├── .gitignore
├── LICENSE
├── README.md
├── codes
    ├── caption
    │   └── BLIP
    │   │   ├── BLIP.gif
    │   │   ├── CODEOWNERS
    │   │   ├── CODE_OF_CONDUCT.md
    │   │   ├── LICENSE.txt
    │   │   ├── README.md
    │   │   ├── SECURITY.md
    │   │   ├── cog.yaml
    │   │   ├── configs
    │   │       ├── bert_config.json
    │   │       ├── caption_coco.yaml
    │   │       ├── med_config.json
    │   │       ├── nlvr.yaml
    │   │       ├── nocaps.yaml
    │   │       ├── pretrain.yaml
    │   │       ├── retrieval_coco.yaml
    │   │       ├── retrieval_flickr.yaml
    │   │       ├── retrieval_msrvtt.yaml
    │   │       └── vqa.yaml
    │   │   ├── data
    │   │       ├── __init__.py
    │   │       ├── coco_karpathy_dataset.py
    │   │       ├── flickr30k_dataset.py
    │   │       ├── nlvr_dataset.py
    │   │       ├── nocaps_dataset.py
    │   │       ├── pretrain_dataset.py
    │   │       ├── utils.py
    │   │       ├── video_dataset.py
    │   │       └── vqa_dataset.py
    │   │   ├── decode_captions.py
    │   │   ├── demo.ipynb
    │   │   ├── eval_nocaps.py
    │   │   ├── eval_retrieval_video.py
    │   │   ├── img2feat_blip.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── blip.py
    │   │       ├── blip_itm.py
    │   │       ├── blip_nlvr.py
    │   │       ├── blip_pretrain.py
    │   │       ├── blip_retrieval.py
    │   │       ├── blip_vqa.py
    │   │       ├── med.py
    │   │       ├── nlvr_encoder.py
    │   │       └── vit.py
    │   │   ├── predict.py
    │   │   ├── pretrain.py
    │   │   ├── requirements.txt
    │   │   ├── train_caption.py
    │   │   ├── train_nlvr.py
    │   │   ├── train_retrieval.py
    │   │   ├── train_vqa.py
    │   │   ├── transform
    │   │       └── randaugment.py
    │   │   └── utils.py
    ├── depth
    │   ├── dptemb2dpt.py
    │   └── img2feat_dpt.py
    ├── diffusion_sd1
    │   ├── diffusion_decoding.py
    │   └── stable-diffusion
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── Stable_Diffusion_v1_Model_Card.md
    │   │   ├── assets
    │   │       ├── a-painting-of-a-fire.png
    │   │       ├── a-photograph-of-a-fire.png
    │   │       ├── a-shirt-with-a-fire-printed-on-it.png
    │   │       ├── a-shirt-with-the-inscription-'fire'.png
    │   │       ├── a-watercolor-painting-of-a-fire.png
    │   │       ├── birdhouse.png
    │   │       ├── fire.png
    │   │       ├── inpainting.png
    │   │       ├── modelfigure.png
    │   │       ├── rdm-preview.jpg
    │   │       ├── reconstruction1.png
    │   │       ├── reconstruction2.png
    │   │       ├── results.gif
    │   │       ├── rick.jpeg
    │   │       ├── stable-samples
    │   │       │   ├── img2img
    │   │       │   │   ├── mountains-1.png
    │   │       │   │   ├── mountains-2.png
    │   │       │   │   ├── mountains-3.png
    │   │       │   │   ├── sketch-mountains-input.jpg
    │   │       │   │   ├── upscaling-in.png
    │   │       │   │   └── upscaling-out.png
    │   │       │   └── txt2img
    │   │       │   │   ├── 000002025.png
    │   │       │   │   ├── 000002035.png
    │   │       │   │   ├── merged-0005.png
    │   │       │   │   ├── merged-0006.png
    │   │       │   │   └── merged-0007.png
    │   │       ├── the-earth-is-on-fire,-oil-on-canvas.png
    │   │       ├── txt2img-convsample.png
    │   │       ├── txt2img-preview.png
    │   │       └── v1-variants-scores.jpg
    │   │   ├── configs
    │   │       ├── autoencoder
    │   │       │   ├── autoencoder_kl_16x16x16.yaml
    │   │       │   ├── autoencoder_kl_32x32x4.yaml
    │   │       │   ├── autoencoder_kl_64x64x3.yaml
    │   │       │   └── autoencoder_kl_8x8x64.yaml
    │   │       ├── latent-diffusion
    │   │       │   ├── celebahq-ldm-vq-4.yaml
    │   │       │   ├── cin-ldm-vq-f8.yaml
    │   │       │   ├── cin256-v2.yaml
    │   │       │   ├── ffhq-ldm-vq-4.yaml
    │   │       │   ├── lsun_bedrooms-ldm-vq-4.yaml
    │   │       │   ├── lsun_churches-ldm-kl-8.yaml
    │   │       │   └── txt2img-1p4B-eval.yaml
    │   │       ├── retrieval-augmented-diffusion
    │   │       │   └── 768x768.yaml
    │   │       └── stable-diffusion
    │   │       │   └── v1-inference.yaml
    │   │   ├── data
    │   │       ├── DejaVuSans.ttf
    │   │       ├── example_conditioning
    │   │       │   ├── superresolution
    │   │       │   │   └── sample_0.jpg
    │   │       │   └── text_conditional
    │   │       │   │   └── sample_0.txt
    │   │       ├── imagenet_clsidx_to_label.txt
    │   │       ├── imagenet_train_hr_indices.p
    │   │       ├── imagenet_val_hr_indices.p
    │   │       ├── index_synset.yaml
    │   │       └── inpainting_examples
    │   │       │   ├── 6458524847_2f4c361183_k.png
    │   │       │   ├── 6458524847_2f4c361183_k_mask.png
    │   │       │   ├── 8399166846_f6fb4e4b8e_k.png
    │   │       │   ├── 8399166846_f6fb4e4b8e_k_mask.png
    │   │       │   ├── alex-iby-G_Pk4D9rMLs.png
    │   │       │   ├── alex-iby-G_Pk4D9rMLs_mask.png
    │   │       │   ├── bench2.png
    │   │       │   ├── bench2_mask.png
    │   │       │   ├── bertrand-gabioud-CpuFzIsHYJ0.png
    │   │       │   ├── bertrand-gabioud-CpuFzIsHYJ0_mask.png
    │   │       │   ├── billow926-12-Wc-Zgx6Y.png
    │   │       │   ├── billow926-12-Wc-Zgx6Y_mask.png
    │   │       │   ├── overture-creations-5sI6fQgYIuo.png
    │   │       │   ├── overture-creations-5sI6fQgYIuo_mask.png
    │   │       │   ├── photo-1583445095369-9c651e7e5d34.png
    │   │       │   └── photo-1583445095369-9c651e7e5d34_mask.png
    │   │   ├── environment.yaml
    │   │   ├── ldm
    │   │       ├── data
    │   │       │   ├── __init__.py
    │   │       │   ├── base.py
    │   │       │   ├── imagenet.py
    │   │       │   └── lsun.py
    │   │       ├── lr_scheduler.py
    │   │       ├── models
    │   │       │   ├── autoencoder.py
    │   │       │   └── diffusion
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── classifier.py
    │   │       │   │   ├── ddim.py
    │   │       │   │   ├── ddpm.py
    │   │       │   │   ├── dpm_solver
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── dpm_solver.py
    │   │       │   │       └── sampler.py
    │   │       │   │   └── plms.py
    │   │       ├── modules
    │   │       │   ├── attention.py
    │   │       │   ├── diffusionmodules
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── model.py
    │   │       │   │   ├── openaimodel.py
    │   │       │   │   └── util.py
    │   │       │   ├── distributions
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── distributions.py
    │   │       │   ├── ema.py
    │   │       │   ├── encoders
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── modules.py
    │   │       │   ├── image_degradation
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── bsrgan.py
    │   │       │   │   ├── bsrgan_light.py
    │   │       │   │   ├── utils
    │   │       │   │   │   └── test.png
    │   │       │   │   └── utils_image.py
    │   │       │   ├── losses
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── contperceptual.py
    │   │       │   │   └── vqperceptual.py
    │   │       │   └── x_transformer.py
    │   │       └── util.py
    │   │   ├── main.py
    │   │   ├── models
    │   │       ├── first_stage_models
    │   │       │   ├── kl-f16
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f32
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f4
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f8
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f16
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f4-noattn
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f4
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f8-n256
    │   │       │   │   └── config.yaml
    │   │       │   └── vq-f8
    │   │       │   │   └── config.yaml
    │   │       └── ldm
    │   │       │   ├── bsr_sr
    │   │       │       └── config.yaml
    │   │       │   ├── celeba256
    │   │       │       └── config.yaml
    │   │       │   ├── cin256
    │   │       │       └── config.yaml
    │   │       │   ├── ffhq256
    │   │       │       └── config.yaml
    │   │       │   ├── inpainting_big
    │   │       │       └── config.yaml
    │   │       │   ├── layout2img-openimages256
    │   │       │       └── config.yaml
    │   │       │   ├── lsun_beds256
    │   │       │       └── config.yaml
    │   │       │   ├── lsun_churches256
    │   │       │       └── config.yaml
    │   │       │   ├── semantic_synthesis256
    │   │       │       └── config.yaml
    │   │       │   ├── semantic_synthesis512
    │   │       │       └── config.yaml
    │   │       │   └── text2img256
    │   │       │       └── config.yaml
    │   │   ├── notebook_helpers.py
    │   │   ├── scripts
    │   │       ├── download_first_stages.sh
    │   │       ├── download_models.sh
    │   │       ├── img2img.py
    │   │       ├── inpaint.py
    │   │       ├── knn2img.py
    │   │       ├── latent_imagenet_diffusion.ipynb
    │   │       ├── sample_diffusion.py
    │   │       ├── tests
    │   │       │   └── test_watermark.py
    │   │       ├── train_searcher.py
    │   │       └── txt2img.py
    │   │   └── setup.py
    ├── diffusion_sd2
    │   ├── diffusion_decoding.py
    │   └── stablediffusion
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── LICENSE-MODEL
    │   │   ├── README.md
    │   │   ├── assets
    │   │       ├── model-variants.jpg
    │   │       ├── modelfigure.png
    │   │       ├── rick.jpeg
    │   │       ├── stable-inpainting
    │   │       │   ├── inpainting.gif
    │   │       │   └── merged-leopards.png
    │   │       └── stable-samples
    │   │       │   ├── depth2img
    │   │       │       ├── d2i.gif
    │   │       │       ├── depth2fantasy.jpeg
    │   │       │       ├── depth2img01.png
    │   │       │       ├── depth2img02.png
    │   │       │       ├── merged-0000.png
    │   │       │       ├── merged-0004.png
    │   │       │       ├── merged-0005.png
    │   │       │       ├── midas.jpeg
    │   │       │       └── old_man.png
    │   │       │   ├── img2img
    │   │       │       ├── mountains-1.png
    │   │       │       ├── mountains-2.png
    │   │       │       ├── mountains-3.png
    │   │       │       ├── sketch-mountains-input.jpg
    │   │       │       ├── upscaling-in.png
    │   │       │       └── upscaling-out.png
    │   │       │   ├── stable-unclip
    │   │       │       ├── houses_out.jpeg
    │   │       │       ├── oldcar000.jpeg
    │   │       │       ├── oldcar500.jpeg
    │   │       │       ├── oldcar800.jpeg
    │   │       │       ├── panda.jpg
    │   │       │       ├── plates_out.jpeg
    │   │       │       ├── unclip-variations.png
    │   │       │       └── unclip-variations_noise.png
    │   │       │   ├── txt2img
    │   │       │       ├── 768
    │   │       │       │   ├── merged-0001.png
    │   │       │       │   ├── merged-0002.png
    │   │       │       │   ├── merged-0003.png
    │   │       │       │   ├── merged-0004.png
    │   │       │       │   ├── merged-0005.png
    │   │       │       │   └── merged-0006.png
    │   │       │       ├── 000002025.png
    │   │       │       ├── 000002035.png
    │   │       │       ├── merged-0001.png
    │   │       │       ├── merged-0003.png
    │   │       │       ├── merged-0005.png
    │   │       │       ├── merged-0006.png
    │   │       │       └── merged-0007.png
    │   │       │   └── upscaling
    │   │       │       ├── merged-dog.png
    │   │       │       ├── sampled-bear-x4.png
    │   │       │       └── snow-leopard-x4.png
    │   │   ├── checkpoints
    │   │       └── checkpoints.txt
    │   │   ├── configs
    │   │       ├── karlo
    │   │       │   ├── decoder_900M_vit_l.yaml
    │   │       │   ├── improved_sr_64_256_1.4B.yaml
    │   │       │   └── prior_1B_vit_l.yaml
    │   │       └── stable-diffusion
    │   │       │   ├── intel
    │   │       │       ├── v2-inference-bf16.yaml
    │   │       │       ├── v2-inference-fp32.yaml
    │   │       │       ├── v2-inference-v-bf16.yaml
    │   │       │       └── v2-inference-v-fp32.yaml
    │   │       │   ├── v2-1-stable-unclip-h-inference.yaml
    │   │       │   ├── v2-1-stable-unclip-l-inference.yaml
    │   │       │   ├── v2-inference-v.yaml
    │   │       │   ├── v2-inference.yaml
    │   │       │   ├── v2-inpainting-inference.yaml
    │   │       │   ├── v2-midas-inference.yaml
    │   │       │   └── x4-upscaling.yaml
    │   │   ├── doc
    │   │       └── UNCLIP.MD
    │   │   ├── environment.yaml
    │   │   ├── ldm
    │   │       ├── data
    │   │       │   ├── __init__.py
    │   │       │   └── util.py
    │   │       ├── models
    │   │       │   ├── autoencoder.py
    │   │       │   └── diffusion
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── ddim.py
    │   │       │   │   ├── ddpm.py
    │   │       │   │   ├── dpm_solver
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── dpm_solver.py
    │   │       │   │       └── sampler.py
    │   │       │   │   ├── plms.py
    │   │       │   │   └── sampling_util.py
    │   │       ├── modules
    │   │       │   ├── attention.py
    │   │       │   ├── diffusionmodules
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── model.py
    │   │       │   │   ├── openaimodel.py
    │   │       │   │   ├── upscaling.py
    │   │       │   │   └── util.py
    │   │       │   ├── distributions
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── distributions.py
    │   │       │   ├── ema.py
    │   │       │   ├── encoders
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── modules.py
    │   │       │   ├── image_degradation
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── bsrgan.py
    │   │       │   │   ├── bsrgan_light.py
    │   │       │   │   ├── utils
    │   │       │   │   │   └── test.png
    │   │       │   │   └── utils_image.py
    │   │       │   ├── karlo
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── diffusers_pipeline.py
    │   │       │   │   └── kakao
    │   │       │   │   │   ├── __init__.py
    │   │       │   │   │   ├── models
    │   │       │   │   │       ├── __init__.py
    │   │       │   │   │       ├── clip.py
    │   │       │   │   │       ├── decoder_model.py
    │   │       │   │   │       ├── prior_model.py
    │   │       │   │   │       ├── sr_256_1k.py
    │   │       │   │   │       └── sr_64_256.py
    │   │       │   │   │   ├── modules
    │   │       │   │   │       ├── __init__.py
    │   │       │   │   │       ├── diffusion
    │   │       │   │   │       │   ├── gaussian_diffusion.py
    │   │       │   │   │       │   └── respace.py
    │   │       │   │   │       ├── nn.py
    │   │       │   │   │       ├── resample.py
    │   │       │   │   │       ├── unet.py
    │   │       │   │   │       └── xf.py
    │   │       │   │   │   ├── sampler.py
    │   │       │   │   │   └── template.py
    │   │       │   └── midas
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── api.py
    │   │       │   │   ├── midas
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── base_model.py
    │   │       │   │       ├── blocks.py
    │   │       │   │       ├── dpt_depth.py
    │   │       │   │       ├── midas_net.py
    │   │       │   │       ├── midas_net_custom.py
    │   │       │   │       ├── transforms.py
    │   │       │   │       └── vit.py
    │   │       │   │   └── utils.py
    │   │       └── util.py
    │   │   ├── modelcard.md
    │   │   ├── requirements.txt
    │   │   ├── scripts
    │   │       ├── gradio
    │   │       │   ├── depth2img.py
    │   │       │   ├── inpainting.py
    │   │       │   └── superresolution.py
    │   │       ├── img2img.py
    │   │       ├── streamlit
    │   │       │   ├── depth2img.py
    │   │       │   ├── inpainting.py
    │   │       │   ├── stableunclip.py
    │   │       │   └── superresolution.py
    │   │       ├── tests
    │   │       │   └── test_watermark.py
    │   │       └── txt2img.py
    │   │   └── setup.py
    ├── gan
    │   ├── bdpy
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── bdpy
    │   │   │   ├── __init__.py
    │   │   │   ├── bdata
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bdata.py
    │   │   │   │   ├── featureselector.py
    │   │   │   │   ├── metadata.py
    │   │   │   │   └── utils.py
    │   │   │   ├── dataform
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── datastore.py
    │   │   │   │   ├── features.py
    │   │   │   │   ├── pd.py
    │   │   │   │   └── sparse.py
    │   │   │   ├── distcomp
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── distcomp.py
    │   │   │   ├── dl
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── caffe.py
    │   │   │   │   └── torch
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── models.py
    │   │   │   │   │   └── torch.py
    │   │   │   ├── evals
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── metrics.py
    │   │   │   ├── feature
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── feature.py
    │   │   │   ├── fig
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── draw_group_image_set.py
    │   │   │   │   ├── fig.py
    │   │   │   │   ├── makeplots.py
    │   │   │   │   └── tile_images.py
    │   │   │   ├── ml
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── crossvalidation.py
    │   │   │   │   ├── ensemble.py
    │   │   │   │   ├── learning.py
    │   │   │   │   ├── regress.py
    │   │   │   │   └── searchlight.py
    │   │   │   ├── mri
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── fmriprep.py
    │   │   │   │   ├── glm.py
    │   │   │   │   ├── image.py
    │   │   │   │   ├── load_epi.py
    │   │   │   │   ├── load_mri.py
    │   │   │   │   ├── roi.py
    │   │   │   │   └── spm.py
    │   │   │   ├── opendata
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── openneuro.py
    │   │   │   ├── preproc
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── interface.py
    │   │   │   │   ├── preprocessor.py
    │   │   │   │   ├── select_top.py
    │   │   │   │   └── util.py
    │   │   │   ├── recon
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── torch
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── icnn.py
    │   │   │   │   └── utils.py
    │   │   │   ├── stats
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── corr.py
    │   │   │   └── util
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── info.py
    │   │   │   │   ├── math.py
    │   │   │   │   └── utils.py
    │   │   ├── docs
    │   │   │   ├── _config.yml
    │   │   │   ├── bdata_api_examples.md
    │   │   │   ├── dataform_features.md
    │   │   │   └── index.md
    │   │   ├── examples
    │   │   │   └── .gitignore
    │   │   ├── setup.py
    │   │   └── test
    │   │   │   ├── .gitignore
    │   │   │   ├── data
    │   │   │       └── mri
    │   │   │       │   ├── epi0001.hdr
    │   │   │       │   ├── epi0001.img
    │   │   │       │   ├── epi0002.hdr
    │   │   │       │   ├── epi0002.img
    │   │   │       │   ├── epi0003.hdr
    │   │   │       │   ├── epi0003.img
    │   │   │       │   ├── epi0004.hdr
    │   │   │       │   ├── epi0004.img
    │   │   │       │   ├── epi0005.hdr
    │   │   │       │   └── epi0005.img
    │   │   │   ├── test_bdata.py
    │   │   │   ├── test_bdata_metadata.py
    │   │   │   ├── test_bdata_utils.py
    │   │   │   ├── test_cv.py
    │   │   │   ├── test_dataform_sparse.py
    │   │   │   ├── test_distcomp.py
    │   │   │   ├── test_evals.py
    │   │   │   ├── test_feature.py
    │   │   │   ├── test_featureselector.py
    │   │   │   ├── test_ml.py
    │   │   │   ├── test_ml_utils.py
    │   │   │   ├── test_mri.py
    │   │   │   ├── test_preproc.py
    │   │   │   ├── test_stats.py
    │   │   │   ├── test_util.py
    │   │   │   └── test_util_math.py
    │   ├── make_subjstim_vgg19.py
    │   ├── make_vgg19bdpy.py
    │   ├── make_vgg19fromdecode.py
    │   └── recon_icnn_image_vgg19_dgn_relu7gen_gd.py
    └── utils
    │   ├── identification.py
    │   ├── img2feat_decoded.py
    │   ├── img2feat_sd.py
    │   ├── make_subjmri.py
    │   ├── make_subjstim.py
    │   ├── nsd_access
    │       ├── __init__.py
    │       └── nsda.py
    │   └── ridge.py
├── requirements.txt
├── results_tech_paper.jpg
├── visual_summary.jpg
└── visual_summary_techpaper.jpg


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Project specific
 2 | decoded/**
 3 | identification/**
 4 | mrifeat/**
 5 | nsd/**
 6 | nsdfeat/**
 7 | 
 8 | # Thumbnails
 9 | ._*
10 | .DS_Store
11 | # Python .gitignore
12 | 
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 | 
18 | # C extensions
19 | *.so
20 | 
21 | # Distribution / packaging
22 | .Python
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | wheels/
35 | share/python-wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 | 
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 | 
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 | cover/
59 | 
60 | # Translations
61 | *.mo
62 | *.pot
63 | 
64 | # Jupyter Notebook
65 | .ipynb_checkpoints
66 | 
67 | # IPython
68 | profile_default/
69 | ipython_config.py
70 | 
71 | # Environments
72 | .env
73 | .venv
74 | env/
75 | venv/
76 | ENV/
77 | env.bak/
78 | venv.bak/
79 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Yu Takagi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/BLIP.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/caption/BLIP/BLIP.gif


--------------------------------------------------------------------------------
/codes/caption/BLIP/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing.
2 | #ECCN:Open Source
3 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022, Salesforce.com, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | 
10 | * Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Security
2 | 
3 | Please report any security issue to [security@salesforce.com](mailto:security@salesforce.com)
4 | as soon as it is discovered. This library limits its runtime dependencies in
5 | order to reduce the total cost of ownership as much as can be, but all consumers
6 | should remain vigilant and have their security stakeholders review all third-party
7 | products (3PP) like this one and their dependencies.
8 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/cog.yaml:
--------------------------------------------------------------------------------
 1 | build:
 2 |   gpu: true
 3 |   cuda: "11.1"
 4 |   python_version: "3.8"
 5 |   system_packages:
 6 |     - "libgl1-mesa-glx"
 7 |     - "libglib2.0-0"
 8 |   python_packages:
 9 |     - "ipython==7.30.1"
10 |     - "torchvision==0.11.1"
11 |     - "torch==1.10.0"
12 |     - "timm==0.4.12"
13 |     - "transformers==4.15.0"
14 |     - "fairscale==0.4.4"
15 |     - "pycocoevalcap==1.2"
16 | 
17 | predict: "predict.py:Predictor"
18 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertModel"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "hidden_act": "gelu",
 7 |   "hidden_dropout_prob": 0.1,
 8 |   "hidden_size": 768,
 9 |   "initializer_range": 0.02,
10 |   "intermediate_size": 3072,
11 |   "layer_norm_eps": 1e-12,
12 |   "max_position_embeddings": 512,
13 |   "model_type": "bert",
14 |   "num_attention_heads": 12,
15 |   "num_hidden_layers": 12,
16 |   "pad_token_id": 0,
17 |   "type_vocab_size": 2,
18 |   "vocab_size": 30522,
19 |   "encoder_width": 768,
20 |   "add_cross_attention": true   
21 | }
22 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/caption_coco.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/coco/images/'
 2 | ann_root: 'annotation'
 3 | coco_gt_root: 'annotation/coco_gt'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | vit: 'base'
10 | vit_grad_ckpt: False
11 | vit_ckpt_layer: 0
12 | batch_size: 32
13 | init_lr: 1e-5
14 | 
15 | # vit: 'large'
16 | # vit_grad_ckpt: True
17 | # vit_ckpt_layer: 5
18 | # batch_size: 16
19 | # init_lr: 2e-6
20 | 
21 | image_size: 384
22 | 
23 | # generation configs
24 | max_length: 20  
25 | min_length: 5
26 | num_beams: 3
27 | prompt: 'a picture of '
28 | 
29 | # optimizer
30 | weight_decay: 0.05
31 | min_lr: 0
32 | max_epoch: 5
33 | 
34 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/med_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertModel"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "hidden_act": "gelu",
 7 |   "hidden_dropout_prob": 0.1,
 8 |   "hidden_size": 768,
 9 |   "initializer_range": 0.02,
10 |   "intermediate_size": 3072,
11 |   "layer_norm_eps": 1e-12,
12 |   "max_position_embeddings": 512,
13 |   "model_type": "bert",
14 |   "num_attention_heads": 12,
15 |   "num_hidden_layers": 12,
16 |   "pad_token_id": 0,
17 |   "type_vocab_size": 2,
18 |   "vocab_size": 30524,
19 |   "encoder_width": 768,
20 |   "add_cross_attention": true   
21 | }
22 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/nlvr.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/NLVR2/' 
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth'
 6 | 
 7 | #size of vit model; base or large
 8 | vit: 'base'
 9 | batch_size_train: 16 
10 | batch_size_test: 64 
11 | vit_grad_ckpt: False
12 | vit_ckpt_layer: 0
13 | max_epoch: 15
14 | 
15 | image_size: 384
16 | 
17 | # optimizer
18 | weight_decay: 0.05
19 | init_lr: 3e-5
20 | min_lr: 0
21 | 
22 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/nocaps.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/nocaps/'
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
 6 | 
 7 | vit: 'base'
 8 | batch_size: 32
 9 | 
10 | image_size: 384
11 | 
12 | max_length: 20
13 | min_length: 5
14 | num_beams: 3
15 | prompt: 'a picture of '


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json',
 2 |              '/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json',
 3 |              ]
 4 | laion_path: ''   
 5 | 
 6 | # size of vit model; base or large
 7 | vit: 'base'
 8 | vit_grad_ckpt: False
 9 | vit_ckpt_layer: 0
10 | 
11 | image_size: 224
12 | batch_size: 75
13 | 
14 | queue_size: 57600
15 | alpha: 0.4
16 | 
17 | # optimizer
18 | weight_decay: 0.05
19 | init_lr: 3e-4
20 | min_lr: 1e-6
21 | warmup_lr: 1e-6
22 | lr_decay_rate: 0.9
23 | max_epoch: 20
24 | warmup_steps: 3000
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/retrieval_coco.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/coco/images/'
 2 | ann_root: 'annotation'
 3 | dataset: 'coco'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | 
10 | vit: 'base'
11 | batch_size_train: 32
12 | batch_size_test: 64
13 | vit_grad_ckpt: True
14 | vit_ckpt_layer: 4
15 | init_lr: 1e-5
16 | 
17 | # vit: 'large'
18 | # batch_size_train: 16
19 | # batch_size_test: 32
20 | # vit_grad_ckpt: True
21 | # vit_ckpt_layer: 12
22 | # init_lr: 5e-6
23 | 
24 | image_size: 384
25 | queue_size: 57600
26 | alpha: 0.4
27 | k_test: 256
28 | negative_all_rank: True
29 | 
30 | # optimizer
31 | weight_decay: 0.05
32 | min_lr: 0
33 | max_epoch: 6
34 | 
35 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/retrieval_flickr.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/flickr30k/'
 2 | ann_root: 'annotation'
 3 | dataset: 'flickr'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | 
10 | vit: 'base'
11 | batch_size_train: 32
12 | batch_size_test: 64
13 | vit_grad_ckpt: True
14 | vit_ckpt_layer: 4
15 | init_lr: 1e-5
16 | 
17 | # vit: 'large'
18 | # batch_size_train: 16
19 | # batch_size_test: 32
20 | # vit_grad_ckpt: True
21 | # vit_ckpt_layer: 10
22 | # init_lr: 5e-6
23 | 
24 | image_size: 384
25 | queue_size: 57600
26 | alpha: 0.4
27 | k_test: 128
28 | negative_all_rank: False
29 | 
30 | # optimizer
31 | weight_decay: 0.05
32 | min_lr: 0
33 | max_epoch: 6
34 | 
35 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/retrieval_msrvtt.yaml:
--------------------------------------------------------------------------------
 1 | video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos'
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
 6 | 
 7 | # size of vit model; base or large
 8 | vit: 'base'
 9 | batch_size: 64
10 | k_test: 128
11 | image_size: 384
12 | num_frm_test: 8


--------------------------------------------------------------------------------
/codes/caption/BLIP/configs/vqa.yaml:
--------------------------------------------------------------------------------
 1 | vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
 2 | vg_root: '/export/share/datasets/vision/visual-genome/'  #followed by image/
 3 | train_files: ['vqa_train','vqa_val','vg_qa']
 4 | ann_root: 'annotation'
 5 | 
 6 | # set pretrained as a file path or an url
 7 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
 8 | 
 9 | # size of vit model; base or large
10 | vit: 'base'
11 | batch_size_train: 16 
12 | batch_size_test: 32 
13 | vit_grad_ckpt: False
14 | vit_ckpt_layer: 0
15 | init_lr: 2e-5
16 | 
17 | image_size: 480
18 | 
19 | k_test: 128
20 | inference: 'rank'
21 | 
22 | # optimizer
23 | weight_decay: 0.05
24 | min_lr: 0
25 | max_epoch: 10


--------------------------------------------------------------------------------
/codes/caption/BLIP/data/nocaps_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from torch.utils.data import Dataset
 5 | from torchvision.datasets.utils import download_url
 6 | 
 7 | from PIL import Image
 8 | 
 9 | class nocaps_eval(Dataset):
10 |     def __init__(self, transform, image_root, ann_root, split):   
11 |         urls = {'val':'https://storage.googleapis.com/sfr-vision-language-research/datasets/nocaps_val.json',
12 |                 'test':'https://storage.googleapis.com/sfr-vision-language-research/datasets/nocaps_test.json'}
13 |         filenames = {'val':'nocaps_val.json','test':'nocaps_test.json'}
14 |         
15 |         download_url(urls[split],ann_root)
16 |         
17 |         self.annotation = json.load(open(os.path.join(ann_root,filenames[split]),'r'))
18 |         self.transform = transform
19 |         self.image_root = image_root
20 |         
21 |     def __len__(self):
22 |         return len(self.annotation)
23 |     
24 |     def __getitem__(self, index):  
25 |         
26 |         ann = self.annotation[index]
27 |         
28 |         image_path = os.path.join(self.image_root,ann['image'])        
29 |         image = Image.open(image_path).convert('RGB')   
30 |         image = self.transform(image)          
31 |         
32 |         return image, int(ann['img_id'])    


--------------------------------------------------------------------------------
/codes/caption/BLIP/data/pretrain_dataset.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import random
 4 | 
 5 | from torch.utils.data import Dataset
 6 | 
 7 | from PIL import Image
 8 | from PIL import ImageFile
 9 | ImageFile.LOAD_TRUNCATED_IMAGES = True
10 | Image.MAX_IMAGE_PIXELS = None
11 | 
12 | from data.utils import pre_caption
13 | import os,glob
14 | 
15 | class pretrain_dataset(Dataset):
16 |     def __init__(self, ann_file, laion_path, transform): 
17 | 
18 |         self.ann_pretrain = []
19 |         for f in ann_file:
20 |             print('loading '+f)
21 |             ann = json.load(open(f,'r'))
22 |             self.ann_pretrain += ann
23 |         
24 |         self.laion_path = laion_path
25 |         if self.laion_path:
26 |             self.laion_files = glob.glob(os.path.join(laion_path,'*.json'))
27 | 
28 |             print('loading '+self.laion_files[0])
29 |             with open(self.laion_files[0],'r') as f:
30 |                 self.ann_laion = json.load(f)  
31 | 
32 |             self.annotation = self.ann_pretrain + self.ann_laion
33 |         else:
34 |             self.annotation = self.ann_pretrain
35 |             
36 |         self.transform = transform
37 | 
38 | 
39 |     def reload_laion(self, epoch):
40 |         n = epoch%len(self.laion_files)
41 |         print('loading '+self.laion_files[n])
42 |         with open(self.laion_files[n],'r') as f:
43 |             self.ann_laion = json.load(f)      
44 |         
45 |         self.annotation = self.ann_pretrain + self.ann_laion    
46 |         
47 |     
48 |     def __len__(self):
49 |         return len(self.annotation)
50 |     
51 |     def __getitem__(self, index):    
52 |         
53 |         ann = self.annotation[index]   
54 |       
55 |         image = Image.open(ann['image']).convert('RGB')   
56 |         image = self.transform(image)
57 |         caption = pre_caption(ann['caption'],30)
58 |         
59 |         return image, caption


--------------------------------------------------------------------------------
/codes/caption/BLIP/img2feat_blip.py:
--------------------------------------------------------------------------------
 1 | import argparse, os
 2 | from PIL import Image
 3 | import torch
 4 | from torchvision import transforms
 5 | from models.blip import blip_decoder
 6 | import sys
 7 | sys.path.append("../../util/")
 8 | from nsd_access.nsda import NSDAccess
 9 | from tqdm import tqdm
10 | import numpy as np
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser()
14 | 
15 |     parser.add_argument(
16 |         "--gpu",
17 |         required=True,
18 |         type=int,
19 |         help="gpu"
20 |     )
21 | 
22 |     # Set parameters
23 |     opt = parser.parse_args()
24 |     gpu = opt.gpu
25 |     torch.cuda.set_device(gpu)
26 |     nimage = 73000
27 |     image_size = 240
28 |     model_url = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth"
29 |     device = torch.device(f"cuda:{gpu}" if torch.cuda.is_available() else "cpu")
30 |     model = blip_decoder(pretrained=model_url, image_size=image_size, vit="base")
31 |     model.eval()
32 |     model = model.to(device)
33 |     savedir = f'../../../nsdfeat/blip/'
34 |     os.makedirs(savedir, exist_ok=True)
35 | 
36 |     # Make feature
37 |     nsda = NSDAccess('../../../nsd/')
38 |     for s in tqdm(range(nimage)):
39 |         img_arr = nsda.read_images(s)
40 |         image = Image.fromarray(img_arr).convert("RGB").resize((image_size,image_size), resample=Image.LANCZOS)
41 |         img_arr = transforms.ToTensor()(image).to('cuda').unsqueeze(0)
42 |         with torch.no_grad():
43 |             vit_feat = model.visual_encoder(img_arr).cpu().detach().numpy().squeeze()        
44 |         np.save(f'{savedir}/{s:06}.npy',vit_feat)
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/codes/caption/BLIP/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/caption/BLIP/models/__init__.py


--------------------------------------------------------------------------------
/codes/caption/BLIP/requirements.txt:
--------------------------------------------------------------------------------
1 | timm==0.4.12
2 | transformers==4.15.0
3 | fairscale==0.4.4
4 | pycocoevalcap
5 | 


--------------------------------------------------------------------------------
/codes/depth/img2feat_dpt.py:
--------------------------------------------------------------------------------
 1 | import argparse, os
 2 | from tqdm import tqdm
 3 | import torch
 4 | import numpy as np
 5 | import PIL
 6 | from transformers import AutoImageProcessor, DPTForDepthEstimation
 7 | sys.path.append("../utils/")
 8 | from nsd_access.nsda import NSDAccess
 9 | from PIL import Image
10 | 
11 | def main():
12 | 
13 |     parser = argparse.ArgumentParser()
14 | 
15 |     parser.add_argument(
16 |         "--imgidx",
17 |         required=True,
18 |         nargs="*",
19 |         type=int,
20 |         help="start and end imgs"
21 |     )
22 |     parser.add_argument(
23 |         "--gpu",
24 |         required=True,
25 |         type=int,
26 |         help="gpu"
27 |     )
28 | 
29 |     opt = parser.parse_args()
30 |     imgidx = opt.imgidx
31 |     gpu = opt.gpu
32 |     resolution = 512
33 |     nsda = NSDAccess('../../nsd/')
34 | 
35 |     # Save Directories
36 |     os.makedirs(f'../../nsdfeat/dpt/', exist_ok=True)
37 |     for i in range(4):
38 |         os.makedirs(f'../../nsdfeat/dpt_emb{i}/', exist_ok=True)
39 | 
40 |     device = torch.device(f"cuda:{gpu}") if torch.cuda.is_available() else torch.device("cpu")
41 |     image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large")
42 |     model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
43 |     model.to(device)
44 |     
45 | 
46 |     for s in tqdm(range(imgidx[0],imgidx[1])):
47 |         print(f"Now processing image {s:06}")
48 |         img_arr = nsda.read_images(s)
49 |         image = Image.fromarray(img_arr).convert("RGB").resize((resolution, resolution), resample=PIL.Image.LANCZOS)
50 |         inputs = image_processor(images=image, return_tensors="pt").to(device)
51 |         with torch.no_grad():
52 |             outputs = model(**inputs,output_hidden_states=True)
53 |             predicted_depth = outputs.predicted_depth
54 |         hidden_states = [
55 |             feature.to('cpu').detach().numpy() for idx, feature in enumerate(outputs.hidden_states[1:]) if idx in model.config.backbone_out_indices
56 |             ]
57 | 
58 |         predicted_depth = predicted_depth.to('cpu').detach().numpy()
59 | 
60 |         for idx, dpt_idx in enumerate(model.config.backbone_out_indices):
61 |             np.save(f'../../nsdfeat/dpt_emb{idx}/{s:06}.npy',hidden_states[idx])
62 |         np.save(f'../../nsdfeat/dpt/{s:06}.npy',predicted_depth)
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/a-painting-of-a-fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-painting-of-a-fire.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/a-photograph-of-a-fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-photograph-of-a-fire.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-a-fire-printed-on-it.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-a-fire-printed-on-it.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-the-inscription-'fire'.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-the-inscription-'fire'.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/a-watercolor-painting-of-a-fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-watercolor-painting-of-a-fire.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/birdhouse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/birdhouse.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/fire.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/inpainting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/inpainting.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/modelfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/modelfigure.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/rdm-preview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/rdm-preview.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/reconstruction1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/reconstruction1.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/reconstruction2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/reconstruction2.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/results.gif


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/rick.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/rick.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-1.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-2.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-3.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-in.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-in.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-out.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002025.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002035.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002035.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0005.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0006.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0007.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/the-earth-is-on-fire,-oil-on-canvas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/the-earth-is-on-fire,-oil-on-canvas.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/txt2img-convsample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/txt2img-convsample.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/txt2img-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/txt2img-preview.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/assets/v1-variants-scores.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/v1-variants-scores.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_16x16x16.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-6
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: "val/rec_loss"
 6 |     embed_dim: 16
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 0.000001
12 |         disc_weight: 0.5
13 | 
14 |     ddconfig:
15 |       double_z: True
16 |       z_channels: 16
17 |       resolution: 256
18 |       in_channels: 3
19 |       out_ch: 3
20 |       ch: 128
21 |       ch_mult: [ 1,1,2,2,4]  # num_down = len(ch_mult)-1
22 |       num_res_blocks: 2
23 |       attn_resolutions: [16]
24 |       dropout: 0.0
25 | 
26 | 
27 | data:
28 |   target: main.DataModuleFromConfig
29 |   params:
30 |     batch_size: 12
31 |     wrap: True
32 |     train:
33 |       target: ldm.data.imagenet.ImageNetSRTrain
34 |       params:
35 |         size: 256
36 |         degradation: pil_nearest
37 |     validation:
38 |       target: ldm.data.imagenet.ImageNetSRValidation
39 |       params:
40 |         size: 256
41 |         degradation: pil_nearest
42 | 
43 | lightning:
44 |   callbacks:
45 |     image_logger:
46 |       target: main.ImageLogger
47 |       params:
48 |         batch_frequency: 1000
49 |         max_images: 8
50 |         increase_log_steps: True
51 | 
52 |   trainer:
53 |     benchmark: True
54 |     accumulate_grad_batches: 2
55 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_32x32x4.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-6
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: "val/rec_loss"
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 0.000001
12 |         disc_weight: 0.5
13 | 
14 |     ddconfig:
15 |       double_z: True
16 |       z_channels: 4
17 |       resolution: 256
18 |       in_channels: 3
19 |       out_ch: 3
20 |       ch: 128
21 |       ch_mult: [ 1,2,4,4 ]  # num_down = len(ch_mult)-1
22 |       num_res_blocks: 2
23 |       attn_resolutions: [ ]
24 |       dropout: 0.0
25 | 
26 | data:
27 |   target: main.DataModuleFromConfig
28 |   params:
29 |     batch_size: 12
30 |     wrap: True
31 |     train:
32 |       target: ldm.data.imagenet.ImageNetSRTrain
33 |       params:
34 |         size: 256
35 |         degradation: pil_nearest
36 |     validation:
37 |       target: ldm.data.imagenet.ImageNetSRValidation
38 |       params:
39 |         size: 256
40 |         degradation: pil_nearest
41 | 
42 | lightning:
43 |   callbacks:
44 |     image_logger:
45 |       target: main.ImageLogger
46 |       params:
47 |         batch_frequency: 1000
48 |         max_images: 8
49 |         increase_log_steps: True
50 | 
51 |   trainer:
52 |     benchmark: True
53 |     accumulate_grad_batches: 2
54 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_64x64x3.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-6
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: "val/rec_loss"
 6 |     embed_dim: 3
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 0.000001
12 |         disc_weight: 0.5
13 | 
14 |     ddconfig:
15 |       double_z: True
16 |       z_channels: 3
17 |       resolution: 256
18 |       in_channels: 3
19 |       out_ch: 3
20 |       ch: 128
21 |       ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
22 |       num_res_blocks: 2
23 |       attn_resolutions: [ ]
24 |       dropout: 0.0
25 | 
26 | 
27 | data:
28 |   target: main.DataModuleFromConfig
29 |   params:
30 |     batch_size: 12
31 |     wrap: True
32 |     train:
33 |       target: ldm.data.imagenet.ImageNetSRTrain
34 |       params:
35 |         size: 256
36 |         degradation: pil_nearest
37 |     validation:
38 |       target: ldm.data.imagenet.ImageNetSRValidation
39 |       params:
40 |         size: 256
41 |         degradation: pil_nearest
42 | 
43 | lightning:
44 |   callbacks:
45 |     image_logger:
46 |       target: main.ImageLogger
47 |       params:
48 |         batch_frequency: 1000
49 |         max_images: 8
50 |         increase_log_steps: True
51 | 
52 |   trainer:
53 |     benchmark: True
54 |     accumulate_grad_batches: 2
55 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_8x8x64.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-6
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: "val/rec_loss"
 6 |     embed_dim: 64
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 0.000001
12 |         disc_weight: 0.5
13 | 
14 |     ddconfig:
15 |       double_z: True
16 |       z_channels: 64
17 |       resolution: 256
18 |       in_channels: 3
19 |       out_ch: 3
20 |       ch: 128
21 |       ch_mult: [ 1,1,2,2,4,4]  # num_down = len(ch_mult)-1
22 |       num_res_blocks: 2
23 |       attn_resolutions: [16,8]
24 |       dropout: 0.0
25 | 
26 | data:
27 |   target: main.DataModuleFromConfig
28 |   params:
29 |     batch_size: 12
30 |     wrap: True
31 |     train:
32 |       target: ldm.data.imagenet.ImageNetSRTrain
33 |       params:
34 |         size: 256
35 |         degradation: pil_nearest
36 |     validation:
37 |       target: ldm.data.imagenet.ImageNetSRValidation
38 |       params:
39 |         size: 256
40 |         degradation: pil_nearest
41 | 
42 | lightning:
43 |   callbacks:
44 |     image_logger:
45 |       target: main.ImageLogger
46 |       params:
47 |         batch_frequency: 1000
48 |         max_images: 8
49 |         increase_log_steps: True
50 | 
51 |   trainer:
52 |     benchmark: True
53 |     accumulate_grad_batches: 2
54 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/celebahq-ldm-vq-4.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     image_size: 64
12 |     channels: 3
13 |     monitor: val/loss_simple_ema
14 | 
15 |     unet_config:
16 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
17 |       params:
18 |         image_size: 64
19 |         in_channels: 3
20 |         out_channels: 3
21 |         model_channels: 224
22 |         attention_resolutions:
23 |         # note: this isn\t actually the resolution but
24 |         # the downsampling factor, i.e. this corresnponds to
25 |         # attention on spatial resolution 8,16,32, as the
26 |         # spatial reolution of the latents is 64 for f4
27 |         - 8
28 |         - 4
29 |         - 2
30 |         num_res_blocks: 2
31 |         channel_mult:
32 |         - 1
33 |         - 2
34 |         - 3
35 |         - 4
36 |         num_head_channels: 32
37 |     first_stage_config:
38 |       target: ldm.models.autoencoder.VQModelInterface
39 |       params:
40 |         embed_dim: 3
41 |         n_embed: 8192
42 |         ckpt_path: models/first_stage_models/vq-f4/model.ckpt
43 |         ddconfig:
44 |           double_z: false
45 |           z_channels: 3
46 |           resolution: 256
47 |           in_channels: 3
48 |           out_ch: 3
49 |           ch: 128
50 |           ch_mult:
51 |           - 1
52 |           - 2
53 |           - 4
54 |           num_res_blocks: 2
55 |           attn_resolutions: []
56 |           dropout: 0.0
57 |         lossconfig:
58 |           target: torch.nn.Identity
59 |     cond_stage_config: __is_unconditional__
60 | data:
61 |   target: main.DataModuleFromConfig
62 |   params:
63 |     batch_size: 48
64 |     num_workers: 5
65 |     wrap: false
66 |     train:
67 |       target: taming.data.faceshq.CelebAHQTrain
68 |       params:
69 |         size: 256
70 |     validation:
71 |       target: taming.data.faceshq.CelebAHQValidation
72 |       params:
73 |         size: 256
74 | 
75 | 
76 | lightning:
77 |   callbacks:
78 |     image_logger:
79 |       target: main.ImageLogger
80 |       params:
81 |         batch_frequency: 5000
82 |         max_images: 8
83 |         increase_log_steps: False
84 | 
85 |   trainer:
86 |     benchmark: True


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/cin256-v2.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 0.0001
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: class_label
12 |     image_size: 64
13 |     channels: 3
14 |     cond_stage_trainable: true
15 |     conditioning_key: crossattn
16 |     monitor: val/loss
17 |     use_ema: False
18 |     
19 |     unet_config:
20 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
21 |       params:
22 |         image_size: 64
23 |         in_channels: 3
24 |         out_channels: 3
25 |         model_channels: 192
26 |         attention_resolutions:
27 |         - 8
28 |         - 4
29 |         - 2
30 |         num_res_blocks: 2
31 |         channel_mult:
32 |         - 1
33 |         - 2
34 |         - 3
35 |         - 5
36 |         num_heads: 1
37 |         use_spatial_transformer: true
38 |         transformer_depth: 1
39 |         context_dim: 512
40 |     
41 |     first_stage_config:
42 |       target: ldm.models.autoencoder.VQModelInterface
43 |       params:
44 |         embed_dim: 3
45 |         n_embed: 8192
46 |         ddconfig:
47 |           double_z: false
48 |           z_channels: 3
49 |           resolution: 256
50 |           in_channels: 3
51 |           out_ch: 3
52 |           ch: 128
53 |           ch_mult:
54 |           - 1
55 |           - 2
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 |           dropout: 0.0
60 |         lossconfig:
61 |           target: torch.nn.Identity
62 |     
63 |     cond_stage_config:
64 |       target: ldm.modules.encoders.modules.ClassEmbedder
65 |       params:
66 |         n_classes: 1001
67 |         embed_dim: 512
68 |         key: class_label
69 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/ffhq-ldm-vq-4.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     image_size: 64
12 |     channels: 3
13 |     monitor: val/loss_simple_ema
14 |     unet_config:
15 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
16 |       params:
17 |         image_size: 64
18 |         in_channels: 3
19 |         out_channels: 3
20 |         model_channels: 224
21 |         attention_resolutions:
22 |         # note: this isn\t actually the resolution but
23 |         # the downsampling factor, i.e. this corresnponds to
24 |         # attention on spatial resolution 8,16,32, as the
25 |         # spatial reolution of the latents is 64 for f4
26 |         - 8
27 |         - 4
28 |         - 2
29 |         num_res_blocks: 2
30 |         channel_mult:
31 |         - 1
32 |         - 2
33 |         - 3
34 |         - 4
35 |         num_head_channels: 32
36 |     first_stage_config:
37 |       target: ldm.models.autoencoder.VQModelInterface
38 |       params:
39 |         embed_dim: 3
40 |         n_embed: 8192
41 |         ckpt_path: configs/first_stage_models/vq-f4/model.yaml
42 |         ddconfig:
43 |           double_z: false
44 |           z_channels: 3
45 |           resolution: 256
46 |           in_channels: 3
47 |           out_ch: 3
48 |           ch: 128
49 |           ch_mult:
50 |           - 1
51 |           - 2
52 |           - 4
53 |           num_res_blocks: 2
54 |           attn_resolutions: []
55 |           dropout: 0.0
56 |         lossconfig:
57 |           target: torch.nn.Identity
58 |     cond_stage_config: __is_unconditional__
59 | data:
60 |   target: main.DataModuleFromConfig
61 |   params:
62 |     batch_size: 42
63 |     num_workers: 5
64 |     wrap: false
65 |     train:
66 |       target: taming.data.faceshq.FFHQTrain
67 |       params:
68 |         size: 256
69 |     validation:
70 |       target: taming.data.faceshq.FFHQValidation
71 |       params:
72 |         size: 256
73 | 
74 | 
75 | lightning:
76 |   callbacks:
77 |     image_logger:
78 |       target: main.ImageLogger
79 |       params:
80 |         batch_frequency: 5000
81 |         max_images: 8
82 |         increase_log_steps: False
83 | 
84 |   trainer:
85 |     benchmark: True


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     image_size: 64
12 |     channels: 3
13 |     monitor: val/loss_simple_ema
14 |     unet_config:
15 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
16 |       params:
17 |         image_size: 64
18 |         in_channels: 3
19 |         out_channels: 3
20 |         model_channels: 224
21 |         attention_resolutions:
22 |         # note: this isn\t actually the resolution but
23 |         # the downsampling factor, i.e. this corresnponds to
24 |         # attention on spatial resolution 8,16,32, as the
25 |         # spatial reolution of the latents is 64 for f4
26 |         - 8
27 |         - 4
28 |         - 2
29 |         num_res_blocks: 2
30 |         channel_mult:
31 |         - 1
32 |         - 2
33 |         - 3
34 |         - 4
35 |         num_head_channels: 32
36 |     first_stage_config:
37 |       target: ldm.models.autoencoder.VQModelInterface
38 |       params:
39 |         ckpt_path: configs/first_stage_models/vq-f4/model.yaml
40 |         embed_dim: 3
41 |         n_embed: 8192
42 |         ddconfig:
43 |           double_z: false
44 |           z_channels: 3
45 |           resolution: 256
46 |           in_channels: 3
47 |           out_ch: 3
48 |           ch: 128
49 |           ch_mult:
50 |           - 1
51 |           - 2
52 |           - 4
53 |           num_res_blocks: 2
54 |           attn_resolutions: []
55 |           dropout: 0.0
56 |         lossconfig:
57 |           target: torch.nn.Identity
58 |     cond_stage_config: __is_unconditional__
59 | data:
60 |   target: main.DataModuleFromConfig
61 |   params:
62 |     batch_size: 48
63 |     num_workers: 5
64 |     wrap: false
65 |     train:
66 |       target: ldm.data.lsun.LSUNBedroomsTrain
67 |       params:
68 |         size: 256
69 |     validation:
70 |       target: ldm.data.lsun.LSUNBedroomsValidation
71 |       params:
72 |         size: 256
73 | 
74 | 
75 | lightning:
76 |   callbacks:
77 |     image_logger:
78 |       target: main.ImageLogger
79 |       params:
80 |         batch_frequency: 5000
81 |         max_images: 8
82 |         increase_log_steps: False
83 | 
84 |   trainer:
85 |     benchmark: True


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-5   # set to target_lr by starting main.py with '--scale_lr False'
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0155
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     loss_type: l1
11 |     first_stage_key: "image"
12 |     cond_stage_key: "image"
13 |     image_size: 32
14 |     channels: 4
15 |     cond_stage_trainable: False
16 |     concat_mode: False
17 |     scale_by_std: True
18 |     monitor: 'val/loss_simple_ema'
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       target: ldm.lr_scheduler.LambdaLinearScheduler
22 |       params:
23 |         warm_up_steps: [10000]
24 |         cycle_lengths: [10000000000000]
25 |         f_start: [1.e-6]
26 |         f_max: [1.]
27 |         f_min: [ 1.]
28 | 
29 |     unet_config:
30 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31 |       params:
32 |         image_size: 32
33 |         in_channels: 4
34 |         out_channels: 4
35 |         model_channels: 192
36 |         attention_resolutions: [ 1, 2, 4, 8 ]   # 32, 16, 8, 4
37 |         num_res_blocks: 2
38 |         channel_mult: [ 1,2,2,4,4 ]  # 32, 16, 8, 4, 2
39 |         num_heads: 8
40 |         use_scale_shift_norm: True
41 |         resblock_updown: True
42 | 
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         embed_dim: 4
47 |         monitor: "val/rec_loss"
48 |         ckpt_path: "models/first_stage_models/kl-f8/model.ckpt"
49 |         ddconfig:
50 |           double_z: True
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult: [ 1,2,4,4 ]  # num_down = len(ch_mult)-1
57 |           num_res_blocks: 2
58 |           attn_resolutions: [ ]
59 |           dropout: 0.0
60 |         lossconfig:
61 |           target: torch.nn.Identity
62 | 
63 |     cond_stage_config: "__is_unconditional__"
64 | 
65 | data:
66 |   target: main.DataModuleFromConfig
67 |   params:
68 |     batch_size: 96
69 |     num_workers: 5
70 |     wrap: False
71 |     train:
72 |       target: ldm.data.lsun.LSUNChurchesTrain
73 |       params:
74 |         size: 256
75 |     validation:
76 |       target: ldm.data.lsun.LSUNChurchesValidation
77 |       params:
78 |         size: 256
79 | 
80 | lightning:
81 |   callbacks:
82 |     image_logger:
83 |       target: main.ImageLogger
84 |       params:
85 |         batch_frequency: 5000
86 |         max_images: 8
87 |         increase_log_steps: False
88 | 
89 | 
90 |   trainer:
91 |     benchmark: True


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/txt2img-1p4B-eval.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.012
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: caption
12 |     image_size: 32
13 |     channels: 4
14 |     cond_stage_trainable: true
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         image_size: 32
24 |         in_channels: 4
25 |         out_channels: 4
26 |         model_channels: 320
27 |         attention_resolutions:
28 |         - 4
29 |         - 2
30 |         - 1
31 |         num_res_blocks: 2
32 |         channel_mult:
33 |         - 1
34 |         - 2
35 |         - 4
36 |         - 4
37 |         num_heads: 8
38 |         use_spatial_transformer: true
39 |         transformer_depth: 1
40 |         context_dim: 1280
41 |         use_checkpoint: true
42 |         legacy: False
43 | 
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.AutoencoderKL
46 |       params:
47 |         embed_dim: 4
48 |         monitor: val/rec_loss
49 |         ddconfig:
50 |           double_z: true
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult:
57 |           - 1
58 |           - 2
59 |           - 4
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions: []
63 |           dropout: 0.0
64 |         lossconfig:
65 |           target: torch.nn.Identity
66 | 
67 |     cond_stage_config:
68 |       target: ldm.modules.encoders.modules.BERTEmbedder
69 |       params:
70 |         n_embed: 1280
71 |         n_layer: 32
72 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/retrieval-augmented-diffusion/768x768.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 0.0001
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.015
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: jpg
11 |     cond_stage_key: nix
12 |     image_size: 48
13 |     channels: 16
14 |     cond_stage_trainable: false
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_by_std: false
18 |     scale_factor: 0.22765929
19 |     unet_config:
20 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
21 |       params:
22 |         image_size: 48
23 |         in_channels: 16
24 |         out_channels: 16
25 |         model_channels: 448
26 |         attention_resolutions:
27 |         - 4
28 |         - 2
29 |         - 1
30 |         num_res_blocks: 2
31 |         channel_mult:
32 |         - 1
33 |         - 2
34 |         - 3
35 |         - 4
36 |         use_scale_shift_norm: false
37 |         resblock_updown: false
38 |         num_head_channels: 32
39 |         use_spatial_transformer: true
40 |         transformer_depth: 1
41 |         context_dim: 768
42 |         use_checkpoint: true
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         monitor: val/rec_loss
47 |         embed_dim: 16
48 |         ddconfig:
49 |           double_z: true
50 |           z_channels: 16
51 |           resolution: 256
52 |           in_channels: 3
53 |           out_ch: 3
54 |           ch: 128
55 |           ch_mult:
56 |           - 1
57 |           - 1
58 |           - 2
59 |           - 2
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions:
63 |           - 16
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: torch.nn.Identity
67 |     cond_stage_config:
68 |       target: torch.nn.Identity


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/configs/stable-diffusion/v1-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       target: ldm.lr_scheduler.LambdaLinearScheduler
22 |       params:
23 |         warm_up_steps: [ 10000 ]
24 |         cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25 |         f_start: [ 1.e-6 ]
26 |         f_max: [ 1. ]
27 |         f_min: [ 1. ]
28 | 
29 |     unet_config:
30 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31 |       params:
32 |         image_size: 32 # unused
33 |         in_channels: 4
34 |         out_channels: 4
35 |         model_channels: 320
36 |         attention_resolutions: [ 4, 2, 1 ]
37 |         num_res_blocks: 2
38 |         channel_mult: [ 1, 2, 4, 4 ]
39 |         num_heads: 8
40 |         use_spatial_transformer: True
41 |         transformer_depth: 1
42 |         context_dim: 768
43 |         use_checkpoint: True
44 |         legacy: False
45 | 
46 |     first_stage_config:
47 |       target: ldm.models.autoencoder.AutoencoderKL
48 |       params:
49 |         embed_dim: 4
50 |         monitor: val/rec_loss
51 |         ddconfig:
52 |           double_z: true
53 |           z_channels: 4
54 |           resolution: 256
55 |           in_channels: 3
56 |           out_ch: 3
57 |           ch: 128
58 |           ch_mult:
59 |           - 1
60 |           - 2
61 |           - 4
62 |           - 4
63 |           num_res_blocks: 2
64 |           attn_resolutions: []
65 |           dropout: 0.0
66 |         lossconfig:
67 |           target: torch.nn.Identity
68 | 
69 |     cond_stage_config:
70 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/DejaVuSans.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/DejaVuSans.ttf


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/example_conditioning/superresolution/sample_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/example_conditioning/superresolution/sample_0.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/example_conditioning/text_conditional/sample_0.txt:
--------------------------------------------------------------------------------
1 | A basket of cerries
2 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/imagenet_train_hr_indices.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/imagenet_train_hr_indices.p


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/imagenet_val_hr_indices.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/imagenet_val_hr_indices.p


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34_mask.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ldm
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.8.5
 7 |   - pip=20.3
 8 |   - cudatoolkit=11.3
 9 |   - pytorch=1.11.0
10 |   - torchvision=0.12.0
11 |   - numpy=1.19.2
12 |   - pip:
13 |     - albumentations==0.4.3
14 |     - diffusers
15 |     - opencv-python==4.1.2.30
16 |     - pudb==2019.2
17 |     - invisible-watermark
18 |     - imageio==2.9.0
19 |     - imageio-ffmpeg==0.4.2
20 |     - pytorch-lightning==1.4.2
21 |     - omegaconf==2.1.1
22 |     - test-tube>=0.7.5
23 |     - streamlit>=0.73.1
24 |     - einops==0.3.0
25 |     - torch-fidelity==0.3.0
26 |     - transformers==4.19.2
27 |     - torchmetrics==0.6.0
28 |     - kornia==0.6
29 |     - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
30 |     - -e git+https://github.com/openai/CLIP.git@main#egg=clip
31 |     - -e .
32 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/data/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/data/base.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from torch.utils.data import Dataset, ConcatDataset, ChainDataset, IterableDataset
 3 | 
 4 | 
 5 | class Txt2ImgIterableBaseDataset(IterableDataset):
 6 |     '''
 7 |     Define an interface to make the IterableDatasets for text2img data chainable
 8 |     '''
 9 |     def __init__(self, num_records=0, valid_ids=None, size=256):
10 |         super().__init__()
11 |         self.num_records = num_records
12 |         self.valid_ids = valid_ids
13 |         self.sample_ids = valid_ids
14 |         self.size = size
15 | 
16 |         print(f'{self.__class__.__name__} dataset contains {self.__len__()} examples.')
17 | 
18 |     def __len__(self):
19 |         return self.num_records
20 | 
21 |     @abstractmethod
22 |     def __iter__(self):
23 |         pass


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr
2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light
3 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/ldm/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.losses.contperceptual import LPIPSWithDiscriminator


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f16/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 16
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 16
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       num_res_blocks: 2
27 |       attn_resolutions:
28 |       - 16
29 |       dropout: 0.0
30 | data:
31 |   target: main.DataModuleFromConfig
32 |   params:
33 |     batch_size: 6
34 |     wrap: true
35 |     train:
36 |       target: ldm.data.openimages.FullOpenImagesTrain
37 |       params:
38 |         size: 384
39 |         crop_size: 256
40 |     validation:
41 |       target: ldm.data.openimages.FullOpenImagesValidation
42 |       params:
43 |         size: 384
44 |         crop_size: 256
45 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f32/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 64
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 64
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       - 4
27 |       num_res_blocks: 2
28 |       attn_resolutions:
29 |       - 16
30 |       - 8
31 |       dropout: 0.0
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 6
36 |     wrap: true
37 |     train:
38 |       target: ldm.data.openimages.FullOpenImagesTrain
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         size: 384
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 3
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 3
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       num_res_blocks: 2
25 |       attn_resolutions: []
26 |       dropout: 0.0
27 | data:
28 |   target: main.DataModuleFromConfig
29 |   params:
30 |     batch_size: 10
31 |     wrap: true
32 |     train:
33 |       target: ldm.data.openimages.FullOpenImagesTrain
34 |       params:
35 |         size: 384
36 |         crop_size: 256
37 |     validation:
38 |       target: ldm.data.openimages.FullOpenImagesValidation
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f8/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 4
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       - 4
25 |       num_res_blocks: 2
26 |       attn_resolutions: []
27 |       dropout: 0.0
28 | data:
29 |   target: main.DataModuleFromConfig
30 |   params:
31 |     batch_size: 4
32 |     wrap: true
33 |     train:
34 |       target: ldm.data.openimages.FullOpenImagesTrain
35 |       params:
36 |         size: 384
37 |         crop_size: 256
38 |     validation:
39 |       target: ldm.data.openimages.FullOpenImagesValidation
40 |       params:
41 |         size: 384
42 |         crop_size: 256
43 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f16/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 8
 6 |     n_embed: 16384
 7 |     ddconfig:
 8 |       double_z: false
 9 |       z_channels: 8
10 |       resolution: 256
11 |       in_channels: 3
12 |       out_ch: 3
13 |       ch: 128
14 |       ch_mult:
15 |       - 1
16 |       - 1
17 |       - 2
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions:
22 |       - 16
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 250001
30 |         disc_weight: 0.75
31 |         disc_num_layers: 2
32 |         codebook_weight: 1.0
33 | 
34 | data:
35 |   target: main.DataModuleFromConfig
36 |   params:
37 |     batch_size: 14
38 |     num_workers: 20
39 |     wrap: true
40 |     train:
41 |       target: ldm.data.openimages.FullOpenImagesTrain
42 |       params:
43 |         size: 384
44 |         crop_size: 256
45 |     validation:
46 |       target: ldm.data.openimages.FullOpenImagesValidation
47 |       params:
48 |         size: 384
49 |         crop_size: 256
50 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f4-noattn/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       attn_type: none
11 |       double_z: false
12 |       z_channels: 3
13 |       resolution: 256
14 |       in_channels: 3
15 |       out_ch: 3
16 |       ch: 128
17 |       ch_mult:
18 |       - 1
19 |       - 2
20 |       - 4
21 |       num_res_blocks: 2
22 |       attn_resolutions: []
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 11
30 |         disc_weight: 0.75
31 |         codebook_weight: 1.0
32 | 
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 8
37 |     num_workers: 12
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         crop_size: 256
43 |     validation:
44 |       target: ldm.data.openimages.FullOpenImagesValidation
45 |       params:
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       double_z: false
11 |       z_channels: 3
12 |       resolution: 256
13 |       in_channels: 3
14 |       out_ch: 3
15 |       ch: 128
16 |       ch_mult:
17 |       - 1
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions: []
22 |       dropout: 0.0
23 |     lossconfig:
24 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
25 |       params:
26 |         disc_conditional: false
27 |         disc_in_channels: 3
28 |         disc_start: 0
29 |         disc_weight: 0.75
30 |         codebook_weight: 1.0
31 | 
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 8
36 |     num_workers: 16
37 |     wrap: true
38 |     train:
39 |       target: ldm.data.openimages.FullOpenImagesTrain
40 |       params:
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         crop_size: 256
46 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f8-n256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 4
 6 |     n_embed: 256
 7 |     monitor: val/rec_loss
 8 |     ddconfig:
 9 |       double_z: false
10 |       z_channels: 4
11 |       resolution: 256
12 |       in_channels: 3
13 |       out_ch: 3
14 |       ch: 128
15 |       ch_mult:
16 |       - 1
17 |       - 2
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions:
22 |       - 32
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 250001
30 |         disc_weight: 0.75
31 |         codebook_weight: 1.0
32 | 
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 10
37 |     num_workers: 20
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         size: 384
43 |         crop_size: 256
44 |     validation:
45 |       target: ldm.data.openimages.FullOpenImagesValidation
46 |       params:
47 |         size: 384
48 |         crop_size: 256
49 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f8/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 4
 6 |     n_embed: 16384
 7 |     monitor: val/rec_loss
 8 |     ddconfig:
 9 |       double_z: false
10 |       z_channels: 4
11 |       resolution: 256
12 |       in_channels: 3
13 |       out_ch: 3
14 |       ch: 128
15 |       ch_mult:
16 |       - 1
17 |       - 2
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions:
22 |       - 32
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_num_layers: 2
30 |         disc_start: 1
31 |         disc_weight: 0.6
32 |         codebook_weight: 1.0
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 10
37 |     num_workers: 20
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         size: 384
43 |         crop_size: 256
44 |     validation:
45 |       target: ldm.data.openimages.FullOpenImagesValidation
46 |       params:
47 |         size: 384
48 |         crop_size: 256
49 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/bsr_sr/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0155
 7 |     log_every_t: 100
 8 |     timesteps: 1000
 9 |     loss_type: l2
10 |     first_stage_key: image
11 |     cond_stage_key: LR_image
12 |     image_size: 64
13 |     channels: 3
14 |     concat_mode: true
15 |     cond_stage_trainable: false
16 |     unet_config:
17 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
18 |       params:
19 |         image_size: 64
20 |         in_channels: 6
21 |         out_channels: 3
22 |         model_channels: 160
23 |         attention_resolutions:
24 |         - 16
25 |         - 8
26 |         num_res_blocks: 2
27 |         channel_mult:
28 |         - 1
29 |         - 2
30 |         - 2
31 |         - 4
32 |         num_head_channels: 32
33 |     first_stage_config:
34 |       target: ldm.models.autoencoder.VQModelInterface
35 |       params:
36 |         embed_dim: 3
37 |         n_embed: 8192
38 |         monitor: val/rec_loss
39 |         ddconfig:
40 |           double_z: false
41 |           z_channels: 3
42 |           resolution: 256
43 |           in_channels: 3
44 |           out_ch: 3
45 |           ch: 128
46 |           ch_mult:
47 |           - 1
48 |           - 2
49 |           - 4
50 |           num_res_blocks: 2
51 |           attn_resolutions: []
52 |           dropout: 0.0
53 |         lossconfig:
54 |           target: torch.nn.Identity
55 |     cond_stage_config:
56 |       target: torch.nn.Identity
57 | data:
58 |   target: main.DataModuleFromConfig
59 |   params:
60 |     batch_size: 64
61 |     wrap: false
62 |     num_workers: 12
63 |     train:
64 |       target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain
65 |       params:
66 |         size: 256
67 |         degradation: bsrgan_light
68 |         downscale_f: 4
69 |         min_crop_f: 0.5
70 |         max_crop_f: 1.0
71 |         random_crop: true
72 |     validation:
73 |       target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation
74 |       params:
75 |         size: 256
76 |         degradation: bsrgan_light
77 |         downscale_f: 4
78 |         min_crop_f: 0.5
79 |         max_crop_f: 1.0
80 |         random_crop: true
81 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/celeba256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: class_label
12 |     image_size: 64
13 |     channels: 3
14 |     cond_stage_trainable: false
15 |     concat_mode: false
16 |     monitor: val/loss
17 |     unet_config:
18 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
19 |       params:
20 |         image_size: 64
21 |         in_channels: 3
22 |         out_channels: 3
23 |         model_channels: 224
24 |         attention_resolutions:
25 |         - 8
26 |         - 4
27 |         - 2
28 |         num_res_blocks: 2
29 |         channel_mult:
30 |         - 1
31 |         - 2
32 |         - 3
33 |         - 4
34 |         num_head_channels: 32
35 |     first_stage_config:
36 |       target: ldm.models.autoencoder.VQModelInterface
37 |       params:
38 |         embed_dim: 3
39 |         n_embed: 8192
40 |         ddconfig:
41 |           double_z: false
42 |           z_channels: 3
43 |           resolution: 256
44 |           in_channels: 3
45 |           out_ch: 3
46 |           ch: 128
47 |           ch_mult:
48 |           - 1
49 |           - 2
50 |           - 4
51 |           num_res_blocks: 2
52 |           attn_resolutions: []
53 |           dropout: 0.0
54 |         lossconfig:
55 |           target: torch.nn.Identity
56 |     cond_stage_config: __is_unconditional__
57 | data:
58 |   target: main.DataModuleFromConfig
59 |   params:
60 |     batch_size: 48
61 |     num_workers: 5
62 |     wrap: false
63 |     train:
64 |       target: ldm.data.faceshq.CelebAHQTrain
65 |       params:
66 |         size: 256
67 |     validation:
68 |       target: ldm.data.faceshq.CelebAHQValidation
69 |       params:
70 |         size: 256
71 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/cin256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: class_label
12 |     image_size: 32
13 |     channels: 4
14 |     cond_stage_trainable: true
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     unet_config:
18 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
19 |       params:
20 |         image_size: 32
21 |         in_channels: 4
22 |         out_channels: 4
23 |         model_channels: 256
24 |         attention_resolutions:
25 |         - 4
26 |         - 2
27 |         - 1
28 |         num_res_blocks: 2
29 |         channel_mult:
30 |         - 1
31 |         - 2
32 |         - 4
33 |         num_head_channels: 32
34 |         use_spatial_transformer: true
35 |         transformer_depth: 1
36 |         context_dim: 512
37 |     first_stage_config:
38 |       target: ldm.models.autoencoder.VQModelInterface
39 |       params:
40 |         embed_dim: 4
41 |         n_embed: 16384
42 |         ddconfig:
43 |           double_z: false
44 |           z_channels: 4
45 |           resolution: 256
46 |           in_channels: 3
47 |           out_ch: 3
48 |           ch: 128
49 |           ch_mult:
50 |           - 1
51 |           - 2
52 |           - 2
53 |           - 4
54 |           num_res_blocks: 2
55 |           attn_resolutions:
56 |           - 32
57 |           dropout: 0.0
58 |         lossconfig:
59 |           target: torch.nn.Identity
60 |     cond_stage_config:
61 |       target: ldm.modules.encoders.modules.ClassEmbedder
62 |       params:
63 |         embed_dim: 512
64 |         key: class_label
65 | data:
66 |   target: main.DataModuleFromConfig
67 |   params:
68 |     batch_size: 64
69 |     num_workers: 12
70 |     wrap: false
71 |     train:
72 |       target: ldm.data.imagenet.ImageNetTrain
73 |       params:
74 |         config:
75 |           size: 256
76 |     validation:
77 |       target: ldm.data.imagenet.ImageNetValidation
78 |       params:
79 |         config:
80 |           size: 256
81 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/ffhq256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: class_label
12 |     image_size: 64
13 |     channels: 3
14 |     cond_stage_trainable: false
15 |     concat_mode: false
16 |     monitor: val/loss
17 |     unet_config:
18 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
19 |       params:
20 |         image_size: 64
21 |         in_channels: 3
22 |         out_channels: 3
23 |         model_channels: 224
24 |         attention_resolutions:
25 |         - 8
26 |         - 4
27 |         - 2
28 |         num_res_blocks: 2
29 |         channel_mult:
30 |         - 1
31 |         - 2
32 |         - 3
33 |         - 4
34 |         num_head_channels: 32
35 |     first_stage_config:
36 |       target: ldm.models.autoencoder.VQModelInterface
37 |       params:
38 |         embed_dim: 3
39 |         n_embed: 8192
40 |         ddconfig:
41 |           double_z: false
42 |           z_channels: 3
43 |           resolution: 256
44 |           in_channels: 3
45 |           out_ch: 3
46 |           ch: 128
47 |           ch_mult:
48 |           - 1
49 |           - 2
50 |           - 4
51 |           num_res_blocks: 2
52 |           attn_resolutions: []
53 |           dropout: 0.0
54 |         lossconfig:
55 |           target: torch.nn.Identity
56 |     cond_stage_config: __is_unconditional__
57 | data:
58 |   target: main.DataModuleFromConfig
59 |   params:
60 |     batch_size: 42
61 |     num_workers: 5
62 |     wrap: false
63 |     train:
64 |       target: ldm.data.faceshq.FFHQTrain
65 |       params:
66 |         size: 256
67 |     validation:
68 |       target: ldm.data.faceshq.FFHQValidation
69 |       params:
70 |         size: 256
71 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/inpainting_big/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0205
 7 |     log_every_t: 100
 8 |     timesteps: 1000
 9 |     loss_type: l1
10 |     first_stage_key: image
11 |     cond_stage_key: masked_image
12 |     image_size: 64
13 |     channels: 3
14 |     concat_mode: true
15 |     monitor: val/loss
16 |     scheduler_config:
17 |       target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler
18 |       params:
19 |         verbosity_interval: 0
20 |         warm_up_steps: 1000
21 |         max_decay_steps: 50000
22 |         lr_start: 0.001
23 |         lr_max: 0.1
24 |         lr_min: 0.0001
25 |     unet_config:
26 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
27 |       params:
28 |         image_size: 64
29 |         in_channels: 7
30 |         out_channels: 3
31 |         model_channels: 256
32 |         attention_resolutions:
33 |         - 8
34 |         - 4
35 |         - 2
36 |         num_res_blocks: 2
37 |         channel_mult:
38 |         - 1
39 |         - 2
40 |         - 3
41 |         - 4
42 |         num_heads: 8
43 |         resblock_updown: true
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.VQModelInterface
46 |       params:
47 |         embed_dim: 3
48 |         n_embed: 8192
49 |         monitor: val/rec_loss
50 |         ddconfig:
51 |           attn_type: none
52 |           double_z: false
53 |           z_channels: 3
54 |           resolution: 256
55 |           in_channels: 3
56 |           out_ch: 3
57 |           ch: 128
58 |           ch_mult:
59 |           - 1
60 |           - 2
61 |           - 4
62 |           num_res_blocks: 2
63 |           attn_resolutions: []
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: ldm.modules.losses.contperceptual.DummyLoss
67 |     cond_stage_config: __is_first_stage__
68 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/layout2img-openimages256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0205
 7 |     log_every_t: 100
 8 |     timesteps: 1000
 9 |     loss_type: l1
10 |     first_stage_key: image
11 |     cond_stage_key: coordinates_bbox
12 |     image_size: 64
13 |     channels: 3
14 |     conditioning_key: crossattn
15 |     cond_stage_trainable: true
16 |     unet_config:
17 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
18 |       params:
19 |         image_size: 64
20 |         in_channels: 3
21 |         out_channels: 3
22 |         model_channels: 128
23 |         attention_resolutions:
24 |         - 8
25 |         - 4
26 |         - 2
27 |         num_res_blocks: 2
28 |         channel_mult:
29 |         - 1
30 |         - 2
31 |         - 3
32 |         - 4
33 |         num_head_channels: 32
34 |         use_spatial_transformer: true
35 |         transformer_depth: 3
36 |         context_dim: 512
37 |     first_stage_config:
38 |       target: ldm.models.autoencoder.VQModelInterface
39 |       params:
40 |         embed_dim: 3
41 |         n_embed: 8192
42 |         monitor: val/rec_loss
43 |         ddconfig:
44 |           double_z: false
45 |           z_channels: 3
46 |           resolution: 256
47 |           in_channels: 3
48 |           out_ch: 3
49 |           ch: 128
50 |           ch_mult:
51 |           - 1
52 |           - 2
53 |           - 4
54 |           num_res_blocks: 2
55 |           attn_resolutions: []
56 |           dropout: 0.0
57 |         lossconfig:
58 |           target: torch.nn.Identity
59 |     cond_stage_config:
60 |       target: ldm.modules.encoders.modules.BERTEmbedder
61 |       params:
62 |         n_embed: 512
63 |         n_layer: 16
64 |         vocab_size: 8192
65 |         max_seq_len: 92
66 |         use_tokenizer: false
67 |     monitor: val/loss_simple_ema
68 | data:
69 |   target: main.DataModuleFromConfig
70 |   params:
71 |     batch_size: 24
72 |     wrap: false
73 |     num_workers: 10
74 |     train:
75 |       target: ldm.data.openimages.OpenImagesBBoxTrain
76 |       params:
77 |         size: 256
78 |     validation:
79 |       target: ldm.data.openimages.OpenImagesBBoxValidation
80 |       params:
81 |         size: 256
82 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/lsun_beds256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: class_label
12 |     image_size: 64
13 |     channels: 3
14 |     cond_stage_trainable: false
15 |     concat_mode: false
16 |     monitor: val/loss
17 |     unet_config:
18 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
19 |       params:
20 |         image_size: 64
21 |         in_channels: 3
22 |         out_channels: 3
23 |         model_channels: 224
24 |         attention_resolutions:
25 |         - 8
26 |         - 4
27 |         - 2
28 |         num_res_blocks: 2
29 |         channel_mult:
30 |         - 1
31 |         - 2
32 |         - 3
33 |         - 4
34 |         num_head_channels: 32
35 |     first_stage_config:
36 |       target: ldm.models.autoencoder.VQModelInterface
37 |       params:
38 |         embed_dim: 3
39 |         n_embed: 8192
40 |         ddconfig:
41 |           double_z: false
42 |           z_channels: 3
43 |           resolution: 256
44 |           in_channels: 3
45 |           out_ch: 3
46 |           ch: 128
47 |           ch_mult:
48 |           - 1
49 |           - 2
50 |           - 4
51 |           num_res_blocks: 2
52 |           attn_resolutions: []
53 |           dropout: 0.0
54 |         lossconfig:
55 |           target: torch.nn.Identity
56 |     cond_stage_config: __is_unconditional__
57 | data:
58 |   target: main.DataModuleFromConfig
59 |   params:
60 |     batch_size: 48
61 |     num_workers: 5
62 |     wrap: false
63 |     train:
64 |       target: ldm.data.lsun.LSUNBedroomsTrain
65 |       params:
66 |         size: 256
67 |     validation:
68 |       target: ldm.data.lsun.LSUNBedroomsValidation
69 |       params:
70 |         size: 256
71 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/lsun_churches256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0155
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     loss_type: l1
11 |     first_stage_key: image
12 |     cond_stage_key: image
13 |     image_size: 32
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     concat_mode: false
17 |     scale_by_std: true
18 |     monitor: val/loss_simple_ema
19 |     scheduler_config:
20 |       target: ldm.lr_scheduler.LambdaLinearScheduler
21 |       params:
22 |         warm_up_steps:
23 |         - 10000
24 |         cycle_lengths:
25 |         - 10000000000000
26 |         f_start:
27 |         - 1.0e-06
28 |         f_max:
29 |         - 1.0
30 |         f_min:
31 |         - 1.0
32 |     unet_config:
33 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
34 |       params:
35 |         image_size: 32
36 |         in_channels: 4
37 |         out_channels: 4
38 |         model_channels: 192
39 |         attention_resolutions:
40 |         - 1
41 |         - 2
42 |         - 4
43 |         - 8
44 |         num_res_blocks: 2
45 |         channel_mult:
46 |         - 1
47 |         - 2
48 |         - 2
49 |         - 4
50 |         - 4
51 |         num_heads: 8
52 |         use_scale_shift_norm: true
53 |         resblock_updown: true
54 |     first_stage_config:
55 |       target: ldm.models.autoencoder.AutoencoderKL
56 |       params:
57 |         embed_dim: 4
58 |         monitor: val/rec_loss
59 |         ddconfig:
60 |           double_z: true
61 |           z_channels: 4
62 |           resolution: 256
63 |           in_channels: 3
64 |           out_ch: 3
65 |           ch: 128
66 |           ch_mult:
67 |           - 1
68 |           - 2
69 |           - 4
70 |           - 4
71 |           num_res_blocks: 2
72 |           attn_resolutions: []
73 |           dropout: 0.0
74 |         lossconfig:
75 |           target: torch.nn.Identity
76 | 
77 |     cond_stage_config: '__is_unconditional__'
78 | 
79 | data:
80 |   target: main.DataModuleFromConfig
81 |   params:
82 |     batch_size: 96
83 |     num_workers: 5
84 |     wrap: false
85 |     train:
86 |       target: ldm.data.lsun.LSUNChurchesTrain
87 |       params:
88 |         size: 256
89 |     validation:
90 |       target: ldm.data.lsun.LSUNChurchesValidation
91 |       params:
92 |         size: 256
93 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/semantic_synthesis256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0205
 7 |     log_every_t: 100
 8 |     timesteps: 1000
 9 |     loss_type: l1
10 |     first_stage_key: image
11 |     cond_stage_key: segmentation
12 |     image_size: 64
13 |     channels: 3
14 |     concat_mode: true
15 |     cond_stage_trainable: true
16 |     unet_config:
17 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
18 |       params:
19 |         image_size: 64
20 |         in_channels: 6
21 |         out_channels: 3
22 |         model_channels: 128
23 |         attention_resolutions:
24 |         - 32
25 |         - 16
26 |         - 8
27 |         num_res_blocks: 2
28 |         channel_mult:
29 |         - 1
30 |         - 4
31 |         - 8
32 |         num_heads: 8
33 |     first_stage_config:
34 |       target: ldm.models.autoencoder.VQModelInterface
35 |       params:
36 |         embed_dim: 3
37 |         n_embed: 8192
38 |         ddconfig:
39 |           double_z: false
40 |           z_channels: 3
41 |           resolution: 256
42 |           in_channels: 3
43 |           out_ch: 3
44 |           ch: 128
45 |           ch_mult:
46 |           - 1
47 |           - 2
48 |           - 4
49 |           num_res_blocks: 2
50 |           attn_resolutions: []
51 |           dropout: 0.0
52 |         lossconfig:
53 |           target: torch.nn.Identity
54 |     cond_stage_config:
55 |       target: ldm.modules.encoders.modules.SpatialRescaler
56 |       params:
57 |         n_stages: 2
58 |         in_channels: 182
59 |         out_channels: 3
60 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/semantic_synthesis512/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0205
 7 |     log_every_t: 100
 8 |     timesteps: 1000
 9 |     loss_type: l1
10 |     first_stage_key: image
11 |     cond_stage_key: segmentation
12 |     image_size: 128
13 |     channels: 3
14 |     concat_mode: true
15 |     cond_stage_trainable: true
16 |     unet_config:
17 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
18 |       params:
19 |         image_size: 128
20 |         in_channels: 6
21 |         out_channels: 3
22 |         model_channels: 128
23 |         attention_resolutions:
24 |         - 32
25 |         - 16
26 |         - 8
27 |         num_res_blocks: 2
28 |         channel_mult:
29 |         - 1
30 |         - 4
31 |         - 8
32 |         num_heads: 8
33 |     first_stage_config:
34 |       target: ldm.models.autoencoder.VQModelInterface
35 |       params:
36 |         embed_dim: 3
37 |         n_embed: 8192
38 |         monitor: val/rec_loss
39 |         ddconfig:
40 |           double_z: false
41 |           z_channels: 3
42 |           resolution: 256
43 |           in_channels: 3
44 |           out_ch: 3
45 |           ch: 128
46 |           ch_mult:
47 |           - 1
48 |           - 2
49 |           - 4
50 |           num_res_blocks: 2
51 |           attn_resolutions: []
52 |           dropout: 0.0
53 |         lossconfig:
54 |           target: torch.nn.Identity
55 |     cond_stage_config:
56 |       target: ldm.modules.encoders.modules.SpatialRescaler
57 |       params:
58 |         n_stages: 2
59 |         in_channels: 182
60 |         out_channels: 3
61 | data:
62 |   target: main.DataModuleFromConfig
63 |   params:
64 |     batch_size: 8
65 |     wrap: false
66 |     num_workers: 10
67 |     train:
68 |       target: ldm.data.landscapes.RFWTrain
69 |       params:
70 |         size: 768
71 |         crop_size: 512
72 |         segmentation_to_float32: true
73 |     validation:
74 |       target: ldm.data.landscapes.RFWValidation
75 |       params:
76 |         size: 768
77 |         crop_size: 512
78 |         segmentation_to_float32: true
79 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/models/ldm/text2img256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 2.0e-06
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.0015
 6 |     linear_end: 0.0195
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: image
11 |     cond_stage_key: caption
12 |     image_size: 64
13 |     channels: 3
14 |     cond_stage_trainable: true
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     unet_config:
18 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
19 |       params:
20 |         image_size: 64
21 |         in_channels: 3
22 |         out_channels: 3
23 |         model_channels: 192
24 |         attention_resolutions:
25 |         - 8
26 |         - 4
27 |         - 2
28 |         num_res_blocks: 2
29 |         channel_mult:
30 |         - 1
31 |         - 2
32 |         - 3
33 |         - 5
34 |         num_head_channels: 32
35 |         use_spatial_transformer: true
36 |         transformer_depth: 1
37 |         context_dim: 640
38 |     first_stage_config:
39 |       target: ldm.models.autoencoder.VQModelInterface
40 |       params:
41 |         embed_dim: 3
42 |         n_embed: 8192
43 |         ddconfig:
44 |           double_z: false
45 |           z_channels: 3
46 |           resolution: 256
47 |           in_channels: 3
48 |           out_ch: 3
49 |           ch: 128
50 |           ch_mult:
51 |           - 1
52 |           - 2
53 |           - 4
54 |           num_res_blocks: 2
55 |           attn_resolutions: []
56 |           dropout: 0.0
57 |         lossconfig:
58 |           target: torch.nn.Identity
59 |     cond_stage_config:
60 |       target: ldm.modules.encoders.modules.BERTEmbedder
61 |       params:
62 |         n_embed: 640
63 |         n_layer: 32
64 | data:
65 |   target: main.DataModuleFromConfig
66 |   params:
67 |     batch_size: 28
68 |     num_workers: 10
69 |     wrap: false
70 |     train:
71 |       target: ldm.data.previews.pytorch_dataset.PreviewsTrain
72 |       params:
73 |         size: 256
74 |     validation:
75 |       target: ldm.data.previews.pytorch_dataset.PreviewsValidation
76 |       params:
77 |         size: 256
78 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/scripts/download_first_stages.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | wget -O models/first_stage_models/kl-f4/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f4.zip
 3 | wget -O models/first_stage_models/kl-f8/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f8.zip
 4 | wget -O models/first_stage_models/kl-f16/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f16.zip
 5 | wget -O models/first_stage_models/kl-f32/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f32.zip
 6 | wget -O models/first_stage_models/vq-f4/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4.zip
 7 | wget -O models/first_stage_models/vq-f4-noattn/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4-noattn.zip
 8 | wget -O models/first_stage_models/vq-f8/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8.zip
 9 | wget -O models/first_stage_models/vq-f8-n256/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8-n256.zip
10 | wget -O models/first_stage_models/vq-f16/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f16.zip
11 | 
12 | 
13 | 
14 | cd models/first_stage_models/kl-f4
15 | unzip -o model.zip
16 | 
17 | cd ../kl-f8
18 | unzip -o model.zip
19 | 
20 | cd ../kl-f16
21 | unzip -o model.zip
22 | 
23 | cd ../kl-f32
24 | unzip -o model.zip
25 | 
26 | cd ../vq-f4
27 | unzip -o model.zip
28 | 
29 | cd ../vq-f4-noattn
30 | unzip -o model.zip
31 | 
32 | cd ../vq-f8
33 | unzip -o model.zip
34 | 
35 | cd ../vq-f8-n256
36 | unzip -o model.zip
37 | 
38 | cd ../vq-f16
39 | unzip -o model.zip
40 | 
41 | cd ../..


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/scripts/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | wget -O models/ldm/celeba256/celeba-256.zip https://ommer-lab.com/files/latent-diffusion/celeba.zip
 3 | wget -O models/ldm/ffhq256/ffhq-256.zip https://ommer-lab.com/files/latent-diffusion/ffhq.zip
 4 | wget -O models/ldm/lsun_churches256/lsun_churches-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_churches.zip
 5 | wget -O models/ldm/lsun_beds256/lsun_beds-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_bedrooms.zip
 6 | wget -O models/ldm/text2img256/model.zip https://ommer-lab.com/files/latent-diffusion/text2img.zip
 7 | wget -O models/ldm/cin256/model.zip https://ommer-lab.com/files/latent-diffusion/cin.zip
 8 | wget -O models/ldm/semantic_synthesis512/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis.zip
 9 | wget -O models/ldm/semantic_synthesis256/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis256.zip
10 | wget -O models/ldm/bsr_sr/model.zip https://ommer-lab.com/files/latent-diffusion/sr_bsr.zip
11 | wget -O models/ldm/layout2img-openimages256/model.zip https://ommer-lab.com/files/latent-diffusion/layout2img_model.zip
12 | wget -O models/ldm/inpainting_big/model.zip https://ommer-lab.com/files/latent-diffusion/inpainting_big.zip
13 | 
14 | 
15 | 
16 | cd models/ldm/celeba256
17 | unzip -o celeba-256.zip
18 | 
19 | cd ../ffhq256
20 | unzip -o ffhq-256.zip
21 | 
22 | cd ../lsun_churches256
23 | unzip -o lsun_churches-256.zip
24 | 
25 | cd ../lsun_beds256
26 | unzip -o lsun_beds-256.zip
27 | 
28 | cd ../text2img256
29 | unzip -o model.zip
30 | 
31 | cd ../cin256
32 | unzip -o model.zip
33 | 
34 | cd ../semantic_synthesis512
35 | unzip -o model.zip
36 | 
37 | cd ../semantic_synthesis256
38 | unzip -o model.zip
39 | 
40 | cd ../bsr_sr
41 | unzip -o model.zip
42 | 
43 | cd ../layout2img-openimages256
44 | unzip -o model.zip
45 | 
46 | cd ../inpainting_big
47 | unzip -o model.zip
48 | 
49 | cd ../..
50 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/scripts/tests/test_watermark.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import fire
 3 | from imwatermark import WatermarkDecoder
 4 | 
 5 | 
 6 | def testit(img_path):
 7 |     bgr = cv2.imread(img_path)
 8 |     decoder = WatermarkDecoder('bytes', 136)
 9 |     watermark = decoder.decode(bgr, 'dwtDct')
10 |     try:
11 |         dec = watermark.decode('utf-8')
12 |     except:
13 |         dec = "null"
14 |     print(dec)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     fire.Fire(testit)


--------------------------------------------------------------------------------
/codes/diffusion_sd1/stable-diffusion/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='latent-diffusion',
 5 |     version='0.0.1',
 6 |     description='',
 7 |     packages=find_packages(),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |         'tqdm',
12 |     ],
13 | )


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Stability AI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/model-variants.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/model-variants.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/modelfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/modelfigure.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/rick.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/rick.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/inpainting.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/inpainting.gif


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/merged-leopards.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/merged-leopards.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/d2i.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/d2i.gif


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2fantasy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2fantasy.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img01.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img02.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0000.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0004.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0005.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/midas.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/midas.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/old_man.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/old_man.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-1.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-2.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-3.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-in.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-in.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-out.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/houses_out.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/houses_out.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar000.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar000.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar500.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar500.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar800.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar800.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/panda.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/panda.jpg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/plates_out.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/plates_out.jpeg


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations_noise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations_noise.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002025.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002035.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002035.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0001.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0002.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0003.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0004.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0005.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0006.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0001.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0003.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0005.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0006.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0007.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/merged-dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/merged-dog.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/sampled-bear-x4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/sampled-bear-x4.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/snow-leopard-x4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/snow-leopard-x4.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/checkpoints/checkpoints.txt:
--------------------------------------------------------------------------------
1 | Put unCLIP checkpoints here.


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/karlo/decoder_900M_vit_l.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: t2i-decoder
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     image_size: 64
 6 |     num_channels: 320
 7 |     num_res_blocks: 3
 8 |     channel_mult: ''
 9 |     attention_resolutions: 32,16,8
10 |     num_heads: -1
11 |     num_head_channels: 64
12 |     num_heads_upsample: -1
13 |     use_scale_shift_norm: true
14 |     dropout: 0.1
15 |     clip_dim: 768
16 |     clip_emb_mult: 4
17 |     text_ctx: 77
18 |     xf_width: 1536
19 |     xf_layers: 0
20 |     xf_heads: 0
21 |     xf_final_ln: false
22 |     resblock_updown: true
23 |     learn_sigma: true
24 |     text_drop: 0.3
25 |     clip_emb_type: image
26 |     clip_emb_drop: 0.1
27 |     use_plm: true
28 | 
29 | diffusion:
30 |   steps: 1000
31 |   learn_sigma: true
32 |   sigma_small: false
33 |   noise_schedule: squaredcos_cap_v2
34 |   use_kl: false
35 |   predict_xstart: false
36 |   rescale_learned_sigmas: true
37 |   timestep_respacing: ''
38 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/karlo/improved_sr_64_256_1.4B.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: improved_sr_64_256
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     channels: 320
 6 |     depth: 3
 7 |     channels_multiple:
 8 |     - 1
 9 |     - 2
10 |     - 3
11 |     - 4
12 |     dropout: 0.0
13 | 
14 | diffusion:
15 |   steps: 1000
16 |   learn_sigma: false
17 |   sigma_small: true
18 |   noise_schedule: squaredcos_cap_v2
19 |   use_kl: false
20 |   predict_xstart: false
21 |   rescale_learned_sigmas: true
22 |   timestep_respacing: '7'
23 | 
24 | 
25 | sampling:
26 |   timestep_respacing: '7' # fix
27 |   clip_denoise: true
28 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/karlo/prior_1B_vit_l.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: prior
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     text_ctx: 77
 6 |     xf_width: 2048
 7 |     xf_layers: 20
 8 |     xf_heads: 32
 9 |     xf_final_ln: true
10 |     text_drop: 0.2
11 |     clip_dim: 768
12 | 
13 | diffusion:
14 |   steps: 1000
15 |   learn_sigma: false
16 |   sigma_small: true
17 |   noise_schedule: squaredcos_cap_v2
18 |   use_kl: false
19 |   predict_xstart: true
20 |   rescale_learned_sigmas: false
21 |   timestep_respacing: ''
22 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-bf16.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     linear_start: 0.00085
 9 |     linear_end: 0.0120
10 |     num_timesteps_cond: 1
11 |     log_every_t: 200
12 |     timesteps: 1000
13 |     first_stage_key: "jpg"
14 |     cond_stage_key: "txt"
15 |     image_size: 64
16 |     channels: 4
17 |     cond_stage_trainable: false
18 |     conditioning_key: crossattn
19 |     monitor: val/loss_simple_ema
20 |     scale_factor: 0.18215
21 |     use_ema: False # we set this to false because this is an inference only config
22 | 
23 |     unet_config:
24 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
25 |       params:
26 |         use_checkpoint: False
27 |         use_fp16: False
28 |         use_bf16: True
29 |         image_size: 32 # unused
30 |         in_channels: 4
31 |         out_channels: 4
32 |         model_channels: 320
33 |         attention_resolutions: [ 4, 2, 1 ]
34 |         num_res_blocks: 2
35 |         channel_mult: [ 1, 2, 4, 4 ]
36 |         num_head_channels: 64 # need to fix for flash-attn
37 |         use_spatial_transformer: True
38 |         use_linear_in_transformer: True
39 |         transformer_depth: 1
40 |         context_dim: 1024
41 |         legacy: False
42 | 
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         embed_dim: 4
47 |         monitor: val/rec_loss
48 |         ddconfig:
49 |           #attn_type: "vanilla-xformers"
50 |           double_z: true
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult:
57 |           - 1
58 |           - 2
59 |           - 4
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions: []
63 |           dropout: 0.0
64 |         lossconfig:
65 |           target: torch.nn.Identity
66 | 
67 |     cond_stage_config:
68 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
69 |       params:
70 |         freeze: True
71 |         layer: "penultimate"
72 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-fp32.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     linear_start: 0.00085
 9 |     linear_end: 0.0120
10 |     num_timesteps_cond: 1
11 |     log_every_t: 200
12 |     timesteps: 1000
13 |     first_stage_key: "jpg"
14 |     cond_stage_key: "txt"
15 |     image_size: 64
16 |     channels: 4
17 |     cond_stage_trainable: false
18 |     conditioning_key: crossattn
19 |     monitor: val/loss_simple_ema
20 |     scale_factor: 0.18215
21 |     use_ema: False # we set this to false because this is an inference only config
22 | 
23 |     unet_config:
24 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
25 |       params:
26 |         use_checkpoint: False
27 |         use_fp16: False
28 |         image_size: 32 # unused
29 |         in_channels: 4
30 |         out_channels: 4
31 |         model_channels: 320
32 |         attention_resolutions: [ 4, 2, 1 ]
33 |         num_res_blocks: 2
34 |         channel_mult: [ 1, 2, 4, 4 ]
35 |         num_head_channels: 64 # need to fix for flash-attn
36 |         use_spatial_transformer: True
37 |         use_linear_in_transformer: True
38 |         transformer_depth: 1
39 |         context_dim: 1024
40 |         legacy: False
41 | 
42 |     first_stage_config:
43 |       target: ldm.models.autoencoder.AutoencoderKL
44 |       params:
45 |         embed_dim: 4
46 |         monitor: val/rec_loss
47 |         ddconfig:
48 |           #attn_type: "vanilla-xformers"
49 |           double_z: true
50 |           z_channels: 4
51 |           resolution: 256
52 |           in_channels: 3
53 |           out_ch: 3
54 |           ch: 128
55 |           ch_mult:
56 |           - 1
57 |           - 2
58 |           - 4
59 |           - 4
60 |           num_res_blocks: 2
61 |           attn_resolutions: []
62 |           dropout: 0.0
63 |         lossconfig:
64 |           target: torch.nn.Identity
65 | 
66 |     cond_stage_config:
67 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
68 |       params:
69 |         freeze: True
70 |         layer: "penultimate"
71 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-v-bf16.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     parameterization: "v"
 9 |     linear_start: 0.00085
10 |     linear_end: 0.0120
11 |     num_timesteps_cond: 1
12 |     log_every_t: 200
13 |     timesteps: 1000
14 |     first_stage_key: "jpg"
15 |     cond_stage_key: "txt"
16 |     image_size: 64
17 |     channels: 4
18 |     cond_stage_trainable: false
19 |     conditioning_key: crossattn
20 |     monitor: val/loss_simple_ema
21 |     scale_factor: 0.18215
22 |     use_ema: False # we set this to false because this is an inference only config
23 | 
24 |     unet_config:
25 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
26 |       params:
27 |         use_checkpoint: False
28 |         use_fp16: False
29 |         use_bf16: True
30 |         image_size: 32 # unused
31 |         in_channels: 4
32 |         out_channels: 4
33 |         model_channels: 320
34 |         attention_resolutions: [ 4, 2, 1 ]
35 |         num_res_blocks: 2
36 |         channel_mult: [ 1, 2, 4, 4 ]
37 |         num_head_channels: 64 # need to fix for flash-attn
38 |         use_spatial_transformer: True
39 |         use_linear_in_transformer: True
40 |         transformer_depth: 1
41 |         context_dim: 1024
42 |         legacy: False
43 | 
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.AutoencoderKL
46 |       params:
47 |         embed_dim: 4
48 |         monitor: val/rec_loss
49 |         ddconfig:
50 |           #attn_type: "vanilla-xformers"
51 |           double_z: true
52 |           z_channels: 4
53 |           resolution: 256
54 |           in_channels: 3
55 |           out_ch: 3
56 |           ch: 128
57 |           ch_mult:
58 |           - 1
59 |           - 2
60 |           - 4
61 |           - 4
62 |           num_res_blocks: 2
63 |           attn_resolutions: []
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: torch.nn.Identity
67 | 
68 |     cond_stage_config:
69 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
70 |       params:
71 |         freeze: True
72 |         layer: "penultimate"
73 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-v-fp32.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     parameterization: "v"
 9 |     linear_start: 0.00085
10 |     linear_end: 0.0120
11 |     num_timesteps_cond: 1
12 |     log_every_t: 200
13 |     timesteps: 1000
14 |     first_stage_key: "jpg"
15 |     cond_stage_key: "txt"
16 |     image_size: 64
17 |     channels: 4
18 |     cond_stage_trainable: false
19 |     conditioning_key: crossattn
20 |     monitor: val/loss_simple_ema
21 |     scale_factor: 0.18215
22 |     use_ema: False # we set this to false because this is an inference only config
23 | 
24 |     unet_config:
25 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
26 |       params:
27 |         use_checkpoint: False
28 |         use_fp16: False
29 |         image_size: 32 # unused
30 |         in_channels: 4
31 |         out_channels: 4
32 |         model_channels: 320
33 |         attention_resolutions: [ 4, 2, 1 ]
34 |         num_res_blocks: 2
35 |         channel_mult: [ 1, 2, 4, 4 ]
36 |         num_head_channels: 64 # need to fix for flash-attn
37 |         use_spatial_transformer: True
38 |         use_linear_in_transformer: True
39 |         transformer_depth: 1
40 |         context_dim: 1024
41 |         legacy: False
42 | 
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         embed_dim: 4
47 |         monitor: val/rec_loss
48 |         ddconfig:
49 |           #attn_type: "vanilla-xformers"
50 |           double_z: true
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult:
57 |           - 1
58 |           - 2
59 |           - 4
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions: []
63 |           dropout: 0.0
64 |         lossconfig:
65 |           target: torch.nn.Identity
66 | 
67 |     cond_stage_config:
68 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
69 |       params:
70 |         freeze: True
71 |         layer: "penultimate"
72 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-1-stable-unclip-h-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
 4 |   params:
 5 |     embedding_dropout: 0.25
 6 |     parameterization: "v"
 7 |     linear_start: 0.00085
 8 |     linear_end: 0.0120
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 96
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn-adm
17 |     scale_factor: 0.18215
18 |     monitor: val/loss_simple_ema
19 |     use_ema: False
20 | 
21 |     embedder_config:
22 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
23 | 
24 |     noise_aug_config:
25 |       target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
26 |       params:
27 |         timestep_dim: 1024
28 |         noise_schedule_config:
29 |           timesteps: 1000
30 |           beta_schedule: squaredcos_cap_v2
31 | 
32 |     unet_config:
33 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
34 |       params:
35 |         num_classes: "sequential"
36 |         adm_in_channels: 2048
37 |         use_checkpoint: True
38 |         image_size: 32 # unused
39 |         in_channels: 4
40 |         out_channels: 4
41 |         model_channels: 320
42 |         attention_resolutions: [ 4, 2, 1 ]
43 |         num_res_blocks: 2
44 |         channel_mult: [ 1, 2, 4, 4 ]
45 |         num_head_channels: 64 # need to fix for flash-attn
46 |         use_spatial_transformer: True
47 |         use_linear_in_transformer: True
48 |         transformer_depth: 1
49 |         context_dim: 1024
50 |         legacy: False
51 | 
52 |     first_stage_config:
53 |       target: ldm.models.autoencoder.AutoencoderKL
54 |       params:
55 |         embed_dim: 4
56 |         monitor: val/rec_loss
57 |         ddconfig:
58 |           attn_type: "vanilla-xformers"
59 |           double_z: true
60 |           z_channels: 4
61 |           resolution: 256
62 |           in_channels: 3
63 |           out_ch: 3
64 |           ch: 128
65 |           ch_mult:
66 |             - 1
67 |             - 2
68 |             - 4
69 |             - 4
70 |           num_res_blocks: 2
71 |           attn_resolutions: [ ]
72 |           dropout: 0.0
73 |         lossconfig:
74 |           target: torch.nn.Identity
75 | 
76 |     cond_stage_config:
77 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
78 |       params:
79 |         freeze: True
80 |         layer: "penultimate"
81 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-1-stable-unclip-l-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
 4 |   params:
 5 |     embedding_dropout: 0.25
 6 |     parameterization: "v"
 7 |     linear_start: 0.00085
 8 |     linear_end: 0.0120
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 96
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn-adm
17 |     scale_factor: 0.18215
18 |     monitor: val/loss_simple_ema
19 |     use_ema: False
20 | 
21 |     embedder_config:
22 |       target: ldm.modules.encoders.modules.ClipImageEmbedder
23 |       params:
24 |         model: "ViT-L/14"
25 | 
26 |     noise_aug_config:
27 |       target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
28 |       params:
29 |         clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th"
30 |         timestep_dim: 768
31 |         noise_schedule_config:
32 |           timesteps: 1000
33 |           beta_schedule: squaredcos_cap_v2
34 | 
35 |     unet_config:
36 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
37 |       params:
38 |         num_classes: "sequential"
39 |         adm_in_channels: 1536
40 |         use_checkpoint: True
41 |         image_size: 32 # unused
42 |         in_channels: 4
43 |         out_channels: 4
44 |         model_channels: 320
45 |         attention_resolutions: [ 4, 2, 1 ]
46 |         num_res_blocks: 2
47 |         channel_mult: [ 1, 2, 4, 4 ]
48 |         num_head_channels: 64 # need to fix for flash-attn
49 |         use_spatial_transformer: True
50 |         use_linear_in_transformer: True
51 |         transformer_depth: 1
52 |         context_dim: 1024
53 |         legacy: False
54 | 
55 |     first_stage_config:
56 |       target: ldm.models.autoencoder.AutoencoderKL
57 |       params:
58 |         embed_dim: 4
59 |         monitor: val/rec_loss
60 |         ddconfig:
61 |           attn_type: "vanilla-xformers"
62 |           double_z: true
63 |           z_channels: 4
64 |           resolution: 256
65 |           in_channels: 3
66 |           out_ch: 3
67 |           ch: 128
68 |           ch_mult:
69 |             - 1
70 |             - 2
71 |             - 4
72 |             - 4
73 |           num_res_blocks: 2
74 |           attn_resolutions: [ ]
75 |           dropout: 0.0
76 |         lossconfig:
77 |           target: torch.nn.Identity
78 | 
79 |     cond_stage_config:
80 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
81 |       params:
82 |         freeze: True
83 |         layer: "penultimate"


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-inference-v.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     parameterization: "v"
 6 |     linear_start: 0.00085
 7 |     linear_end: 0.0120
 8 |     num_timesteps_cond: 1
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 64
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn
17 |     monitor: val/loss_simple_ema
18 |     scale_factor: 0.18215
19 |     use_ema: False # we set this to false because this is an inference only config
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         use_checkpoint: True
25 |         use_fp16: True
26 |         image_size: 32 # unused
27 |         in_channels: 4
28 |         out_channels: 4
29 |         model_channels: 320
30 |         attention_resolutions: [ 4, 2, 1 ]
31 |         num_res_blocks: 2
32 |         channel_mult: [ 1, 2, 4, 4 ]
33 |         num_head_channels: 64 # need to fix for flash-attn
34 |         use_spatial_transformer: True
35 |         use_linear_in_transformer: True
36 |         transformer_depth: 1
37 |         context_dim: 1024
38 |         legacy: False
39 | 
40 |     first_stage_config:
41 |       target: ldm.models.autoencoder.AutoencoderKL
42 |       params:
43 |         embed_dim: 4
44 |         monitor: val/rec_loss
45 |         ddconfig:
46 |           #attn_type: "vanilla-xformers"
47 |           double_z: true
48 |           z_channels: 4
49 |           resolution: 256
50 |           in_channels: 3
51 |           out_ch: 3
52 |           ch: 128
53 |           ch_mult:
54 |           - 1
55 |           - 2
56 |           - 4
57 |           - 4
58 |           num_res_blocks: 2
59 |           attn_resolutions: []
60 |           dropout: 0.0
61 |         lossconfig:
62 |           target: torch.nn.Identity
63 | 
64 |     cond_stage_config:
65 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66 |       params:
67 |         freeze: True
68 |         layer: "penultimate"
69 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False # we set this to false because this is an inference only config
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         use_checkpoint: True
24 |         use_fp16: True
25 |         image_size: 32 # unused
26 |         in_channels: 4
27 |         out_channels: 4
28 |         model_channels: 320
29 |         attention_resolutions: [ 4, 2, 1 ]
30 |         num_res_blocks: 2
31 |         channel_mult: [ 1, 2, 4, 4 ]
32 |         num_head_channels: 64 # need to fix for flash-attn
33 |         use_spatial_transformer: True
34 |         use_linear_in_transformer: True
35 |         transformer_depth: 1
36 |         context_dim: 1024
37 |         legacy: False
38 | 
39 |     first_stage_config:
40 |       target: ldm.models.autoencoder.AutoencoderKL
41 |       params:
42 |         embed_dim: 4
43 |         monitor: val/rec_loss
44 |         ddconfig:
45 |           #attn_type: "vanilla-xformers"
46 |           double_z: true
47 |           z_channels: 4
48 |           resolution: 256
49 |           in_channels: 3
50 |           out_ch: 3
51 |           ch: 128
52 |           ch_mult:
53 |           - 1
54 |           - 2
55 |           - 4
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 |           dropout: 0.0
60 |         lossconfig:
61 |           target: torch.nn.Identity
62 | 
63 |     cond_stage_config:
64 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65 |       params:
66 |         freeze: True
67 |         layer: "penultimate"
68 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-07
 3 |   target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false
15 |     conditioning_key: hybrid
16 |     scale_factor: 0.18215
17 |     monitor: val/loss_simple_ema
18 |     finetune_keys: null
19 |     use_ema: False
20 | 
21 |     depth_stage_config:
22 |       target: ldm.modules.midas.api.MiDaSInference
23 |       params:
24 |         model_type: "dpt_hybrid"
25 | 
26 |     unet_config:
27 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
28 |       params:
29 |         use_checkpoint: True
30 |         image_size: 32 # unused
31 |         in_channels: 5
32 |         out_channels: 4
33 |         model_channels: 320
34 |         attention_resolutions: [ 4, 2, 1 ]
35 |         num_res_blocks: 2
36 |         channel_mult: [ 1, 2, 4, 4 ]
37 |         num_head_channels: 64 # need to fix for flash-attn
38 |         use_spatial_transformer: True
39 |         use_linear_in_transformer: True
40 |         transformer_depth: 1
41 |         context_dim: 1024
42 |         legacy: False
43 | 
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.AutoencoderKL
46 |       params:
47 |         embed_dim: 4
48 |         monitor: val/rec_loss
49 |         ddconfig:
50 |           #attn_type: "vanilla-xformers"
51 |           double_z: true
52 |           z_channels: 4
53 |           resolution: 256
54 |           in_channels: 3
55 |           out_ch: 3
56 |           ch: 128
57 |           ch_mult:
58 |             - 1
59 |             - 2
60 |             - 4
61 |             - 4
62 |           num_res_blocks: 2
63 |           attn_resolutions: [ ]
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: torch.nn.Identity
67 | 
68 |     cond_stage_config:
69 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
70 |       params:
71 |         freeze: True
72 |         layer: "penultimate"
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/x4-upscaling.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentUpscaleDiffusion
 4 |   params:
 5 |     parameterization: "v"
 6 |     low_scale_key: "lr"
 7 |     linear_start: 0.0001
 8 |     linear_end: 0.02
 9 |     num_timesteps_cond: 1
10 |     log_every_t: 200
11 |     timesteps: 1000
12 |     first_stage_key: "jpg"
13 |     cond_stage_key: "txt"
14 |     image_size: 128
15 |     channels: 4
16 |     cond_stage_trainable: false
17 |     conditioning_key: "hybrid-adm"
18 |     monitor: val/loss_simple_ema
19 |     scale_factor: 0.08333
20 |     use_ema: False
21 | 
22 |     low_scale_config:
23 |       target: ldm.modules.diffusionmodules.upscaling.ImageConcatWithNoiseAugmentation
24 |       params:
25 |         noise_schedule_config: # image space
26 |           linear_start: 0.0001
27 |           linear_end: 0.02
28 |         max_noise_level: 350
29 | 
30 |     unet_config:
31 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
32 |       params:
33 |         use_checkpoint: True
34 |         num_classes: 1000  # timesteps for noise conditioning (here constant, just need one)
35 |         image_size: 128
36 |         in_channels: 7
37 |         out_channels: 4
38 |         model_channels: 256
39 |         attention_resolutions: [ 2,4,8]
40 |         num_res_blocks: 2
41 |         channel_mult: [ 1, 2, 2, 4]
42 |         disable_self_attentions: [True, True, True, False]
43 |         disable_middle_self_attn: False
44 |         num_heads: 8
45 |         use_spatial_transformer: True
46 |         transformer_depth: 1
47 |         context_dim: 1024
48 |         legacy: False
49 |         use_linear_in_transformer: True
50 | 
51 |     first_stage_config:
52 |       target: ldm.models.autoencoder.AutoencoderKL
53 |       params:
54 |         embed_dim: 4
55 |         ddconfig:
56 |           # attn_type: "vanilla-xformers" this model needs efficient attention to be feasible on HR data, also the decoder seems to break in half precision (UNet is fine though)
57 |           double_z: True
58 |           z_channels: 4
59 |           resolution: 256
60 |           in_channels: 3
61 |           out_ch: 3
62 |           ch: 128
63 |           ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
64 |           num_res_blocks: 2
65 |           attn_resolutions: [ ]
66 |           dropout: 0.0
67 | 
68 |         lossconfig:
69 |           target: torch.nn.Identity
70 | 
71 |     cond_stage_config:
72 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
73 |       params:
74 |         freeze: True
75 |         layer: "penultimate"
76 | 
77 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ldm
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.8.5
 7 |   - pip=20.3
 8 |   - cudatoolkit=11.3
 9 |   - pytorch=1.12.1
10 |   - torchvision=0.13.1
11 |   - numpy=1.23.1
12 |   - pip:
13 |     - albumentations==1.3.0
14 |     - opencv-python==4.6.0.66
15 |     - imageio==2.9.0
16 |     - imageio-ffmpeg==0.4.2
17 |     - pytorch-lightning==1.4.2
18 |     - omegaconf==2.1.1
19 |     - test-tube>=0.7.5
20 |     - streamlit==1.12.1
21 |     - einops==0.3.0
22 |     - transformers==4.19.2
23 |     - webdataset==0.2.5
24 |     - kornia==0.6
25 |     - open_clip_torch==2.0.2
26 |     - invisible-watermark>=0.1.5
27 |     - streamlit-drawable-canvas==0.8.0
28 |     - torchmetrics==0.6.0
29 |     - -e .
30 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/data/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/data/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ldm.modules.midas.api import load_midas_transform
 4 | 
 5 | 
 6 | class AddMiDaS(object):
 7 |     def __init__(self, model_type):
 8 |         super().__init__()
 9 |         self.transform = load_midas_transform(model_type)
10 | 
11 |     def pt2np(self, x):
12 |         x = ((x + 1.0) * .5).detach().cpu().numpy()
13 |         return x
14 | 
15 |     def np2pt(self, x):
16 |         x = torch.from_numpy(x) * 2 - 1.
17 |         return x
18 | 
19 |     def __call__(self, sample):
20 |         # sample['jpg'] is tensor hwc in [-1, 1] at this point
21 |         x = self.pt2np(sample['jpg'])
22 |         x = self.transform({"image": x})["image"]
23 |         sample['midas_in'] = x
24 |         return sample


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/sampling_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def append_dims(x, target_dims):
 6 |     """Appends dimensions to the end of a tensor until it has target_dims dimensions.
 7 |     From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
 8 |     dims_to_append = target_dims - x.ndim
 9 |     if dims_to_append < 0:
10 |         raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
11 |     return x[(...,) + (None,) * dims_to_append]
12 | 
13 | 
14 | def norm_thresholding(x0, value):
15 |     s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
16 |     return x0 * (value / s)
17 | 
18 | 
19 | def spatial_norm_thresholding(x0, value):
20 |     # b c h w
21 |     s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
22 |     return x0 * (value / s)


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr
2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light
3 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/sr_256_1k.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Karlo-v1.0.alpha
 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved.
 4 | # ------------------------------------------------------------------------------------
 5 | 
 6 | from ldm.modules.karlo.kakao.models.sr_64_256 import SupRes64to256Progressive
 7 | 
 8 | 
 9 | class SupRes256to1kProgressive(SupRes64to256Progressive):
10 |     pass  # no difference currently
11 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion)
 3 | # ------------------------------------------------------------------------------------
 4 | 
 5 | 
 6 | from .diffusion import gaussian_diffusion as gd
 7 | from .diffusion.respace import (
 8 |     SpacedDiffusion,
 9 |     space_timesteps,
10 | )
11 | 
12 | 
13 | def create_gaussian_diffusion(
14 |     steps,
15 |     learn_sigma,
16 |     sigma_small,
17 |     noise_schedule,
18 |     use_kl,
19 |     predict_xstart,
20 |     rescale_learned_sigmas,
21 |     timestep_respacing,
22 | ):
23 |     betas = gd.get_named_beta_schedule(noise_schedule, steps)
24 |     if use_kl:
25 |         loss_type = gd.LossType.RESCALED_KL
26 |     elif rescale_learned_sigmas:
27 |         loss_type = gd.LossType.RESCALED_MSE
28 |     else:
29 |         loss_type = gd.LossType.MSE
30 |     if not timestep_respacing:
31 |         timestep_respacing = [steps]
32 | 
33 |     return SpacedDiffusion(
34 |         use_timesteps=space_timesteps(steps, timestep_respacing),
35 |         betas=betas,
36 |         model_mean_type=(
37 |             gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
38 |         ),
39 |         model_var_type=(
40 |             (
41 |                 gd.ModelVarType.FIXED_LARGE
42 |                 if not sigma_small
43 |                 else gd.ModelVarType.FIXED_SMALL
44 |             )
45 |             if not learn_sigma
46 |             else gd.ModelVarType.LEARNED_RANGE
47 |         ),
48 |         loss_type=loss_type,
49 |     )
50 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/modules/resample.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Modified from Guided-Diffusion (https://github.com/openai/guided-diffusion)
 3 | # ------------------------------------------------------------------------------------
 4 | 
 5 | from abc import abstractmethod
 6 | 
 7 | import torch as th
 8 | 
 9 | 
10 | def create_named_schedule_sampler(name, diffusion):
11 |     """
12 |     Create a ScheduleSampler from a library of pre-defined samplers.
13 | 
14 |     :param name: the name of the sampler.
15 |     :param diffusion: the diffusion object to sample for.
16 |     """
17 |     if name == "uniform":
18 |         return UniformSampler(diffusion)
19 |     else:
20 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
21 | 
22 | 
23 | class ScheduleSampler(th.nn.Module):
24 |     """
25 |     A distribution over timesteps in the diffusion process, intended to reduce
26 |     variance of the objective.
27 | 
28 |     By default, samplers perform unbiased importance sampling, in which the
29 |     objective's mean is unchanged.
30 |     However, subclasses may override sample() to change how the resampled
31 |     terms are reweighted, allowing for actual changes in the objective.
32 |     """
33 | 
34 |     @abstractmethod
35 |     def weights(self):
36 |         """
37 |         Get a numpy array of weights, one per diffusion step.
38 | 
39 |         The weights needn't be normalized, but must be positive.
40 |         """
41 | 
42 |     def sample(self, batch_size, device):
43 |         """
44 |         Importance-sample timesteps for a batch.
45 | 
46 |         :param batch_size: the number of timesteps.
47 |         :param device: the torch device to save to.
48 |         :return: a tuple (timesteps, weights):
49 |                  - timesteps: a tensor of timestep indices.
50 |                  - weights: a tensor of weights to scale the resulting losses.
51 |         """
52 |         w = self.weights()
53 |         p = w / th.sum(w)
54 |         indices = p.multinomial(batch_size, replacement=True)
55 |         weights = 1 / (len(p) * p[indices])
56 |         return indices, weights
57 | 
58 | 
59 | class UniformSampler(ScheduleSampler):
60 |     def __init__(self, diffusion):
61 |         super(UniformSampler, self).__init__()
62 |         self.diffusion = diffusion
63 |         self.register_buffer(
64 |             "_weights", th.ones([diffusion.num_timesteps]), persistent=False
65 |         )
66 | 
67 |     def weights(self):
68 |         return self._weights
69 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/__init__.py


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/base_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class BaseModel(torch.nn.Module):
 5 |     def load(self, path):
 6 |         """Load model from file.
 7 | 
 8 |         Args:
 9 |             path (str): file path
10 |         """
11 |         parameters = torch.load(path, map_location=torch.device('cpu'))
12 | 
13 |         if "optimizer" in parameters:
14 |             parameters = parameters["model"]
15 | 
16 |         self.load_state_dict(parameters)
17 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/requirements.txt:
--------------------------------------------------------------------------------
 1 | albumentations==0.4.3
 2 | opencv-python
 3 | pudb==2019.2
 4 | imageio==2.9.0
 5 | imageio-ffmpeg==0.4.2
 6 | pytorch-lightning==1.4.2
 7 | torchmetrics==0.6
 8 | omegaconf==2.1.1
 9 | test-tube>=0.7.5
10 | streamlit>=0.73.1
11 | einops==0.3.0
12 | transformers==4.19.2
13 | webdataset==0.2.5
14 | open-clip-torch==2.7.0
15 | gradio==3.13.2
16 | kornia==0.6
17 | invisible-watermark>=0.1.5
18 | streamlit-drawable-canvas==0.8.0
19 | -e .
20 | 


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/scripts/tests/test_watermark.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import fire
 3 | from imwatermark import WatermarkDecoder
 4 | 
 5 | 
 6 | def testit(img_path):
 7 |     bgr = cv2.imread(img_path)
 8 |     decoder = WatermarkDecoder('bytes', 136)
 9 |     watermark = decoder.decode(bgr, 'dwtDct')
10 |     try:
11 |         dec = watermark.decode('utf-8')
12 |     except:
13 |         dec = "null"
14 |     print(dec)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     fire.Fire(testit)


--------------------------------------------------------------------------------
/codes/diffusion_sd2/stablediffusion/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='stable-diffusion',
 5 |     version='0.0.1',
 6 |     description='',
 7 |     packages=find_packages(),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |         'tqdm',
12 |     ],
13 | )


--------------------------------------------------------------------------------
/codes/gan/bdpy/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *~
 3 | 
 4 | *.pyc
 5 | .python-version
 6 | .pydevproject
 7 | .project
 8 | *.ipynb
 9 | .ipynb_checkpoints
10 | build
11 | dist
12 | *.egg-info
13 | 
14 | *.npy
15 | *.mat
16 | *.h5
17 | 
18 | .coverage
19 | htmlcov
20 | .pylintrc
21 | 
22 | junk
23 | tmp
24 | test_local
25 | test_versions
26 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017-2018 Kamitani Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/README.md:
--------------------------------------------------------------------------------
 1 | # BdPy
 2 | 
 3 | [![PyPI version](https://badge.fury.io/py/bdpy.svg)](https://badge.fury.io/py/bdpy)
 4 | [![GitHub license](https://img.shields.io/github/license/KamitaniLab/bdpy)](https://github.com/KamitaniLab/bdpy/blob/master/LICENSE)
 5 | 
 6 | Python package for brain decoding analysis
 7 | 
 8 | ## Requirements
 9 | 
10 | - Python 2.7, 3.6, or later
11 | - numpy
12 | - scipy
13 | - scikit-learn
14 | - h5py
15 | - hdf5storage
16 | - pyyaml
17 | 
18 | ### Optional requirements
19 | 
20 | - `dataform` module
21 |     - pandas
22 | - `dl.caffe` module
23 |     - Caffe
24 |     - Pillow
25 |     - tqdm
26 | - `dl.torch` module
27 |     - PyTorch
28 |     - Pillow
29 | - `fig` module
30 |     - matplotlib
31 |     - Pillow
32 | - `mri` module
33 |     - nipy
34 |     - nibabel
35 |     - pandas
36 | - `recon.torch` module
37 |     - PyTorch
38 |     - Pillow
39 | 
40 | ## Installation
41 | 
42 | Latest stable release:
43 | 
44 | ``` shell
45 | $ pip install bdpy
46 | ```
47 | 
48 | To install the latest development version ("master" branch of the repository), please run the following command.
49 | 
50 | ```shell
51 | $ pip install git+https://github.com/KamitaniLab/bdpy.git
52 | ```
53 | 
54 | ## Packages
55 | 
56 | - bdata: BdPy data format (BData) core package
57 | - dataform: Utilities for various data format
58 | - distcomp: Distributed computation utilities
59 | - dl: Deep learning utilities
60 | - feature: Utilities for DNN features
61 | - fig: Utilities for figure creation
62 | - ml: Machine learning utilities
63 | - mri: MRI utilities
64 | - opendata: Open data utilities
65 | - preproc: Utilities for preprocessing
66 | - recon: Reconstruction methods
67 | - stats: Utilities for statistics
68 | - util: Miscellaneous utilities
69 | 
70 | ## BdPy data format
71 | 
72 | BdPy data format (or BrainDecoderToolbox2 data format; BData) consists of two variables: dataset and metadata. **dataset** stores brain activity data (e.g., voxel signal value for fMRI data), target variables (e.g., ID of stimuli for vision experiments), and additional information specifying experimental design (e.g., run and block numbers for fMRI experiments). Each row corresponds to a single 'sample', and each column representes either single feature (voxel), target, or experiment design information. **metadata** contains data describing meta-information for each column in dataset.
73 | 
74 | See [BData API examples](https://github.com/KamitaniLab/bdpy/blob/main/docs/bdata_api_examples.md) for useage of BData.
75 | 
76 | ## Developers
77 | 
78 | - Shuntaro C. Aoki (Kyoto Univ)
79 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy: Brain decoding toolbox for Python
 3 | 
 4 | Developed by Kamitani Lab, Kyoto Univ. and ATR
 5 | """
 6 | 
 7 | 
 8 | # `import bdpy` implicitly imports class `BData` (in package `bdata`) and
 9 | # package `util`.
10 | from .bdata import BData
11 | from .bdata import vstack, metadata_equal
12 | from .util import *
13 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/bdata/__init__.py:
--------------------------------------------------------------------------------
1 | '''BdPy data package
2 | 
3 | This package is a part of BdPy
4 | '''
5 | 
6 | 
7 | from .bdata import BData
8 | from .utils import concat_dataset, vstack, metadata_equal
9 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/dataform/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy data format package
 3 | 
 4 | This package is a part of BdPy
 5 | """
 6 | 
 7 | from .pd import *
 8 | from .datastore import *
 9 | from .sparse import *
10 | from .features import *
11 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/dataform/pd.py:
--------------------------------------------------------------------------------
 1 | '''Utilities for Pandas dataframe
 2 | 
 3 | This file is a part of BdPy
 4 | '''
 5 | 
 6 | 
 7 | __all__ = ['convert_dataframe', 'append_dataframe']
 8 | 
 9 | 
10 | import pandas as pd
11 | 
12 | 
13 | def convert_dataframe(lst):
14 |     '''Convert `lst` to Pandas dataframe
15 | 
16 |     Parameters
17 |     ----------
18 |     lst : list of dicts
19 | 
20 |     Returns
21 |     -------
22 |     Pandas dataframe
23 |     '''
24 | 
25 |     df_lst = (pd.DataFrame([item.values()], columns=item.keys()) for item in lst)
26 |     df = pd.concat(df_lst, axis=0, ignore_index=True)
27 |     return df
28 | 
29 | 
30 | def append_dataframe(df, **kwargs):
31 |     '''Append a row to Pandas dataframe `df`
32 | 
33 |     Parameters
34 |     ----------
35 |     df : Pandas dataframe
36 |     kwargs : key-value of data to be added in `df`
37 | 
38 |     Returns
39 |     -------
40 |     Pandas dataframe
41 |     '''
42 | 
43 |     df_append = pd.DataFrame({k : [kwargs[k]] for k in kwargs})
44 |     return df.append(df_append, ignore_index=True)
45 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/distcomp/__init__.py:
--------------------------------------------------------------------------------
1 | '''Distributed computation package
2 | 
3 | This package is a part of BdPy.
4 | '''
5 | 
6 | from .distcomp import *
7 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/dl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/bdpy/dl/__init__.py


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/dl/torch/__init__.py:
--------------------------------------------------------------------------------
1 | from .torch import FeatureExtractor, ImageDataset
2 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/bdpy/evals/__init__.py


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/feature/__init__.py:
--------------------------------------------------------------------------------
1 | '''Feature engineering module.'''
2 | 
3 | from .feature import normalize_feature
4 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/feature/feature.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def normalize_feature(feature,
 5 |                       channel_wise_mean=True, channel_wise_std=True,
 6 |                       channel_axis=0,
 7 |                       std_ddof=1,
 8 |                       shift=None, scale=None,
 9 |                       scaling_only=False):
10 |     '''Normalize feature.
11 | 
12 |     Parameters
13 |     ----------
14 |     feature : ndarray
15 |         Feature to be normalized.
16 |     channel_wise_mean, channel_wise_std : bool (default: True)
17 |         If `True`, run channel-wise mean/SD normalization.
18 |     channel_axis : int (default: 0)
19 |         Channel axis.
20 |     shift, scale : None, 'self', or ndarray (default: None)
21 |         If shift/scale is `None`, nothing will be added/multiplied to the normalized features.
22 |         If `'self'`, mean/SD of `feature` will be added/multiplied to the normalized features.
23 |         If ndarrays are given, the arrays will be added/multiplied to the normalized features.
24 |     std_ddof : int (default: 1)
25 |         Delta degree of freedom for SD.
26 | 
27 |     Returns
28 |     -------
29 |     ndarray
30 |         Normalized (and scaled/shifted) features.
31 |     '''
32 | 
33 |     if feature.ndim == 1:
34 |         axes_along = None
35 |     else:
36 |         axes = list(range(feature.ndim))
37 |         axes.remove(channel_axis)
38 |         axes_along = tuple(axes)
39 | 
40 |     if channel_wise_mean:
41 |         feat_mean = np.mean(feature, axis=axes_along, keepdims=True)
42 |     else:
43 |         feat_mean = np.mean(feature, keepdims=True)
44 | 
45 |     if channel_wise_std:
46 |         feat_std = np.std(feature, axis=axes_along, ddof=std_ddof, keepdims=True)
47 |     else:
48 |         feat_std = np.mean(np.std(feature, axis=axes_along, ddof=std_ddof, keepdims=True), keepdims=True)
49 | 
50 |     if isinstance(shift, str) and shift == 'self':
51 |         shift = feat_mean
52 | 
53 |     if isinstance(scale, str) and scale == 'self':
54 |         scale = feat_std
55 | 
56 |     if scaling_only:
57 |         feat_n = (feature / feat_std) * scale
58 |     else:
59 |         feat_n = ((feature - feat_mean) / feat_std)
60 | 
61 |         if not scale is None:
62 |             feat_n = feat_n * scale
63 |         if not shift is None:
64 |             feat_n = feat_n + shift
65 | 
66 |     if not feature.shape == feat_n.shape:
67 |         try:
68 |             feat_n.reshape(feature.shape)
69 |         except:
70 |             raise ValueError('Invalid shape of normalized features (original: %s, normalized: %s). '
71 |                              + 'Possibly incorrect shift and/or scale.'
72 |                              % (str(feature.shape), str(feat_n.shape)))
73 | 
74 |     return feat_n
75 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/fig/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Figure package
 2 | 
 3 | This package is a part of BdPy.
 4 | '''
 5 | 
 6 | from .fig import *
 7 | from .tile_images import tile_images
 8 | from .draw_group_image_set import draw_group_image_set
 9 | from .makeplots import makeplots
10 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/ml/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy machine learning package
 3 | 
 4 | This package is a part of BdPy
 5 | """
 6 | 
 7 | 
 8 | from .learning import Classification, CrossValidation, ModelTraining, ModelTest
 9 | from .crossvalidation import make_cvindex, make_crossvalidationindex, make_cvindex_generator
10 | from .crossvalidation import cvindex_groupwise
11 | from .ensemble import *
12 | from .regress import *
13 | from .searchlight import *
14 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/ml/ensemble.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities for ensemble learning
 3 | """
 4 | 
 5 | from collections import Counter
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | __all__ = ['get_majority']
11 | 
12 | 
13 | def get_majority(data, axis=0):
14 |     """
15 |     Returns a list of majority elements in each row or column.
16 | 
17 |     If more than two elements occupies the same numbers in each row or column,
18 |     'get_majority' returns the first-sorted element.
19 | 
20 |     Parameters
21 |     ----------
22 |     data : array_like
23 |     axis : 0 or 1, optional
24 |         Axis in which elements are counted (default: 0)
25 | 
26 | 
27 |     Returns
28 |     -------
29 |     majority_list : list
30 |         A list of majority elements
31 |     """
32 | 
33 |     majority_list = []
34 | 
35 |     if axis == 0:
36 |         data = np.transpose(data) 
37 | 
38 |     for i in range(data.shape[0]):
39 |         target = data[i].tolist()
40 |         # Change KS for returning first element if the same numbers
41 |         #c = Counter(target)
42 |         c = Counter(np.sort(target))
43 |         majority = c.most_common(1)
44 |         majority_list.append(majority[0][0])
45 | 
46 |     return majority_list
47 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/ml/regress.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file is a part of BdPy
 3 | """
 4 | 
 5 | 
 6 | __all__ = ['add_bias']
 7 | 
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def add_bias(x, axis=0):
13 |     """
14 |     Add bias terms to x
15 | 
16 |     Parameters
17 |     ----------
18 |     x : array_like
19 |         Data matrix
20 |     axis : 0 or 1, optional
21 |         Axis in which bias terms are added (default: 0)
22 | 
23 |     Returns
24 |     -------
25 |     y : array_like
26 |         Data matrix with bias terms
27 |     """
28 | 
29 |     if axis == 0:
30 |         vlen = x.shape[1]
31 |         y = np.concatenate((x, np.array([np.ones(vlen)])), axis=0)
32 |     elif axis == 1:
33 |         vlen = x.shape[0]
34 |         y = np.concatenate((x, np.array([np.ones(vlen)]).T), axis=1)
35 |     else:
36 |         raise ValueError('axis should be either 0 or 1')
37 |     
38 |     return y
39 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/ml/searchlight.py:
--------------------------------------------------------------------------------
 1 | '''Utilities for searchlight analysis.'''
 2 | 
 3 | 
 4 | __all__ = ['get_neighbors']
 5 | 
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def get_neighbors(xyz, space_xyz, shape='sphere', size=9):
11 |     '''
12 |     Returns neighboring voxels (cluster).
13 | 
14 |     Parameters
15 |     ----------
16 |     xyz : array_like, shape=(3,) or len=3
17 |         Voxel XYZ coordinate in the center of the cluster.
18 |     space_xyz : array_like, shape=(3, N) or (N, 3)
19 |         XYZ coordinate of all voxels.
20 |     shape : {'sphere'}, optional
21 |         Shape of the cluster.
22 |     size : float, optional
23 |         Size of the cluster.
24 | 
25 |     Returns
26 |     -------
27 |     cluster_index : array_like, dtype=bool, shape=(N,)
28 |         Boolean index of voxels in the cluster.
29 |     '''
30 | 
31 |     # Input check
32 |     if isinstance(xyz, list):
33 |         xyz = np.array(xyz)
34 | 
35 |     if xyz.ndim != 1:
36 |         raise TypeError('xyz should be 1-D array')
37 | 
38 |     if space_xyz.ndim != 2:
39 |         raise TypeError('space_xyz should be 2-D array')
40 | 
41 |     # Fix input shape
42 |     if space_xyz.shape[0] == 3:
43 |         space_xyz = space_xyz.T
44 | 
45 |     if shape == 'sphere':
46 |         dist = np.sum((space_xyz - xyz) ** 2, axis=1)
47 |         cluster_index = dist <= size ** 2
48 |     else:
49 |         raise ValueError('Unknown shape: %s' % shape)
50 | 
51 |     return cluster_index
52 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/mri/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy MRI package
 3 | 
 4 | This package is a part of BdPy
 5 | """
 6 | 
 7 | from .load_epi import load_epi
 8 | from .load_mri import load_mri
 9 | from .roi import add_roimask, get_roiflag, add_roilabel, add_rois, merge_rois, add_hcp_rois, add_hcp_visual_cortex
10 | from .fmriprep import create_bdata_fmriprep, FmriprepData
11 | from .spm import create_bdata_spm_domestic
12 | from .image import export_brain_image
13 | from .glm import make_paradigm
14 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/mri/image.py:
--------------------------------------------------------------------------------
 1 | '''bdpy.mri.image'''
 2 | 
 3 | 
 4 | from itertools import product
 5 | 
 6 | import numpy as np
 7 | import nibabel
 8 | 
 9 | from bdpy.mri import load_mri
10 | 
11 | 
12 | def export_brain_image(brain_data, template, xyz=None, out_file=None):
13 |     '''Export a brain data array as a brain image.
14 | 
15 |     Parameters
16 |     ----------
17 |     brain_data : array
18 |         Brain data array, shape = (n_sample, n_voxels)
19 |     template : str
20 |         Path to a template brain image file
21 |     xyz : array, optional
22 |         Voxel xyz coordinates of the brain data array
23 | 
24 |     Returns
25 |     -------
26 |     nibabel.Nifti1Image
27 |     '''
28 | 
29 |     if brain_data.ndim == 1:
30 |         brain_data = brain_data[np.newaxis, :]
31 | 
32 |     if brain_data.shape[0] > 1:
33 |         raise RuntimeError('4-D image is not supported yet.')
34 |         
35 |     template_image = nibabel.load(template)
36 |     _, brain_xyz, _ = load_mri(template)
37 | 
38 |     out_table = {}
39 |     if xyz is None:
40 |         xyz = brain_xyz
41 |     
42 |     for i in range(brain_data.shape[1]):
43 |         x, y, z = xyz[0, i], xyz[1, i], xyz[2, i]
44 |         out_table.update({(x, y, z): brain_data[0, i]})
45 | 
46 |     out_image_array = np.zeros(template_image.shape[:3])
47 |     for i, j, k in product(range(template_image.shape[0]), range(template_image.shape[1]), range(template_image.shape[2])):
48 |         x, y, z = template_image.affine[:3, :3].dot([i, j, k]) + template_image.affine[:3, 3]
49 |         if (x, y, z) in out_table:
50 |             out_image_array[i, j, k] = out_table[(x, y, z)]
51 | 
52 |     out_image = nibabel.Nifti1Image(out_image_array, template_image.affine)
53 |             
54 |     return out_image
55 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/mri/load_epi.py:
--------------------------------------------------------------------------------
 1 | '''Loading EPIs module.
 2 | 
 3 | This file is a part of BdPy.
 4 | '''
 5 | 
 6 | 
 7 | import itertools as itr
 8 | import os
 9 | import re
10 | import string
11 | 
12 | import nipy
13 | import numpy as np
14 | import scipy.io as sio
15 | 
16 | 
17 | def load_epi(datafiles):
18 |     '''Load EPI files.
19 | 
20 |     The returned data and xyz are flattened by C-like order.
21 | 
22 |     Parameters
23 |     ----------
24 |     datafiles: list
25 |         EPI image files.
26 | 
27 |     Returns
28 |     -------
29 |     data: array_like, shape = (M, N)
30 |         Voxel signal values (M: the number of samples, N: the nubmer of
31 |         voxels).
32 |     xyz_array: array_like, shape = (3, N)
33 |         XYZ Coordiantes of voxels.
34 |     '''
35 | 
36 |     data_list = []
37 |     xyz = np.array([])
38 | 
39 |     for df in datafiles:
40 |         print("Loading %s" % df)
41 | 
42 |         # Load an EPI image
43 |         img = nipy.load_image(df)
44 | 
45 |         xyz = _check_xyz(xyz, img)
46 |         data_list.append(np.array(img.get_data().flatten(), dtype=np.float64))
47 | 
48 |     data = np.vstack(data_list)
49 | 
50 |     return data, xyz
51 | 
52 | 
53 | def _check_xyz(xyz, img):
54 |     '''Check voxel xyz consistency.'''
55 | 
56 |     xyz_current = _get_xyz(img.coordmap.affine, img.get_data().shape)
57 | 
58 |     if xyz.size == 0:
59 |         xyz = xyz_current
60 |     elif (xyz != xyz_current).any():
61 |         raise ValueError("Voxel XYZ coordinates are inconsistent across volumes")
62 | 
63 |     return xyz
64 | 
65 | 
66 | def _get_xyz(affine, volume_shape):
67 |     '''Return voxel XYZ coordinates based on an affine matrix.
68 | 
69 |     Parameters
70 |     ----------
71 |     affine : array
72 |         Affine matrix.
73 |     volume_shape : list
74 |         Shape of the volume (i, j, k lnegth).
75 | 
76 |     Returns
77 |     -------
78 |     array, shape = (3, N)
79 |         x-, y-, and z-coordinates (N: the number of voxels).
80 |     '''
81 | 
82 |     i_len, j_len, k_len = volume_shape
83 |     ijk = np.array(list(itr.product(range(i_len),
84 |                                     range(j_len),
85 |                                     range(k_len),
86 |                                     [1]))).T
87 | 
88 |     return np.dot(affine, ijk)[:-1]
89 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/mri/load_mri.py:
--------------------------------------------------------------------------------
 1 | '''load_mri'''
 2 | 
 3 | 
 4 | import numpy as np
 5 | import nipy
 6 | 
 7 | 
 8 | def load_mri(fpath):
 9 |     '''Load a MRI image.
10 | 
11 |     - Returns data as 2D array (sample x voxel)
12 |     - Returns voxle xyz coordinates (3 x voxel)
13 |     - Returns voxel ijk indexes (3 x voxel)
14 |     - Data, xyz, and ijk are flattened by Fortran-like index order
15 |     '''
16 |     img = nipy.load_image(fpath)
17 | 
18 |     data = img.get_data()
19 |     if data.ndim == 4:
20 |         data = data.reshape(-1, data.shape[-1], order='F').T
21 |         i_len, j_len, k_len, t = img.shape
22 |         affine = np.delete(np.delete(img.coordmap.affine, 3, axis=0), 3, axis=1)
23 |     elif data.ndim == 3:
24 |         data = data.flatten(order='F')
25 |         i_len, j_len, k_len = img.shape
26 |         affine = img.coordmap.affine
27 |     else:
28 |         raise ValueError('Invalid shape.')
29 | 
30 |     ijk = np.array(np.unravel_index(np.arange(i_len * j_len * k_len),
31 |                                     (i_len, j_len, k_len), order='F'))
32 |     ijk_b = np.vstack([ijk, np.ones((1, i_len * j_len * k_len))])
33 |     xyz_b = np.dot(affine, ijk_b)
34 |     xyz = xyz_b[:-1]
35 | 
36 |     return data, xyz, ijk
37 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/opendata/__init__.py:
--------------------------------------------------------------------------------
1 | from .openneuro import makedata
2 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/preproc/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy preprocessing package
 3 | 
 4 | This package is a part of BdPy
 5 | """
 6 | 
 7 | 
 8 | from .interface import *
 9 | from .select_top import *
10 | from .preprocessor import Preprocessor
11 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/preproc/select_top.py:
--------------------------------------------------------------------------------
 1 | """
 2 | select_top
 3 | 
 4 | This file is a part of BdPy.
 5 | """
 6 | 
 7 | 
 8 | __all__ = ['select_top']
 9 | 
10 | 
11 | import numpy as np
12 | from .util import print_start_msg, print_finish_msg
13 | 
14 | 
15 | def select_top(data, value, num, axis=0, verbose=True):
16 |     """
17 |     Select top `num` features of `value` from `data`
18 | 
19 |     Parameters
20 |     ----------
21 |     data : array
22 |        Data matrix
23 |     value : array_like
24 |        Vector of values
25 |     num : int
26 |        Number of selected features
27 | 
28 |     Returns
29 |     -------
30 |     selected_data : array
31 |         Selected data matrix
32 |     selected_index : array
33 |         Index of selected data
34 |     """
35 | 
36 |     if verbose:
37 |         print_start_msg()
38 | 
39 |     num_elem = data.shape[axis]
40 | 
41 |     sorted_index = np.argsort(value)[::-1]
42 | 
43 |     rank = np.zeros(num_elem, dtype=np.int)
44 |     rank[sorted_index] = np.array(range(0, num_elem))
45 | 
46 |     selected_index_bool = rank < num
47 | 
48 |     if axis == 0:
49 |         selected_data = data[selected_index_bool, :]
50 |         selected_index = np.array(range(0, num_elem), dtype=np.int)[selected_index_bool]
51 |     elif axis == 1:
52 |         selected_data = data[:, selected_index_bool]
53 |         selected_index = np.array(range(0, num_elem), dtype=np.int)[selected_index_bool]
54 |     else:
55 |         raise ValueError('Invalid axis')
56 | 
57 |     if verbose:
58 |         print_finish_msg()
59 | 
60 |     return selected_data, selected_index
61 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/preproc/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for preprocessing
 3 | """
 4 | 
 5 | 
 6 | import inspect
 7 | from datetime import datetime
 8 | 
 9 | 
10 | def print_start_msg():
11 |     """
12 |     Print process starting message
13 |     """
14 |     print("%s Running %s"  % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
15 |                               inspect.currentframe().f_back.f_code.co_name))
16 | 
17 | 
18 | def print_finish_msg():
19 |     """
20 |     Print process finishing message
21 |     """
22 |     print("%s DONE"  % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
23 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/recon/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/recon/torch/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/stats/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy statistics package
 3 | 
 4 | This package is a part of BdPy
 5 | 
 6 | 
 7 | Functions:
 8 | 
 9 | - `corrcoef` : Returns correlation coefficient between `x` and `y`
10 | - `corrmat`  : Returns correlation matrix between `x` and `y`
11 | """
12 | 
13 | 
14 | from .corr import *
15 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/util/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BdPy utility package
 3 | 
 4 | This package is a part of BdPy
 5 | """
 6 | 
 7 | 
 8 | from .utils import *
 9 | from .info import dump_info
10 | from .math import average_elemwise
11 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/util/info.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import hashlib
 3 | import os
 4 | import sys
 5 | import time
 6 | import uuid
 7 | import warnings
 8 | import yaml
 9 | 
10 | 
11 | def dump_info(output_dir, script=None, parameters=None, info_file='info.yaml'):
12 |     '''Dump runtime information.'''
13 | 
14 |     if script is not None:
15 |         script_path = os.path.abspath(script)
16 |         with open(script_path, 'r') as f:
17 |             script_txt = f.read()
18 |         if sys.version_info.major == 2:
19 |             script_md5 = hashlib.md5(script_txt).hexdigest()
20 |         else:
21 |             script_md5 = hashlib.md5(script_txt.encode()).hexdigest()
22 |     else:
23 |         script_path = None
24 |         script_txt = None
25 |         script_md5 = None
26 | 
27 |     run_id = str(uuid.uuid1())
28 |     run_time = time.time()
29 |     run_info = {
30 |         'run_time'   : run_time,
31 |         'time_stamp' : datetime.datetime.fromtimestamp(run_time).strftime('%Y-%m-%d %H:%M:%S'),
32 |         'host'       : os.uname()[1],
33 |         'hardware'   : os.uname()[4],
34 |         'os'         : os.uname()[0],
35 |         'os_release' : os.uname()[2],
36 |         'os_version' : os.uname()[3],
37 |         'user'       : os.getlogin(),
38 |         'script_path': script_path,
39 |         'script_txt' : script_txt,
40 |         'script_md5' : script_md5,
41 |         }
42 | 
43 |     if parameters is not None:
44 |         parameters_fixed = {}
45 |         for k, v in parameters.items():
46 |             if isinstance(v, type({}.keys())):
47 |                 v = list(v)
48 |             parameters_fixed.update({k: v})
49 |         run_info.update({'parameters': parameters_fixed})
50 | 
51 |     run_info_file = os.path.join(output_dir, info_file)
52 | 
53 |     if os.path.isfile(run_info_file):
54 |         with open(run_info_file, 'r') as f:
55 |             info_yaml = yaml.load(f, Loader=yaml.SafeLoader)
56 |         while info_yaml is None:
57 |             warnings.warn('Failed to load info from %s. Retrying...'
58 |                           % run_info_file)
59 |             with open(run_info_file, 'r') as f:
60 |                 info_yaml = yaml.load(f, Loader=yaml.SafeLoader)
61 | 
62 |     else:
63 |         info_yaml = {}
64 | 
65 |     if '_runtime_info' in info_yaml:
66 |         pass
67 |     else:
68 |         info_yaml.update({'_runtime_info' : {}})
69 | 
70 |     info_yaml['_runtime_info'].update({run_id: run_info})
71 | 
72 |     with open(run_info_file, 'w') as f:
73 |         f.write(yaml.dump(info_yaml, default_flow_style=False))
74 | 
75 |     return run_info
76 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/bdpy/util/math.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def average_elemwise(arrays, keepdims=False):
 5 |     '''Return element-wise mean of arrays.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     arrays : list of ndarrays
10 |     keepdims : bool
11 | 
12 |     Raises
13 |     ------
14 |     ndarray
15 |     '''
16 | 
17 |     n_array = len(arrays)
18 | 
19 |     max_dim_i = np.argmax([a.ndim for a in arrays])
20 |     max_array_shape = arrays[max_dim_i].shape
21 | 
22 |     arrays_sum = np.zeros(max_array_shape)
23 | 
24 |     for a in arrays:
25 |         arrays_sum += a
26 | 
27 |     mean_array = arrays_sum / n_array
28 | 
29 |     if not keepdims:
30 |         mean_array = np.squeeze(mean_array)
31 |         
32 |     return mean_array
33 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/codes/gan/bdpy/docs/bdata_api_examples.md:
--------------------------------------------------------------------------------
 1 | # BData API examples
 2 | 
 3 | ### Data API
 4 | 
 5 | #### Import module and initialization.
 6 | 
 7 |     from bdpy import BData
 8 | 
 9 |     # Create an empty BData instance
10 |     bdata = BData()
11 | 
12 |     # Load BData from a file
13 |     bdata = BData('data_file.h5')
14 | 
15 | #### Load data
16 | 
17 |     # Load BData from 'data_file.h5'
18 |     bdata.load('data_file.h5')
19 | 
20 | #### Show data
21 | 
22 |     # Show 'key' and 'description' of metadata
23 |     bdata.show_meatadata()
24 | 
25 |     # Get 'value' of the metadata specified by 'key'
26 |     voxel_x = bdata.get_metadata('voxel_x', where='VoxelData')
27 | 
28 | #### Data extraction
29 | 
30 |     # Get an array of voxel data in V1
31 |     data_v1 = bdata.select('ROI_V1')  # shape=(M, num voxels in V1)
32 | 
33 |     # `select` accepts some operators
34 |     data_v1v2 = bdata.select('ROI_V1 + ROI_V2')
35 |     data_hvc = bdata.select('ROI_LOC + ROI_FFA + ROI_PPA - LOC_LVC')
36 | 
37 |     # Wildcard
38 |     data_visual = data.select('ROI_V*')
39 | 
40 |     # Get labels ('image_index') in the dataset
41 |     label_a  = bdata.select('image_index')
42 | 
43 | #### Data creation
44 | 
45 |     # Add new data
46 |     x = numpy.random.rand(bdata.dataset.shape[0])
47 |     bdata.add(x, 'random_data')
48 | 
49 |     # Set description of metadata
50 |     bdata.set_metadatadescription('random_data', 'Random data')
51 | 
52 |     # Save data
53 |     bdata.save('output_file.h5')  # File format is selected automatically by extension. .mat, .h5,and .npy are supported.
54 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/docs/dataform_features.md:
--------------------------------------------------------------------------------
 1 | # Features and DecodedFeatures
 2 | 
 3 | bdpy provides classes to handle DNN's (true) features and decoded features: `dataform.Features` and `dataform.DecodedFeatures`.
 4 | 
 5 | ## Basic usage
 6 | 
 7 | ``` python
 8 | from bdpy.dataform import Features, DecodedFeatures
 9 | 
10 | 
11 | ## Initialize
12 | 
13 | features = Features('/path/to/features/dir')
14 | 
15 | decoded_features = DecodedFeatures('/path/to/decoded/features/dir')
16 | 
17 | ## Get features as an array
18 | 
19 | feat = features.get(layer='conv1')
20 | 
21 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC', label='stimulus-0001)  # Decoded features for specified sample (label)
22 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC')                        # Decoded features from all avaiable samples
23 | 
24 | # Decoded features with CV
25 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC', fold='cv_fold1)
26 | 
27 | ## List labels
28 | 
29 | feat_labels = features.labels
30 | 
31 | decfeat_labels = decoded_features.labels           # All available labels
32 | decfeat_labels = decoded_features.selected_labels  # Labels assigned to decoded features previously obtained by `get` method
33 | ```
34 | 
35 | ## Feature statistics
36 | 
37 | ``` python
38 | features.statistic('mean', layer='fc8')
39 | features.statistic('std', layer='fc8')          # Default ddof = 1
40 | features.statistic('std, ddof=0', layer='fc8')
41 | 
42 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC')
43 | decoded_features.statistic('std', layer='fc8', subject='sub-01', roi='VC')          # Default ddof = 1
44 | decoded_features.statistic('std, ddof=0', layer='fc8', subject='sub-01', roi='VC')
45 | 
46 | # Decoded features with CV
47 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC', fold='cv_fold1')  # Mean within the specified fold
48 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC')
49 | 
50 | # If `fold` is omitted for CV decoded features, decoded features are pooled across add CV folds and then the statistics are calculated.
51 | 
52 | ```
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/docs/index.md:
--------------------------------------------------------------------------------
1 | # Bdpy: Python Package for Brain Decoding
2 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/examples/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | bdpy
3 | data
4 | figures
5 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/setup.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Setup script for bdpy
 3 | 
 4 | This file is a part of BdPy.
 5 | '''
 6 | 
 7 | 
 8 | from setuptools import setup
 9 | 
10 | 
11 | VERSION = '0.18'
12 | 
13 | if __name__ == '__main__':
14 | 
15 |     # Long description
16 |     with open('./README.md') as f:
17 |         long_description = f.read()
18 | 
19 |     # Setup
20 |     setup(name='bdpy',
21 |           version=VERSION,
22 |           description='Brain decoder toolbox for Python',
23 |           long_description=long_description,
24 |           long_description_content_type='text/markdown',
25 |           author='Shuntaro C. Aoki',
26 |           author_email='brainliner-admin@atr.jp',
27 |           maintainer='Shuntaro C. Aoki',
28 |           maintainer_email='brainliner-admin@atr.jp',
29 |           url='https://github.com/KamitaniLab/bdpy',
30 |           license='MIT',
31 |           keywords='neuroscience, neuroimaging, brain decoding, fmri, machine learning',
32 |           packages=['bdpy',
33 |                     'bdpy.bdata',
34 |                     'bdpy.dataform',
35 |                     'bdpy.distcomp',
36 |                     'bdpy.dl',
37 |                     'bdpy.dl.torch',
38 |                     'bdpy.evals',
39 |                     'bdpy.feature',
40 |                     'bdpy.fig',
41 |                     'bdpy.ml',
42 |                     'bdpy.mri',
43 |                     'bdpy.opendata',
44 |                     'bdpy.preproc',
45 |                     'bdpy.recon',
46 |                     'bdpy.recon.torch',
47 |                     'bdpy.stats',
48 |                     'bdpy.util'],
49 |           install_requires=[
50 |               'numpy',
51 |               'scipy',
52 |               'scikit-learn',
53 |               'h5py',
54 |               'hdf5storage',
55 |               'pyyaml'
56 |           ])
57 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/.gitignore:
--------------------------------------------------------------------------------
1 | bdpy


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0001.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0001.hdr


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0001.img:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0001.img


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0002.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0002.hdr


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0002.img:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0002.img


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0003.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0003.hdr


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0003.img:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0003.img


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0004.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0004.hdr


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0004.img:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0004.img


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0005.hdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0005.hdr


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/data/mri/epi0005.img:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0005.img


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/test_distcomp.py:
--------------------------------------------------------------------------------
 1 | '''Tests for distcomp'''
 2 | 
 3 | 
 4 | from unittest import TestCase, TestLoader, TextTestRunner
 5 | 
 6 | import os
 7 | import shutil
 8 | 
 9 | from bdpy.distcomp import DistComp
10 | 
11 | 
12 | class TestUtil(TestCase):
13 |     def test_distcomp_file(self):
14 |         lockdir = './tmp'
15 |         comp_id = 'test-distcomp-fs'
16 | 
17 |         if os.path.exists(lockdir):
18 |             shutil.rmtree(lockdir)
19 |         os.mkdir(lockdir)
20 | 
21 |         # init
22 |         distcomp = DistComp(lockdir='./tmp', comp_id=comp_id)
23 |         self.assertTrue(os.path.isdir(lockdir))
24 |         self.assertFalse(distcomp.islocked())
25 | 
26 |         # lock
27 |         distcomp.lock()
28 |         self.assertTrue(os.path.isfile(os.path.join(lockdir,
29 |                                                     comp_id + '.lock')))
30 |         self.assertTrue(distcomp.islocked())
31 | 
32 |         # unlock
33 |         distcomp.unlock()
34 |         self.assertFalse(os.path.isfile(os.path.join(lockdir,
35 |                                                      comp_id + '.lock')))
36 |         self.assertFalse(distcomp.islocked())
37 | 
38 |         # islocked_lock
39 |         distcomp.islocked_lock()
40 |         self.assertTrue(os.path.isfile(os.path.join(lockdir,
41 |                                                     comp_id + '.lock')))
42 |         self.assertTrue(distcomp.islocked())
43 | 
44 |         shutil.rmtree(lockdir)
45 | 
46 |     def test_distcomp_sqlite3(self):
47 |         db_path = './tmp/distcomp.db'
48 |         comp_id = 'test-distcomp-sqlite3-1'
49 | 
50 |         if os.path.exists(db_path):
51 |             os.remove(db_path)
52 | 
53 |         if not os.path.exists(os.path.dirname(db_path)):
54 |             os.mkdir(os.path.dirname(db_path))
55 | 
56 |         # init
57 |         distcomp = DistComp(backend='sqlite3', db_path=db_path)
58 |         self.assertTrue(os.path.isfile(db_path))
59 |         self.assertFalse(distcomp.islocked(comp_id))
60 | 
61 |         # lock
62 |         distcomp.lock(comp_id)
63 |         self.assertTrue(distcomp.islocked(comp_id))
64 | 
65 |         # unlock
66 |         distcomp.unlock(comp_id)
67 |         self.assertFalse(distcomp.islocked(comp_id))
68 | 
69 |         # islocked_lock
70 |         with self.assertRaises(NotImplementedError):
71 |             distcomp.islocked_lock(comp_id)
72 | 
73 |         os.remove(db_path)
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     suite = TestLoader().loadTestsFromTestCase(TestUtil)
78 |     TextTestRunner(verbosity=2).run(suite)
79 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/test_util.py:
--------------------------------------------------------------------------------
 1 | '''Tests for bdpy.util'''
 2 | 
 3 | 
 4 | from unittest import TestCase, TestLoader, TextTestRunner
 5 | 
 6 | import numpy as np
 7 | 
 8 | import bdpy
 9 | 
10 | 
11 | class TestUtil(TestCase):
12 |     '''Tests for 'util' module'''
13 | 
14 |     def test_create_groupvector_pass0001(self):
15 |         '''Test for create_groupvector (list and scalar inputs).'''
16 | 
17 |         x = [1, 2, 3]
18 |         y = 2
19 | 
20 |         exp_output = [1, 1, 2, 2, 3, 3]
21 | 
22 |         test_output = bdpy.create_groupvector(x, y)
23 | 
24 |         self.assertTrue((test_output == exp_output).all())
25 | 
26 |     def test_create_groupvector_pass0002(self):
27 |         '''Test for create_groupvector (list and list inputs).'''
28 | 
29 |         x = [1, 2, 3]
30 |         y = [2, 4, 2]
31 | 
32 |         exp_output = [1, 1, 2, 2, 2, 2, 3, 3]
33 | 
34 |         test_output = bdpy.create_groupvector(x, y)
35 | 
36 |         self.assertTrue((test_output == exp_output).all())
37 | 
38 |     def test_create_groupvector_pass0003(self):
39 |         '''Test for create_groupvector (Numpy array and scalar inputs).'''
40 | 
41 |         x = np.array([1, 2, 3])
42 |         y = 2
43 | 
44 |         exp_output = np.array([1, 1, 2, 2, 3, 3])
45 | 
46 |         test_output = bdpy.create_groupvector(x, y)
47 | 
48 |         np.testing.assert_array_equal(test_output, exp_output)
49 | 
50 |     def test_create_groupvector_pass0005(self):
51 |         '''Test for create_groupvector (Numpy arrays inputs).'''
52 | 
53 |         x = np.array([1, 2, 3])
54 |         y = np.array([2, 4, 2])
55 | 
56 |         exp_output = np.array([1, 1, 2, 2, 2, 2, 3, 3])
57 | 
58 |         test_output = bdpy.create_groupvector(x, y)
59 | 
60 |         np.testing.assert_array_equal(test_output, exp_output)
61 | 
62 |     def test_create_groupvector_error(self):
63 |         '''Test for create_groupvector (ValueError).'''
64 | 
65 |         x = [1, 2, 3]
66 |         y = [0]
67 | 
68 |         self.assertRaises(ValueError, bdpy.create_groupvector, x, y)
69 | 
70 |     def test_divide_chunks(self):
71 |         '''Test for divide_chunks.'''
72 | 
73 |         a = [1, 2, 3, 4, 5, 6, 7]
74 | 
75 |         # Test 1
76 |         expected = [[1, 2, 3, 4],
77 |                     [5, 6, 7]]
78 |         actual = bdpy.divide_chunks(a, chunk_size=4)
79 |         self.assertEqual(actual, expected)
80 | 
81 |         # Test 2
82 |         expected = [[1, 2, 3],
83 |                     [4, 5, 6],
84 |                     [7]]
85 |         actual = bdpy.divide_chunks(a, chunk_size=3)
86 |         self.assertEqual(actual, expected)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     suite = TestLoader().loadTestsFromTestCase(TestUtil)
91 |     TextTestRunner(verbosity=2).run(suite)
92 | 


--------------------------------------------------------------------------------
/codes/gan/bdpy/test/test_util_math.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase, TestLoader, TextTestRunner
 2 | 
 3 | import numpy as np
 4 | 
 5 | import bdpy
 6 | 
 7 | 
 8 | class TestUtilMath(TestCase):
 9 | 
10 |     def test_average_elemwise(self):
11 |         a = np.array([1, 2, 3])
12 |         b = np.array([9, 8, 7])
13 |         ans_valid = np.array([5, 5, 5])
14 |         ans_test = bdpy.average_elemwise([a, b])
15 |         np.testing.assert_array_equal(ans_test, ans_valid)
16 | 
17 |         a = np.array([[1, 2, 3]])
18 |         b = np.array([[9, 8, 7]])
19 |         ans_valid = np.array([5, 5, 5])
20 |         ans_test = bdpy.average_elemwise([a, b])
21 |         np.testing.assert_array_equal(ans_test, ans_valid)
22 | 
23 |         a = np.array([[1, 2, 3]])
24 |         b = np.array([9, 8, 7])
25 |         ans_valid = np.array([5, 5, 5])
26 |         ans_test = bdpy.average_elemwise([a, b])
27 |         np.testing.assert_array_equal(ans_test, ans_valid)
28 | 
29 |         a = np.array([1, 2, 3])
30 |         b = np.array([[9, 8, 7]])
31 |         ans_valid = np.array([5, 5, 5])
32 |         ans_test = bdpy.average_elemwise([a, b])
33 |         np.testing.assert_array_equal(ans_test, ans_valid)
34 | 
35 |     def test_average_elemwise_keepdims(self):
36 |         a = np.array([1, 2, 3])
37 |         b = np.array([9, 8, 7])
38 |         ans_valid = np.array([5, 5, 5])
39 |         ans_test = bdpy.average_elemwise([a, b], keepdims=True)
40 |         np.testing.assert_array_equal(ans_test, ans_valid)
41 | 
42 |         a = np.array([[1, 2, 3]])
43 |         b = np.array([[9, 8, 7]])
44 |         ans_valid = np.array([[5, 5, 5]])
45 |         ans_test = bdpy.average_elemwise([a, b], keepdims=True)
46 |         np.testing.assert_array_equal(ans_test, ans_valid)
47 | 
48 |         a = np.array([[1, 2, 3]])
49 |         b = np.array([9, 8, 7])
50 |         ans_valid = np.array([[5, 5, 5]])
51 |         ans_test = bdpy.average_elemwise([a, b], keepdims=True)
52 |         np.testing.assert_array_equal(ans_test, ans_valid)
53 | 
54 |         a = np.array([1, 2, 3])
55 |         b = np.array([[9, 8, 7]])
56 |         ans_valid = np.array([[5, 5, 5]])
57 |         ans_test = bdpy.average_elemwise([a, b], keepdims=True)
58 |         np.testing.assert_array_equal(ans_test, ans_valid)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     suite = TestLoader().loadTestsFromTestCase(TestUtilMath)
63 |     TextTestRunner(verbosity=2).run(suite)
64 | 


--------------------------------------------------------------------------------
/codes/gan/make_subjstim_vgg19.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.io
 3 | from tqdm import tqdm
 4 | import argparse
 5 | import os
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser()
 9 | 
10 |     parser.add_argument(
11 |         "--layer",
12 |         type=str,
13 |         default=None,
14 |         help="layer of VGG19",
15 |     )
16 |     parser.add_argument(
17 |         "--subject",
18 |         type=str,
19 |         default=None,
20 |         help="subject name: subj01 or subj02  or subj05  or subj07 for full-data subjects ",
21 |     )
22 | 
23 |     opt = parser.parse_args()
24 |     subject=opt.subject
25 |     layer = opt.layer
26 |     datdir = '../../nsdfeat/vgg19_features/'
27 |     savedir = f'../../nsdfeat//subjfeat/'
28 |     nsd_expdesign = scipy.io.loadmat('../../nsd/nsddata/experiments/nsd/nsd_expdesign.mat')
29 | 
30 |     # Note that most of them are 1-base index!
31 |     # This is why I subtract 1
32 |     sharedix = nsd_expdesign['sharedix'] -1 
33 |     stims = np.load(f'../../mrifeat/{subject}/{subject}_stims_ave.npy')
34 |     feats = []
35 |     tr_idx = np.zeros(len(stims))
36 | 
37 |     for idx, s in tqdm(enumerate(stims)): 
38 |         if s in sharedix:
39 |             tr_idx[idx] = 0
40 |         else:
41 |             tr_idx[idx] = 1    
42 |         feat = scipy.io.loadmat(f'{datdir}/{layer}/nsd/org/VGG19-{layer}-nsd-org-{s:06}.mat')
43 |         feats.append(feat['feat'].flatten())
44 | 
45 |     feats = np.stack(feats)    
46 | 
47 |     os.makedirs(savedir, exist_ok=True)
48 | 
49 |     feats_tr = feats[tr_idx==1,:]
50 |     feats_te = feats[tr_idx==0,:]
51 | 
52 |     np.save(f'{savedir}/{subject}_{layer}_tr.npy',feats_tr)
53 |     np.save(f'{savedir}/{subject}_{layer}_te.npy',feats_te)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/codes/gan/make_vgg19fromdecode.py:
--------------------------------------------------------------------------------
 1 | import argparse, os
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | import torch
 5 | from scipy.io import savemat
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument(
12 |         "--subject",
13 |         type=str,
14 |         default=None,
15 |         help="subject name: subj01 or subj02  or subj05  or subj07 for full-data subjects ",
16 |     )
17 |     opt = parser.parse_args()
18 |     subject = opt.subject
19 |     roinames = ['early','ventral','midventral','midlateral','lateral','parietal']
20 | 
21 |     maps = {
22 |             'conv1_1': [64,224,224],
23 |             'conv1_2': [64,224,224],
24 |             'conv2_1': [128,112,112],
25 |             'conv2_2': [128,112,112],
26 |             'conv3_1': [256,56,56],
27 |             'conv3_2': [256,56,56],
28 |             'conv3_3': [256,56,56],
29 |             'conv3_4': [256,56,56],
30 |             'conv4_1': [512,28,28],
31 |             'conv4_2': [512,28,28],
32 |             'conv4_3': [512,28,28],
33 |             'conv4_4': [512,28,28],
34 |             'conv5_1': [512,14,14],
35 |             'conv5_2': [512,14,14],
36 |             'conv5_3': [512,14,14],
37 |             'conv5_4': [512,14,14],
38 |             'fc6':     [1,4096],
39 |             'fc7':     [1,4096],
40 |             'fc8':     [1,1000],
41 |         }
42 |     datdir = f'../../decoded/{subject}/'
43 |     savedir = f'../../decoded/gan_mod/'
44 |     os.makedirs(savedir, exist_ok=True)
45 | 
46 |     for layer in tqdm(maps.keys()):
47 |         print(f'Now Layer: {layer}')
48 |         os.makedirs(f'{savedir}/{layer}/{subject}/streams/', exist_ok=True)
49 |         feat = np.load(f'{datdir}/{subject}_{"_".join(roinames)}_scores_{layer}.npy')
50 |         for i in range(feat.shape[0]):
51 |             cfeat = feat[i,:].reshape(maps[layer])[np.newaxis]
52 |             mdic = {"feat":cfeat}
53 |             savemat(f'{savedir}/{layer}/{subject}/streams/VGG19-{layer}-{subject}-streams-{i:06}.mat', mdic)
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/codes/utils/make_subjstim.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.io
 3 | from tqdm import tqdm
 4 | import argparse
 5 | import os
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser()
 9 | 
10 |     parser.add_argument(
11 |         "--featname",
12 |         type=str,
13 |         default='',
14 |         help="Target variable",
15 |     )
16 |     parser.add_argument(
17 |         "--use_stim",
18 |         type=str,
19 |         default='',
20 |         help="ave or each",
21 |     )
22 |     parser.add_argument(
23 |         "--subject",
24 |         type=str,
25 |         default=None,
26 |         help="subject name: subj01 or subj02  or subj05  or subj07 for full-data subjects ",
27 |     )
28 | 
29 |     opt = parser.parse_args()
30 |     subject=opt.subject
31 |     use_stim = opt.use_stim
32 |     featname = opt.featname
33 |     topdir = '../../nsdfeat/'
34 |     savedir = f'{topdir}/subjfeat/'
35 |     featdir = f'{topdir}/{featname}/'
36 | 
37 |     nsd_expdesign = scipy.io.loadmat('../../nsd/nsddata/experiments/nsd/nsd_expdesign.mat')
38 | 
39 |     # Note that most of them are 1-base index!
40 |     # This is why I subtract 1
41 |     sharedix = nsd_expdesign['sharedix'] -1 
42 | 
43 |     if use_stim == 'ave':
44 |         stims = np.load(f'../../mrifeat/{subject}/{subject}_stims_ave.npy')
45 |     else: # Each
46 |         stims = np.load(f'../../mrifeat/{subject}/{subject}_stims.npy')
47 |     
48 |     feats = []
49 |     tr_idx = np.zeros(len(stims))
50 | 
51 |     for idx, s in tqdm(enumerate(stims)): 
52 |         if s in sharedix:
53 |             tr_idx[idx] = 0
54 |         else:
55 |             tr_idx[idx] = 1    
56 |         feat = np.load(f'{featdir}/{s:06}.npy')
57 |         feats.append(feat)
58 | 
59 |     feats = np.stack(feats)    
60 | 
61 |     os.makedirs(savedir, exist_ok=True)
62 | 
63 |     feats_tr = feats[tr_idx==1,:]
64 |     feats_te = feats[tr_idx==0,:]
65 |     np.save(f'../../mrifeat/{subject}/{subject}_stims_tridx.npy',tr_idx)
66 | 
67 |     np.save(f'{savedir}/{subject}_{use_stim}_{featname}_tr.npy',feats_tr)
68 |     np.save(f'{savedir}/{subject}_{use_stim}_{featname}_te.npy',feats_te)
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/codes/utils/nsd_access/__init__.py:
--------------------------------------------------------------------------------
1 | from .nsda import *
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nibabel
2 | h5py
3 | matplotlib
4 | pycocotools
5 | IPython
6 | himalaya
7 | timm
8 | fairscale


--------------------------------------------------------------------------------
/results_tech_paper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/results_tech_paper.jpg


--------------------------------------------------------------------------------
/visual_summary.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/visual_summary.jpg


--------------------------------------------------------------------------------
/visual_summary_techpaper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/visual_summary_techpaper.jpg


--------------------------------------------------------------------------------