├── .gitignore ├── LICENSE ├── README.md ├── codes ├── caption │ └── BLIP │ │ ├── BLIP.gif │ │ ├── CODEOWNERS │ │ ├── CODE_OF_CONDUCT.md │ │ ├── LICENSE.txt │ │ ├── README.md │ │ ├── SECURITY.md │ │ ├── cog.yaml │ │ ├── configs │ │ ├── bert_config.json │ │ ├── caption_coco.yaml │ │ ├── med_config.json │ │ ├── nlvr.yaml │ │ ├── nocaps.yaml │ │ ├── pretrain.yaml │ │ ├── retrieval_coco.yaml │ │ ├── retrieval_flickr.yaml │ │ ├── retrieval_msrvtt.yaml │ │ └── vqa.yaml │ │ ├── data │ │ ├── __init__.py │ │ ├── coco_karpathy_dataset.py │ │ ├── flickr30k_dataset.py │ │ ├── nlvr_dataset.py │ │ ├── nocaps_dataset.py │ │ ├── pretrain_dataset.py │ │ ├── utils.py │ │ ├── video_dataset.py │ │ └── vqa_dataset.py │ │ ├── decode_captions.py │ │ ├── demo.ipynb │ │ ├── eval_nocaps.py │ │ ├── eval_retrieval_video.py │ │ ├── img2feat_blip.py │ │ ├── models │ │ ├── __init__.py │ │ ├── blip.py │ │ ├── blip_itm.py │ │ ├── blip_nlvr.py │ │ ├── blip_pretrain.py │ │ ├── blip_retrieval.py │ │ ├── blip_vqa.py │ │ ├── med.py │ │ ├── nlvr_encoder.py │ │ └── vit.py │ │ ├── predict.py │ │ ├── pretrain.py │ │ ├── requirements.txt │ │ ├── train_caption.py │ │ ├── train_nlvr.py │ │ ├── train_retrieval.py │ │ ├── train_vqa.py │ │ ├── transform │ │ └── randaugment.py │ │ └── utils.py ├── depth │ ├── dptemb2dpt.py │ └── img2feat_dpt.py ├── diffusion_sd1 │ ├── diffusion_decoding.py │ └── stable-diffusion │ │ ├── LICENSE │ │ ├── README.md │ │ ├── Stable_Diffusion_v1_Model_Card.md │ │ ├── assets │ │ ├── a-painting-of-a-fire.png │ │ ├── a-photograph-of-a-fire.png │ │ ├── a-shirt-with-a-fire-printed-on-it.png │ │ ├── a-shirt-with-the-inscription-'fire'.png │ │ ├── a-watercolor-painting-of-a-fire.png │ │ ├── birdhouse.png │ │ ├── fire.png │ │ ├── inpainting.png │ │ ├── modelfigure.png │ │ ├── rdm-preview.jpg │ │ ├── reconstruction1.png │ │ ├── reconstruction2.png │ │ ├── results.gif │ │ ├── rick.jpeg │ │ ├── stable-samples │ │ │ ├── img2img │ │ │ │ ├── mountains-1.png │ │ │ │ ├── mountains-2.png │ │ │ │ ├── mountains-3.png │ │ │ │ ├── sketch-mountains-input.jpg │ │ │ │ ├── upscaling-in.png │ │ │ │ └── upscaling-out.png │ │ │ └── txt2img │ │ │ │ ├── 000002025.png │ │ │ │ ├── 000002035.png │ │ │ │ ├── merged-0005.png │ │ │ │ ├── merged-0006.png │ │ │ │ └── merged-0007.png │ │ ├── the-earth-is-on-fire,-oil-on-canvas.png │ │ ├── txt2img-convsample.png │ │ ├── txt2img-preview.png │ │ └── v1-variants-scores.jpg │ │ ├── configs │ │ ├── autoencoder │ │ │ ├── autoencoder_kl_16x16x16.yaml │ │ │ ├── autoencoder_kl_32x32x4.yaml │ │ │ ├── autoencoder_kl_64x64x3.yaml │ │ │ └── autoencoder_kl_8x8x64.yaml │ │ ├── latent-diffusion │ │ │ ├── celebahq-ldm-vq-4.yaml │ │ │ ├── cin-ldm-vq-f8.yaml │ │ │ ├── cin256-v2.yaml │ │ │ ├── ffhq-ldm-vq-4.yaml │ │ │ ├── lsun_bedrooms-ldm-vq-4.yaml │ │ │ ├── lsun_churches-ldm-kl-8.yaml │ │ │ └── txt2img-1p4B-eval.yaml │ │ ├── retrieval-augmented-diffusion │ │ │ └── 768x768.yaml │ │ └── stable-diffusion │ │ │ └── v1-inference.yaml │ │ ├── data │ │ ├── DejaVuSans.ttf │ │ ├── example_conditioning │ │ │ ├── superresolution │ │ │ │ └── sample_0.jpg │ │ │ └── text_conditional │ │ │ │ └── sample_0.txt │ │ ├── imagenet_clsidx_to_label.txt │ │ ├── imagenet_train_hr_indices.p │ │ ├── imagenet_val_hr_indices.p │ │ ├── index_synset.yaml │ │ └── inpainting_examples │ │ │ ├── 6458524847_2f4c361183_k.png │ │ │ ├── 6458524847_2f4c361183_k_mask.png │ │ │ ├── 8399166846_f6fb4e4b8e_k.png │ │ │ ├── 8399166846_f6fb4e4b8e_k_mask.png │ │ │ ├── alex-iby-G_Pk4D9rMLs.png │ │ │ ├── alex-iby-G_Pk4D9rMLs_mask.png │ │ │ ├── bench2.png │ │ │ ├── bench2_mask.png │ │ │ ├── bertrand-gabioud-CpuFzIsHYJ0.png │ │ │ ├── bertrand-gabioud-CpuFzIsHYJ0_mask.png │ │ │ ├── billow926-12-Wc-Zgx6Y.png │ │ │ ├── billow926-12-Wc-Zgx6Y_mask.png │ │ │ ├── overture-creations-5sI6fQgYIuo.png │ │ │ ├── overture-creations-5sI6fQgYIuo_mask.png │ │ │ ├── photo-1583445095369-9c651e7e5d34.png │ │ │ └── photo-1583445095369-9c651e7e5d34_mask.png │ │ ├── environment.yaml │ │ ├── ldm │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── imagenet.py │ │ │ └── lsun.py │ │ ├── lr_scheduler.py │ │ ├── models │ │ │ ├── autoencoder.py │ │ │ └── diffusion │ │ │ │ ├── __init__.py │ │ │ │ ├── classifier.py │ │ │ │ ├── ddim.py │ │ │ │ ├── ddpm.py │ │ │ │ ├── dpm_solver │ │ │ │ ├── __init__.py │ │ │ │ ├── dpm_solver.py │ │ │ │ └── sampler.py │ │ │ │ └── plms.py │ │ ├── modules │ │ │ ├── attention.py │ │ │ ├── diffusionmodules │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── openaimodel.py │ │ │ │ └── util.py │ │ │ ├── distributions │ │ │ │ ├── __init__.py │ │ │ │ └── distributions.py │ │ │ ├── ema.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ └── modules.py │ │ │ ├── image_degradation │ │ │ │ ├── __init__.py │ │ │ │ ├── bsrgan.py │ │ │ │ ├── bsrgan_light.py │ │ │ │ ├── utils │ │ │ │ │ └── test.png │ │ │ │ └── utils_image.py │ │ │ ├── losses │ │ │ │ ├── __init__.py │ │ │ │ ├── contperceptual.py │ │ │ │ └── vqperceptual.py │ │ │ └── x_transformer.py │ │ └── util.py │ │ ├── main.py │ │ ├── models │ │ ├── first_stage_models │ │ │ ├── kl-f16 │ │ │ │ └── config.yaml │ │ │ ├── kl-f32 │ │ │ │ └── config.yaml │ │ │ ├── kl-f4 │ │ │ │ └── config.yaml │ │ │ ├── kl-f8 │ │ │ │ └── config.yaml │ │ │ ├── vq-f16 │ │ │ │ └── config.yaml │ │ │ ├── vq-f4-noattn │ │ │ │ └── config.yaml │ │ │ ├── vq-f4 │ │ │ │ └── config.yaml │ │ │ ├── vq-f8-n256 │ │ │ │ └── config.yaml │ │ │ └── vq-f8 │ │ │ │ └── config.yaml │ │ └── ldm │ │ │ ├── bsr_sr │ │ │ └── config.yaml │ │ │ ├── celeba256 │ │ │ └── config.yaml │ │ │ ├── cin256 │ │ │ └── config.yaml │ │ │ ├── ffhq256 │ │ │ └── config.yaml │ │ │ ├── inpainting_big │ │ │ └── config.yaml │ │ │ ├── layout2img-openimages256 │ │ │ └── config.yaml │ │ │ ├── lsun_beds256 │ │ │ └── config.yaml │ │ │ ├── lsun_churches256 │ │ │ └── config.yaml │ │ │ ├── semantic_synthesis256 │ │ │ └── config.yaml │ │ │ ├── semantic_synthesis512 │ │ │ └── config.yaml │ │ │ └── text2img256 │ │ │ └── config.yaml │ │ ├── notebook_helpers.py │ │ ├── scripts │ │ ├── download_first_stages.sh │ │ ├── download_models.sh │ │ ├── img2img.py │ │ ├── inpaint.py │ │ ├── knn2img.py │ │ ├── latent_imagenet_diffusion.ipynb │ │ ├── sample_diffusion.py │ │ ├── tests │ │ │ └── test_watermark.py │ │ ├── train_searcher.py │ │ └── txt2img.py │ │ └── setup.py ├── diffusion_sd2 │ ├── diffusion_decoding.py │ └── stablediffusion │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── LICENSE-MODEL │ │ ├── README.md │ │ ├── assets │ │ ├── model-variants.jpg │ │ ├── modelfigure.png │ │ ├── rick.jpeg │ │ ├── stable-inpainting │ │ │ ├── inpainting.gif │ │ │ └── merged-leopards.png │ │ └── stable-samples │ │ │ ├── depth2img │ │ │ ├── d2i.gif │ │ │ ├── depth2fantasy.jpeg │ │ │ ├── depth2img01.png │ │ │ ├── depth2img02.png │ │ │ ├── merged-0000.png │ │ │ ├── merged-0004.png │ │ │ ├── merged-0005.png │ │ │ ├── midas.jpeg │ │ │ └── old_man.png │ │ │ ├── img2img │ │ │ ├── mountains-1.png │ │ │ ├── mountains-2.png │ │ │ ├── mountains-3.png │ │ │ ├── sketch-mountains-input.jpg │ │ │ ├── upscaling-in.png │ │ │ └── upscaling-out.png │ │ │ ├── stable-unclip │ │ │ ├── houses_out.jpeg │ │ │ ├── oldcar000.jpeg │ │ │ ├── oldcar500.jpeg │ │ │ ├── oldcar800.jpeg │ │ │ ├── panda.jpg │ │ │ ├── plates_out.jpeg │ │ │ ├── unclip-variations.png │ │ │ └── unclip-variations_noise.png │ │ │ ├── txt2img │ │ │ ├── 768 │ │ │ │ ├── merged-0001.png │ │ │ │ ├── merged-0002.png │ │ │ │ ├── merged-0003.png │ │ │ │ ├── merged-0004.png │ │ │ │ ├── merged-0005.png │ │ │ │ └── merged-0006.png │ │ │ ├── 000002025.png │ │ │ ├── 000002035.png │ │ │ ├── merged-0001.png │ │ │ ├── merged-0003.png │ │ │ ├── merged-0005.png │ │ │ ├── merged-0006.png │ │ │ └── merged-0007.png │ │ │ └── upscaling │ │ │ ├── merged-dog.png │ │ │ ├── sampled-bear-x4.png │ │ │ └── snow-leopard-x4.png │ │ ├── checkpoints │ │ └── checkpoints.txt │ │ ├── configs │ │ ├── karlo │ │ │ ├── decoder_900M_vit_l.yaml │ │ │ ├── improved_sr_64_256_1.4B.yaml │ │ │ └── prior_1B_vit_l.yaml │ │ └── stable-diffusion │ │ │ ├── intel │ │ │ ├── v2-inference-bf16.yaml │ │ │ ├── v2-inference-fp32.yaml │ │ │ ├── v2-inference-v-bf16.yaml │ │ │ └── v2-inference-v-fp32.yaml │ │ │ ├── v2-1-stable-unclip-h-inference.yaml │ │ │ ├── v2-1-stable-unclip-l-inference.yaml │ │ │ ├── v2-inference-v.yaml │ │ │ ├── v2-inference.yaml │ │ │ ├── v2-inpainting-inference.yaml │ │ │ ├── v2-midas-inference.yaml │ │ │ └── x4-upscaling.yaml │ │ ├── doc │ │ └── UNCLIP.MD │ │ ├── environment.yaml │ │ ├── ldm │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── util.py │ │ ├── models │ │ │ ├── autoencoder.py │ │ │ └── diffusion │ │ │ │ ├── __init__.py │ │ │ │ ├── ddim.py │ │ │ │ ├── ddpm.py │ │ │ │ ├── dpm_solver │ │ │ │ ├── __init__.py │ │ │ │ ├── dpm_solver.py │ │ │ │ └── sampler.py │ │ │ │ ├── plms.py │ │ │ │ └── sampling_util.py │ │ ├── modules │ │ │ ├── attention.py │ │ │ ├── diffusionmodules │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── openaimodel.py │ │ │ │ ├── upscaling.py │ │ │ │ └── util.py │ │ │ ├── distributions │ │ │ │ ├── __init__.py │ │ │ │ └── distributions.py │ │ │ ├── ema.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ └── modules.py │ │ │ ├── image_degradation │ │ │ │ ├── __init__.py │ │ │ │ ├── bsrgan.py │ │ │ │ ├── bsrgan_light.py │ │ │ │ ├── utils │ │ │ │ │ └── test.png │ │ │ │ └── utils_image.py │ │ │ ├── karlo │ │ │ │ ├── __init__.py │ │ │ │ ├── diffusers_pipeline.py │ │ │ │ └── kakao │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── clip.py │ │ │ │ │ ├── decoder_model.py │ │ │ │ │ ├── prior_model.py │ │ │ │ │ ├── sr_256_1k.py │ │ │ │ │ └── sr_64_256.py │ │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── diffusion │ │ │ │ │ │ ├── gaussian_diffusion.py │ │ │ │ │ │ └── respace.py │ │ │ │ │ ├── nn.py │ │ │ │ │ ├── resample.py │ │ │ │ │ ├── unet.py │ │ │ │ │ └── xf.py │ │ │ │ │ ├── sampler.py │ │ │ │ │ └── template.py │ │ │ └── midas │ │ │ │ ├── __init__.py │ │ │ │ ├── api.py │ │ │ │ ├── midas │ │ │ │ ├── __init__.py │ │ │ │ ├── base_model.py │ │ │ │ ├── blocks.py │ │ │ │ ├── dpt_depth.py │ │ │ │ ├── midas_net.py │ │ │ │ ├── midas_net_custom.py │ │ │ │ ├── transforms.py │ │ │ │ └── vit.py │ │ │ │ └── utils.py │ │ └── util.py │ │ ├── modelcard.md │ │ ├── requirements.txt │ │ ├── scripts │ │ ├── gradio │ │ │ ├── depth2img.py │ │ │ ├── inpainting.py │ │ │ └── superresolution.py │ │ ├── img2img.py │ │ ├── streamlit │ │ │ ├── depth2img.py │ │ │ ├── inpainting.py │ │ │ ├── stableunclip.py │ │ │ └── superresolution.py │ │ ├── tests │ │ │ └── test_watermark.py │ │ └── txt2img.py │ │ └── setup.py ├── gan │ ├── bdpy │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── bdpy │ │ │ ├── __init__.py │ │ │ ├── bdata │ │ │ │ ├── __init__.py │ │ │ │ ├── bdata.py │ │ │ │ ├── featureselector.py │ │ │ │ ├── metadata.py │ │ │ │ └── utils.py │ │ │ ├── dataform │ │ │ │ ├── __init__.py │ │ │ │ ├── datastore.py │ │ │ │ ├── features.py │ │ │ │ ├── pd.py │ │ │ │ └── sparse.py │ │ │ ├── distcomp │ │ │ │ ├── __init__.py │ │ │ │ └── distcomp.py │ │ │ ├── dl │ │ │ │ ├── __init__.py │ │ │ │ ├── caffe.py │ │ │ │ └── torch │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── torch.py │ │ │ ├── evals │ │ │ │ ├── __init__.py │ │ │ │ └── metrics.py │ │ │ ├── feature │ │ │ │ ├── __init__.py │ │ │ │ └── feature.py │ │ │ ├── fig │ │ │ │ ├── __init__.py │ │ │ │ ├── draw_group_image_set.py │ │ │ │ ├── fig.py │ │ │ │ ├── makeplots.py │ │ │ │ └── tile_images.py │ │ │ ├── ml │ │ │ │ ├── __init__.py │ │ │ │ ├── crossvalidation.py │ │ │ │ ├── ensemble.py │ │ │ │ ├── learning.py │ │ │ │ ├── regress.py │ │ │ │ └── searchlight.py │ │ │ ├── mri │ │ │ │ ├── __init__.py │ │ │ │ ├── fmriprep.py │ │ │ │ ├── glm.py │ │ │ │ ├── image.py │ │ │ │ ├── load_epi.py │ │ │ │ ├── load_mri.py │ │ │ │ ├── roi.py │ │ │ │ └── spm.py │ │ │ ├── opendata │ │ │ │ ├── __init__.py │ │ │ │ └── openneuro.py │ │ │ ├── preproc │ │ │ │ ├── __init__.py │ │ │ │ ├── interface.py │ │ │ │ ├── preprocessor.py │ │ │ │ ├── select_top.py │ │ │ │ └── util.py │ │ │ ├── recon │ │ │ │ ├── __init__.py │ │ │ │ ├── torch │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── icnn.py │ │ │ │ └── utils.py │ │ │ ├── stats │ │ │ │ ├── __init__.py │ │ │ │ └── corr.py │ │ │ └── util │ │ │ │ ├── __init__.py │ │ │ │ ├── info.py │ │ │ │ ├── math.py │ │ │ │ └── utils.py │ │ ├── docs │ │ │ ├── _config.yml │ │ │ ├── bdata_api_examples.md │ │ │ ├── dataform_features.md │ │ │ └── index.md │ │ ├── examples │ │ │ └── .gitignore │ │ ├── setup.py │ │ └── test │ │ │ ├── .gitignore │ │ │ ├── data │ │ │ └── mri │ │ │ │ ├── epi0001.hdr │ │ │ │ ├── epi0001.img │ │ │ │ ├── epi0002.hdr │ │ │ │ ├── epi0002.img │ │ │ │ ├── epi0003.hdr │ │ │ │ ├── epi0003.img │ │ │ │ ├── epi0004.hdr │ │ │ │ ├── epi0004.img │ │ │ │ ├── epi0005.hdr │ │ │ │ └── epi0005.img │ │ │ ├── test_bdata.py │ │ │ ├── test_bdata_metadata.py │ │ │ ├── test_bdata_utils.py │ │ │ ├── test_cv.py │ │ │ ├── test_dataform_sparse.py │ │ │ ├── test_distcomp.py │ │ │ ├── test_evals.py │ │ │ ├── test_feature.py │ │ │ ├── test_featureselector.py │ │ │ ├── test_ml.py │ │ │ ├── test_ml_utils.py │ │ │ ├── test_mri.py │ │ │ ├── test_preproc.py │ │ │ ├── test_stats.py │ │ │ ├── test_util.py │ │ │ └── test_util_math.py │ ├── make_subjstim_vgg19.py │ ├── make_vgg19bdpy.py │ ├── make_vgg19fromdecode.py │ └── recon_icnn_image_vgg19_dgn_relu7gen_gd.py └── utils │ ├── identification.py │ ├── img2feat_decoded.py │ ├── img2feat_sd.py │ ├── make_subjmri.py │ ├── make_subjstim.py │ ├── nsd_access │ ├── __init__.py │ └── nsda.py │ └── ridge.py ├── requirements.txt ├── results_tech_paper.jpg ├── visual_summary.jpg └── visual_summary_techpaper.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Project specific 2 | decoded/** 3 | identification/** 4 | mrifeat/** 5 | nsd/** 6 | nsdfeat/** 7 | 8 | # Thumbnails 9 | ._* 10 | .DS_Store 11 | # Python .gitignore 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Jupyter Notebook 65 | .ipynb_checkpoints 66 | 67 | # IPython 68 | profile_default/ 69 | ipython_config.py 70 | 71 | # Environments 72 | .env 73 | .venv 74 | env/ 75 | venv/ 76 | ENV/ 77 | env.bak/ 78 | venv.bak/ 79 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yu Takagi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /codes/caption/BLIP/BLIP.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/caption/BLIP/BLIP.gif -------------------------------------------------------------------------------- /codes/caption/BLIP/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing. 2 | #ECCN:Open Source 3 | -------------------------------------------------------------------------------- /codes/caption/BLIP/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022, Salesforce.com, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | * Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /codes/caption/BLIP/SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security 2 | 3 | Please report any security issue to [security@salesforce.com](mailto:security@salesforce.com) 4 | as soon as it is discovered. This library limits its runtime dependencies in 5 | order to reduce the total cost of ownership as much as can be, but all consumers 6 | should remain vigilant and have their security stakeholders review all third-party 7 | products (3PP) like this one and their dependencies. 8 | -------------------------------------------------------------------------------- /codes/caption/BLIP/cog.yaml: -------------------------------------------------------------------------------- 1 | build: 2 | gpu: true 3 | cuda: "11.1" 4 | python_version: "3.8" 5 | system_packages: 6 | - "libgl1-mesa-glx" 7 | - "libglib2.0-0" 8 | python_packages: 9 | - "ipython==7.30.1" 10 | - "torchvision==0.11.1" 11 | - "torch==1.10.0" 12 | - "timm==0.4.12" 13 | - "transformers==4.15.0" 14 | - "fairscale==0.4.4" 15 | - "pycocoevalcap==1.2" 16 | 17 | predict: "predict.py:Predictor" 18 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/bert_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertModel" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 768, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 3072, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 12, 15 | "num_hidden_layers": 12, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 30522, 19 | "encoder_width": 768, 20 | "add_cross_attention": true 21 | } 22 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/caption_coco.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/coco/images/' 2 | ann_root: 'annotation' 3 | coco_gt_root: 'annotation/coco_gt' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' 7 | 8 | # size of vit model; base or large 9 | vit: 'base' 10 | vit_grad_ckpt: False 11 | vit_ckpt_layer: 0 12 | batch_size: 32 13 | init_lr: 1e-5 14 | 15 | # vit: 'large' 16 | # vit_grad_ckpt: True 17 | # vit_ckpt_layer: 5 18 | # batch_size: 16 19 | # init_lr: 2e-6 20 | 21 | image_size: 384 22 | 23 | # generation configs 24 | max_length: 20 25 | min_length: 5 26 | num_beams: 3 27 | prompt: 'a picture of ' 28 | 29 | # optimizer 30 | weight_decay: 0.05 31 | min_lr: 0 32 | max_epoch: 5 33 | 34 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/med_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertModel" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 768, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 3072, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 12, 15 | "num_hidden_layers": 12, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 30524, 19 | "encoder_width": 768, 20 | "add_cross_attention": true 21 | } 22 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/nlvr.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/NLVR2/' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth' 6 | 7 | #size of vit model; base or large 8 | vit: 'base' 9 | batch_size_train: 16 10 | batch_size_test: 64 11 | vit_grad_ckpt: False 12 | vit_ckpt_layer: 0 13 | max_epoch: 15 14 | 15 | image_size: 384 16 | 17 | # optimizer 18 | weight_decay: 0.05 19 | init_lr: 3e-5 20 | min_lr: 0 21 | 22 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/nocaps.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/nocaps/' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' 6 | 7 | vit: 'base' 8 | batch_size: 32 9 | 10 | image_size: 384 11 | 12 | max_length: 20 13 | min_length: 5 14 | num_beams: 3 15 | prompt: 'a picture of ' -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/pretrain.yaml: -------------------------------------------------------------------------------- 1 | train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json', 2 | '/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json', 3 | ] 4 | laion_path: '' 5 | 6 | # size of vit model; base or large 7 | vit: 'base' 8 | vit_grad_ckpt: False 9 | vit_ckpt_layer: 0 10 | 11 | image_size: 224 12 | batch_size: 75 13 | 14 | queue_size: 57600 15 | alpha: 0.4 16 | 17 | # optimizer 18 | weight_decay: 0.05 19 | init_lr: 3e-4 20 | min_lr: 1e-6 21 | warmup_lr: 1e-6 22 | lr_decay_rate: 0.9 23 | max_epoch: 20 24 | warmup_steps: 3000 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/retrieval_coco.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/coco/images/' 2 | ann_root: 'annotation' 3 | dataset: 'coco' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth' 7 | 8 | # size of vit model; base or large 9 | 10 | vit: 'base' 11 | batch_size_train: 32 12 | batch_size_test: 64 13 | vit_grad_ckpt: True 14 | vit_ckpt_layer: 4 15 | init_lr: 1e-5 16 | 17 | # vit: 'large' 18 | # batch_size_train: 16 19 | # batch_size_test: 32 20 | # vit_grad_ckpt: True 21 | # vit_ckpt_layer: 12 22 | # init_lr: 5e-6 23 | 24 | image_size: 384 25 | queue_size: 57600 26 | alpha: 0.4 27 | k_test: 256 28 | negative_all_rank: True 29 | 30 | # optimizer 31 | weight_decay: 0.05 32 | min_lr: 0 33 | max_epoch: 6 34 | 35 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/retrieval_flickr.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/flickr30k/' 2 | ann_root: 'annotation' 3 | dataset: 'flickr' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth' 7 | 8 | # size of vit model; base or large 9 | 10 | vit: 'base' 11 | batch_size_train: 32 12 | batch_size_test: 64 13 | vit_grad_ckpt: True 14 | vit_ckpt_layer: 4 15 | init_lr: 1e-5 16 | 17 | # vit: 'large' 18 | # batch_size_train: 16 19 | # batch_size_test: 32 20 | # vit_grad_ckpt: True 21 | # vit_ckpt_layer: 10 22 | # init_lr: 5e-6 23 | 24 | image_size: 384 25 | queue_size: 57600 26 | alpha: 0.4 27 | k_test: 128 28 | negative_all_rank: False 29 | 30 | # optimizer 31 | weight_decay: 0.05 32 | min_lr: 0 33 | max_epoch: 6 34 | 35 | -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/retrieval_msrvtt.yaml: -------------------------------------------------------------------------------- 1 | video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth' 6 | 7 | # size of vit model; base or large 8 | vit: 'base' 9 | batch_size: 64 10 | k_test: 128 11 | image_size: 384 12 | num_frm_test: 8 -------------------------------------------------------------------------------- /codes/caption/BLIP/configs/vqa.yaml: -------------------------------------------------------------------------------- 1 | vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/ 2 | vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/ 3 | train_files: ['vqa_train','vqa_val','vg_qa'] 4 | ann_root: 'annotation' 5 | 6 | # set pretrained as a file path or an url 7 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth' 8 | 9 | # size of vit model; base or large 10 | vit: 'base' 11 | batch_size_train: 16 12 | batch_size_test: 32 13 | vit_grad_ckpt: False 14 | vit_ckpt_layer: 0 15 | init_lr: 2e-5 16 | 17 | image_size: 480 18 | 19 | k_test: 128 20 | inference: 'rank' 21 | 22 | # optimizer 23 | weight_decay: 0.05 24 | min_lr: 0 25 | max_epoch: 10 -------------------------------------------------------------------------------- /codes/caption/BLIP/data/nocaps_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from torch.utils.data import Dataset 5 | from torchvision.datasets.utils import download_url 6 | 7 | from PIL import Image 8 | 9 | class nocaps_eval(Dataset): 10 | def __init__(self, transform, image_root, ann_root, split): 11 | urls = {'val':'https://storage.googleapis.com/sfr-vision-language-research/datasets/nocaps_val.json', 12 | 'test':'https://storage.googleapis.com/sfr-vision-language-research/datasets/nocaps_test.json'} 13 | filenames = {'val':'nocaps_val.json','test':'nocaps_test.json'} 14 | 15 | download_url(urls[split],ann_root) 16 | 17 | self.annotation = json.load(open(os.path.join(ann_root,filenames[split]),'r')) 18 | self.transform = transform 19 | self.image_root = image_root 20 | 21 | def __len__(self): 22 | return len(self.annotation) 23 | 24 | def __getitem__(self, index): 25 | 26 | ann = self.annotation[index] 27 | 28 | image_path = os.path.join(self.image_root,ann['image']) 29 | image = Image.open(image_path).convert('RGB') 30 | image = self.transform(image) 31 | 32 | return image, int(ann['img_id']) -------------------------------------------------------------------------------- /codes/caption/BLIP/data/pretrain_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | 5 | from torch.utils.data import Dataset 6 | 7 | from PIL import Image 8 | from PIL import ImageFile 9 | ImageFile.LOAD_TRUNCATED_IMAGES = True 10 | Image.MAX_IMAGE_PIXELS = None 11 | 12 | from data.utils import pre_caption 13 | import os,glob 14 | 15 | class pretrain_dataset(Dataset): 16 | def __init__(self, ann_file, laion_path, transform): 17 | 18 | self.ann_pretrain = [] 19 | for f in ann_file: 20 | print('loading '+f) 21 | ann = json.load(open(f,'r')) 22 | self.ann_pretrain += ann 23 | 24 | self.laion_path = laion_path 25 | if self.laion_path: 26 | self.laion_files = glob.glob(os.path.join(laion_path,'*.json')) 27 | 28 | print('loading '+self.laion_files[0]) 29 | with open(self.laion_files[0],'r') as f: 30 | self.ann_laion = json.load(f) 31 | 32 | self.annotation = self.ann_pretrain + self.ann_laion 33 | else: 34 | self.annotation = self.ann_pretrain 35 | 36 | self.transform = transform 37 | 38 | 39 | def reload_laion(self, epoch): 40 | n = epoch%len(self.laion_files) 41 | print('loading '+self.laion_files[n]) 42 | with open(self.laion_files[n],'r') as f: 43 | self.ann_laion = json.load(f) 44 | 45 | self.annotation = self.ann_pretrain + self.ann_laion 46 | 47 | 48 | def __len__(self): 49 | return len(self.annotation) 50 | 51 | def __getitem__(self, index): 52 | 53 | ann = self.annotation[index] 54 | 55 | image = Image.open(ann['image']).convert('RGB') 56 | image = self.transform(image) 57 | caption = pre_caption(ann['caption'],30) 58 | 59 | return image, caption -------------------------------------------------------------------------------- /codes/caption/BLIP/img2feat_blip.py: -------------------------------------------------------------------------------- 1 | import argparse, os 2 | from PIL import Image 3 | import torch 4 | from torchvision import transforms 5 | from models.blip import blip_decoder 6 | import sys 7 | sys.path.append("../../util/") 8 | from nsd_access.nsda import NSDAccess 9 | from tqdm import tqdm 10 | import numpy as np 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | 15 | parser.add_argument( 16 | "--gpu", 17 | required=True, 18 | type=int, 19 | help="gpu" 20 | ) 21 | 22 | # Set parameters 23 | opt = parser.parse_args() 24 | gpu = opt.gpu 25 | torch.cuda.set_device(gpu) 26 | nimage = 73000 27 | image_size = 240 28 | model_url = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth" 29 | device = torch.device(f"cuda:{gpu}" if torch.cuda.is_available() else "cpu") 30 | model = blip_decoder(pretrained=model_url, image_size=image_size, vit="base") 31 | model.eval() 32 | model = model.to(device) 33 | savedir = f'../../../nsdfeat/blip/' 34 | os.makedirs(savedir, exist_ok=True) 35 | 36 | # Make feature 37 | nsda = NSDAccess('../../../nsd/') 38 | for s in tqdm(range(nimage)): 39 | img_arr = nsda.read_images(s) 40 | image = Image.fromarray(img_arr).convert("RGB").resize((image_size,image_size), resample=Image.LANCZOS) 41 | img_arr = transforms.ToTensor()(image).to('cuda').unsqueeze(0) 42 | with torch.no_grad(): 43 | vit_feat = model.visual_encoder(img_arr).cpu().detach().numpy().squeeze() 44 | np.save(f'{savedir}/{s:06}.npy',vit_feat) 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /codes/caption/BLIP/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/caption/BLIP/models/__init__.py -------------------------------------------------------------------------------- /codes/caption/BLIP/requirements.txt: -------------------------------------------------------------------------------- 1 | timm==0.4.12 2 | transformers==4.15.0 3 | fairscale==0.4.4 4 | pycocoevalcap 5 | -------------------------------------------------------------------------------- /codes/depth/img2feat_dpt.py: -------------------------------------------------------------------------------- 1 | import argparse, os 2 | from tqdm import tqdm 3 | import torch 4 | import numpy as np 5 | import PIL 6 | from transformers import AutoImageProcessor, DPTForDepthEstimation 7 | sys.path.append("../utils/") 8 | from nsd_access.nsda import NSDAccess 9 | from PIL import Image 10 | 11 | def main(): 12 | 13 | parser = argparse.ArgumentParser() 14 | 15 | parser.add_argument( 16 | "--imgidx", 17 | required=True, 18 | nargs="*", 19 | type=int, 20 | help="start and end imgs" 21 | ) 22 | parser.add_argument( 23 | "--gpu", 24 | required=True, 25 | type=int, 26 | help="gpu" 27 | ) 28 | 29 | opt = parser.parse_args() 30 | imgidx = opt.imgidx 31 | gpu = opt.gpu 32 | resolution = 512 33 | nsda = NSDAccess('../../nsd/') 34 | 35 | # Save Directories 36 | os.makedirs(f'../../nsdfeat/dpt/', exist_ok=True) 37 | for i in range(4): 38 | os.makedirs(f'../../nsdfeat/dpt_emb{i}/', exist_ok=True) 39 | 40 | device = torch.device(f"cuda:{gpu}") if torch.cuda.is_available() else torch.device("cpu") 41 | image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large") 42 | model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") 43 | model.to(device) 44 | 45 | 46 | for s in tqdm(range(imgidx[0],imgidx[1])): 47 | print(f"Now processing image {s:06}") 48 | img_arr = nsda.read_images(s) 49 | image = Image.fromarray(img_arr).convert("RGB").resize((resolution, resolution), resample=PIL.Image.LANCZOS) 50 | inputs = image_processor(images=image, return_tensors="pt").to(device) 51 | with torch.no_grad(): 52 | outputs = model(**inputs,output_hidden_states=True) 53 | predicted_depth = outputs.predicted_depth 54 | hidden_states = [ 55 | feature.to('cpu').detach().numpy() for idx, feature in enumerate(outputs.hidden_states[1:]) if idx in model.config.backbone_out_indices 56 | ] 57 | 58 | predicted_depth = predicted_depth.to('cpu').detach().numpy() 59 | 60 | for idx, dpt_idx in enumerate(model.config.backbone_out_indices): 61 | np.save(f'../../nsdfeat/dpt_emb{idx}/{s:06}.npy',hidden_states[idx]) 62 | np.save(f'../../nsdfeat/dpt/{s:06}.npy',predicted_depth) 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/a-painting-of-a-fire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-painting-of-a-fire.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/a-photograph-of-a-fire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-photograph-of-a-fire.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-a-fire-printed-on-it.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-a-fire-printed-on-it.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-the-inscription-'fire'.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-shirt-with-the-inscription-'fire'.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/a-watercolor-painting-of-a-fire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/a-watercolor-painting-of-a-fire.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/birdhouse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/birdhouse.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/fire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/fire.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/inpainting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/inpainting.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/modelfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/modelfigure.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/rdm-preview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/rdm-preview.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/reconstruction1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/reconstruction1.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/reconstruction2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/reconstruction2.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/results.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/results.gif -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/rick.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/rick.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-1.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-2.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/mountains-3.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-in.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/img2img/upscaling-out.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002025.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002035.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/000002035.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0005.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0006.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/stable-samples/txt2img/merged-0007.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/the-earth-is-on-fire,-oil-on-canvas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/the-earth-is-on-fire,-oil-on-canvas.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/txt2img-convsample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/txt2img-convsample.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/txt2img-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/txt2img-preview.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/assets/v1-variants-scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/assets/v1-variants-scores.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_16x16x16.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-6 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: "val/rec_loss" 6 | embed_dim: 16 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 0.000001 12 | disc_weight: 0.5 13 | 14 | ddconfig: 15 | double_z: True 16 | z_channels: 16 17 | resolution: 256 18 | in_channels: 3 19 | out_ch: 3 20 | ch: 128 21 | ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1 22 | num_res_blocks: 2 23 | attn_resolutions: [16] 24 | dropout: 0.0 25 | 26 | 27 | data: 28 | target: main.DataModuleFromConfig 29 | params: 30 | batch_size: 12 31 | wrap: True 32 | train: 33 | target: ldm.data.imagenet.ImageNetSRTrain 34 | params: 35 | size: 256 36 | degradation: pil_nearest 37 | validation: 38 | target: ldm.data.imagenet.ImageNetSRValidation 39 | params: 40 | size: 256 41 | degradation: pil_nearest 42 | 43 | lightning: 44 | callbacks: 45 | image_logger: 46 | target: main.ImageLogger 47 | params: 48 | batch_frequency: 1000 49 | max_images: 8 50 | increase_log_steps: True 51 | 52 | trainer: 53 | benchmark: True 54 | accumulate_grad_batches: 2 55 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_32x32x4.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-6 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: "val/rec_loss" 6 | embed_dim: 4 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 0.000001 12 | disc_weight: 0.5 13 | 14 | ddconfig: 15 | double_z: True 16 | z_channels: 4 17 | resolution: 256 18 | in_channels: 3 19 | out_ch: 3 20 | ch: 128 21 | ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1 22 | num_res_blocks: 2 23 | attn_resolutions: [ ] 24 | dropout: 0.0 25 | 26 | data: 27 | target: main.DataModuleFromConfig 28 | params: 29 | batch_size: 12 30 | wrap: True 31 | train: 32 | target: ldm.data.imagenet.ImageNetSRTrain 33 | params: 34 | size: 256 35 | degradation: pil_nearest 36 | validation: 37 | target: ldm.data.imagenet.ImageNetSRValidation 38 | params: 39 | size: 256 40 | degradation: pil_nearest 41 | 42 | lightning: 43 | callbacks: 44 | image_logger: 45 | target: main.ImageLogger 46 | params: 47 | batch_frequency: 1000 48 | max_images: 8 49 | increase_log_steps: True 50 | 51 | trainer: 52 | benchmark: True 53 | accumulate_grad_batches: 2 54 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_64x64x3.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-6 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: "val/rec_loss" 6 | embed_dim: 3 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 0.000001 12 | disc_weight: 0.5 13 | 14 | ddconfig: 15 | double_z: True 16 | z_channels: 3 17 | resolution: 256 18 | in_channels: 3 19 | out_ch: 3 20 | ch: 128 21 | ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1 22 | num_res_blocks: 2 23 | attn_resolutions: [ ] 24 | dropout: 0.0 25 | 26 | 27 | data: 28 | target: main.DataModuleFromConfig 29 | params: 30 | batch_size: 12 31 | wrap: True 32 | train: 33 | target: ldm.data.imagenet.ImageNetSRTrain 34 | params: 35 | size: 256 36 | degradation: pil_nearest 37 | validation: 38 | target: ldm.data.imagenet.ImageNetSRValidation 39 | params: 40 | size: 256 41 | degradation: pil_nearest 42 | 43 | lightning: 44 | callbacks: 45 | image_logger: 46 | target: main.ImageLogger 47 | params: 48 | batch_frequency: 1000 49 | max_images: 8 50 | increase_log_steps: True 51 | 52 | trainer: 53 | benchmark: True 54 | accumulate_grad_batches: 2 55 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/autoencoder/autoencoder_kl_8x8x64.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-6 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: "val/rec_loss" 6 | embed_dim: 64 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 0.000001 12 | disc_weight: 0.5 13 | 14 | ddconfig: 15 | double_z: True 16 | z_channels: 64 17 | resolution: 256 18 | in_channels: 3 19 | out_ch: 3 20 | ch: 128 21 | ch_mult: [ 1,1,2,2,4,4] # num_down = len(ch_mult)-1 22 | num_res_blocks: 2 23 | attn_resolutions: [16,8] 24 | dropout: 0.0 25 | 26 | data: 27 | target: main.DataModuleFromConfig 28 | params: 29 | batch_size: 12 30 | wrap: True 31 | train: 32 | target: ldm.data.imagenet.ImageNetSRTrain 33 | params: 34 | size: 256 35 | degradation: pil_nearest 36 | validation: 37 | target: ldm.data.imagenet.ImageNetSRValidation 38 | params: 39 | size: 256 40 | degradation: pil_nearest 41 | 42 | lightning: 43 | callbacks: 44 | image_logger: 45 | target: main.ImageLogger 46 | params: 47 | batch_frequency: 1000 48 | max_images: 8 49 | increase_log_steps: True 50 | 51 | trainer: 52 | benchmark: True 53 | accumulate_grad_batches: 2 54 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/celebahq-ldm-vq-4.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | image_size: 64 12 | channels: 3 13 | monitor: val/loss_simple_ema 14 | 15 | unet_config: 16 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 17 | params: 18 | image_size: 64 19 | in_channels: 3 20 | out_channels: 3 21 | model_channels: 224 22 | attention_resolutions: 23 | # note: this isn\t actually the resolution but 24 | # the downsampling factor, i.e. this corresnponds to 25 | # attention on spatial resolution 8,16,32, as the 26 | # spatial reolution of the latents is 64 for f4 27 | - 8 28 | - 4 29 | - 2 30 | num_res_blocks: 2 31 | channel_mult: 32 | - 1 33 | - 2 34 | - 3 35 | - 4 36 | num_head_channels: 32 37 | first_stage_config: 38 | target: ldm.models.autoencoder.VQModelInterface 39 | params: 40 | embed_dim: 3 41 | n_embed: 8192 42 | ckpt_path: models/first_stage_models/vq-f4/model.ckpt 43 | ddconfig: 44 | double_z: false 45 | z_channels: 3 46 | resolution: 256 47 | in_channels: 3 48 | out_ch: 3 49 | ch: 128 50 | ch_mult: 51 | - 1 52 | - 2 53 | - 4 54 | num_res_blocks: 2 55 | attn_resolutions: [] 56 | dropout: 0.0 57 | lossconfig: 58 | target: torch.nn.Identity 59 | cond_stage_config: __is_unconditional__ 60 | data: 61 | target: main.DataModuleFromConfig 62 | params: 63 | batch_size: 48 64 | num_workers: 5 65 | wrap: false 66 | train: 67 | target: taming.data.faceshq.CelebAHQTrain 68 | params: 69 | size: 256 70 | validation: 71 | target: taming.data.faceshq.CelebAHQValidation 72 | params: 73 | size: 256 74 | 75 | 76 | lightning: 77 | callbacks: 78 | image_logger: 79 | target: main.ImageLogger 80 | params: 81 | batch_frequency: 5000 82 | max_images: 8 83 | increase_log_steps: False 84 | 85 | trainer: 86 | benchmark: True -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/cin256-v2.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 0.0001 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: class_label 12 | image_size: 64 13 | channels: 3 14 | cond_stage_trainable: true 15 | conditioning_key: crossattn 16 | monitor: val/loss 17 | use_ema: False 18 | 19 | unet_config: 20 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 21 | params: 22 | image_size: 64 23 | in_channels: 3 24 | out_channels: 3 25 | model_channels: 192 26 | attention_resolutions: 27 | - 8 28 | - 4 29 | - 2 30 | num_res_blocks: 2 31 | channel_mult: 32 | - 1 33 | - 2 34 | - 3 35 | - 5 36 | num_heads: 1 37 | use_spatial_transformer: true 38 | transformer_depth: 1 39 | context_dim: 512 40 | 41 | first_stage_config: 42 | target: ldm.models.autoencoder.VQModelInterface 43 | params: 44 | embed_dim: 3 45 | n_embed: 8192 46 | ddconfig: 47 | double_z: false 48 | z_channels: 3 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.ClassEmbedder 65 | params: 66 | n_classes: 1001 67 | embed_dim: 512 68 | key: class_label 69 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/ffhq-ldm-vq-4.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | image_size: 64 12 | channels: 3 13 | monitor: val/loss_simple_ema 14 | unet_config: 15 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 16 | params: 17 | image_size: 64 18 | in_channels: 3 19 | out_channels: 3 20 | model_channels: 224 21 | attention_resolutions: 22 | # note: this isn\t actually the resolution but 23 | # the downsampling factor, i.e. this corresnponds to 24 | # attention on spatial resolution 8,16,32, as the 25 | # spatial reolution of the latents is 64 for f4 26 | - 8 27 | - 4 28 | - 2 29 | num_res_blocks: 2 30 | channel_mult: 31 | - 1 32 | - 2 33 | - 3 34 | - 4 35 | num_head_channels: 32 36 | first_stage_config: 37 | target: ldm.models.autoencoder.VQModelInterface 38 | params: 39 | embed_dim: 3 40 | n_embed: 8192 41 | ckpt_path: configs/first_stage_models/vq-f4/model.yaml 42 | ddconfig: 43 | double_z: false 44 | z_channels: 3 45 | resolution: 256 46 | in_channels: 3 47 | out_ch: 3 48 | ch: 128 49 | ch_mult: 50 | - 1 51 | - 2 52 | - 4 53 | num_res_blocks: 2 54 | attn_resolutions: [] 55 | dropout: 0.0 56 | lossconfig: 57 | target: torch.nn.Identity 58 | cond_stage_config: __is_unconditional__ 59 | data: 60 | target: main.DataModuleFromConfig 61 | params: 62 | batch_size: 42 63 | num_workers: 5 64 | wrap: false 65 | train: 66 | target: taming.data.faceshq.FFHQTrain 67 | params: 68 | size: 256 69 | validation: 70 | target: taming.data.faceshq.FFHQValidation 71 | params: 72 | size: 256 73 | 74 | 75 | lightning: 76 | callbacks: 77 | image_logger: 78 | target: main.ImageLogger 79 | params: 80 | batch_frequency: 5000 81 | max_images: 8 82 | increase_log_steps: False 83 | 84 | trainer: 85 | benchmark: True -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | image_size: 64 12 | channels: 3 13 | monitor: val/loss_simple_ema 14 | unet_config: 15 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 16 | params: 17 | image_size: 64 18 | in_channels: 3 19 | out_channels: 3 20 | model_channels: 224 21 | attention_resolutions: 22 | # note: this isn\t actually the resolution but 23 | # the downsampling factor, i.e. this corresnponds to 24 | # attention on spatial resolution 8,16,32, as the 25 | # spatial reolution of the latents is 64 for f4 26 | - 8 27 | - 4 28 | - 2 29 | num_res_blocks: 2 30 | channel_mult: 31 | - 1 32 | - 2 33 | - 3 34 | - 4 35 | num_head_channels: 32 36 | first_stage_config: 37 | target: ldm.models.autoencoder.VQModelInterface 38 | params: 39 | ckpt_path: configs/first_stage_models/vq-f4/model.yaml 40 | embed_dim: 3 41 | n_embed: 8192 42 | ddconfig: 43 | double_z: false 44 | z_channels: 3 45 | resolution: 256 46 | in_channels: 3 47 | out_ch: 3 48 | ch: 128 49 | ch_mult: 50 | - 1 51 | - 2 52 | - 4 53 | num_res_blocks: 2 54 | attn_resolutions: [] 55 | dropout: 0.0 56 | lossconfig: 57 | target: torch.nn.Identity 58 | cond_stage_config: __is_unconditional__ 59 | data: 60 | target: main.DataModuleFromConfig 61 | params: 62 | batch_size: 48 63 | num_workers: 5 64 | wrap: false 65 | train: 66 | target: ldm.data.lsun.LSUNBedroomsTrain 67 | params: 68 | size: 256 69 | validation: 70 | target: ldm.data.lsun.LSUNBedroomsValidation 71 | params: 72 | size: 256 73 | 74 | 75 | lightning: 76 | callbacks: 77 | image_logger: 78 | target: main.ImageLogger 79 | params: 80 | batch_frequency: 5000 81 | max_images: 8 82 | increase_log_steps: False 83 | 84 | trainer: 85 | benchmark: True -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-5 # set to target_lr by starting main.py with '--scale_lr False' 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0155 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | loss_type: l1 11 | first_stage_key: "image" 12 | cond_stage_key: "image" 13 | image_size: 32 14 | channels: 4 15 | cond_stage_trainable: False 16 | concat_mode: False 17 | scale_by_std: True 18 | monitor: 'val/loss_simple_ema' 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [10000] 24 | cycle_lengths: [10000000000000] 25 | f_start: [1.e-6] 26 | f_max: [1.] 27 | f_min: [ 1.] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 192 36 | attention_resolutions: [ 1, 2, 4, 8 ] # 32, 16, 8, 4 37 | num_res_blocks: 2 38 | channel_mult: [ 1,2,2,4,4 ] # 32, 16, 8, 4, 2 39 | num_heads: 8 40 | use_scale_shift_norm: True 41 | resblock_updown: True 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: "val/rec_loss" 48 | ckpt_path: "models/first_stage_models/kl-f8/model.ckpt" 49 | ddconfig: 50 | double_z: True 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1 57 | num_res_blocks: 2 58 | attn_resolutions: [ ] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: "__is_unconditional__" 64 | 65 | data: 66 | target: main.DataModuleFromConfig 67 | params: 68 | batch_size: 96 69 | num_workers: 5 70 | wrap: False 71 | train: 72 | target: ldm.data.lsun.LSUNChurchesTrain 73 | params: 74 | size: 256 75 | validation: 76 | target: ldm.data.lsun.LSUNChurchesValidation 77 | params: 78 | size: 256 79 | 80 | lightning: 81 | callbacks: 82 | image_logger: 83 | target: main.ImageLogger 84 | params: 85 | batch_frequency: 5000 86 | max_images: 8 87 | increase_log_steps: False 88 | 89 | 90 | trainer: 91 | benchmark: True -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/latent-diffusion/txt2img-1p4B-eval.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-05 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.012 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: caption 12 | image_size: 32 13 | channels: 4 14 | cond_stage_trainable: true 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | image_size: 32 24 | in_channels: 4 25 | out_channels: 4 26 | model_channels: 320 27 | attention_resolutions: 28 | - 4 29 | - 2 30 | - 1 31 | num_res_blocks: 2 32 | channel_mult: 33 | - 1 34 | - 2 35 | - 4 36 | - 4 37 | num_heads: 8 38 | use_spatial_transformer: true 39 | transformer_depth: 1 40 | context_dim: 1280 41 | use_checkpoint: true 42 | legacy: False 43 | 44 | first_stage_config: 45 | target: ldm.models.autoencoder.AutoencoderKL 46 | params: 47 | embed_dim: 4 48 | monitor: val/rec_loss 49 | ddconfig: 50 | double_z: true 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: 57 | - 1 58 | - 2 59 | - 4 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: [] 63 | dropout: 0.0 64 | lossconfig: 65 | target: torch.nn.Identity 66 | 67 | cond_stage_config: 68 | target: ldm.modules.encoders.modules.BERTEmbedder 69 | params: 70 | n_embed: 1280 71 | n_layer: 32 72 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/retrieval-augmented-diffusion/768x768.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 0.0001 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.015 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: jpg 11 | cond_stage_key: nix 12 | image_size: 48 13 | channels: 16 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_by_std: false 18 | scale_factor: 0.22765929 19 | unet_config: 20 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 21 | params: 22 | image_size: 48 23 | in_channels: 16 24 | out_channels: 16 25 | model_channels: 448 26 | attention_resolutions: 27 | - 4 28 | - 2 29 | - 1 30 | num_res_blocks: 2 31 | channel_mult: 32 | - 1 33 | - 2 34 | - 3 35 | - 4 36 | use_scale_shift_norm: false 37 | resblock_updown: false 38 | num_head_channels: 32 39 | use_spatial_transformer: true 40 | transformer_depth: 1 41 | context_dim: 768 42 | use_checkpoint: true 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | monitor: val/rec_loss 47 | embed_dim: 16 48 | ddconfig: 49 | double_z: true 50 | z_channels: 16 51 | resolution: 256 52 | in_channels: 3 53 | out_ch: 3 54 | ch: 128 55 | ch_mult: 56 | - 1 57 | - 1 58 | - 2 59 | - 2 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: 63 | - 16 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | cond_stage_config: 68 | target: torch.nn.Identity -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/configs/stable-diffusion/v1-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/DejaVuSans.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/DejaVuSans.ttf -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/example_conditioning/superresolution/sample_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/example_conditioning/superresolution/sample_0.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/example_conditioning/text_conditional/sample_0.txt: -------------------------------------------------------------------------------- 1 | A basket of cerries 2 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/imagenet_train_hr_indices.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/imagenet_train_hr_indices.p -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/imagenet_val_hr_indices.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/imagenet_val_hr_indices.p -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/6458524847_2f4c361183_k_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/8399166846_f6fb4e4b8e_k_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/alex-iby-G_Pk4D9rMLs_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bench2_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/bertrand-gabioud-CpuFzIsHYJ0_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/billow926-12-Wc-Zgx6Y_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/data/inpainting_examples/photo-1583445095369-9c651e7e5d34_mask.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/environment.yaml: -------------------------------------------------------------------------------- 1 | name: ldm 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3.8.5 7 | - pip=20.3 8 | - cudatoolkit=11.3 9 | - pytorch=1.11.0 10 | - torchvision=0.12.0 11 | - numpy=1.19.2 12 | - pip: 13 | - albumentations==0.4.3 14 | - diffusers 15 | - opencv-python==4.1.2.30 16 | - pudb==2019.2 17 | - invisible-watermark 18 | - imageio==2.9.0 19 | - imageio-ffmpeg==0.4.2 20 | - pytorch-lightning==1.4.2 21 | - omegaconf==2.1.1 22 | - test-tube>=0.7.5 23 | - streamlit>=0.73.1 24 | - einops==0.3.0 25 | - torch-fidelity==0.3.0 26 | - transformers==4.19.2 27 | - torchmetrics==0.6.0 28 | - kornia==0.6 29 | - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers 30 | - -e git+https://github.com/openai/CLIP.git@main#egg=clip 31 | - -e . 32 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/data/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/data/base.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from torch.utils.data import Dataset, ConcatDataset, ChainDataset, IterableDataset 3 | 4 | 5 | class Txt2ImgIterableBaseDataset(IterableDataset): 6 | ''' 7 | Define an interface to make the IterableDatasets for text2img data chainable 8 | ''' 9 | def __init__(self, num_records=0, valid_ids=None, size=256): 10 | super().__init__() 11 | self.num_records = num_records 12 | self.valid_ids = valid_ids 13 | self.sample_ids = valid_ids 14 | self.size = size 15 | 16 | print(f'{self.__class__.__name__} dataset contains {self.__len__()} examples.') 17 | 18 | def __len__(self): 19 | return self.num_records 20 | 21 | @abstractmethod 22 | def __iter__(self): 23 | pass -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr 2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light 3 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/utils/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd1/stable-diffusion/ldm/modules/image_degradation/utils/test.png -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/ldm/modules/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from ldm.modules.losses.contperceptual import LPIPSWithDiscriminator -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f16/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: val/rec_loss 6 | embed_dim: 16 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 1.0e-06 12 | disc_weight: 0.5 13 | ddconfig: 14 | double_z: true 15 | z_channels: 16 16 | resolution: 256 17 | in_channels: 3 18 | out_ch: 3 19 | ch: 128 20 | ch_mult: 21 | - 1 22 | - 1 23 | - 2 24 | - 2 25 | - 4 26 | num_res_blocks: 2 27 | attn_resolutions: 28 | - 16 29 | dropout: 0.0 30 | data: 31 | target: main.DataModuleFromConfig 32 | params: 33 | batch_size: 6 34 | wrap: true 35 | train: 36 | target: ldm.data.openimages.FullOpenImagesTrain 37 | params: 38 | size: 384 39 | crop_size: 256 40 | validation: 41 | target: ldm.data.openimages.FullOpenImagesValidation 42 | params: 43 | size: 384 44 | crop_size: 256 45 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f32/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: val/rec_loss 6 | embed_dim: 64 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 1.0e-06 12 | disc_weight: 0.5 13 | ddconfig: 14 | double_z: true 15 | z_channels: 64 16 | resolution: 256 17 | in_channels: 3 18 | out_ch: 3 19 | ch: 128 20 | ch_mult: 21 | - 1 22 | - 1 23 | - 2 24 | - 2 25 | - 4 26 | - 4 27 | num_res_blocks: 2 28 | attn_resolutions: 29 | - 16 30 | - 8 31 | dropout: 0.0 32 | data: 33 | target: main.DataModuleFromConfig 34 | params: 35 | batch_size: 6 36 | wrap: true 37 | train: 38 | target: ldm.data.openimages.FullOpenImagesTrain 39 | params: 40 | size: 384 41 | crop_size: 256 42 | validation: 43 | target: ldm.data.openimages.FullOpenImagesValidation 44 | params: 45 | size: 384 46 | crop_size: 256 47 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f4/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: val/rec_loss 6 | embed_dim: 3 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 1.0e-06 12 | disc_weight: 0.5 13 | ddconfig: 14 | double_z: true 15 | z_channels: 3 16 | resolution: 256 17 | in_channels: 3 18 | out_ch: 3 19 | ch: 128 20 | ch_mult: 21 | - 1 22 | - 2 23 | - 4 24 | num_res_blocks: 2 25 | attn_resolutions: [] 26 | dropout: 0.0 27 | data: 28 | target: main.DataModuleFromConfig 29 | params: 30 | batch_size: 10 31 | wrap: true 32 | train: 33 | target: ldm.data.openimages.FullOpenImagesTrain 34 | params: 35 | size: 384 36 | crop_size: 256 37 | validation: 38 | target: ldm.data.openimages.FullOpenImagesValidation 39 | params: 40 | size: 384 41 | crop_size: 256 42 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/kl-f8/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.AutoencoderKL 4 | params: 5 | monitor: val/rec_loss 6 | embed_dim: 4 7 | lossconfig: 8 | target: ldm.modules.losses.LPIPSWithDiscriminator 9 | params: 10 | disc_start: 50001 11 | kl_weight: 1.0e-06 12 | disc_weight: 0.5 13 | ddconfig: 14 | double_z: true 15 | z_channels: 4 16 | resolution: 256 17 | in_channels: 3 18 | out_ch: 3 19 | ch: 128 20 | ch_mult: 21 | - 1 22 | - 2 23 | - 4 24 | - 4 25 | num_res_blocks: 2 26 | attn_resolutions: [] 27 | dropout: 0.0 28 | data: 29 | target: main.DataModuleFromConfig 30 | params: 31 | batch_size: 4 32 | wrap: true 33 | train: 34 | target: ldm.data.openimages.FullOpenImagesTrain 35 | params: 36 | size: 384 37 | crop_size: 256 38 | validation: 39 | target: ldm.data.openimages.FullOpenImagesValidation 40 | params: 41 | size: 384 42 | crop_size: 256 43 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f16/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.VQModel 4 | params: 5 | embed_dim: 8 6 | n_embed: 16384 7 | ddconfig: 8 | double_z: false 9 | z_channels: 8 10 | resolution: 256 11 | in_channels: 3 12 | out_ch: 3 13 | ch: 128 14 | ch_mult: 15 | - 1 16 | - 1 17 | - 2 18 | - 2 19 | - 4 20 | num_res_blocks: 2 21 | attn_resolutions: 22 | - 16 23 | dropout: 0.0 24 | lossconfig: 25 | target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator 26 | params: 27 | disc_conditional: false 28 | disc_in_channels: 3 29 | disc_start: 250001 30 | disc_weight: 0.75 31 | disc_num_layers: 2 32 | codebook_weight: 1.0 33 | 34 | data: 35 | target: main.DataModuleFromConfig 36 | params: 37 | batch_size: 14 38 | num_workers: 20 39 | wrap: true 40 | train: 41 | target: ldm.data.openimages.FullOpenImagesTrain 42 | params: 43 | size: 384 44 | crop_size: 256 45 | validation: 46 | target: ldm.data.openimages.FullOpenImagesValidation 47 | params: 48 | size: 384 49 | crop_size: 256 50 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f4-noattn/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.VQModel 4 | params: 5 | embed_dim: 3 6 | n_embed: 8192 7 | monitor: val/rec_loss 8 | 9 | ddconfig: 10 | attn_type: none 11 | double_z: false 12 | z_channels: 3 13 | resolution: 256 14 | in_channels: 3 15 | out_ch: 3 16 | ch: 128 17 | ch_mult: 18 | - 1 19 | - 2 20 | - 4 21 | num_res_blocks: 2 22 | attn_resolutions: [] 23 | dropout: 0.0 24 | lossconfig: 25 | target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator 26 | params: 27 | disc_conditional: false 28 | disc_in_channels: 3 29 | disc_start: 11 30 | disc_weight: 0.75 31 | codebook_weight: 1.0 32 | 33 | data: 34 | target: main.DataModuleFromConfig 35 | params: 36 | batch_size: 8 37 | num_workers: 12 38 | wrap: true 39 | train: 40 | target: ldm.data.openimages.FullOpenImagesTrain 41 | params: 42 | crop_size: 256 43 | validation: 44 | target: ldm.data.openimages.FullOpenImagesValidation 45 | params: 46 | crop_size: 256 47 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f4/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.VQModel 4 | params: 5 | embed_dim: 3 6 | n_embed: 8192 7 | monitor: val/rec_loss 8 | 9 | ddconfig: 10 | double_z: false 11 | z_channels: 3 12 | resolution: 256 13 | in_channels: 3 14 | out_ch: 3 15 | ch: 128 16 | ch_mult: 17 | - 1 18 | - 2 19 | - 4 20 | num_res_blocks: 2 21 | attn_resolutions: [] 22 | dropout: 0.0 23 | lossconfig: 24 | target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator 25 | params: 26 | disc_conditional: false 27 | disc_in_channels: 3 28 | disc_start: 0 29 | disc_weight: 0.75 30 | codebook_weight: 1.0 31 | 32 | data: 33 | target: main.DataModuleFromConfig 34 | params: 35 | batch_size: 8 36 | num_workers: 16 37 | wrap: true 38 | train: 39 | target: ldm.data.openimages.FullOpenImagesTrain 40 | params: 41 | crop_size: 256 42 | validation: 43 | target: ldm.data.openimages.FullOpenImagesValidation 44 | params: 45 | crop_size: 256 46 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f8-n256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.VQModel 4 | params: 5 | embed_dim: 4 6 | n_embed: 256 7 | monitor: val/rec_loss 8 | ddconfig: 9 | double_z: false 10 | z_channels: 4 11 | resolution: 256 12 | in_channels: 3 13 | out_ch: 3 14 | ch: 128 15 | ch_mult: 16 | - 1 17 | - 2 18 | - 2 19 | - 4 20 | num_res_blocks: 2 21 | attn_resolutions: 22 | - 32 23 | dropout: 0.0 24 | lossconfig: 25 | target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator 26 | params: 27 | disc_conditional: false 28 | disc_in_channels: 3 29 | disc_start: 250001 30 | disc_weight: 0.75 31 | codebook_weight: 1.0 32 | 33 | data: 34 | target: main.DataModuleFromConfig 35 | params: 36 | batch_size: 10 37 | num_workers: 20 38 | wrap: true 39 | train: 40 | target: ldm.data.openimages.FullOpenImagesTrain 41 | params: 42 | size: 384 43 | crop_size: 256 44 | validation: 45 | target: ldm.data.openimages.FullOpenImagesValidation 46 | params: 47 | size: 384 48 | crop_size: 256 49 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/first_stage_models/vq-f8/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 4.5e-06 3 | target: ldm.models.autoencoder.VQModel 4 | params: 5 | embed_dim: 4 6 | n_embed: 16384 7 | monitor: val/rec_loss 8 | ddconfig: 9 | double_z: false 10 | z_channels: 4 11 | resolution: 256 12 | in_channels: 3 13 | out_ch: 3 14 | ch: 128 15 | ch_mult: 16 | - 1 17 | - 2 18 | - 2 19 | - 4 20 | num_res_blocks: 2 21 | attn_resolutions: 22 | - 32 23 | dropout: 0.0 24 | lossconfig: 25 | target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator 26 | params: 27 | disc_conditional: false 28 | disc_in_channels: 3 29 | disc_num_layers: 2 30 | disc_start: 1 31 | disc_weight: 0.6 32 | codebook_weight: 1.0 33 | data: 34 | target: main.DataModuleFromConfig 35 | params: 36 | batch_size: 10 37 | num_workers: 20 38 | wrap: true 39 | train: 40 | target: ldm.data.openimages.FullOpenImagesTrain 41 | params: 42 | size: 384 43 | crop_size: 256 44 | validation: 45 | target: ldm.data.openimages.FullOpenImagesValidation 46 | params: 47 | size: 384 48 | crop_size: 256 49 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/bsr_sr/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0155 7 | log_every_t: 100 8 | timesteps: 1000 9 | loss_type: l2 10 | first_stage_key: image 11 | cond_stage_key: LR_image 12 | image_size: 64 13 | channels: 3 14 | concat_mode: true 15 | cond_stage_trainable: false 16 | unet_config: 17 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 18 | params: 19 | image_size: 64 20 | in_channels: 6 21 | out_channels: 3 22 | model_channels: 160 23 | attention_resolutions: 24 | - 16 25 | - 8 26 | num_res_blocks: 2 27 | channel_mult: 28 | - 1 29 | - 2 30 | - 2 31 | - 4 32 | num_head_channels: 32 33 | first_stage_config: 34 | target: ldm.models.autoencoder.VQModelInterface 35 | params: 36 | embed_dim: 3 37 | n_embed: 8192 38 | monitor: val/rec_loss 39 | ddconfig: 40 | double_z: false 41 | z_channels: 3 42 | resolution: 256 43 | in_channels: 3 44 | out_ch: 3 45 | ch: 128 46 | ch_mult: 47 | - 1 48 | - 2 49 | - 4 50 | num_res_blocks: 2 51 | attn_resolutions: [] 52 | dropout: 0.0 53 | lossconfig: 54 | target: torch.nn.Identity 55 | cond_stage_config: 56 | target: torch.nn.Identity 57 | data: 58 | target: main.DataModuleFromConfig 59 | params: 60 | batch_size: 64 61 | wrap: false 62 | num_workers: 12 63 | train: 64 | target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain 65 | params: 66 | size: 256 67 | degradation: bsrgan_light 68 | downscale_f: 4 69 | min_crop_f: 0.5 70 | max_crop_f: 1.0 71 | random_crop: true 72 | validation: 73 | target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation 74 | params: 75 | size: 256 76 | degradation: bsrgan_light 77 | downscale_f: 4 78 | min_crop_f: 0.5 79 | max_crop_f: 1.0 80 | random_crop: true 81 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/celeba256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: class_label 12 | image_size: 64 13 | channels: 3 14 | cond_stage_trainable: false 15 | concat_mode: false 16 | monitor: val/loss 17 | unet_config: 18 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 19 | params: 20 | image_size: 64 21 | in_channels: 3 22 | out_channels: 3 23 | model_channels: 224 24 | attention_resolutions: 25 | - 8 26 | - 4 27 | - 2 28 | num_res_blocks: 2 29 | channel_mult: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | num_head_channels: 32 35 | first_stage_config: 36 | target: ldm.models.autoencoder.VQModelInterface 37 | params: 38 | embed_dim: 3 39 | n_embed: 8192 40 | ddconfig: 41 | double_z: false 42 | z_channels: 3 43 | resolution: 256 44 | in_channels: 3 45 | out_ch: 3 46 | ch: 128 47 | ch_mult: 48 | - 1 49 | - 2 50 | - 4 51 | num_res_blocks: 2 52 | attn_resolutions: [] 53 | dropout: 0.0 54 | lossconfig: 55 | target: torch.nn.Identity 56 | cond_stage_config: __is_unconditional__ 57 | data: 58 | target: main.DataModuleFromConfig 59 | params: 60 | batch_size: 48 61 | num_workers: 5 62 | wrap: false 63 | train: 64 | target: ldm.data.faceshq.CelebAHQTrain 65 | params: 66 | size: 256 67 | validation: 68 | target: ldm.data.faceshq.CelebAHQValidation 69 | params: 70 | size: 256 71 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/cin256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: class_label 12 | image_size: 32 13 | channels: 4 14 | cond_stage_trainable: true 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | unet_config: 18 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 19 | params: 20 | image_size: 32 21 | in_channels: 4 22 | out_channels: 4 23 | model_channels: 256 24 | attention_resolutions: 25 | - 4 26 | - 2 27 | - 1 28 | num_res_blocks: 2 29 | channel_mult: 30 | - 1 31 | - 2 32 | - 4 33 | num_head_channels: 32 34 | use_spatial_transformer: true 35 | transformer_depth: 1 36 | context_dim: 512 37 | first_stage_config: 38 | target: ldm.models.autoencoder.VQModelInterface 39 | params: 40 | embed_dim: 4 41 | n_embed: 16384 42 | ddconfig: 43 | double_z: false 44 | z_channels: 4 45 | resolution: 256 46 | in_channels: 3 47 | out_ch: 3 48 | ch: 128 49 | ch_mult: 50 | - 1 51 | - 2 52 | - 2 53 | - 4 54 | num_res_blocks: 2 55 | attn_resolutions: 56 | - 32 57 | dropout: 0.0 58 | lossconfig: 59 | target: torch.nn.Identity 60 | cond_stage_config: 61 | target: ldm.modules.encoders.modules.ClassEmbedder 62 | params: 63 | embed_dim: 512 64 | key: class_label 65 | data: 66 | target: main.DataModuleFromConfig 67 | params: 68 | batch_size: 64 69 | num_workers: 12 70 | wrap: false 71 | train: 72 | target: ldm.data.imagenet.ImageNetTrain 73 | params: 74 | config: 75 | size: 256 76 | validation: 77 | target: ldm.data.imagenet.ImageNetValidation 78 | params: 79 | config: 80 | size: 256 81 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/ffhq256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: class_label 12 | image_size: 64 13 | channels: 3 14 | cond_stage_trainable: false 15 | concat_mode: false 16 | monitor: val/loss 17 | unet_config: 18 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 19 | params: 20 | image_size: 64 21 | in_channels: 3 22 | out_channels: 3 23 | model_channels: 224 24 | attention_resolutions: 25 | - 8 26 | - 4 27 | - 2 28 | num_res_blocks: 2 29 | channel_mult: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | num_head_channels: 32 35 | first_stage_config: 36 | target: ldm.models.autoencoder.VQModelInterface 37 | params: 38 | embed_dim: 3 39 | n_embed: 8192 40 | ddconfig: 41 | double_z: false 42 | z_channels: 3 43 | resolution: 256 44 | in_channels: 3 45 | out_ch: 3 46 | ch: 128 47 | ch_mult: 48 | - 1 49 | - 2 50 | - 4 51 | num_res_blocks: 2 52 | attn_resolutions: [] 53 | dropout: 0.0 54 | lossconfig: 55 | target: torch.nn.Identity 56 | cond_stage_config: __is_unconditional__ 57 | data: 58 | target: main.DataModuleFromConfig 59 | params: 60 | batch_size: 42 61 | num_workers: 5 62 | wrap: false 63 | train: 64 | target: ldm.data.faceshq.FFHQTrain 65 | params: 66 | size: 256 67 | validation: 68 | target: ldm.data.faceshq.FFHQValidation 69 | params: 70 | size: 256 71 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/inpainting_big/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0205 7 | log_every_t: 100 8 | timesteps: 1000 9 | loss_type: l1 10 | first_stage_key: image 11 | cond_stage_key: masked_image 12 | image_size: 64 13 | channels: 3 14 | concat_mode: true 15 | monitor: val/loss 16 | scheduler_config: 17 | target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler 18 | params: 19 | verbosity_interval: 0 20 | warm_up_steps: 1000 21 | max_decay_steps: 50000 22 | lr_start: 0.001 23 | lr_max: 0.1 24 | lr_min: 0.0001 25 | unet_config: 26 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 27 | params: 28 | image_size: 64 29 | in_channels: 7 30 | out_channels: 3 31 | model_channels: 256 32 | attention_resolutions: 33 | - 8 34 | - 4 35 | - 2 36 | num_res_blocks: 2 37 | channel_mult: 38 | - 1 39 | - 2 40 | - 3 41 | - 4 42 | num_heads: 8 43 | resblock_updown: true 44 | first_stage_config: 45 | target: ldm.models.autoencoder.VQModelInterface 46 | params: 47 | embed_dim: 3 48 | n_embed: 8192 49 | monitor: val/rec_loss 50 | ddconfig: 51 | attn_type: none 52 | double_z: false 53 | z_channels: 3 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [] 64 | dropout: 0.0 65 | lossconfig: 66 | target: ldm.modules.losses.contperceptual.DummyLoss 67 | cond_stage_config: __is_first_stage__ 68 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/layout2img-openimages256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0205 7 | log_every_t: 100 8 | timesteps: 1000 9 | loss_type: l1 10 | first_stage_key: image 11 | cond_stage_key: coordinates_bbox 12 | image_size: 64 13 | channels: 3 14 | conditioning_key: crossattn 15 | cond_stage_trainable: true 16 | unet_config: 17 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 18 | params: 19 | image_size: 64 20 | in_channels: 3 21 | out_channels: 3 22 | model_channels: 128 23 | attention_resolutions: 24 | - 8 25 | - 4 26 | - 2 27 | num_res_blocks: 2 28 | channel_mult: 29 | - 1 30 | - 2 31 | - 3 32 | - 4 33 | num_head_channels: 32 34 | use_spatial_transformer: true 35 | transformer_depth: 3 36 | context_dim: 512 37 | first_stage_config: 38 | target: ldm.models.autoencoder.VQModelInterface 39 | params: 40 | embed_dim: 3 41 | n_embed: 8192 42 | monitor: val/rec_loss 43 | ddconfig: 44 | double_z: false 45 | z_channels: 3 46 | resolution: 256 47 | in_channels: 3 48 | out_ch: 3 49 | ch: 128 50 | ch_mult: 51 | - 1 52 | - 2 53 | - 4 54 | num_res_blocks: 2 55 | attn_resolutions: [] 56 | dropout: 0.0 57 | lossconfig: 58 | target: torch.nn.Identity 59 | cond_stage_config: 60 | target: ldm.modules.encoders.modules.BERTEmbedder 61 | params: 62 | n_embed: 512 63 | n_layer: 16 64 | vocab_size: 8192 65 | max_seq_len: 92 66 | use_tokenizer: false 67 | monitor: val/loss_simple_ema 68 | data: 69 | target: main.DataModuleFromConfig 70 | params: 71 | batch_size: 24 72 | wrap: false 73 | num_workers: 10 74 | train: 75 | target: ldm.data.openimages.OpenImagesBBoxTrain 76 | params: 77 | size: 256 78 | validation: 79 | target: ldm.data.openimages.OpenImagesBBoxValidation 80 | params: 81 | size: 256 82 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/lsun_beds256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: class_label 12 | image_size: 64 13 | channels: 3 14 | cond_stage_trainable: false 15 | concat_mode: false 16 | monitor: val/loss 17 | unet_config: 18 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 19 | params: 20 | image_size: 64 21 | in_channels: 3 22 | out_channels: 3 23 | model_channels: 224 24 | attention_resolutions: 25 | - 8 26 | - 4 27 | - 2 28 | num_res_blocks: 2 29 | channel_mult: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | num_head_channels: 32 35 | first_stage_config: 36 | target: ldm.models.autoencoder.VQModelInterface 37 | params: 38 | embed_dim: 3 39 | n_embed: 8192 40 | ddconfig: 41 | double_z: false 42 | z_channels: 3 43 | resolution: 256 44 | in_channels: 3 45 | out_ch: 3 46 | ch: 128 47 | ch_mult: 48 | - 1 49 | - 2 50 | - 4 51 | num_res_blocks: 2 52 | attn_resolutions: [] 53 | dropout: 0.0 54 | lossconfig: 55 | target: torch.nn.Identity 56 | cond_stage_config: __is_unconditional__ 57 | data: 58 | target: main.DataModuleFromConfig 59 | params: 60 | batch_size: 48 61 | num_workers: 5 62 | wrap: false 63 | train: 64 | target: ldm.data.lsun.LSUNBedroomsTrain 65 | params: 66 | size: 256 67 | validation: 68 | target: ldm.data.lsun.LSUNBedroomsValidation 69 | params: 70 | size: 256 71 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/lsun_churches256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-05 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0155 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | loss_type: l1 11 | first_stage_key: image 12 | cond_stage_key: image 13 | image_size: 32 14 | channels: 4 15 | cond_stage_trainable: false 16 | concat_mode: false 17 | scale_by_std: true 18 | monitor: val/loss_simple_ema 19 | scheduler_config: 20 | target: ldm.lr_scheduler.LambdaLinearScheduler 21 | params: 22 | warm_up_steps: 23 | - 10000 24 | cycle_lengths: 25 | - 10000000000000 26 | f_start: 27 | - 1.0e-06 28 | f_max: 29 | - 1.0 30 | f_min: 31 | - 1.0 32 | unet_config: 33 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 34 | params: 35 | image_size: 32 36 | in_channels: 4 37 | out_channels: 4 38 | model_channels: 192 39 | attention_resolutions: 40 | - 1 41 | - 2 42 | - 4 43 | - 8 44 | num_res_blocks: 2 45 | channel_mult: 46 | - 1 47 | - 2 48 | - 2 49 | - 4 50 | - 4 51 | num_heads: 8 52 | use_scale_shift_norm: true 53 | resblock_updown: true 54 | first_stage_config: 55 | target: ldm.models.autoencoder.AutoencoderKL 56 | params: 57 | embed_dim: 4 58 | monitor: val/rec_loss 59 | ddconfig: 60 | double_z: true 61 | z_channels: 4 62 | resolution: 256 63 | in_channels: 3 64 | out_ch: 3 65 | ch: 128 66 | ch_mult: 67 | - 1 68 | - 2 69 | - 4 70 | - 4 71 | num_res_blocks: 2 72 | attn_resolutions: [] 73 | dropout: 0.0 74 | lossconfig: 75 | target: torch.nn.Identity 76 | 77 | cond_stage_config: '__is_unconditional__' 78 | 79 | data: 80 | target: main.DataModuleFromConfig 81 | params: 82 | batch_size: 96 83 | num_workers: 5 84 | wrap: false 85 | train: 86 | target: ldm.data.lsun.LSUNChurchesTrain 87 | params: 88 | size: 256 89 | validation: 90 | target: ldm.data.lsun.LSUNChurchesValidation 91 | params: 92 | size: 256 93 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/semantic_synthesis256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0205 7 | log_every_t: 100 8 | timesteps: 1000 9 | loss_type: l1 10 | first_stage_key: image 11 | cond_stage_key: segmentation 12 | image_size: 64 13 | channels: 3 14 | concat_mode: true 15 | cond_stage_trainable: true 16 | unet_config: 17 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 18 | params: 19 | image_size: 64 20 | in_channels: 6 21 | out_channels: 3 22 | model_channels: 128 23 | attention_resolutions: 24 | - 32 25 | - 16 26 | - 8 27 | num_res_blocks: 2 28 | channel_mult: 29 | - 1 30 | - 4 31 | - 8 32 | num_heads: 8 33 | first_stage_config: 34 | target: ldm.models.autoencoder.VQModelInterface 35 | params: 36 | embed_dim: 3 37 | n_embed: 8192 38 | ddconfig: 39 | double_z: false 40 | z_channels: 3 41 | resolution: 256 42 | in_channels: 3 43 | out_ch: 3 44 | ch: 128 45 | ch_mult: 46 | - 1 47 | - 2 48 | - 4 49 | num_res_blocks: 2 50 | attn_resolutions: [] 51 | dropout: 0.0 52 | lossconfig: 53 | target: torch.nn.Identity 54 | cond_stage_config: 55 | target: ldm.modules.encoders.modules.SpatialRescaler 56 | params: 57 | n_stages: 2 58 | in_channels: 182 59 | out_channels: 3 60 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/semantic_synthesis512/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0205 7 | log_every_t: 100 8 | timesteps: 1000 9 | loss_type: l1 10 | first_stage_key: image 11 | cond_stage_key: segmentation 12 | image_size: 128 13 | channels: 3 14 | concat_mode: true 15 | cond_stage_trainable: true 16 | unet_config: 17 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 18 | params: 19 | image_size: 128 20 | in_channels: 6 21 | out_channels: 3 22 | model_channels: 128 23 | attention_resolutions: 24 | - 32 25 | - 16 26 | - 8 27 | num_res_blocks: 2 28 | channel_mult: 29 | - 1 30 | - 4 31 | - 8 32 | num_heads: 8 33 | first_stage_config: 34 | target: ldm.models.autoencoder.VQModelInterface 35 | params: 36 | embed_dim: 3 37 | n_embed: 8192 38 | monitor: val/rec_loss 39 | ddconfig: 40 | double_z: false 41 | z_channels: 3 42 | resolution: 256 43 | in_channels: 3 44 | out_ch: 3 45 | ch: 128 46 | ch_mult: 47 | - 1 48 | - 2 49 | - 4 50 | num_res_blocks: 2 51 | attn_resolutions: [] 52 | dropout: 0.0 53 | lossconfig: 54 | target: torch.nn.Identity 55 | cond_stage_config: 56 | target: ldm.modules.encoders.modules.SpatialRescaler 57 | params: 58 | n_stages: 2 59 | in_channels: 182 60 | out_channels: 3 61 | data: 62 | target: main.DataModuleFromConfig 63 | params: 64 | batch_size: 8 65 | wrap: false 66 | num_workers: 10 67 | train: 68 | target: ldm.data.landscapes.RFWTrain 69 | params: 70 | size: 768 71 | crop_size: 512 72 | segmentation_to_float32: true 73 | validation: 74 | target: ldm.data.landscapes.RFWValidation 75 | params: 76 | size: 768 77 | crop_size: 512 78 | segmentation_to_float32: true 79 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/models/ldm/text2img256/config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 2.0e-06 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.0015 6 | linear_end: 0.0195 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: image 11 | cond_stage_key: caption 12 | image_size: 64 13 | channels: 3 14 | cond_stage_trainable: true 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | unet_config: 18 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 19 | params: 20 | image_size: 64 21 | in_channels: 3 22 | out_channels: 3 23 | model_channels: 192 24 | attention_resolutions: 25 | - 8 26 | - 4 27 | - 2 28 | num_res_blocks: 2 29 | channel_mult: 30 | - 1 31 | - 2 32 | - 3 33 | - 5 34 | num_head_channels: 32 35 | use_spatial_transformer: true 36 | transformer_depth: 1 37 | context_dim: 640 38 | first_stage_config: 39 | target: ldm.models.autoencoder.VQModelInterface 40 | params: 41 | embed_dim: 3 42 | n_embed: 8192 43 | ddconfig: 44 | double_z: false 45 | z_channels: 3 46 | resolution: 256 47 | in_channels: 3 48 | out_ch: 3 49 | ch: 128 50 | ch_mult: 51 | - 1 52 | - 2 53 | - 4 54 | num_res_blocks: 2 55 | attn_resolutions: [] 56 | dropout: 0.0 57 | lossconfig: 58 | target: torch.nn.Identity 59 | cond_stage_config: 60 | target: ldm.modules.encoders.modules.BERTEmbedder 61 | params: 62 | n_embed: 640 63 | n_layer: 32 64 | data: 65 | target: main.DataModuleFromConfig 66 | params: 67 | batch_size: 28 68 | num_workers: 10 69 | wrap: false 70 | train: 71 | target: ldm.data.previews.pytorch_dataset.PreviewsTrain 72 | params: 73 | size: 256 74 | validation: 75 | target: ldm.data.previews.pytorch_dataset.PreviewsValidation 76 | params: 77 | size: 256 78 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/scripts/download_first_stages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget -O models/first_stage_models/kl-f4/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f4.zip 3 | wget -O models/first_stage_models/kl-f8/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f8.zip 4 | wget -O models/first_stage_models/kl-f16/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f16.zip 5 | wget -O models/first_stage_models/kl-f32/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f32.zip 6 | wget -O models/first_stage_models/vq-f4/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4.zip 7 | wget -O models/first_stage_models/vq-f4-noattn/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4-noattn.zip 8 | wget -O models/first_stage_models/vq-f8/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8.zip 9 | wget -O models/first_stage_models/vq-f8-n256/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8-n256.zip 10 | wget -O models/first_stage_models/vq-f16/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f16.zip 11 | 12 | 13 | 14 | cd models/first_stage_models/kl-f4 15 | unzip -o model.zip 16 | 17 | cd ../kl-f8 18 | unzip -o model.zip 19 | 20 | cd ../kl-f16 21 | unzip -o model.zip 22 | 23 | cd ../kl-f32 24 | unzip -o model.zip 25 | 26 | cd ../vq-f4 27 | unzip -o model.zip 28 | 29 | cd ../vq-f4-noattn 30 | unzip -o model.zip 31 | 32 | cd ../vq-f8 33 | unzip -o model.zip 34 | 35 | cd ../vq-f8-n256 36 | unzip -o model.zip 37 | 38 | cd ../vq-f16 39 | unzip -o model.zip 40 | 41 | cd ../.. -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/scripts/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget -O models/ldm/celeba256/celeba-256.zip https://ommer-lab.com/files/latent-diffusion/celeba.zip 3 | wget -O models/ldm/ffhq256/ffhq-256.zip https://ommer-lab.com/files/latent-diffusion/ffhq.zip 4 | wget -O models/ldm/lsun_churches256/lsun_churches-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_churches.zip 5 | wget -O models/ldm/lsun_beds256/lsun_beds-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_bedrooms.zip 6 | wget -O models/ldm/text2img256/model.zip https://ommer-lab.com/files/latent-diffusion/text2img.zip 7 | wget -O models/ldm/cin256/model.zip https://ommer-lab.com/files/latent-diffusion/cin.zip 8 | wget -O models/ldm/semantic_synthesis512/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis.zip 9 | wget -O models/ldm/semantic_synthesis256/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis256.zip 10 | wget -O models/ldm/bsr_sr/model.zip https://ommer-lab.com/files/latent-diffusion/sr_bsr.zip 11 | wget -O models/ldm/layout2img-openimages256/model.zip https://ommer-lab.com/files/latent-diffusion/layout2img_model.zip 12 | wget -O models/ldm/inpainting_big/model.zip https://ommer-lab.com/files/latent-diffusion/inpainting_big.zip 13 | 14 | 15 | 16 | cd models/ldm/celeba256 17 | unzip -o celeba-256.zip 18 | 19 | cd ../ffhq256 20 | unzip -o ffhq-256.zip 21 | 22 | cd ../lsun_churches256 23 | unzip -o lsun_churches-256.zip 24 | 25 | cd ../lsun_beds256 26 | unzip -o lsun_beds-256.zip 27 | 28 | cd ../text2img256 29 | unzip -o model.zip 30 | 31 | cd ../cin256 32 | unzip -o model.zip 33 | 34 | cd ../semantic_synthesis512 35 | unzip -o model.zip 36 | 37 | cd ../semantic_synthesis256 38 | unzip -o model.zip 39 | 40 | cd ../bsr_sr 41 | unzip -o model.zip 42 | 43 | cd ../layout2img-openimages256 44 | unzip -o model.zip 45 | 46 | cd ../inpainting_big 47 | unzip -o model.zip 48 | 49 | cd ../.. 50 | -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/scripts/tests/test_watermark.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import fire 3 | from imwatermark import WatermarkDecoder 4 | 5 | 6 | def testit(img_path): 7 | bgr = cv2.imread(img_path) 8 | decoder = WatermarkDecoder('bytes', 136) 9 | watermark = decoder.decode(bgr, 'dwtDct') 10 | try: 11 | dec = watermark.decode('utf-8') 12 | except: 13 | dec = "null" 14 | print(dec) 15 | 16 | 17 | if __name__ == "__main__": 18 | fire.Fire(testit) -------------------------------------------------------------------------------- /codes/diffusion_sd1/stable-diffusion/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='latent-diffusion', 5 | version='0.0.1', 6 | description='', 7 | packages=find_packages(), 8 | install_requires=[ 9 | 'torch', 10 | 'numpy', 11 | 'tqdm', 12 | ], 13 | ) -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Stability AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/model-variants.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/model-variants.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/modelfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/modelfigure.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/rick.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/rick.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/inpainting.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/inpainting.gif -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/merged-leopards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-inpainting/merged-leopards.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/d2i.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/d2i.gif -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2fantasy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2fantasy.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img01.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/depth2img02.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0000.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0004.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/merged-0005.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/midas.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/midas.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/old_man.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/depth2img/old_man.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-1.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-2.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/mountains-3.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-in.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/img2img/upscaling-out.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/houses_out.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/houses_out.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar000.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar000.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar500.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar500.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar800.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/oldcar800.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/panda.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/panda.jpg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/plates_out.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/plates_out.jpeg -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations_noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/stable-unclip/unclip-variations_noise.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002025.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002035.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/000002035.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0001.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0002.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0003.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0004.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0005.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/768/merged-0006.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0001.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0003.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0005.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0006.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/txt2img/merged-0007.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/merged-dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/merged-dog.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/sampled-bear-x4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/sampled-bear-x4.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/snow-leopard-x4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/assets/stable-samples/upscaling/snow-leopard-x4.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/checkpoints/checkpoints.txt: -------------------------------------------------------------------------------- 1 | Put unCLIP checkpoints here. -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/karlo/decoder_900M_vit_l.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: t2i-decoder 3 | diffusion_sampler: uniform 4 | hparams: 5 | image_size: 64 6 | num_channels: 320 7 | num_res_blocks: 3 8 | channel_mult: '' 9 | attention_resolutions: 32,16,8 10 | num_heads: -1 11 | num_head_channels: 64 12 | num_heads_upsample: -1 13 | use_scale_shift_norm: true 14 | dropout: 0.1 15 | clip_dim: 768 16 | clip_emb_mult: 4 17 | text_ctx: 77 18 | xf_width: 1536 19 | xf_layers: 0 20 | xf_heads: 0 21 | xf_final_ln: false 22 | resblock_updown: true 23 | learn_sigma: true 24 | text_drop: 0.3 25 | clip_emb_type: image 26 | clip_emb_drop: 0.1 27 | use_plm: true 28 | 29 | diffusion: 30 | steps: 1000 31 | learn_sigma: true 32 | sigma_small: false 33 | noise_schedule: squaredcos_cap_v2 34 | use_kl: false 35 | predict_xstart: false 36 | rescale_learned_sigmas: true 37 | timestep_respacing: '' 38 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/karlo/improved_sr_64_256_1.4B.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: improved_sr_64_256 3 | diffusion_sampler: uniform 4 | hparams: 5 | channels: 320 6 | depth: 3 7 | channels_multiple: 8 | - 1 9 | - 2 10 | - 3 11 | - 4 12 | dropout: 0.0 13 | 14 | diffusion: 15 | steps: 1000 16 | learn_sigma: false 17 | sigma_small: true 18 | noise_schedule: squaredcos_cap_v2 19 | use_kl: false 20 | predict_xstart: false 21 | rescale_learned_sigmas: true 22 | timestep_respacing: '7' 23 | 24 | 25 | sampling: 26 | timestep_respacing: '7' # fix 27 | clip_denoise: true 28 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/karlo/prior_1B_vit_l.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: prior 3 | diffusion_sampler: uniform 4 | hparams: 5 | text_ctx: 77 6 | xf_width: 2048 7 | xf_layers: 20 8 | xf_heads: 32 9 | xf_final_ln: true 10 | text_drop: 0.2 11 | clip_dim: 768 12 | 13 | diffusion: 14 | steps: 1000 15 | learn_sigma: false 16 | sigma_small: true 17 | noise_schedule: squaredcos_cap_v2 18 | use_kl: false 19 | predict_xstart: true 20 | rescale_learned_sigmas: false 21 | timestep_respacing: '' 22 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-bf16.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | linear_start: 0.00085 9 | linear_end: 0.0120 10 | num_timesteps_cond: 1 11 | log_every_t: 200 12 | timesteps: 1000 13 | first_stage_key: "jpg" 14 | cond_stage_key: "txt" 15 | image_size: 64 16 | channels: 4 17 | cond_stage_trainable: false 18 | conditioning_key: crossattn 19 | monitor: val/loss_simple_ema 20 | scale_factor: 0.18215 21 | use_ema: False # we set this to false because this is an inference only config 22 | 23 | unet_config: 24 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 25 | params: 26 | use_checkpoint: False 27 | use_fp16: False 28 | use_bf16: True 29 | image_size: 32 # unused 30 | in_channels: 4 31 | out_channels: 4 32 | model_channels: 320 33 | attention_resolutions: [ 4, 2, 1 ] 34 | num_res_blocks: 2 35 | channel_mult: [ 1, 2, 4, 4 ] 36 | num_head_channels: 64 # need to fix for flash-attn 37 | use_spatial_transformer: True 38 | use_linear_in_transformer: True 39 | transformer_depth: 1 40 | context_dim: 1024 41 | legacy: False 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: val/rec_loss 48 | ddconfig: 49 | #attn_type: "vanilla-xformers" 50 | double_z: true 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: 57 | - 1 58 | - 2 59 | - 4 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: [] 63 | dropout: 0.0 64 | lossconfig: 65 | target: torch.nn.Identity 66 | 67 | cond_stage_config: 68 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 69 | params: 70 | freeze: True 71 | layer: "penultimate" 72 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-fp32.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | linear_start: 0.00085 9 | linear_end: 0.0120 10 | num_timesteps_cond: 1 11 | log_every_t: 200 12 | timesteps: 1000 13 | first_stage_key: "jpg" 14 | cond_stage_key: "txt" 15 | image_size: 64 16 | channels: 4 17 | cond_stage_trainable: false 18 | conditioning_key: crossattn 19 | monitor: val/loss_simple_ema 20 | scale_factor: 0.18215 21 | use_ema: False # we set this to false because this is an inference only config 22 | 23 | unet_config: 24 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 25 | params: 26 | use_checkpoint: False 27 | use_fp16: False 28 | image_size: 32 # unused 29 | in_channels: 4 30 | out_channels: 4 31 | model_channels: 320 32 | attention_resolutions: [ 4, 2, 1 ] 33 | num_res_blocks: 2 34 | channel_mult: [ 1, 2, 4, 4 ] 35 | num_head_channels: 64 # need to fix for flash-attn 36 | use_spatial_transformer: True 37 | use_linear_in_transformer: True 38 | transformer_depth: 1 39 | context_dim: 1024 40 | legacy: False 41 | 42 | first_stage_config: 43 | target: ldm.models.autoencoder.AutoencoderKL 44 | params: 45 | embed_dim: 4 46 | monitor: val/rec_loss 47 | ddconfig: 48 | #attn_type: "vanilla-xformers" 49 | double_z: true 50 | z_channels: 4 51 | resolution: 256 52 | in_channels: 3 53 | out_ch: 3 54 | ch: 128 55 | ch_mult: 56 | - 1 57 | - 2 58 | - 4 59 | - 4 60 | num_res_blocks: 2 61 | attn_resolutions: [] 62 | dropout: 0.0 63 | lossconfig: 64 | target: torch.nn.Identity 65 | 66 | cond_stage_config: 67 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 68 | params: 69 | freeze: True 70 | layer: "penultimate" 71 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-v-bf16.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | parameterization: "v" 9 | linear_start: 0.00085 10 | linear_end: 0.0120 11 | num_timesteps_cond: 1 12 | log_every_t: 200 13 | timesteps: 1000 14 | first_stage_key: "jpg" 15 | cond_stage_key: "txt" 16 | image_size: 64 17 | channels: 4 18 | cond_stage_trainable: false 19 | conditioning_key: crossattn 20 | monitor: val/loss_simple_ema 21 | scale_factor: 0.18215 22 | use_ema: False # we set this to false because this is an inference only config 23 | 24 | unet_config: 25 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 26 | params: 27 | use_checkpoint: False 28 | use_fp16: False 29 | use_bf16: True 30 | image_size: 32 # unused 31 | in_channels: 4 32 | out_channels: 4 33 | model_channels: 320 34 | attention_resolutions: [ 4, 2, 1 ] 35 | num_res_blocks: 2 36 | channel_mult: [ 1, 2, 4, 4 ] 37 | num_head_channels: 64 # need to fix for flash-attn 38 | use_spatial_transformer: True 39 | use_linear_in_transformer: True 40 | transformer_depth: 1 41 | context_dim: 1024 42 | legacy: False 43 | 44 | first_stage_config: 45 | target: ldm.models.autoencoder.AutoencoderKL 46 | params: 47 | embed_dim: 4 48 | monitor: val/rec_loss 49 | ddconfig: 50 | #attn_type: "vanilla-xformers" 51 | double_z: true 52 | z_channels: 4 53 | resolution: 256 54 | in_channels: 3 55 | out_ch: 3 56 | ch: 128 57 | ch_mult: 58 | - 1 59 | - 2 60 | - 4 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [] 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | 68 | cond_stage_config: 69 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 70 | params: 71 | freeze: True 72 | layer: "penultimate" 73 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/intel/v2-inference-v-fp32.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | parameterization: "v" 9 | linear_start: 0.00085 10 | linear_end: 0.0120 11 | num_timesteps_cond: 1 12 | log_every_t: 200 13 | timesteps: 1000 14 | first_stage_key: "jpg" 15 | cond_stage_key: "txt" 16 | image_size: 64 17 | channels: 4 18 | cond_stage_trainable: false 19 | conditioning_key: crossattn 20 | monitor: val/loss_simple_ema 21 | scale_factor: 0.18215 22 | use_ema: False # we set this to false because this is an inference only config 23 | 24 | unet_config: 25 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 26 | params: 27 | use_checkpoint: False 28 | use_fp16: False 29 | image_size: 32 # unused 30 | in_channels: 4 31 | out_channels: 4 32 | model_channels: 320 33 | attention_resolutions: [ 4, 2, 1 ] 34 | num_res_blocks: 2 35 | channel_mult: [ 1, 2, 4, 4 ] 36 | num_head_channels: 64 # need to fix for flash-attn 37 | use_spatial_transformer: True 38 | use_linear_in_transformer: True 39 | transformer_depth: 1 40 | context_dim: 1024 41 | legacy: False 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: val/rec_loss 48 | ddconfig: 49 | #attn_type: "vanilla-xformers" 50 | double_z: true 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: 57 | - 1 58 | - 2 59 | - 4 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: [] 63 | dropout: 0.0 64 | lossconfig: 65 | target: torch.nn.Identity 66 | 67 | cond_stage_config: 68 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 69 | params: 70 | freeze: True 71 | layer: "penultimate" 72 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-1-stable-unclip-h-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion 4 | params: 5 | embedding_dropout: 0.25 6 | parameterization: "v" 7 | linear_start: 0.00085 8 | linear_end: 0.0120 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 96 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn-adm 17 | scale_factor: 0.18215 18 | monitor: val/loss_simple_ema 19 | use_ema: False 20 | 21 | embedder_config: 22 | target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder 23 | 24 | noise_aug_config: 25 | target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation 26 | params: 27 | timestep_dim: 1024 28 | noise_schedule_config: 29 | timesteps: 1000 30 | beta_schedule: squaredcos_cap_v2 31 | 32 | unet_config: 33 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 34 | params: 35 | num_classes: "sequential" 36 | adm_in_channels: 2048 37 | use_checkpoint: True 38 | image_size: 32 # unused 39 | in_channels: 4 40 | out_channels: 4 41 | model_channels: 320 42 | attention_resolutions: [ 4, 2, 1 ] 43 | num_res_blocks: 2 44 | channel_mult: [ 1, 2, 4, 4 ] 45 | num_head_channels: 64 # need to fix for flash-attn 46 | use_spatial_transformer: True 47 | use_linear_in_transformer: True 48 | transformer_depth: 1 49 | context_dim: 1024 50 | legacy: False 51 | 52 | first_stage_config: 53 | target: ldm.models.autoencoder.AutoencoderKL 54 | params: 55 | embed_dim: 4 56 | monitor: val/rec_loss 57 | ddconfig: 58 | attn_type: "vanilla-xformers" 59 | double_z: true 60 | z_channels: 4 61 | resolution: 256 62 | in_channels: 3 63 | out_ch: 3 64 | ch: 128 65 | ch_mult: 66 | - 1 67 | - 2 68 | - 4 69 | - 4 70 | num_res_blocks: 2 71 | attn_resolutions: [ ] 72 | dropout: 0.0 73 | lossconfig: 74 | target: torch.nn.Identity 75 | 76 | cond_stage_config: 77 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 78 | params: 79 | freeze: True 80 | layer: "penultimate" 81 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-1-stable-unclip-l-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion 4 | params: 5 | embedding_dropout: 0.25 6 | parameterization: "v" 7 | linear_start: 0.00085 8 | linear_end: 0.0120 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 96 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn-adm 17 | scale_factor: 0.18215 18 | monitor: val/loss_simple_ema 19 | use_ema: False 20 | 21 | embedder_config: 22 | target: ldm.modules.encoders.modules.ClipImageEmbedder 23 | params: 24 | model: "ViT-L/14" 25 | 26 | noise_aug_config: 27 | target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation 28 | params: 29 | clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th" 30 | timestep_dim: 768 31 | noise_schedule_config: 32 | timesteps: 1000 33 | beta_schedule: squaredcos_cap_v2 34 | 35 | unet_config: 36 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 37 | params: 38 | num_classes: "sequential" 39 | adm_in_channels: 1536 40 | use_checkpoint: True 41 | image_size: 32 # unused 42 | in_channels: 4 43 | out_channels: 4 44 | model_channels: 320 45 | attention_resolutions: [ 4, 2, 1 ] 46 | num_res_blocks: 2 47 | channel_mult: [ 1, 2, 4, 4 ] 48 | num_head_channels: 64 # need to fix for flash-attn 49 | use_spatial_transformer: True 50 | use_linear_in_transformer: True 51 | transformer_depth: 1 52 | context_dim: 1024 53 | legacy: False 54 | 55 | first_stage_config: 56 | target: ldm.models.autoencoder.AutoencoderKL 57 | params: 58 | embed_dim: 4 59 | monitor: val/rec_loss 60 | ddconfig: 61 | attn_type: "vanilla-xformers" 62 | double_z: true 63 | z_channels: 4 64 | resolution: 256 65 | in_channels: 3 66 | out_ch: 3 67 | ch: 128 68 | ch_mult: 69 | - 1 70 | - 2 71 | - 4 72 | - 4 73 | num_res_blocks: 2 74 | attn_resolutions: [ ] 75 | dropout: 0.0 76 | lossconfig: 77 | target: torch.nn.Identity 78 | 79 | cond_stage_config: 80 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 81 | params: 82 | freeze: True 83 | layer: "penultimate" -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-inference-v.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: True 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: True 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/v2-midas-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-07 3 | target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: hybrid 16 | scale_factor: 0.18215 17 | monitor: val/loss_simple_ema 18 | finetune_keys: null 19 | use_ema: False 20 | 21 | depth_stage_config: 22 | target: ldm.modules.midas.api.MiDaSInference 23 | params: 24 | model_type: "dpt_hybrid" 25 | 26 | unet_config: 27 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 28 | params: 29 | use_checkpoint: True 30 | image_size: 32 # unused 31 | in_channels: 5 32 | out_channels: 4 33 | model_channels: 320 34 | attention_resolutions: [ 4, 2, 1 ] 35 | num_res_blocks: 2 36 | channel_mult: [ 1, 2, 4, 4 ] 37 | num_head_channels: 64 # need to fix for flash-attn 38 | use_spatial_transformer: True 39 | use_linear_in_transformer: True 40 | transformer_depth: 1 41 | context_dim: 1024 42 | legacy: False 43 | 44 | first_stage_config: 45 | target: ldm.models.autoencoder.AutoencoderKL 46 | params: 47 | embed_dim: 4 48 | monitor: val/rec_loss 49 | ddconfig: 50 | #attn_type: "vanilla-xformers" 51 | double_z: true 52 | z_channels: 4 53 | resolution: 256 54 | in_channels: 3 55 | out_ch: 3 56 | ch: 128 57 | ch_mult: 58 | - 1 59 | - 2 60 | - 4 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [ ] 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | 68 | cond_stage_config: 69 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 70 | params: 71 | freeze: True 72 | layer: "penultimate" 73 | 74 | 75 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/configs/stable-diffusion/x4-upscaling.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentUpscaleDiffusion 4 | params: 5 | parameterization: "v" 6 | low_scale_key: "lr" 7 | linear_start: 0.0001 8 | linear_end: 0.02 9 | num_timesteps_cond: 1 10 | log_every_t: 200 11 | timesteps: 1000 12 | first_stage_key: "jpg" 13 | cond_stage_key: "txt" 14 | image_size: 128 15 | channels: 4 16 | cond_stage_trainable: false 17 | conditioning_key: "hybrid-adm" 18 | monitor: val/loss_simple_ema 19 | scale_factor: 0.08333 20 | use_ema: False 21 | 22 | low_scale_config: 23 | target: ldm.modules.diffusionmodules.upscaling.ImageConcatWithNoiseAugmentation 24 | params: 25 | noise_schedule_config: # image space 26 | linear_start: 0.0001 27 | linear_end: 0.02 28 | max_noise_level: 350 29 | 30 | unet_config: 31 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 32 | params: 33 | use_checkpoint: True 34 | num_classes: 1000 # timesteps for noise conditioning (here constant, just need one) 35 | image_size: 128 36 | in_channels: 7 37 | out_channels: 4 38 | model_channels: 256 39 | attention_resolutions: [ 2,4,8] 40 | num_res_blocks: 2 41 | channel_mult: [ 1, 2, 2, 4] 42 | disable_self_attentions: [True, True, True, False] 43 | disable_middle_self_attn: False 44 | num_heads: 8 45 | use_spatial_transformer: True 46 | transformer_depth: 1 47 | context_dim: 1024 48 | legacy: False 49 | use_linear_in_transformer: True 50 | 51 | first_stage_config: 52 | target: ldm.models.autoencoder.AutoencoderKL 53 | params: 54 | embed_dim: 4 55 | ddconfig: 56 | # attn_type: "vanilla-xformers" this model needs efficient attention to be feasible on HR data, also the decoder seems to break in half precision (UNet is fine though) 57 | double_z: True 58 | z_channels: 4 59 | resolution: 256 60 | in_channels: 3 61 | out_ch: 3 62 | ch: 128 63 | ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1 64 | num_res_blocks: 2 65 | attn_resolutions: [ ] 66 | dropout: 0.0 67 | 68 | lossconfig: 69 | target: torch.nn.Identity 70 | 71 | cond_stage_config: 72 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 73 | params: 74 | freeze: True 75 | layer: "penultimate" 76 | 77 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/environment.yaml: -------------------------------------------------------------------------------- 1 | name: ldm 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3.8.5 7 | - pip=20.3 8 | - cudatoolkit=11.3 9 | - pytorch=1.12.1 10 | - torchvision=0.13.1 11 | - numpy=1.23.1 12 | - pip: 13 | - albumentations==1.3.0 14 | - opencv-python==4.6.0.66 15 | - imageio==2.9.0 16 | - imageio-ffmpeg==0.4.2 17 | - pytorch-lightning==1.4.2 18 | - omegaconf==2.1.1 19 | - test-tube>=0.7.5 20 | - streamlit==1.12.1 21 | - einops==0.3.0 22 | - transformers==4.19.2 23 | - webdataset==0.2.5 24 | - kornia==0.6 25 | - open_clip_torch==2.0.2 26 | - invisible-watermark>=0.1.5 27 | - streamlit-drawable-canvas==0.8.0 28 | - torchmetrics==0.6.0 29 | - -e . 30 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/data/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/data/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ldm.modules.midas.api import load_midas_transform 4 | 5 | 6 | class AddMiDaS(object): 7 | def __init__(self, model_type): 8 | super().__init__() 9 | self.transform = load_midas_transform(model_type) 10 | 11 | def pt2np(self, x): 12 | x = ((x + 1.0) * .5).detach().cpu().numpy() 13 | return x 14 | 15 | def np2pt(self, x): 16 | x = torch.from_numpy(x) * 2 - 1. 17 | return x 18 | 19 | def __call__(self, sample): 20 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 21 | x = self.pt2np(sample['jpg']) 22 | x = self.transform({"image": x})["image"] 23 | sample['midas_in'] = x 24 | return sample -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') 11 | return x[(...,) + (None,) * dims_to_append] 12 | 13 | 14 | def norm_thresholding(x0, value): 15 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 16 | return x0 * (value / s) 17 | 18 | 19 | def spatial_norm_thresholding(x0, value): 20 | # b c h w 21 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 22 | return x0 * (value / s) -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr 2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light 3 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/utils/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/image_degradation/utils/test.png -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/models/sr_256_1k.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Karlo-v1.0.alpha 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved. 4 | # ------------------------------------------------------------------------------------ 5 | 6 | from ldm.modules.karlo.kakao.models.sr_64_256 import SupRes64to256Progressive 7 | 8 | 9 | class SupRes256to1kProgressive(SupRes64to256Progressive): 10 | pass # no difference currently 11 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | 6 | from .diffusion import gaussian_diffusion as gd 7 | from .diffusion.respace import ( 8 | SpacedDiffusion, 9 | space_timesteps, 10 | ) 11 | 12 | 13 | def create_gaussian_diffusion( 14 | steps, 15 | learn_sigma, 16 | sigma_small, 17 | noise_schedule, 18 | use_kl, 19 | predict_xstart, 20 | rescale_learned_sigmas, 21 | timestep_respacing, 22 | ): 23 | betas = gd.get_named_beta_schedule(noise_schedule, steps) 24 | if use_kl: 25 | loss_type = gd.LossType.RESCALED_KL 26 | elif rescale_learned_sigmas: 27 | loss_type = gd.LossType.RESCALED_MSE 28 | else: 29 | loss_type = gd.LossType.MSE 30 | if not timestep_respacing: 31 | timestep_respacing = [steps] 32 | 33 | return SpacedDiffusion( 34 | use_timesteps=space_timesteps(steps, timestep_respacing), 35 | betas=betas, 36 | model_mean_type=( 37 | gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X 38 | ), 39 | model_var_type=( 40 | ( 41 | gd.ModelVarType.FIXED_LARGE 42 | if not sigma_small 43 | else gd.ModelVarType.FIXED_SMALL 44 | ) 45 | if not learn_sigma 46 | else gd.ModelVarType.LEARNED_RANGE 47 | ), 48 | loss_type=loss_type, 49 | ) 50 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/karlo/kakao/modules/resample.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Modified from Guided-Diffusion (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | from abc import abstractmethod 6 | 7 | import torch as th 8 | 9 | 10 | def create_named_schedule_sampler(name, diffusion): 11 | """ 12 | Create a ScheduleSampler from a library of pre-defined samplers. 13 | 14 | :param name: the name of the sampler. 15 | :param diffusion: the diffusion object to sample for. 16 | """ 17 | if name == "uniform": 18 | return UniformSampler(diffusion) 19 | else: 20 | raise NotImplementedError(f"unknown schedule sampler: {name}") 21 | 22 | 23 | class ScheduleSampler(th.nn.Module): 24 | """ 25 | A distribution over timesteps in the diffusion process, intended to reduce 26 | variance of the objective. 27 | 28 | By default, samplers perform unbiased importance sampling, in which the 29 | objective's mean is unchanged. 30 | However, subclasses may override sample() to change how the resampled 31 | terms are reweighted, allowing for actual changes in the objective. 32 | """ 33 | 34 | @abstractmethod 35 | def weights(self): 36 | """ 37 | Get a numpy array of weights, one per diffusion step. 38 | 39 | The weights needn't be normalized, but must be positive. 40 | """ 41 | 42 | def sample(self, batch_size, device): 43 | """ 44 | Importance-sample timesteps for a batch. 45 | 46 | :param batch_size: the number of timesteps. 47 | :param device: the torch device to save to. 48 | :return: a tuple (timesteps, weights): 49 | - timesteps: a tensor of timestep indices. 50 | - weights: a tensor of weights to scale the resulting losses. 51 | """ 52 | w = self.weights() 53 | p = w / th.sum(w) 54 | indices = p.multinomial(batch_size, replacement=True) 55 | weights = 1 / (len(p) * p[indices]) 56 | return indices, weights 57 | 58 | 59 | class UniformSampler(ScheduleSampler): 60 | def __init__(self, diffusion): 61 | super(UniformSampler, self).__init__() 62 | self.diffusion = diffusion 63 | self.register_buffer( 64 | "_weights", th.ones([diffusion.num_timesteps]), persistent=False 65 | ) 66 | 67 | def weights(self): 68 | return self._weights 69 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/__init__.py -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/ldm/modules/midas/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/requirements.txt: -------------------------------------------------------------------------------- 1 | albumentations==0.4.3 2 | opencv-python 3 | pudb==2019.2 4 | imageio==2.9.0 5 | imageio-ffmpeg==0.4.2 6 | pytorch-lightning==1.4.2 7 | torchmetrics==0.6 8 | omegaconf==2.1.1 9 | test-tube>=0.7.5 10 | streamlit>=0.73.1 11 | einops==0.3.0 12 | transformers==4.19.2 13 | webdataset==0.2.5 14 | open-clip-torch==2.7.0 15 | gradio==3.13.2 16 | kornia==0.6 17 | invisible-watermark>=0.1.5 18 | streamlit-drawable-canvas==0.8.0 19 | -e . 20 | -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/scripts/tests/test_watermark.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import fire 3 | from imwatermark import WatermarkDecoder 4 | 5 | 6 | def testit(img_path): 7 | bgr = cv2.imread(img_path) 8 | decoder = WatermarkDecoder('bytes', 136) 9 | watermark = decoder.decode(bgr, 'dwtDct') 10 | try: 11 | dec = watermark.decode('utf-8') 12 | except: 13 | dec = "null" 14 | print(dec) 15 | 16 | 17 | if __name__ == "__main__": 18 | fire.Fire(testit) -------------------------------------------------------------------------------- /codes/diffusion_sd2/stablediffusion/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='stable-diffusion', 5 | version='0.0.1', 6 | description='', 7 | packages=find_packages(), 8 | install_requires=[ 9 | 'torch', 10 | 'numpy', 11 | 'tqdm', 12 | ], 13 | ) -------------------------------------------------------------------------------- /codes/gan/bdpy/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | 4 | *.pyc 5 | .python-version 6 | .pydevproject 7 | .project 8 | *.ipynb 9 | .ipynb_checkpoints 10 | build 11 | dist 12 | *.egg-info 13 | 14 | *.npy 15 | *.mat 16 | *.h5 17 | 18 | .coverage 19 | htmlcov 20 | .pylintrc 21 | 22 | junk 23 | tmp 24 | test_local 25 | test_versions 26 | -------------------------------------------------------------------------------- /codes/gan/bdpy/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017-2018 Kamitani Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /codes/gan/bdpy/README.md: -------------------------------------------------------------------------------- 1 | # BdPy 2 | 3 | [![PyPI version](https://badge.fury.io/py/bdpy.svg)](https://badge.fury.io/py/bdpy) 4 | [![GitHub license](https://img.shields.io/github/license/KamitaniLab/bdpy)](https://github.com/KamitaniLab/bdpy/blob/master/LICENSE) 5 | 6 | Python package for brain decoding analysis 7 | 8 | ## Requirements 9 | 10 | - Python 2.7, 3.6, or later 11 | - numpy 12 | - scipy 13 | - scikit-learn 14 | - h5py 15 | - hdf5storage 16 | - pyyaml 17 | 18 | ### Optional requirements 19 | 20 | - `dataform` module 21 | - pandas 22 | - `dl.caffe` module 23 | - Caffe 24 | - Pillow 25 | - tqdm 26 | - `dl.torch` module 27 | - PyTorch 28 | - Pillow 29 | - `fig` module 30 | - matplotlib 31 | - Pillow 32 | - `mri` module 33 | - nipy 34 | - nibabel 35 | - pandas 36 | - `recon.torch` module 37 | - PyTorch 38 | - Pillow 39 | 40 | ## Installation 41 | 42 | Latest stable release: 43 | 44 | ``` shell 45 | $ pip install bdpy 46 | ``` 47 | 48 | To install the latest development version ("master" branch of the repository), please run the following command. 49 | 50 | ```shell 51 | $ pip install git+https://github.com/KamitaniLab/bdpy.git 52 | ``` 53 | 54 | ## Packages 55 | 56 | - bdata: BdPy data format (BData) core package 57 | - dataform: Utilities for various data format 58 | - distcomp: Distributed computation utilities 59 | - dl: Deep learning utilities 60 | - feature: Utilities for DNN features 61 | - fig: Utilities for figure creation 62 | - ml: Machine learning utilities 63 | - mri: MRI utilities 64 | - opendata: Open data utilities 65 | - preproc: Utilities for preprocessing 66 | - recon: Reconstruction methods 67 | - stats: Utilities for statistics 68 | - util: Miscellaneous utilities 69 | 70 | ## BdPy data format 71 | 72 | BdPy data format (or BrainDecoderToolbox2 data format; BData) consists of two variables: dataset and metadata. **dataset** stores brain activity data (e.g., voxel signal value for fMRI data), target variables (e.g., ID of stimuli for vision experiments), and additional information specifying experimental design (e.g., run and block numbers for fMRI experiments). Each row corresponds to a single 'sample', and each column representes either single feature (voxel), target, or experiment design information. **metadata** contains data describing meta-information for each column in dataset. 73 | 74 | See [BData API examples](https://github.com/KamitaniLab/bdpy/blob/main/docs/bdata_api_examples.md) for useage of BData. 75 | 76 | ## Developers 77 | 78 | - Shuntaro C. Aoki (Kyoto Univ) 79 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy: Brain decoding toolbox for Python 3 | 4 | Developed by Kamitani Lab, Kyoto Univ. and ATR 5 | """ 6 | 7 | 8 | # `import bdpy` implicitly imports class `BData` (in package `bdata`) and 9 | # package `util`. 10 | from .bdata import BData 11 | from .bdata import vstack, metadata_equal 12 | from .util import * 13 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/bdata/__init__.py: -------------------------------------------------------------------------------- 1 | '''BdPy data package 2 | 3 | This package is a part of BdPy 4 | ''' 5 | 6 | 7 | from .bdata import BData 8 | from .utils import concat_dataset, vstack, metadata_equal 9 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/dataform/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy data format package 3 | 4 | This package is a part of BdPy 5 | """ 6 | 7 | from .pd import * 8 | from .datastore import * 9 | from .sparse import * 10 | from .features import * 11 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/dataform/pd.py: -------------------------------------------------------------------------------- 1 | '''Utilities for Pandas dataframe 2 | 3 | This file is a part of BdPy 4 | ''' 5 | 6 | 7 | __all__ = ['convert_dataframe', 'append_dataframe'] 8 | 9 | 10 | import pandas as pd 11 | 12 | 13 | def convert_dataframe(lst): 14 | '''Convert `lst` to Pandas dataframe 15 | 16 | Parameters 17 | ---------- 18 | lst : list of dicts 19 | 20 | Returns 21 | ------- 22 | Pandas dataframe 23 | ''' 24 | 25 | df_lst = (pd.DataFrame([item.values()], columns=item.keys()) for item in lst) 26 | df = pd.concat(df_lst, axis=0, ignore_index=True) 27 | return df 28 | 29 | 30 | def append_dataframe(df, **kwargs): 31 | '''Append a row to Pandas dataframe `df` 32 | 33 | Parameters 34 | ---------- 35 | df : Pandas dataframe 36 | kwargs : key-value of data to be added in `df` 37 | 38 | Returns 39 | ------- 40 | Pandas dataframe 41 | ''' 42 | 43 | df_append = pd.DataFrame({k : [kwargs[k]] for k in kwargs}) 44 | return df.append(df_append, ignore_index=True) 45 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/distcomp/__init__.py: -------------------------------------------------------------------------------- 1 | '''Distributed computation package 2 | 3 | This package is a part of BdPy. 4 | ''' 5 | 6 | from .distcomp import * 7 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/dl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/bdpy/dl/__init__.py -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/dl/torch/__init__.py: -------------------------------------------------------------------------------- 1 | from .torch import FeatureExtractor, ImageDataset 2 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/evals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/bdpy/evals/__init__.py -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/feature/__init__.py: -------------------------------------------------------------------------------- 1 | '''Feature engineering module.''' 2 | 3 | from .feature import normalize_feature 4 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/feature/feature.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def normalize_feature(feature, 5 | channel_wise_mean=True, channel_wise_std=True, 6 | channel_axis=0, 7 | std_ddof=1, 8 | shift=None, scale=None, 9 | scaling_only=False): 10 | '''Normalize feature. 11 | 12 | Parameters 13 | ---------- 14 | feature : ndarray 15 | Feature to be normalized. 16 | channel_wise_mean, channel_wise_std : bool (default: True) 17 | If `True`, run channel-wise mean/SD normalization. 18 | channel_axis : int (default: 0) 19 | Channel axis. 20 | shift, scale : None, 'self', or ndarray (default: None) 21 | If shift/scale is `None`, nothing will be added/multiplied to the normalized features. 22 | If `'self'`, mean/SD of `feature` will be added/multiplied to the normalized features. 23 | If ndarrays are given, the arrays will be added/multiplied to the normalized features. 24 | std_ddof : int (default: 1) 25 | Delta degree of freedom for SD. 26 | 27 | Returns 28 | ------- 29 | ndarray 30 | Normalized (and scaled/shifted) features. 31 | ''' 32 | 33 | if feature.ndim == 1: 34 | axes_along = None 35 | else: 36 | axes = list(range(feature.ndim)) 37 | axes.remove(channel_axis) 38 | axes_along = tuple(axes) 39 | 40 | if channel_wise_mean: 41 | feat_mean = np.mean(feature, axis=axes_along, keepdims=True) 42 | else: 43 | feat_mean = np.mean(feature, keepdims=True) 44 | 45 | if channel_wise_std: 46 | feat_std = np.std(feature, axis=axes_along, ddof=std_ddof, keepdims=True) 47 | else: 48 | feat_std = np.mean(np.std(feature, axis=axes_along, ddof=std_ddof, keepdims=True), keepdims=True) 49 | 50 | if isinstance(shift, str) and shift == 'self': 51 | shift = feat_mean 52 | 53 | if isinstance(scale, str) and scale == 'self': 54 | scale = feat_std 55 | 56 | if scaling_only: 57 | feat_n = (feature / feat_std) * scale 58 | else: 59 | feat_n = ((feature - feat_mean) / feat_std) 60 | 61 | if not scale is None: 62 | feat_n = feat_n * scale 63 | if not shift is None: 64 | feat_n = feat_n + shift 65 | 66 | if not feature.shape == feat_n.shape: 67 | try: 68 | feat_n.reshape(feature.shape) 69 | except: 70 | raise ValueError('Invalid shape of normalized features (original: %s, normalized: %s). ' 71 | + 'Possibly incorrect shift and/or scale.' 72 | % (str(feature.shape), str(feat_n.shape))) 73 | 74 | return feat_n 75 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/fig/__init__.py: -------------------------------------------------------------------------------- 1 | '''Figure package 2 | 3 | This package is a part of BdPy. 4 | ''' 5 | 6 | from .fig import * 7 | from .tile_images import tile_images 8 | from .draw_group_image_set import draw_group_image_set 9 | from .makeplots import makeplots 10 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/ml/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy machine learning package 3 | 4 | This package is a part of BdPy 5 | """ 6 | 7 | 8 | from .learning import Classification, CrossValidation, ModelTraining, ModelTest 9 | from .crossvalidation import make_cvindex, make_crossvalidationindex, make_cvindex_generator 10 | from .crossvalidation import cvindex_groupwise 11 | from .ensemble import * 12 | from .regress import * 13 | from .searchlight import * 14 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/ml/ensemble.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for ensemble learning 3 | """ 4 | 5 | from collections import Counter 6 | 7 | import numpy as np 8 | 9 | 10 | __all__ = ['get_majority'] 11 | 12 | 13 | def get_majority(data, axis=0): 14 | """ 15 | Returns a list of majority elements in each row or column. 16 | 17 | If more than two elements occupies the same numbers in each row or column, 18 | 'get_majority' returns the first-sorted element. 19 | 20 | Parameters 21 | ---------- 22 | data : array_like 23 | axis : 0 or 1, optional 24 | Axis in which elements are counted (default: 0) 25 | 26 | 27 | Returns 28 | ------- 29 | majority_list : list 30 | A list of majority elements 31 | """ 32 | 33 | majority_list = [] 34 | 35 | if axis == 0: 36 | data = np.transpose(data) 37 | 38 | for i in range(data.shape[0]): 39 | target = data[i].tolist() 40 | # Change KS for returning first element if the same numbers 41 | #c = Counter(target) 42 | c = Counter(np.sort(target)) 43 | majority = c.most_common(1) 44 | majority_list.append(majority[0][0]) 45 | 46 | return majority_list 47 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/ml/regress.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is a part of BdPy 3 | """ 4 | 5 | 6 | __all__ = ['add_bias'] 7 | 8 | 9 | import numpy as np 10 | 11 | 12 | def add_bias(x, axis=0): 13 | """ 14 | Add bias terms to x 15 | 16 | Parameters 17 | ---------- 18 | x : array_like 19 | Data matrix 20 | axis : 0 or 1, optional 21 | Axis in which bias terms are added (default: 0) 22 | 23 | Returns 24 | ------- 25 | y : array_like 26 | Data matrix with bias terms 27 | """ 28 | 29 | if axis == 0: 30 | vlen = x.shape[1] 31 | y = np.concatenate((x, np.array([np.ones(vlen)])), axis=0) 32 | elif axis == 1: 33 | vlen = x.shape[0] 34 | y = np.concatenate((x, np.array([np.ones(vlen)]).T), axis=1) 35 | else: 36 | raise ValueError('axis should be either 0 or 1') 37 | 38 | return y 39 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/ml/searchlight.py: -------------------------------------------------------------------------------- 1 | '''Utilities for searchlight analysis.''' 2 | 3 | 4 | __all__ = ['get_neighbors'] 5 | 6 | 7 | import numpy as np 8 | 9 | 10 | def get_neighbors(xyz, space_xyz, shape='sphere', size=9): 11 | ''' 12 | Returns neighboring voxels (cluster). 13 | 14 | Parameters 15 | ---------- 16 | xyz : array_like, shape=(3,) or len=3 17 | Voxel XYZ coordinate in the center of the cluster. 18 | space_xyz : array_like, shape=(3, N) or (N, 3) 19 | XYZ coordinate of all voxels. 20 | shape : {'sphere'}, optional 21 | Shape of the cluster. 22 | size : float, optional 23 | Size of the cluster. 24 | 25 | Returns 26 | ------- 27 | cluster_index : array_like, dtype=bool, shape=(N,) 28 | Boolean index of voxels in the cluster. 29 | ''' 30 | 31 | # Input check 32 | if isinstance(xyz, list): 33 | xyz = np.array(xyz) 34 | 35 | if xyz.ndim != 1: 36 | raise TypeError('xyz should be 1-D array') 37 | 38 | if space_xyz.ndim != 2: 39 | raise TypeError('space_xyz should be 2-D array') 40 | 41 | # Fix input shape 42 | if space_xyz.shape[0] == 3: 43 | space_xyz = space_xyz.T 44 | 45 | if shape == 'sphere': 46 | dist = np.sum((space_xyz - xyz) ** 2, axis=1) 47 | cluster_index = dist <= size ** 2 48 | else: 49 | raise ValueError('Unknown shape: %s' % shape) 50 | 51 | return cluster_index 52 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/mri/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy MRI package 3 | 4 | This package is a part of BdPy 5 | """ 6 | 7 | from .load_epi import load_epi 8 | from .load_mri import load_mri 9 | from .roi import add_roimask, get_roiflag, add_roilabel, add_rois, merge_rois, add_hcp_rois, add_hcp_visual_cortex 10 | from .fmriprep import create_bdata_fmriprep, FmriprepData 11 | from .spm import create_bdata_spm_domestic 12 | from .image import export_brain_image 13 | from .glm import make_paradigm 14 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/mri/image.py: -------------------------------------------------------------------------------- 1 | '''bdpy.mri.image''' 2 | 3 | 4 | from itertools import product 5 | 6 | import numpy as np 7 | import nibabel 8 | 9 | from bdpy.mri import load_mri 10 | 11 | 12 | def export_brain_image(brain_data, template, xyz=None, out_file=None): 13 | '''Export a brain data array as a brain image. 14 | 15 | Parameters 16 | ---------- 17 | brain_data : array 18 | Brain data array, shape = (n_sample, n_voxels) 19 | template : str 20 | Path to a template brain image file 21 | xyz : array, optional 22 | Voxel xyz coordinates of the brain data array 23 | 24 | Returns 25 | ------- 26 | nibabel.Nifti1Image 27 | ''' 28 | 29 | if brain_data.ndim == 1: 30 | brain_data = brain_data[np.newaxis, :] 31 | 32 | if brain_data.shape[0] > 1: 33 | raise RuntimeError('4-D image is not supported yet.') 34 | 35 | template_image = nibabel.load(template) 36 | _, brain_xyz, _ = load_mri(template) 37 | 38 | out_table = {} 39 | if xyz is None: 40 | xyz = brain_xyz 41 | 42 | for i in range(brain_data.shape[1]): 43 | x, y, z = xyz[0, i], xyz[1, i], xyz[2, i] 44 | out_table.update({(x, y, z): brain_data[0, i]}) 45 | 46 | out_image_array = np.zeros(template_image.shape[:3]) 47 | for i, j, k in product(range(template_image.shape[0]), range(template_image.shape[1]), range(template_image.shape[2])): 48 | x, y, z = template_image.affine[:3, :3].dot([i, j, k]) + template_image.affine[:3, 3] 49 | if (x, y, z) in out_table: 50 | out_image_array[i, j, k] = out_table[(x, y, z)] 51 | 52 | out_image = nibabel.Nifti1Image(out_image_array, template_image.affine) 53 | 54 | return out_image 55 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/mri/load_epi.py: -------------------------------------------------------------------------------- 1 | '''Loading EPIs module. 2 | 3 | This file is a part of BdPy. 4 | ''' 5 | 6 | 7 | import itertools as itr 8 | import os 9 | import re 10 | import string 11 | 12 | import nipy 13 | import numpy as np 14 | import scipy.io as sio 15 | 16 | 17 | def load_epi(datafiles): 18 | '''Load EPI files. 19 | 20 | The returned data and xyz are flattened by C-like order. 21 | 22 | Parameters 23 | ---------- 24 | datafiles: list 25 | EPI image files. 26 | 27 | Returns 28 | ------- 29 | data: array_like, shape = (M, N) 30 | Voxel signal values (M: the number of samples, N: the nubmer of 31 | voxels). 32 | xyz_array: array_like, shape = (3, N) 33 | XYZ Coordiantes of voxels. 34 | ''' 35 | 36 | data_list = [] 37 | xyz = np.array([]) 38 | 39 | for df in datafiles: 40 | print("Loading %s" % df) 41 | 42 | # Load an EPI image 43 | img = nipy.load_image(df) 44 | 45 | xyz = _check_xyz(xyz, img) 46 | data_list.append(np.array(img.get_data().flatten(), dtype=np.float64)) 47 | 48 | data = np.vstack(data_list) 49 | 50 | return data, xyz 51 | 52 | 53 | def _check_xyz(xyz, img): 54 | '''Check voxel xyz consistency.''' 55 | 56 | xyz_current = _get_xyz(img.coordmap.affine, img.get_data().shape) 57 | 58 | if xyz.size == 0: 59 | xyz = xyz_current 60 | elif (xyz != xyz_current).any(): 61 | raise ValueError("Voxel XYZ coordinates are inconsistent across volumes") 62 | 63 | return xyz 64 | 65 | 66 | def _get_xyz(affine, volume_shape): 67 | '''Return voxel XYZ coordinates based on an affine matrix. 68 | 69 | Parameters 70 | ---------- 71 | affine : array 72 | Affine matrix. 73 | volume_shape : list 74 | Shape of the volume (i, j, k lnegth). 75 | 76 | Returns 77 | ------- 78 | array, shape = (3, N) 79 | x-, y-, and z-coordinates (N: the number of voxels). 80 | ''' 81 | 82 | i_len, j_len, k_len = volume_shape 83 | ijk = np.array(list(itr.product(range(i_len), 84 | range(j_len), 85 | range(k_len), 86 | [1]))).T 87 | 88 | return np.dot(affine, ijk)[:-1] 89 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/mri/load_mri.py: -------------------------------------------------------------------------------- 1 | '''load_mri''' 2 | 3 | 4 | import numpy as np 5 | import nipy 6 | 7 | 8 | def load_mri(fpath): 9 | '''Load a MRI image. 10 | 11 | - Returns data as 2D array (sample x voxel) 12 | - Returns voxle xyz coordinates (3 x voxel) 13 | - Returns voxel ijk indexes (3 x voxel) 14 | - Data, xyz, and ijk are flattened by Fortran-like index order 15 | ''' 16 | img = nipy.load_image(fpath) 17 | 18 | data = img.get_data() 19 | if data.ndim == 4: 20 | data = data.reshape(-1, data.shape[-1], order='F').T 21 | i_len, j_len, k_len, t = img.shape 22 | affine = np.delete(np.delete(img.coordmap.affine, 3, axis=0), 3, axis=1) 23 | elif data.ndim == 3: 24 | data = data.flatten(order='F') 25 | i_len, j_len, k_len = img.shape 26 | affine = img.coordmap.affine 27 | else: 28 | raise ValueError('Invalid shape.') 29 | 30 | ijk = np.array(np.unravel_index(np.arange(i_len * j_len * k_len), 31 | (i_len, j_len, k_len), order='F')) 32 | ijk_b = np.vstack([ijk, np.ones((1, i_len * j_len * k_len))]) 33 | xyz_b = np.dot(affine, ijk_b) 34 | xyz = xyz_b[:-1] 35 | 36 | return data, xyz, ijk 37 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/opendata/__init__.py: -------------------------------------------------------------------------------- 1 | from .openneuro import makedata 2 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/preproc/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy preprocessing package 3 | 4 | This package is a part of BdPy 5 | """ 6 | 7 | 8 | from .interface import * 9 | from .select_top import * 10 | from .preprocessor import Preprocessor 11 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/preproc/select_top.py: -------------------------------------------------------------------------------- 1 | """ 2 | select_top 3 | 4 | This file is a part of BdPy. 5 | """ 6 | 7 | 8 | __all__ = ['select_top'] 9 | 10 | 11 | import numpy as np 12 | from .util import print_start_msg, print_finish_msg 13 | 14 | 15 | def select_top(data, value, num, axis=0, verbose=True): 16 | """ 17 | Select top `num` features of `value` from `data` 18 | 19 | Parameters 20 | ---------- 21 | data : array 22 | Data matrix 23 | value : array_like 24 | Vector of values 25 | num : int 26 | Number of selected features 27 | 28 | Returns 29 | ------- 30 | selected_data : array 31 | Selected data matrix 32 | selected_index : array 33 | Index of selected data 34 | """ 35 | 36 | if verbose: 37 | print_start_msg() 38 | 39 | num_elem = data.shape[axis] 40 | 41 | sorted_index = np.argsort(value)[::-1] 42 | 43 | rank = np.zeros(num_elem, dtype=np.int) 44 | rank[sorted_index] = np.array(range(0, num_elem)) 45 | 46 | selected_index_bool = rank < num 47 | 48 | if axis == 0: 49 | selected_data = data[selected_index_bool, :] 50 | selected_index = np.array(range(0, num_elem), dtype=np.int)[selected_index_bool] 51 | elif axis == 1: 52 | selected_data = data[:, selected_index_bool] 53 | selected_index = np.array(range(0, num_elem), dtype=np.int)[selected_index_bool] 54 | else: 55 | raise ValueError('Invalid axis') 56 | 57 | if verbose: 58 | print_finish_msg() 59 | 60 | return selected_data, selected_index 61 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/preproc/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for preprocessing 3 | """ 4 | 5 | 6 | import inspect 7 | from datetime import datetime 8 | 9 | 10 | def print_start_msg(): 11 | """ 12 | Print process starting message 13 | """ 14 | print("%s Running %s" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 15 | inspect.currentframe().f_back.f_code.co_name)) 16 | 17 | 18 | def print_finish_msg(): 19 | """ 20 | Print process finishing message 21 | """ 22 | print("%s DONE" % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 23 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/recon/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/recon/torch/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/stats/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy statistics package 3 | 4 | This package is a part of BdPy 5 | 6 | 7 | Functions: 8 | 9 | - `corrcoef` : Returns correlation coefficient between `x` and `y` 10 | - `corrmat` : Returns correlation matrix between `x` and `y` 11 | """ 12 | 13 | 14 | from .corr import * 15 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BdPy utility package 3 | 4 | This package is a part of BdPy 5 | """ 6 | 7 | 8 | from .utils import * 9 | from .info import dump_info 10 | from .math import average_elemwise 11 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/util/info.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import os 4 | import sys 5 | import time 6 | import uuid 7 | import warnings 8 | import yaml 9 | 10 | 11 | def dump_info(output_dir, script=None, parameters=None, info_file='info.yaml'): 12 | '''Dump runtime information.''' 13 | 14 | if script is not None: 15 | script_path = os.path.abspath(script) 16 | with open(script_path, 'r') as f: 17 | script_txt = f.read() 18 | if sys.version_info.major == 2: 19 | script_md5 = hashlib.md5(script_txt).hexdigest() 20 | else: 21 | script_md5 = hashlib.md5(script_txt.encode()).hexdigest() 22 | else: 23 | script_path = None 24 | script_txt = None 25 | script_md5 = None 26 | 27 | run_id = str(uuid.uuid1()) 28 | run_time = time.time() 29 | run_info = { 30 | 'run_time' : run_time, 31 | 'time_stamp' : datetime.datetime.fromtimestamp(run_time).strftime('%Y-%m-%d %H:%M:%S'), 32 | 'host' : os.uname()[1], 33 | 'hardware' : os.uname()[4], 34 | 'os' : os.uname()[0], 35 | 'os_release' : os.uname()[2], 36 | 'os_version' : os.uname()[3], 37 | 'user' : os.getlogin(), 38 | 'script_path': script_path, 39 | 'script_txt' : script_txt, 40 | 'script_md5' : script_md5, 41 | } 42 | 43 | if parameters is not None: 44 | parameters_fixed = {} 45 | for k, v in parameters.items(): 46 | if isinstance(v, type({}.keys())): 47 | v = list(v) 48 | parameters_fixed.update({k: v}) 49 | run_info.update({'parameters': parameters_fixed}) 50 | 51 | run_info_file = os.path.join(output_dir, info_file) 52 | 53 | if os.path.isfile(run_info_file): 54 | with open(run_info_file, 'r') as f: 55 | info_yaml = yaml.load(f, Loader=yaml.SafeLoader) 56 | while info_yaml is None: 57 | warnings.warn('Failed to load info from %s. Retrying...' 58 | % run_info_file) 59 | with open(run_info_file, 'r') as f: 60 | info_yaml = yaml.load(f, Loader=yaml.SafeLoader) 61 | 62 | else: 63 | info_yaml = {} 64 | 65 | if '_runtime_info' in info_yaml: 66 | pass 67 | else: 68 | info_yaml.update({'_runtime_info' : {}}) 69 | 70 | info_yaml['_runtime_info'].update({run_id: run_info}) 71 | 72 | with open(run_info_file, 'w') as f: 73 | f.write(yaml.dump(info_yaml, default_flow_style=False)) 74 | 75 | return run_info 76 | -------------------------------------------------------------------------------- /codes/gan/bdpy/bdpy/util/math.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def average_elemwise(arrays, keepdims=False): 5 | '''Return element-wise mean of arrays. 6 | 7 | Parameters 8 | ---------- 9 | arrays : list of ndarrays 10 | keepdims : bool 11 | 12 | Raises 13 | ------ 14 | ndarray 15 | ''' 16 | 17 | n_array = len(arrays) 18 | 19 | max_dim_i = np.argmax([a.ndim for a in arrays]) 20 | max_array_shape = arrays[max_dim_i].shape 21 | 22 | arrays_sum = np.zeros(max_array_shape) 23 | 24 | for a in arrays: 25 | arrays_sum += a 26 | 27 | mean_array = arrays_sum / n_array 28 | 29 | if not keepdims: 30 | mean_array = np.squeeze(mean_array) 31 | 32 | return mean_array 33 | -------------------------------------------------------------------------------- /codes/gan/bdpy/docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /codes/gan/bdpy/docs/bdata_api_examples.md: -------------------------------------------------------------------------------- 1 | # BData API examples 2 | 3 | ### Data API 4 | 5 | #### Import module and initialization. 6 | 7 | from bdpy import BData 8 | 9 | # Create an empty BData instance 10 | bdata = BData() 11 | 12 | # Load BData from a file 13 | bdata = BData('data_file.h5') 14 | 15 | #### Load data 16 | 17 | # Load BData from 'data_file.h5' 18 | bdata.load('data_file.h5') 19 | 20 | #### Show data 21 | 22 | # Show 'key' and 'description' of metadata 23 | bdata.show_meatadata() 24 | 25 | # Get 'value' of the metadata specified by 'key' 26 | voxel_x = bdata.get_metadata('voxel_x', where='VoxelData') 27 | 28 | #### Data extraction 29 | 30 | # Get an array of voxel data in V1 31 | data_v1 = bdata.select('ROI_V1') # shape=(M, num voxels in V1) 32 | 33 | # `select` accepts some operators 34 | data_v1v2 = bdata.select('ROI_V1 + ROI_V2') 35 | data_hvc = bdata.select('ROI_LOC + ROI_FFA + ROI_PPA - LOC_LVC') 36 | 37 | # Wildcard 38 | data_visual = data.select('ROI_V*') 39 | 40 | # Get labels ('image_index') in the dataset 41 | label_a = bdata.select('image_index') 42 | 43 | #### Data creation 44 | 45 | # Add new data 46 | x = numpy.random.rand(bdata.dataset.shape[0]) 47 | bdata.add(x, 'random_data') 48 | 49 | # Set description of metadata 50 | bdata.set_metadatadescription('random_data', 'Random data') 51 | 52 | # Save data 53 | bdata.save('output_file.h5') # File format is selected automatically by extension. .mat, .h5,and .npy are supported. 54 | -------------------------------------------------------------------------------- /codes/gan/bdpy/docs/dataform_features.md: -------------------------------------------------------------------------------- 1 | # Features and DecodedFeatures 2 | 3 | bdpy provides classes to handle DNN's (true) features and decoded features: `dataform.Features` and `dataform.DecodedFeatures`. 4 | 5 | ## Basic usage 6 | 7 | ``` python 8 | from bdpy.dataform import Features, DecodedFeatures 9 | 10 | 11 | ## Initialize 12 | 13 | features = Features('/path/to/features/dir') 14 | 15 | decoded_features = DecodedFeatures('/path/to/decoded/features/dir') 16 | 17 | ## Get features as an array 18 | 19 | feat = features.get(layer='conv1') 20 | 21 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC', label='stimulus-0001) # Decoded features for specified sample (label) 22 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC') # Decoded features from all avaiable samples 23 | 24 | # Decoded features with CV 25 | decfeat = decoded_features.get(layer='conv1', subject='sub-01', roi='VC', fold='cv_fold1) 26 | 27 | ## List labels 28 | 29 | feat_labels = features.labels 30 | 31 | decfeat_labels = decoded_features.labels # All available labels 32 | decfeat_labels = decoded_features.selected_labels # Labels assigned to decoded features previously obtained by `get` method 33 | ``` 34 | 35 | ## Feature statistics 36 | 37 | ``` python 38 | features.statistic('mean', layer='fc8') 39 | features.statistic('std', layer='fc8') # Default ddof = 1 40 | features.statistic('std, ddof=0', layer='fc8') 41 | 42 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC') 43 | decoded_features.statistic('std', layer='fc8', subject='sub-01', roi='VC') # Default ddof = 1 44 | decoded_features.statistic('std, ddof=0', layer='fc8', subject='sub-01', roi='VC') 45 | 46 | # Decoded features with CV 47 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC', fold='cv_fold1') # Mean within the specified fold 48 | decoded_features.statistic('mean', layer='fc8', subject='sub-01', roi='VC') 49 | 50 | # If `fold` is omitted for CV decoded features, decoded features are pooled across add CV folds and then the statistics are calculated. 51 | 52 | ``` 53 | 54 | 55 | -------------------------------------------------------------------------------- /codes/gan/bdpy/docs/index.md: -------------------------------------------------------------------------------- 1 | # Bdpy: Python Package for Brain Decoding 2 | -------------------------------------------------------------------------------- /codes/gan/bdpy/examples/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | bdpy 3 | data 4 | figures 5 | -------------------------------------------------------------------------------- /codes/gan/bdpy/setup.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Setup script for bdpy 3 | 4 | This file is a part of BdPy. 5 | ''' 6 | 7 | 8 | from setuptools import setup 9 | 10 | 11 | VERSION = '0.18' 12 | 13 | if __name__ == '__main__': 14 | 15 | # Long description 16 | with open('./README.md') as f: 17 | long_description = f.read() 18 | 19 | # Setup 20 | setup(name='bdpy', 21 | version=VERSION, 22 | description='Brain decoder toolbox for Python', 23 | long_description=long_description, 24 | long_description_content_type='text/markdown', 25 | author='Shuntaro C. Aoki', 26 | author_email='brainliner-admin@atr.jp', 27 | maintainer='Shuntaro C. Aoki', 28 | maintainer_email='brainliner-admin@atr.jp', 29 | url='https://github.com/KamitaniLab/bdpy', 30 | license='MIT', 31 | keywords='neuroscience, neuroimaging, brain decoding, fmri, machine learning', 32 | packages=['bdpy', 33 | 'bdpy.bdata', 34 | 'bdpy.dataform', 35 | 'bdpy.distcomp', 36 | 'bdpy.dl', 37 | 'bdpy.dl.torch', 38 | 'bdpy.evals', 39 | 'bdpy.feature', 40 | 'bdpy.fig', 41 | 'bdpy.ml', 42 | 'bdpy.mri', 43 | 'bdpy.opendata', 44 | 'bdpy.preproc', 45 | 'bdpy.recon', 46 | 'bdpy.recon.torch', 47 | 'bdpy.stats', 48 | 'bdpy.util'], 49 | install_requires=[ 50 | 'numpy', 51 | 'scipy', 52 | 'scikit-learn', 53 | 'h5py', 54 | 'hdf5storage', 55 | 'pyyaml' 56 | ]) 57 | -------------------------------------------------------------------------------- /codes/gan/bdpy/test/.gitignore: -------------------------------------------------------------------------------- 1 | bdpy -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0001.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0001.hdr -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0001.img: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0001.img -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0002.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0002.hdr -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0002.img: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0002.img -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0003.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0003.hdr -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0003.img: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0003.img -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0004.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0004.hdr -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0004.img: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0004.img -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0005.hdr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0005.hdr -------------------------------------------------------------------------------- /codes/gan/bdpy/test/data/mri/epi0005.img: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/codes/gan/bdpy/test/data/mri/epi0005.img -------------------------------------------------------------------------------- /codes/gan/bdpy/test/test_distcomp.py: -------------------------------------------------------------------------------- 1 | '''Tests for distcomp''' 2 | 3 | 4 | from unittest import TestCase, TestLoader, TextTestRunner 5 | 6 | import os 7 | import shutil 8 | 9 | from bdpy.distcomp import DistComp 10 | 11 | 12 | class TestUtil(TestCase): 13 | def test_distcomp_file(self): 14 | lockdir = './tmp' 15 | comp_id = 'test-distcomp-fs' 16 | 17 | if os.path.exists(lockdir): 18 | shutil.rmtree(lockdir) 19 | os.mkdir(lockdir) 20 | 21 | # init 22 | distcomp = DistComp(lockdir='./tmp', comp_id=comp_id) 23 | self.assertTrue(os.path.isdir(lockdir)) 24 | self.assertFalse(distcomp.islocked()) 25 | 26 | # lock 27 | distcomp.lock() 28 | self.assertTrue(os.path.isfile(os.path.join(lockdir, 29 | comp_id + '.lock'))) 30 | self.assertTrue(distcomp.islocked()) 31 | 32 | # unlock 33 | distcomp.unlock() 34 | self.assertFalse(os.path.isfile(os.path.join(lockdir, 35 | comp_id + '.lock'))) 36 | self.assertFalse(distcomp.islocked()) 37 | 38 | # islocked_lock 39 | distcomp.islocked_lock() 40 | self.assertTrue(os.path.isfile(os.path.join(lockdir, 41 | comp_id + '.lock'))) 42 | self.assertTrue(distcomp.islocked()) 43 | 44 | shutil.rmtree(lockdir) 45 | 46 | def test_distcomp_sqlite3(self): 47 | db_path = './tmp/distcomp.db' 48 | comp_id = 'test-distcomp-sqlite3-1' 49 | 50 | if os.path.exists(db_path): 51 | os.remove(db_path) 52 | 53 | if not os.path.exists(os.path.dirname(db_path)): 54 | os.mkdir(os.path.dirname(db_path)) 55 | 56 | # init 57 | distcomp = DistComp(backend='sqlite3', db_path=db_path) 58 | self.assertTrue(os.path.isfile(db_path)) 59 | self.assertFalse(distcomp.islocked(comp_id)) 60 | 61 | # lock 62 | distcomp.lock(comp_id) 63 | self.assertTrue(distcomp.islocked(comp_id)) 64 | 65 | # unlock 66 | distcomp.unlock(comp_id) 67 | self.assertFalse(distcomp.islocked(comp_id)) 68 | 69 | # islocked_lock 70 | with self.assertRaises(NotImplementedError): 71 | distcomp.islocked_lock(comp_id) 72 | 73 | os.remove(db_path) 74 | 75 | 76 | if __name__ == '__main__': 77 | suite = TestLoader().loadTestsFromTestCase(TestUtil) 78 | TextTestRunner(verbosity=2).run(suite) 79 | -------------------------------------------------------------------------------- /codes/gan/bdpy/test/test_util.py: -------------------------------------------------------------------------------- 1 | '''Tests for bdpy.util''' 2 | 3 | 4 | from unittest import TestCase, TestLoader, TextTestRunner 5 | 6 | import numpy as np 7 | 8 | import bdpy 9 | 10 | 11 | class TestUtil(TestCase): 12 | '''Tests for 'util' module''' 13 | 14 | def test_create_groupvector_pass0001(self): 15 | '''Test for create_groupvector (list and scalar inputs).''' 16 | 17 | x = [1, 2, 3] 18 | y = 2 19 | 20 | exp_output = [1, 1, 2, 2, 3, 3] 21 | 22 | test_output = bdpy.create_groupvector(x, y) 23 | 24 | self.assertTrue((test_output == exp_output).all()) 25 | 26 | def test_create_groupvector_pass0002(self): 27 | '''Test for create_groupvector (list and list inputs).''' 28 | 29 | x = [1, 2, 3] 30 | y = [2, 4, 2] 31 | 32 | exp_output = [1, 1, 2, 2, 2, 2, 3, 3] 33 | 34 | test_output = bdpy.create_groupvector(x, y) 35 | 36 | self.assertTrue((test_output == exp_output).all()) 37 | 38 | def test_create_groupvector_pass0003(self): 39 | '''Test for create_groupvector (Numpy array and scalar inputs).''' 40 | 41 | x = np.array([1, 2, 3]) 42 | y = 2 43 | 44 | exp_output = np.array([1, 1, 2, 2, 3, 3]) 45 | 46 | test_output = bdpy.create_groupvector(x, y) 47 | 48 | np.testing.assert_array_equal(test_output, exp_output) 49 | 50 | def test_create_groupvector_pass0005(self): 51 | '''Test for create_groupvector (Numpy arrays inputs).''' 52 | 53 | x = np.array([1, 2, 3]) 54 | y = np.array([2, 4, 2]) 55 | 56 | exp_output = np.array([1, 1, 2, 2, 2, 2, 3, 3]) 57 | 58 | test_output = bdpy.create_groupvector(x, y) 59 | 60 | np.testing.assert_array_equal(test_output, exp_output) 61 | 62 | def test_create_groupvector_error(self): 63 | '''Test for create_groupvector (ValueError).''' 64 | 65 | x = [1, 2, 3] 66 | y = [0] 67 | 68 | self.assertRaises(ValueError, bdpy.create_groupvector, x, y) 69 | 70 | def test_divide_chunks(self): 71 | '''Test for divide_chunks.''' 72 | 73 | a = [1, 2, 3, 4, 5, 6, 7] 74 | 75 | # Test 1 76 | expected = [[1, 2, 3, 4], 77 | [5, 6, 7]] 78 | actual = bdpy.divide_chunks(a, chunk_size=4) 79 | self.assertEqual(actual, expected) 80 | 81 | # Test 2 82 | expected = [[1, 2, 3], 83 | [4, 5, 6], 84 | [7]] 85 | actual = bdpy.divide_chunks(a, chunk_size=3) 86 | self.assertEqual(actual, expected) 87 | 88 | 89 | if __name__ == '__main__': 90 | suite = TestLoader().loadTestsFromTestCase(TestUtil) 91 | TextTestRunner(verbosity=2).run(suite) 92 | -------------------------------------------------------------------------------- /codes/gan/bdpy/test/test_util_math.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, TestLoader, TextTestRunner 2 | 3 | import numpy as np 4 | 5 | import bdpy 6 | 7 | 8 | class TestUtilMath(TestCase): 9 | 10 | def test_average_elemwise(self): 11 | a = np.array([1, 2, 3]) 12 | b = np.array([9, 8, 7]) 13 | ans_valid = np.array([5, 5, 5]) 14 | ans_test = bdpy.average_elemwise([a, b]) 15 | np.testing.assert_array_equal(ans_test, ans_valid) 16 | 17 | a = np.array([[1, 2, 3]]) 18 | b = np.array([[9, 8, 7]]) 19 | ans_valid = np.array([5, 5, 5]) 20 | ans_test = bdpy.average_elemwise([a, b]) 21 | np.testing.assert_array_equal(ans_test, ans_valid) 22 | 23 | a = np.array([[1, 2, 3]]) 24 | b = np.array([9, 8, 7]) 25 | ans_valid = np.array([5, 5, 5]) 26 | ans_test = bdpy.average_elemwise([a, b]) 27 | np.testing.assert_array_equal(ans_test, ans_valid) 28 | 29 | a = np.array([1, 2, 3]) 30 | b = np.array([[9, 8, 7]]) 31 | ans_valid = np.array([5, 5, 5]) 32 | ans_test = bdpy.average_elemwise([a, b]) 33 | np.testing.assert_array_equal(ans_test, ans_valid) 34 | 35 | def test_average_elemwise_keepdims(self): 36 | a = np.array([1, 2, 3]) 37 | b = np.array([9, 8, 7]) 38 | ans_valid = np.array([5, 5, 5]) 39 | ans_test = bdpy.average_elemwise([a, b], keepdims=True) 40 | np.testing.assert_array_equal(ans_test, ans_valid) 41 | 42 | a = np.array([[1, 2, 3]]) 43 | b = np.array([[9, 8, 7]]) 44 | ans_valid = np.array([[5, 5, 5]]) 45 | ans_test = bdpy.average_elemwise([a, b], keepdims=True) 46 | np.testing.assert_array_equal(ans_test, ans_valid) 47 | 48 | a = np.array([[1, 2, 3]]) 49 | b = np.array([9, 8, 7]) 50 | ans_valid = np.array([[5, 5, 5]]) 51 | ans_test = bdpy.average_elemwise([a, b], keepdims=True) 52 | np.testing.assert_array_equal(ans_test, ans_valid) 53 | 54 | a = np.array([1, 2, 3]) 55 | b = np.array([[9, 8, 7]]) 56 | ans_valid = np.array([[5, 5, 5]]) 57 | ans_test = bdpy.average_elemwise([a, b], keepdims=True) 58 | np.testing.assert_array_equal(ans_test, ans_valid) 59 | 60 | 61 | if __name__ == '__main__': 62 | suite = TestLoader().loadTestsFromTestCase(TestUtilMath) 63 | TextTestRunner(verbosity=2).run(suite) 64 | -------------------------------------------------------------------------------- /codes/gan/make_subjstim_vgg19.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io 3 | from tqdm import tqdm 4 | import argparse 5 | import os 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser() 9 | 10 | parser.add_argument( 11 | "--layer", 12 | type=str, 13 | default=None, 14 | help="layer of VGG19", 15 | ) 16 | parser.add_argument( 17 | "--subject", 18 | type=str, 19 | default=None, 20 | help="subject name: subj01 or subj02 or subj05 or subj07 for full-data subjects ", 21 | ) 22 | 23 | opt = parser.parse_args() 24 | subject=opt.subject 25 | layer = opt.layer 26 | datdir = '../../nsdfeat/vgg19_features/' 27 | savedir = f'../../nsdfeat//subjfeat/' 28 | nsd_expdesign = scipy.io.loadmat('../../nsd/nsddata/experiments/nsd/nsd_expdesign.mat') 29 | 30 | # Note that most of them are 1-base index! 31 | # This is why I subtract 1 32 | sharedix = nsd_expdesign['sharedix'] -1 33 | stims = np.load(f'../../mrifeat/{subject}/{subject}_stims_ave.npy') 34 | feats = [] 35 | tr_idx = np.zeros(len(stims)) 36 | 37 | for idx, s in tqdm(enumerate(stims)): 38 | if s in sharedix: 39 | tr_idx[idx] = 0 40 | else: 41 | tr_idx[idx] = 1 42 | feat = scipy.io.loadmat(f'{datdir}/{layer}/nsd/org/VGG19-{layer}-nsd-org-{s:06}.mat') 43 | feats.append(feat['feat'].flatten()) 44 | 45 | feats = np.stack(feats) 46 | 47 | os.makedirs(savedir, exist_ok=True) 48 | 49 | feats_tr = feats[tr_idx==1,:] 50 | feats_te = feats[tr_idx==0,:] 51 | 52 | np.save(f'{savedir}/{subject}_{layer}_tr.npy',feats_tr) 53 | np.save(f'{savedir}/{subject}_{layer}_te.npy',feats_te) 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /codes/gan/make_vgg19fromdecode.py: -------------------------------------------------------------------------------- 1 | import argparse, os 2 | import numpy as np 3 | from tqdm import tqdm 4 | import torch 5 | from scipy.io import savemat 6 | import torch 7 | import numpy as np 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--subject", 13 | type=str, 14 | default=None, 15 | help="subject name: subj01 or subj02 or subj05 or subj07 for full-data subjects ", 16 | ) 17 | opt = parser.parse_args() 18 | subject = opt.subject 19 | roinames = ['early','ventral','midventral','midlateral','lateral','parietal'] 20 | 21 | maps = { 22 | 'conv1_1': [64,224,224], 23 | 'conv1_2': [64,224,224], 24 | 'conv2_1': [128,112,112], 25 | 'conv2_2': [128,112,112], 26 | 'conv3_1': [256,56,56], 27 | 'conv3_2': [256,56,56], 28 | 'conv3_3': [256,56,56], 29 | 'conv3_4': [256,56,56], 30 | 'conv4_1': [512,28,28], 31 | 'conv4_2': [512,28,28], 32 | 'conv4_3': [512,28,28], 33 | 'conv4_4': [512,28,28], 34 | 'conv5_1': [512,14,14], 35 | 'conv5_2': [512,14,14], 36 | 'conv5_3': [512,14,14], 37 | 'conv5_4': [512,14,14], 38 | 'fc6': [1,4096], 39 | 'fc7': [1,4096], 40 | 'fc8': [1,1000], 41 | } 42 | datdir = f'../../decoded/{subject}/' 43 | savedir = f'../../decoded/gan_mod/' 44 | os.makedirs(savedir, exist_ok=True) 45 | 46 | for layer in tqdm(maps.keys()): 47 | print(f'Now Layer: {layer}') 48 | os.makedirs(f'{savedir}/{layer}/{subject}/streams/', exist_ok=True) 49 | feat = np.load(f'{datdir}/{subject}_{"_".join(roinames)}_scores_{layer}.npy') 50 | for i in range(feat.shape[0]): 51 | cfeat = feat[i,:].reshape(maps[layer])[np.newaxis] 52 | mdic = {"feat":cfeat} 53 | savemat(f'{savedir}/{layer}/{subject}/streams/VGG19-{layer}-{subject}-streams-{i:06}.mat', mdic) 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /codes/utils/make_subjstim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io 3 | from tqdm import tqdm 4 | import argparse 5 | import os 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser() 9 | 10 | parser.add_argument( 11 | "--featname", 12 | type=str, 13 | default='', 14 | help="Target variable", 15 | ) 16 | parser.add_argument( 17 | "--use_stim", 18 | type=str, 19 | default='', 20 | help="ave or each", 21 | ) 22 | parser.add_argument( 23 | "--subject", 24 | type=str, 25 | default=None, 26 | help="subject name: subj01 or subj02 or subj05 or subj07 for full-data subjects ", 27 | ) 28 | 29 | opt = parser.parse_args() 30 | subject=opt.subject 31 | use_stim = opt.use_stim 32 | featname = opt.featname 33 | topdir = '../../nsdfeat/' 34 | savedir = f'{topdir}/subjfeat/' 35 | featdir = f'{topdir}/{featname}/' 36 | 37 | nsd_expdesign = scipy.io.loadmat('../../nsd/nsddata/experiments/nsd/nsd_expdesign.mat') 38 | 39 | # Note that most of them are 1-base index! 40 | # This is why I subtract 1 41 | sharedix = nsd_expdesign['sharedix'] -1 42 | 43 | if use_stim == 'ave': 44 | stims = np.load(f'../../mrifeat/{subject}/{subject}_stims_ave.npy') 45 | else: # Each 46 | stims = np.load(f'../../mrifeat/{subject}/{subject}_stims.npy') 47 | 48 | feats = [] 49 | tr_idx = np.zeros(len(stims)) 50 | 51 | for idx, s in tqdm(enumerate(stims)): 52 | if s in sharedix: 53 | tr_idx[idx] = 0 54 | else: 55 | tr_idx[idx] = 1 56 | feat = np.load(f'{featdir}/{s:06}.npy') 57 | feats.append(feat) 58 | 59 | feats = np.stack(feats) 60 | 61 | os.makedirs(savedir, exist_ok=True) 62 | 63 | feats_tr = feats[tr_idx==1,:] 64 | feats_te = feats[tr_idx==0,:] 65 | np.save(f'../../mrifeat/{subject}/{subject}_stims_tridx.npy',tr_idx) 66 | 67 | np.save(f'{savedir}/{subject}_{use_stim}_{featname}_tr.npy',feats_tr) 68 | np.save(f'{savedir}/{subject}_{use_stim}_{featname}_te.npy',feats_te) 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /codes/utils/nsd_access/__init__.py: -------------------------------------------------------------------------------- 1 | from .nsda import * 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nibabel 2 | h5py 3 | matplotlib 4 | pycocotools 5 | IPython 6 | himalaya 7 | timm 8 | fairscale -------------------------------------------------------------------------------- /results_tech_paper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/results_tech_paper.jpg -------------------------------------------------------------------------------- /visual_summary.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/visual_summary.jpg -------------------------------------------------------------------------------- /visual_summary_techpaper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yu-takagi/StableDiffusionReconstruction/e187d4b3db1d647ee3e1b4256a2068ffd15df683/visual_summary_techpaper.jpg --------------------------------------------------------------------------------