├── assets ├── didex-dark.png └── didex-light.png ├── dataset_creation ├── README.txt ├── GTA5_to_PTD_uni_cls.py ├── SYNTHIA_to_PTD_uni_cls.py ├── GTA5_to_PTD_rand_cond.py ├── GTA5_to_PTD_uni_cls_rand_cond.py ├── GTA5_to_PTD_rand_location_uni_cls_rand_cond.py ├── GTA5_to_PTD_uni_cls_rand_location.py └── GTA5_to_PTD_rand_location.py ├── generalization_experiments └── README.txt └── README.md /assets/didex-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JNiemeijer/DIDEX/HEAD/assets/didex-dark.png -------------------------------------------------------------------------------- /assets/didex-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JNiemeijer/DIDEX/HEAD/assets/didex-light.png -------------------------------------------------------------------------------- /dataset_creation/README.txt: -------------------------------------------------------------------------------- 1 | This folder contains scripts that can be integrated into stable diffusion to generate the Pseudo Target domains -------------------------------------------------------------------------------- /generalization_experiments/README.txt: -------------------------------------------------------------------------------- 1 | This Folder contains the Config files for starting the experiments for adaptation to the psueod target domain -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Fancy logo](assets/didex-light.png#gh-dark-mode-only) 2 | ![Fancy logo](assets/didex-dark.png#gh-light-mode-only) 3 | 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/generalization-by-adaptation-diffusion-based/domain-generalization-on-gta-to-avg)](https://paperswithcode.com/sota/domain-generalization-on-gta-to-avg?p=generalization-by-adaptation-diffusion-based) 5 | ### [Paper](https://arxiv.org/abs/2312.01850) 6 | 7 | 8 | **Generalization by Adaptation: Diffusion-Based Domain Extension for Domain-Generalized Semantic Segmentation**
9 | [Joshua Niemeijer*](https://scholar.google.com/citations?user=SK0mAJ0AAAAJ&hl), [Manuel Schwonberg*](https://scholar.google.com/citations?user=eqsXwGIAAAAJ&hl), [Jan-Aike Termöhlen*](https://scholar.google.com/citations?user=LkhzlxIAAAAJ&hl), [Nico M. Schmidt](https://scholar.google.com/citations?user=Kaei5zsAAAAJ&hl), and [Tim Fingscheidt](https://scholar.google.com/citations?user=KDgUWRMAAAAJ&hl)
10 | Winter Conference on Applications of Computer Vision (WACV) 2024
11 | (* indicates equal contribution) 12 | 13 | The full code will be published soon. 14 | 15 | ## Installation 16 | To utilize DIDEX please follow the following steps: 17 | 18 | For the creation of the pseudo target domain we build on the following repos: 19 | 1. https://github.com/Stability-AI/stablediffusion.git 20 | 2. https://github.com/lllyasviel/ControlNet.git 21 | 22 | For the adaptation to the pseudo target domain we utilize the following repo: 23 | 1. https://github.com/lhoyer/MIC.git 24 | 25 | To utilize our code please set up the repos following the descriptions they provide. 26 | 27 | ## Diffusion-Based Domain Extension (Pseudo-Target Domain Generation) 28 | To create the Pseudo target domains please utilize the scripts in the folder dataset_creation. 29 | 30 | ## Adaptation To Pseudo-Target Domain 31 | To train the model for domain generalization please utilize the scripts in generalization_experiments 32 | 33 | ## Datasets 34 | We used the dataset structure ... 35 | 36 | ## Evaluation 37 | 38 | ## BibTeX 39 | ``` 40 | @article{Niemeijer2023DIDEX,, 41 | author = {Niemeijer, Joshua and Schwonberg, Manuel and Termöhlen, Jan-Aike and Schmidt, Nico M. and Fingscheidt, Tim}, 42 | title = {{Generalization by Adaptation: Diffusion-Based Domain Extension for Domain-Generalized Semantic Segmentation}}, 43 | year = {2023}, 44 | month = dec, 45 | pages = {1--16}, 46 | eprint = {2312.01850}, 47 | archivePrefix = {arXiv} 48 | } 49 | ``` 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_uni_cls.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | prompt_1 = "A high quality photo; europe" 168 | prompt_2 = "A high quality photo; europe;Highway" 169 | prompt_3 = "A high quality photo; europe;City" 170 | prompt_4 = "A high quality photo; germany" 171 | prompt_5 = "A high quality photo; germany;Highway" 172 | prompt_6 = "A high quality photo; germany;City" 173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6] 174 | ddim_steps = 25 #50 175 | num_samples = 1 176 | scale = 9 # 9 177 | seed = 0 178 | eta = 0 179 | strength = 0.9 180 | 181 | # Replace with the actual path to the folder containing images / labels 182 | input_folder = 'GTA5/images/train' 183 | input_folder_label = 'GTA5/labels/train' 184 | # Replace with the actual path to the folder where you want to save the processed images 185 | output_folder = 'pseudo_target_domain/GTA5/uni_cls/' 186 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 187 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 188 | 'person', 'rider', 'car', 'truck', 'bus', 'tram/ train/ trolley', 'motorcycle', 189 | 'bicycle') 190 | # List all PNG files in the input folder 191 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 192 | 193 | hist = np.zeros(19) 194 | 195 | for png_file in png_files: 196 | file_path = os.path.join(input_folder, png_file) 197 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 198 | label = Image.open(label_path) 199 | label_array = np.array(label) 200 | classes_present = np.unique(label_array) 201 | classes_present = [i for i in classes_present if i != 255] 202 | addressed_classes = [CLASSES[i] for i in classes_present] 203 | addressed_classes_string = ', '.join(addressed_classes) 204 | 205 | # Update the histogram with the current image's class occurrences 206 | hist[classes_present] +=1 207 | current_least_often_cls = np.argmin(hist) 208 | current_least_often_cls_string = CLASSES[current_least_often_cls] 209 | hist[np.argmin(hist)] +=1 210 | 211 | # Process the image 212 | random.seed() 213 | prompt = random.choice(promts)+ ", " + current_least_often_cls_string + ", " + addressed_classes_string 214 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 215 | 216 | # Get the original size of the image 217 | original_size = Image.open(file_path).size 218 | 219 | # Save the result in the output folder with the same filename 220 | output_file_path = os.path.join(output_folder, png_file) 221 | save_result(result, output_file_path, original_size) 222 | -------------------------------------------------------------------------------- /dataset_creation/SYNTHIA_to_PTD_uni_cls.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width)# * 0.5) 155 | new_height = int(height)# * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | prompt_1 = "A high quality photo; europe" # of a german traffic scene" 168 | prompt_2 = "A high quality photo; europe;Highway" 169 | prompt_3 = "A high quality photo; europe;City" 170 | prompt_4 = "A high quality photo; germany" # of a german traffic scene" 171 | prompt_5 = "A high quality photo; germany;Highway" 172 | prompt_6 = "A high quality photo; germany;City" 173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6] 174 | ddim_steps = 25 #50 175 | num_samples = 1 176 | scale = 9 # 9 177 | seed = 0 178 | eta = 0 179 | strength = 0.9 180 | 181 | # Replace with the actual path to the folder containing PNG images 182 | input_folder = 'Datasets/Synthia/train/RAND_CITYSCAPES/RGB' 183 | input_folder_label = 'Synthia/train/RAND_CITYSCAPES/GT/LABELS' 184 | # Replace with the actual path to the folder where you want to save the processed images 185 | output_folder = 'pseudo_target_domain/SYNTHIA/uni_cls_rand_location' 186 | 187 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 188 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 189 | 'person', 'rider', 'car', 'truck', 'bus', 'tram/ train/ trolley', 'motorcycle', 190 | 'bicycle') 191 | # the following classes are not in Synthia terrain, truck, train, 192 | # List all PNG files in the input folder 193 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 194 | 195 | hist = np.zeros(19) 196 | hist[9] = 100000000000000 197 | hist[14] = 100000000000000 198 | hist[16] = 100000000000000 199 | 200 | for png_file in png_files: 201 | file_path = os.path.join(input_folder, png_file) 202 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 203 | label = Image.open(label_path) 204 | label_array = np.array(label) 205 | classes_present = np.unique(label_array) 206 | classes_present = [i for i in classes_present if i != 255] 207 | addressed_classes = [CLASSES[i] for i in classes_present] 208 | addressed_classes_string = ', '.join(addressed_classes) 209 | print(classes_present, addressed_classes_string) 210 | 211 | # Update the histogram with the current image's class occurrences 212 | hist[classes_present] +=1 213 | current_least_often_cls = np.argmin(hist) 214 | current_least_often_cls_string = CLASSES[current_least_often_cls] 215 | hist[np.argmin(hist)] +=1 216 | 217 | # Process the image 218 | random.seed() 219 | prompt = random.choice(promts)+ ", " + current_least_often_cls_string + ", " + addressed_classes_string 220 | print(prompt) 221 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 222 | 223 | # Get the original size of the image 224 | original_size = Image.open(file_path).size 225 | 226 | # Save the result in the output folder with the same filename 227 | output_file_path = os.path.join(output_folder, png_file) 228 | save_result(result, output_file_path, original_size) 229 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_rand_cond.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | prompt_1 = "A high quality photo; europe" 168 | prompt_2 = "A high quality photo; europe;Highway" 169 | prompt_3 = "A high quality photo; europe;City" 170 | prompt_4 = "A high quality photo; germany" 171 | prompt_5 = "A high quality photo; germany;Highway" 172 | prompt_6 = "A high quality photo; germany;City" 173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6] 174 | 175 | codition_1 = "rain" 176 | codition_2 = "Fog/Mist" 177 | codition_3 = "Snowy" 178 | codition_4 = "Sunny" 179 | codition_5 = "Overcast" 180 | codition_6 = "Stormy" 181 | codition_7 = "overexposure" 182 | codition_8 = "underexposure" 183 | codition_9 = "evening" 184 | codition_10 = "morning" 185 | codition_11 = "Night/Darkness" 186 | codition_12 = "Backlighting" 187 | codition_13 = "Artificial Lighting" 188 | codition_14 = "Harsh Light" 189 | codition_15 = "Dappled Light" 190 | codition_16 = "Sun Flare" 191 | codition_17 = "Hazy/Haze" 192 | codition_18 = "Spring" 193 | codition_19 = "Autumn" 194 | codition_20 = "Winter" 195 | codition_21 = "Summer" 196 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10, 197 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19, 198 | codition_20, codition_21] 199 | 200 | ddim_steps = 25 #50 201 | num_samples = 1 202 | scale = 9 # 9 203 | seed = 0 204 | eta = 0 205 | strength = 0.9 206 | 207 | ## Replace with the actual path to the folder containing PNG images 208 | input_folder = 'GTA5/images/train' 209 | input_folder_label = 'GTA5/labels/train' 210 | # Replace with the actual path to the folder where you want to save the processed images 211 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_rand_cond' 212 | 213 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 214 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 215 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 216 | 'bicycle') 217 | # List all PNG files in the input folder 218 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 219 | 220 | hist = np.zeros(19) 221 | 222 | for png_file in png_files: 223 | file_path = os.path.join(input_folder, png_file) 224 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 225 | label = Image.open(label_path) 226 | label_array = np.array(label) 227 | classes_present = np.unique(label_array) 228 | classes_present = [i for i in classes_present if i != 255] 229 | addressed_classes = [CLASSES[i] for i in classes_present] 230 | addressed_classes_string = ', '.join(addressed_classes) 231 | print(classes_present, addressed_classes_string) 232 | 233 | # Update the histogram with the current image's class occurrences 234 | hist[classes_present] +=1 235 | current_least_often_cls = np.argmin(hist) 236 | current_least_often_cls_string = CLASSES[current_least_often_cls] 237 | hist[np.argmin(hist)] +=1 238 | 239 | # Process the image 240 | random.seed() 241 | prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string 242 | print(prompt) 243 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 244 | 245 | # Get the original size of the image 246 | original_size = Image.open(file_path).size 247 | 248 | # Save the result in the output folder with the same filename 249 | output_file_path = os.path.join(output_folder, png_file) 250 | save_result(result, output_file_path, original_size) 251 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_uni_cls_rand_cond.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | 168 | codition_1 = "rain" 169 | codition_2 = "Fog/Mist" 170 | codition_3 = "Snowy" 171 | codition_4 = "Sunny" 172 | codition_5 = "Overcast" 173 | codition_6 = "Stormy" 174 | codition_7 = "overexposure" 175 | codition_8 = "underexposure" 176 | codition_9 = "evening" 177 | codition_10 = "morning" 178 | codition_11 = "Night/Darkness" 179 | codition_12 = "Backlighting" 180 | codition_13 = "Artificial Lighting" 181 | codition_14 = "Harsh Light" 182 | codition_15 = "Dappled Light" 183 | codition_16 = "Sun Flare" 184 | codition_17 = "Hazy/Haze" 185 | codition_18 = "Spring" 186 | codition_19 = "Autumn" 187 | codition_20 = "Winter" 188 | codition_21 = "Summer" 189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10, 190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19, 191 | codition_20, codition_21] 192 | 193 | location_1 = "europe;" 194 | location_2 = "germany;" 195 | 196 | locations = [location_1, location_2] 197 | 198 | traffic_location_1 = "" 199 | traffic_location_2 = "Highway" 200 | traffic_location_3 = "City" 201 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3] 202 | 203 | base_prompt = "A high quality photo; " 204 | 205 | ddim_steps = 25 #50 206 | num_samples = 1 207 | scale = 9 # 9 208 | seed = 0 209 | eta = 0 210 | strength = 0.9 211 | 212 | # Replace with the actual path to the folder containing PNG images 213 | input_folder = 'GTA5/images/train' 214 | input_folder_label = 'GTA5/labels/train' 215 | # Replace with the actual path to the folder where you want to save the processed images 216 | output_folder = 'pseudo_target_domain/GTA5/uni_cls_rand_condition' 217 | 218 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 219 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 220 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 221 | 'bicycle') 222 | # List all PNG files in the input folder 223 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 224 | 225 | hist = np.zeros(19) 226 | 227 | for png_file in png_files: 228 | file_path = os.path.join(input_folder, png_file) 229 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 230 | label = Image.open(label_path) 231 | label_array = np.array(label) 232 | classes_present = np.unique(label_array) 233 | classes_present = [i for i in classes_present if i != 255] 234 | addressed_classes = [CLASSES[i] for i in classes_present] 235 | addressed_classes_string = ', '.join(addressed_classes) 236 | print(classes_present, addressed_classes_string) 237 | 238 | # Update the histogram with the current image's class occurrences 239 | hist[classes_present] +=1 240 | current_least_often_cls = np.argmin(hist) 241 | current_least_often_cls_string = CLASSES[current_least_often_cls] 242 | hist[np.argmin(hist)] +=1 243 | 244 | # Process the image 245 | random.seed() 246 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string 247 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string + ", " + random.choice(coditions) 248 | print(prompt) 249 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 250 | 251 | # Get the original size of the image 252 | original_size = Image.open(file_path).size 253 | 254 | # Save the result in the output folder with the same filename 255 | output_file_path = os.path.join(output_folder, png_file) 256 | save_result(result, output_file_path, original_size) 257 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_rand_location_uni_cls_rand_cond.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | 168 | codition_1 = "rain" 169 | codition_2 = "Fog/Mist" 170 | codition_3 = "Snowy" 171 | codition_4 = "Sunny" 172 | codition_5 = "Overcast" 173 | codition_6 = "Stormy" 174 | codition_7 = "overexposure" 175 | codition_8 = "underexposure" 176 | codition_9 = "evening" 177 | codition_10 = "morning" 178 | codition_11 = "Night/Darkness" 179 | codition_12 = "Backlighting" 180 | codition_13 = "Artificial Lighting" 181 | codition_14 = "Harsh Light" 182 | codition_15 = "Dappled Light" 183 | codition_16 = "Sun Flare" 184 | codition_17 = "Hazy/Haze" 185 | codition_18 = "Spring" 186 | codition_19 = "Autumn" 187 | codition_20 = "Winter" 188 | codition_21 = "Summer" 189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10, 190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19, 191 | codition_20, codition_21] 192 | 193 | location_1 = "europe;" 194 | location_2 = "germany;" 195 | location_3 = "China;" 196 | location_4 = "USA;" 197 | location_5 = "India;" 198 | 199 | locations = [location_1, location_2, location_3, location_4, location_5] 200 | 201 | traffic_location_1 = "" 202 | traffic_location_2 = "Highway" 203 | traffic_location_3 = "City" 204 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3] 205 | 206 | base_prompt = "A high quality photo; " 207 | 208 | ddim_steps = 25 #50 209 | num_samples = 1 210 | scale = 9 # 9 211 | seed = 0 212 | eta = 0 213 | strength = 0.9 214 | 215 | # Replace with the actual path to the folder containing PNG images 216 | input_folder = 'GTA5/images/train' 217 | input_folder_label = 'GTA5/labels/train' 218 | # Replace with the actual path to the folder where you want to save the processed images 219 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_uni_cls_rand_cond' 220 | 221 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 222 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 223 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 224 | 'bicycle') 225 | # List all PNG files in the input folder 226 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 227 | 228 | hist = np.zeros(19) 229 | 230 | for png_file in png_files: 231 | file_path = os.path.join(input_folder, png_file) 232 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 233 | label = Image.open(label_path) 234 | label_array = np.array(label) 235 | classes_present = np.unique(label_array) 236 | classes_present = [i for i in classes_present if i != 255] 237 | addressed_classes = [CLASSES[i] for i in classes_present] 238 | addressed_classes_string = ', '.join(addressed_classes) 239 | print(classes_present, addressed_classes_string) 240 | 241 | # Update the histogram with the current image's class occurrences 242 | hist[classes_present] +=1 243 | current_least_often_cls = np.argmin(hist) 244 | current_least_often_cls_string = CLASSES[current_least_often_cls] 245 | hist[np.argmin(hist)] +=1 246 | 247 | # Process the image 248 | random.seed() 249 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string + ", " + random.choice(coditions) 250 | print(prompt) 251 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 252 | 253 | # Get the original size of the image 254 | original_size = Image.open(file_path).size 255 | 256 | # Save the result in the output folder with the same filename 257 | output_file_path = os.path.join(output_folder, png_file) 258 | save_result(result, output_file_path, original_size) 259 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_uni_cls_rand_location.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | 168 | codition_1 = "rain" 169 | codition_2 = "Fog/Mist" 170 | codition_3 = "Snowy" 171 | codition_4 = "Sunny" 172 | codition_5 = "Overcast" 173 | codition_6 = "Stormy" 174 | codition_7 = "overexposure" 175 | codition_8 = "underexposure" 176 | codition_9 = "evening" 177 | codition_10 = "morning" 178 | codition_11 = "Night/Darkness" 179 | codition_12 = "Backlighting" 180 | codition_13 = "Artificial Lighting" 181 | codition_14 = "Harsh Light" 182 | codition_15 = "Dappled Light" 183 | codition_16 = "Sun Flare" 184 | codition_17 = "Hazy/Haze" 185 | codition_18 = "Spring" 186 | codition_19 = "Autumn" 187 | codition_20 = "Winter" 188 | codition_21 = "Summer" 189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10, 190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19, 191 | codition_20, codition_21] 192 | 193 | location_1 = "europe;" 194 | location_2 = "germany;" 195 | location_3 = "China;" 196 | location_4 = "USA;" 197 | location_5 = "India;" 198 | 199 | locations = [location_1, location_2, location_3, location_4, location_5] 200 | 201 | traffic_location_1 = "" 202 | traffic_location_2 = "Highway" 203 | traffic_location_3 = "City" 204 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3] 205 | 206 | base_prompt = "A high quality photo; " 207 | 208 | ddim_steps = 25 #50 209 | num_samples = 1 210 | scale = 9 # 9 211 | seed = 0 212 | eta = 0 213 | strength = 0.9 214 | 215 | # Replace with the actual path to the folder containing PNG images 216 | input_folder = 'GTA5/images/train' 217 | input_folder_label = 'GTA5/labels/train' 218 | # Replace with the actual path to the folder where you want to save the processed images 219 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_uni_cls' 220 | 221 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 222 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 223 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 224 | 'bicycle') 225 | # List all PNG files in the input folder 226 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 227 | 228 | hist = np.zeros(19) 229 | 230 | for png_file in png_files: 231 | file_path = os.path.join(input_folder, png_file) 232 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 233 | label = Image.open(label_path) 234 | label_array = np.array(label) 235 | classes_present = np.unique(label_array) 236 | classes_present = [i for i in classes_present if i != 255] 237 | addressed_classes = [CLASSES[i] for i in classes_present] 238 | addressed_classes_string = ', '.join(addressed_classes) 239 | print(classes_present, addressed_classes_string) 240 | 241 | # Update the histogram with the current image's class occurrences 242 | hist[classes_present] +=1 243 | current_least_often_cls = np.argmin(hist) 244 | current_least_often_cls_string = CLASSES[current_least_often_cls] 245 | hist[np.argmin(hist)] +=1 246 | 247 | # Process the image 248 | random.seed() 249 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string 250 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string #+ ", " + random.choice(coditions) 251 | print(prompt) 252 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 253 | 254 | # Get the original size of the image 255 | original_size = Image.open(file_path).size 256 | 257 | # Save the result in the output folder with the same filename 258 | output_file_path = os.path.join(output_folder, png_file) 259 | save_result(result, output_file_path, original_size) 260 | -------------------------------------------------------------------------------- /dataset_creation/GTA5_to_PTD_rand_location.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import gradio as gr 5 | from PIL import Image 6 | from omegaconf import OmegaConf 7 | from einops import repeat, rearrange 8 | from pytorch_lightning import seed_everything 9 | from imwatermark import WatermarkEncoder 10 | 11 | from scripts.txt2img import put_watermark 12 | from ldm.util import instantiate_from_config 13 | from ldm.models.diffusion.ddim import DDIMSampler 14 | from ldm.data.util import AddMiDaS 15 | 16 | torch.set_grad_enabled(False) 17 | 18 | 19 | def initialize_model(config, ckpt): 20 | config = OmegaConf.load(config) 21 | print(config.model) 22 | model = instantiate_from_config(config.model) 23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False) 24 | 25 | device = torch.device( 26 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 27 | model = model.to(device) 28 | sampler = DDIMSampler(model) 29 | return sampler 30 | 31 | 32 | def make_batch_sd( 33 | image, 34 | txt, 35 | device, 36 | num_samples=1, 37 | model_type="dpt_hybrid" 38 | ): 39 | image = np.array(image.convert("RGB")) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 42 | midas_trafo = AddMiDaS(model_type=model_type) 43 | batch = { 44 | "jpg": image, 45 | "txt": num_samples * [txt], 46 | } 47 | batch = midas_trafo(batch) 48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w') 49 | batch["jpg"] = repeat(batch["jpg"].to(device=device), 50 | "1 ... -> n ...", n=num_samples) 51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to( 52 | device=device), "1 ... -> n ...", n=num_samples) 53 | return batch 54 | 55 | 56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None, 57 | do_full_sample=False): 58 | device = torch.device( 59 | "cuda") if torch.cuda.is_available() else torch.device("cpu") 60 | model = sampler.model 61 | seed_everything(seed) 62 | 63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...") 64 | wm = "SDV2" 65 | wm_encoder = WatermarkEncoder() 66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8')) 67 | 68 | with torch.no_grad(),\ 69 | torch.autocast("cuda"): 70 | batch = make_batch_sd( 71 | image, txt=prompt, device=device, num_samples=num_samples) 72 | z = model.get_first_stage_encoding(model.encode_first_stage( 73 | batch[model.first_stage_key])) # move to latent space 74 | c = model.cond_stage_model.encode(batch["txt"]) 75 | c_cat = list() 76 | for ck in model.concat_keys: 77 | cc = batch[ck] 78 | cc = model.depth_model(cc) 79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 80 | keepdim=True) 81 | display_depth = (cc - depth_min) / (depth_max - depth_min) 82 | depth_image = Image.fromarray( 83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8)) 84 | cc = torch.nn.functional.interpolate( 85 | cc, 86 | size=z.shape[2:], 87 | mode="bicubic", 88 | align_corners=False, 89 | ) 90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3], 91 | keepdim=True) 92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1. 93 | c_cat.append(cc) 94 | c_cat = torch.cat(c_cat, dim=1) 95 | # cond 96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]} 97 | 98 | # uncond cond 99 | uc_cross = model.get_unconditional_conditioning(num_samples, "") 100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} 101 | if not do_full_sample: 102 | # encode (scaled latent) 103 | z_enc = sampler.stochastic_encode( 104 | z, torch.tensor([t_enc] * num_samples).to(model.device)) 105 | else: 106 | z_enc = torch.randn_like(z) 107 | # decode it 108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale, 109 | unconditional_conditioning=uc_full, callback=callback) 110 | x_samples_ddim = model.decode_first_stage(samples) 111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) 112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255 113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result] 114 | 115 | 116 | def pad_image(input_image): 117 | pad_w, pad_h = np.max(((2, 2), np.ceil( 118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size 119 | im_padded = Image.fromarray( 120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) 121 | return im_padded 122 | 123 | 124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength): 125 | init_image = input_image.convert("RGB") 126 | image = pad_image(init_image) # resize to integer multiple of 32 127 | 128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True) 129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]' 130 | do_full_sample = strength == 1. 131 | t_enc = min(int(strength * steps), steps-1) 132 | result = paint( 133 | sampler=sampler, 134 | image=image, 135 | prompt=prompt, 136 | t_enc=t_enc, 137 | seed=seed, 138 | scale=scale, 139 | num_samples=num_samples, 140 | callback=None, 141 | do_full_sample=do_full_sample 142 | ) 143 | return result 144 | 145 | 146 | sampler = initialize_model(sys.argv[1], sys.argv[2]) 147 | 148 | from PIL import Image 149 | import os 150 | import random 151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength): 152 | input_image = Image.open(file_path) 153 | width, height = input_image.size 154 | new_width = int(width * 0.5) 155 | new_height = int(height * 0.5) 156 | # Resize the image 157 | input_image = input_image.resize((new_width, new_height)) 158 | 159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 160 | return result 161 | 162 | def save_result(result, output_file_path, original_size): 163 | # Upsample the result back to the original size 164 | result_image = result[1].resize(original_size) 165 | result_image.save(output_file_path) 166 | 167 | prompt_1 = "A high quality photo; europe" 168 | prompt_2 = "A high quality photo; europe;Highway" 169 | prompt_3 = "A high quality photo; europe;City" 170 | prompt_4 = "A high quality photo; germany" 171 | prompt_5 = "A high quality photo; germany;Highway" 172 | prompt_6 = "A high quality photo; germany;City" 173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6] 174 | 175 | 176 | codition_1 = "rain" 177 | codition_2 = "Fog/Mist" 178 | codition_3 = "Snowy" 179 | codition_4 = "Sunny" 180 | codition_5 = "Overcast" 181 | codition_6 = "Stormy" 182 | codition_7 = "overexposure" 183 | codition_8 = "underexposure" 184 | codition_9 = "evening" 185 | codition_10 = "morning" 186 | codition_11 = "Night/Darkness" 187 | codition_12 = "Backlighting" 188 | codition_13 = "Artificial Lighting" 189 | codition_14 = "Harsh Light" 190 | codition_15 = "Dappled Light" 191 | codition_16 = "Sun Flare" 192 | codition_17 = "Hazy/Haze" 193 | codition_18 = "Spring" 194 | codition_19 = "Autumn" 195 | codition_20 = "Winter" 196 | codition_21 = "Summer" 197 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10, 198 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19, 199 | codition_20, codition_21] 200 | 201 | location_1 = "europe;" 202 | location_2 = "germany;" 203 | location_3 = "China;" 204 | location_4 = "USA;" 205 | location_5 = "India;" 206 | 207 | locations = [location_1, location_2, location_3, location_4, location_5] 208 | 209 | traffic_location_1 = "" 210 | traffic_location_2 = "Highway" 211 | traffic_location_3 = "City" 212 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3] 213 | 214 | base_prompt = "A high quality photo; " 215 | 216 | ddim_steps = 25 #50 217 | num_samples = 1 218 | scale = 9 # 9 219 | seed = 0 220 | eta = 0 221 | strength = 0.9 222 | 223 | # Replace with the actual path to the folder containing PNG images 224 | input_folder = 'GTA5/images/train' 225 | input_folder_label = 'GTA5/labels/train' 226 | # Replace with the actual path to the folder where you want to save the processed images 227 | output_folder = 'pseudo_target_domain/GTA5/uni_cls_rand_location' 228 | 229 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 230 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 231 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 232 | 'bicycle') 233 | # List all PNG files in the input folder 234 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')] 235 | 236 | hist = np.zeros(19) 237 | 238 | for png_file in png_files: 239 | file_path = os.path.join(input_folder, png_file) 240 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png')) 241 | label = Image.open(label_path) 242 | label_array = np.array(label) 243 | classes_present = np.unique(label_array) 244 | classes_present = [i for i in classes_present if i != 255] 245 | addressed_classes = [CLASSES[i] for i in classes_present] 246 | addressed_classes_string = ', '.join(addressed_classes) 247 | print(classes_present, addressed_classes_string) 248 | 249 | # Update the histogram with the current image's class occurrences 250 | hist[classes_present] +=1 251 | current_least_often_cls = np.argmin(hist) 252 | current_least_often_cls_string = CLASSES[current_least_often_cls] 253 | hist[np.argmin(hist)] +=1 254 | 255 | # Process the image 256 | random.seed() 257 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string 258 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + addressed_classes_string 259 | print(prompt) 260 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength) 261 | 262 | # Get the original size of the image 263 | original_size = Image.open(file_path).size 264 | 265 | # Save the result in the output folder with the same filename 266 | output_file_path = os.path.join(output_folder, png_file) 267 | save_result(result, output_file_path, original_size) 268 | --------------------------------------------------------------------------------