├── assets
├── didex-dark.png
└── didex-light.png
├── dataset_creation
├── README.txt
├── GTA5_to_PTD_uni_cls.py
├── SYNTHIA_to_PTD_uni_cls.py
├── GTA5_to_PTD_rand_cond.py
├── GTA5_to_PTD_uni_cls_rand_cond.py
├── GTA5_to_PTD_rand_location_uni_cls_rand_cond.py
├── GTA5_to_PTD_uni_cls_rand_location.py
└── GTA5_to_PTD_rand_location.py
├── generalization_experiments
└── README.txt
└── README.md
/assets/didex-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JNiemeijer/DIDEX/HEAD/assets/didex-dark.png
--------------------------------------------------------------------------------
/assets/didex-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JNiemeijer/DIDEX/HEAD/assets/didex-light.png
--------------------------------------------------------------------------------
/dataset_creation/README.txt:
--------------------------------------------------------------------------------
1 | This folder contains scripts that can be integrated into stable diffusion to generate the Pseudo Target domains
--------------------------------------------------------------------------------
/generalization_experiments/README.txt:
--------------------------------------------------------------------------------
1 | This Folder contains the Config files for starting the experiments for adaptation to the psueod target domain
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 | [](https://paperswithcode.com/sota/domain-generalization-on-gta-to-avg?p=generalization-by-adaptation-diffusion-based)
5 | ### [Paper](https://arxiv.org/abs/2312.01850)
6 |
7 |
8 | **Generalization by Adaptation: Diffusion-Based Domain Extension for Domain-Generalized Semantic Segmentation**
9 | [Joshua Niemeijer*](https://scholar.google.com/citations?user=SK0mAJ0AAAAJ&hl), [Manuel Schwonberg*](https://scholar.google.com/citations?user=eqsXwGIAAAAJ&hl), [Jan-Aike Termöhlen*](https://scholar.google.com/citations?user=LkhzlxIAAAAJ&hl), [Nico M. Schmidt](https://scholar.google.com/citations?user=Kaei5zsAAAAJ&hl), and [Tim Fingscheidt](https://scholar.google.com/citations?user=KDgUWRMAAAAJ&hl)
10 | Winter Conference on Applications of Computer Vision (WACV) 2024
11 | (* indicates equal contribution)
12 |
13 | The full code will be published soon.
14 |
15 | ## Installation
16 | To utilize DIDEX please follow the following steps:
17 |
18 | For the creation of the pseudo target domain we build on the following repos:
19 | 1. https://github.com/Stability-AI/stablediffusion.git
20 | 2. https://github.com/lllyasviel/ControlNet.git
21 |
22 | For the adaptation to the pseudo target domain we utilize the following repo:
23 | 1. https://github.com/lhoyer/MIC.git
24 |
25 | To utilize our code please set up the repos following the descriptions they provide.
26 |
27 | ## Diffusion-Based Domain Extension (Pseudo-Target Domain Generation)
28 | To create the Pseudo target domains please utilize the scripts in the folder dataset_creation.
29 |
30 | ## Adaptation To Pseudo-Target Domain
31 | To train the model for domain generalization please utilize the scripts in generalization_experiments
32 |
33 | ## Datasets
34 | We used the dataset structure ...
35 |
36 | ## Evaluation
37 |
38 | ## BibTeX
39 | ```
40 | @article{Niemeijer2023DIDEX,,
41 | author = {Niemeijer, Joshua and Schwonberg, Manuel and Termöhlen, Jan-Aike and Schmidt, Nico M. and Fingscheidt, Tim},
42 | title = {{Generalization by Adaptation: Diffusion-Based Domain Extension for Domain-Generalized Semantic Segmentation}},
43 | year = {2023},
44 | month = dec,
45 | pages = {1--16},
46 | eprint = {2312.01850},
47 | archivePrefix = {arXiv}
48 | }
49 | ```
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_uni_cls.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 | prompt_1 = "A high quality photo; europe"
168 | prompt_2 = "A high quality photo; europe;Highway"
169 | prompt_3 = "A high quality photo; europe;City"
170 | prompt_4 = "A high quality photo; germany"
171 | prompt_5 = "A high quality photo; germany;Highway"
172 | prompt_6 = "A high quality photo; germany;City"
173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6]
174 | ddim_steps = 25 #50
175 | num_samples = 1
176 | scale = 9 # 9
177 | seed = 0
178 | eta = 0
179 | strength = 0.9
180 |
181 | # Replace with the actual path to the folder containing images / labels
182 | input_folder = 'GTA5/images/train'
183 | input_folder_label = 'GTA5/labels/train'
184 | # Replace with the actual path to the folder where you want to save the processed images
185 | output_folder = 'pseudo_target_domain/GTA5/uni_cls/'
186 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
187 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
188 | 'person', 'rider', 'car', 'truck', 'bus', 'tram/ train/ trolley', 'motorcycle',
189 | 'bicycle')
190 | # List all PNG files in the input folder
191 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
192 |
193 | hist = np.zeros(19)
194 |
195 | for png_file in png_files:
196 | file_path = os.path.join(input_folder, png_file)
197 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
198 | label = Image.open(label_path)
199 | label_array = np.array(label)
200 | classes_present = np.unique(label_array)
201 | classes_present = [i for i in classes_present if i != 255]
202 | addressed_classes = [CLASSES[i] for i in classes_present]
203 | addressed_classes_string = ', '.join(addressed_classes)
204 |
205 | # Update the histogram with the current image's class occurrences
206 | hist[classes_present] +=1
207 | current_least_often_cls = np.argmin(hist)
208 | current_least_often_cls_string = CLASSES[current_least_often_cls]
209 | hist[np.argmin(hist)] +=1
210 |
211 | # Process the image
212 | random.seed()
213 | prompt = random.choice(promts)+ ", " + current_least_often_cls_string + ", " + addressed_classes_string
214 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
215 |
216 | # Get the original size of the image
217 | original_size = Image.open(file_path).size
218 |
219 | # Save the result in the output folder with the same filename
220 | output_file_path = os.path.join(output_folder, png_file)
221 | save_result(result, output_file_path, original_size)
222 |
--------------------------------------------------------------------------------
/dataset_creation/SYNTHIA_to_PTD_uni_cls.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width)# * 0.5)
155 | new_height = int(height)# * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 | prompt_1 = "A high quality photo; europe" # of a german traffic scene"
168 | prompt_2 = "A high quality photo; europe;Highway"
169 | prompt_3 = "A high quality photo; europe;City"
170 | prompt_4 = "A high quality photo; germany" # of a german traffic scene"
171 | prompt_5 = "A high quality photo; germany;Highway"
172 | prompt_6 = "A high quality photo; germany;City"
173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6]
174 | ddim_steps = 25 #50
175 | num_samples = 1
176 | scale = 9 # 9
177 | seed = 0
178 | eta = 0
179 | strength = 0.9
180 |
181 | # Replace with the actual path to the folder containing PNG images
182 | input_folder = 'Datasets/Synthia/train/RAND_CITYSCAPES/RGB'
183 | input_folder_label = 'Synthia/train/RAND_CITYSCAPES/GT/LABELS'
184 | # Replace with the actual path to the folder where you want to save the processed images
185 | output_folder = 'pseudo_target_domain/SYNTHIA/uni_cls_rand_location'
186 |
187 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
188 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
189 | 'person', 'rider', 'car', 'truck', 'bus', 'tram/ train/ trolley', 'motorcycle',
190 | 'bicycle')
191 | # the following classes are not in Synthia terrain, truck, train,
192 | # List all PNG files in the input folder
193 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
194 |
195 | hist = np.zeros(19)
196 | hist[9] = 100000000000000
197 | hist[14] = 100000000000000
198 | hist[16] = 100000000000000
199 |
200 | for png_file in png_files:
201 | file_path = os.path.join(input_folder, png_file)
202 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
203 | label = Image.open(label_path)
204 | label_array = np.array(label)
205 | classes_present = np.unique(label_array)
206 | classes_present = [i for i in classes_present if i != 255]
207 | addressed_classes = [CLASSES[i] for i in classes_present]
208 | addressed_classes_string = ', '.join(addressed_classes)
209 | print(classes_present, addressed_classes_string)
210 |
211 | # Update the histogram with the current image's class occurrences
212 | hist[classes_present] +=1
213 | current_least_often_cls = np.argmin(hist)
214 | current_least_often_cls_string = CLASSES[current_least_often_cls]
215 | hist[np.argmin(hist)] +=1
216 |
217 | # Process the image
218 | random.seed()
219 | prompt = random.choice(promts)+ ", " + current_least_often_cls_string + ", " + addressed_classes_string
220 | print(prompt)
221 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
222 |
223 | # Get the original size of the image
224 | original_size = Image.open(file_path).size
225 |
226 | # Save the result in the output folder with the same filename
227 | output_file_path = os.path.join(output_folder, png_file)
228 | save_result(result, output_file_path, original_size)
229 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_rand_cond.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 | prompt_1 = "A high quality photo; europe"
168 | prompt_2 = "A high quality photo; europe;Highway"
169 | prompt_3 = "A high quality photo; europe;City"
170 | prompt_4 = "A high quality photo; germany"
171 | prompt_5 = "A high quality photo; germany;Highway"
172 | prompt_6 = "A high quality photo; germany;City"
173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6]
174 |
175 | codition_1 = "rain"
176 | codition_2 = "Fog/Mist"
177 | codition_3 = "Snowy"
178 | codition_4 = "Sunny"
179 | codition_5 = "Overcast"
180 | codition_6 = "Stormy"
181 | codition_7 = "overexposure"
182 | codition_8 = "underexposure"
183 | codition_9 = "evening"
184 | codition_10 = "morning"
185 | codition_11 = "Night/Darkness"
186 | codition_12 = "Backlighting"
187 | codition_13 = "Artificial Lighting"
188 | codition_14 = "Harsh Light"
189 | codition_15 = "Dappled Light"
190 | codition_16 = "Sun Flare"
191 | codition_17 = "Hazy/Haze"
192 | codition_18 = "Spring"
193 | codition_19 = "Autumn"
194 | codition_20 = "Winter"
195 | codition_21 = "Summer"
196 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10,
197 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19,
198 | codition_20, codition_21]
199 |
200 | ddim_steps = 25 #50
201 | num_samples = 1
202 | scale = 9 # 9
203 | seed = 0
204 | eta = 0
205 | strength = 0.9
206 |
207 | ## Replace with the actual path to the folder containing PNG images
208 | input_folder = 'GTA5/images/train'
209 | input_folder_label = 'GTA5/labels/train'
210 | # Replace with the actual path to the folder where you want to save the processed images
211 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_rand_cond'
212 |
213 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
214 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
215 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
216 | 'bicycle')
217 | # List all PNG files in the input folder
218 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
219 |
220 | hist = np.zeros(19)
221 |
222 | for png_file in png_files:
223 | file_path = os.path.join(input_folder, png_file)
224 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
225 | label = Image.open(label_path)
226 | label_array = np.array(label)
227 | classes_present = np.unique(label_array)
228 | classes_present = [i for i in classes_present if i != 255]
229 | addressed_classes = [CLASSES[i] for i in classes_present]
230 | addressed_classes_string = ', '.join(addressed_classes)
231 | print(classes_present, addressed_classes_string)
232 |
233 | # Update the histogram with the current image's class occurrences
234 | hist[classes_present] +=1
235 | current_least_often_cls = np.argmin(hist)
236 | current_least_often_cls_string = CLASSES[current_least_often_cls]
237 | hist[np.argmin(hist)] +=1
238 |
239 | # Process the image
240 | random.seed()
241 | prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string
242 | print(prompt)
243 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
244 |
245 | # Get the original size of the image
246 | original_size = Image.open(file_path).size
247 |
248 | # Save the result in the output folder with the same filename
249 | output_file_path = os.path.join(output_folder, png_file)
250 | save_result(result, output_file_path, original_size)
251 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_uni_cls_rand_cond.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 |
168 | codition_1 = "rain"
169 | codition_2 = "Fog/Mist"
170 | codition_3 = "Snowy"
171 | codition_4 = "Sunny"
172 | codition_5 = "Overcast"
173 | codition_6 = "Stormy"
174 | codition_7 = "overexposure"
175 | codition_8 = "underexposure"
176 | codition_9 = "evening"
177 | codition_10 = "morning"
178 | codition_11 = "Night/Darkness"
179 | codition_12 = "Backlighting"
180 | codition_13 = "Artificial Lighting"
181 | codition_14 = "Harsh Light"
182 | codition_15 = "Dappled Light"
183 | codition_16 = "Sun Flare"
184 | codition_17 = "Hazy/Haze"
185 | codition_18 = "Spring"
186 | codition_19 = "Autumn"
187 | codition_20 = "Winter"
188 | codition_21 = "Summer"
189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10,
190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19,
191 | codition_20, codition_21]
192 |
193 | location_1 = "europe;"
194 | location_2 = "germany;"
195 |
196 | locations = [location_1, location_2]
197 |
198 | traffic_location_1 = ""
199 | traffic_location_2 = "Highway"
200 | traffic_location_3 = "City"
201 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3]
202 |
203 | base_prompt = "A high quality photo; "
204 |
205 | ddim_steps = 25 #50
206 | num_samples = 1
207 | scale = 9 # 9
208 | seed = 0
209 | eta = 0
210 | strength = 0.9
211 |
212 | # Replace with the actual path to the folder containing PNG images
213 | input_folder = 'GTA5/images/train'
214 | input_folder_label = 'GTA5/labels/train'
215 | # Replace with the actual path to the folder where you want to save the processed images
216 | output_folder = 'pseudo_target_domain/GTA5/uni_cls_rand_condition'
217 |
218 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
219 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
220 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
221 | 'bicycle')
222 | # List all PNG files in the input folder
223 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
224 |
225 | hist = np.zeros(19)
226 |
227 | for png_file in png_files:
228 | file_path = os.path.join(input_folder, png_file)
229 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
230 | label = Image.open(label_path)
231 | label_array = np.array(label)
232 | classes_present = np.unique(label_array)
233 | classes_present = [i for i in classes_present if i != 255]
234 | addressed_classes = [CLASSES[i] for i in classes_present]
235 | addressed_classes_string = ', '.join(addressed_classes)
236 | print(classes_present, addressed_classes_string)
237 |
238 | # Update the histogram with the current image's class occurrences
239 | hist[classes_present] +=1
240 | current_least_often_cls = np.argmin(hist)
241 | current_least_often_cls_string = CLASSES[current_least_often_cls]
242 | hist[np.argmin(hist)] +=1
243 |
244 | # Process the image
245 | random.seed()
246 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string
247 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string + ", " + random.choice(coditions)
248 | print(prompt)
249 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
250 |
251 | # Get the original size of the image
252 | original_size = Image.open(file_path).size
253 |
254 | # Save the result in the output folder with the same filename
255 | output_file_path = os.path.join(output_folder, png_file)
256 | save_result(result, output_file_path, original_size)
257 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_rand_location_uni_cls_rand_cond.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 |
168 | codition_1 = "rain"
169 | codition_2 = "Fog/Mist"
170 | codition_3 = "Snowy"
171 | codition_4 = "Sunny"
172 | codition_5 = "Overcast"
173 | codition_6 = "Stormy"
174 | codition_7 = "overexposure"
175 | codition_8 = "underexposure"
176 | codition_9 = "evening"
177 | codition_10 = "morning"
178 | codition_11 = "Night/Darkness"
179 | codition_12 = "Backlighting"
180 | codition_13 = "Artificial Lighting"
181 | codition_14 = "Harsh Light"
182 | codition_15 = "Dappled Light"
183 | codition_16 = "Sun Flare"
184 | codition_17 = "Hazy/Haze"
185 | codition_18 = "Spring"
186 | codition_19 = "Autumn"
187 | codition_20 = "Winter"
188 | codition_21 = "Summer"
189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10,
190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19,
191 | codition_20, codition_21]
192 |
193 | location_1 = "europe;"
194 | location_2 = "germany;"
195 | location_3 = "China;"
196 | location_4 = "USA;"
197 | location_5 = "India;"
198 |
199 | locations = [location_1, location_2, location_3, location_4, location_5]
200 |
201 | traffic_location_1 = ""
202 | traffic_location_2 = "Highway"
203 | traffic_location_3 = "City"
204 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3]
205 |
206 | base_prompt = "A high quality photo; "
207 |
208 | ddim_steps = 25 #50
209 | num_samples = 1
210 | scale = 9 # 9
211 | seed = 0
212 | eta = 0
213 | strength = 0.9
214 |
215 | # Replace with the actual path to the folder containing PNG images
216 | input_folder = 'GTA5/images/train'
217 | input_folder_label = 'GTA5/labels/train'
218 | # Replace with the actual path to the folder where you want to save the processed images
219 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_uni_cls_rand_cond'
220 |
221 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
222 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
223 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
224 | 'bicycle')
225 | # List all PNG files in the input folder
226 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
227 |
228 | hist = np.zeros(19)
229 |
230 | for png_file in png_files:
231 | file_path = os.path.join(input_folder, png_file)
232 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
233 | label = Image.open(label_path)
234 | label_array = np.array(label)
235 | classes_present = np.unique(label_array)
236 | classes_present = [i for i in classes_present if i != 255]
237 | addressed_classes = [CLASSES[i] for i in classes_present]
238 | addressed_classes_string = ', '.join(addressed_classes)
239 | print(classes_present, addressed_classes_string)
240 |
241 | # Update the histogram with the current image's class occurrences
242 | hist[classes_present] +=1
243 | current_least_often_cls = np.argmin(hist)
244 | current_least_often_cls_string = CLASSES[current_least_often_cls]
245 | hist[np.argmin(hist)] +=1
246 |
247 | # Process the image
248 | random.seed()
249 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string + ", " + random.choice(coditions)
250 | print(prompt)
251 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
252 |
253 | # Get the original size of the image
254 | original_size = Image.open(file_path).size
255 |
256 | # Save the result in the output folder with the same filename
257 | output_file_path = os.path.join(output_folder, png_file)
258 | save_result(result, output_file_path, original_size)
259 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_uni_cls_rand_location.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 |
168 | codition_1 = "rain"
169 | codition_2 = "Fog/Mist"
170 | codition_3 = "Snowy"
171 | codition_4 = "Sunny"
172 | codition_5 = "Overcast"
173 | codition_6 = "Stormy"
174 | codition_7 = "overexposure"
175 | codition_8 = "underexposure"
176 | codition_9 = "evening"
177 | codition_10 = "morning"
178 | codition_11 = "Night/Darkness"
179 | codition_12 = "Backlighting"
180 | codition_13 = "Artificial Lighting"
181 | codition_14 = "Harsh Light"
182 | codition_15 = "Dappled Light"
183 | codition_16 = "Sun Flare"
184 | codition_17 = "Hazy/Haze"
185 | codition_18 = "Spring"
186 | codition_19 = "Autumn"
187 | codition_20 = "Winter"
188 | codition_21 = "Summer"
189 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10,
190 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19,
191 | codition_20, codition_21]
192 |
193 | location_1 = "europe;"
194 | location_2 = "germany;"
195 | location_3 = "China;"
196 | location_4 = "USA;"
197 | location_5 = "India;"
198 |
199 | locations = [location_1, location_2, location_3, location_4, location_5]
200 |
201 | traffic_location_1 = ""
202 | traffic_location_2 = "Highway"
203 | traffic_location_3 = "City"
204 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3]
205 |
206 | base_prompt = "A high quality photo; "
207 |
208 | ddim_steps = 25 #50
209 | num_samples = 1
210 | scale = 9 # 9
211 | seed = 0
212 | eta = 0
213 | strength = 0.9
214 |
215 | # Replace with the actual path to the folder containing PNG images
216 | input_folder = 'GTA5/images/train'
217 | input_folder_label = 'GTA5/labels/train'
218 | # Replace with the actual path to the folder where you want to save the processed images
219 | output_folder = 'pseudo_target_domain/GTA5/rand_locations_uni_cls'
220 |
221 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
222 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
223 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
224 | 'bicycle')
225 | # List all PNG files in the input folder
226 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
227 |
228 | hist = np.zeros(19)
229 |
230 | for png_file in png_files:
231 | file_path = os.path.join(input_folder, png_file)
232 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
233 | label = Image.open(label_path)
234 | label_array = np.array(label)
235 | classes_present = np.unique(label_array)
236 | classes_present = [i for i in classes_present if i != 255]
237 | addressed_classes = [CLASSES[i] for i in classes_present]
238 | addressed_classes_string = ', '.join(addressed_classes)
239 | print(classes_present, addressed_classes_string)
240 |
241 | # Update the histogram with the current image's class occurrences
242 | hist[classes_present] +=1
243 | current_least_often_cls = np.argmin(hist)
244 | current_least_often_cls_string = CLASSES[current_least_often_cls]
245 | hist[np.argmin(hist)] +=1
246 |
247 | # Process the image
248 | random.seed()
249 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string
250 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + current_least_often_cls_string + ", " + addressed_classes_string #+ ", " + random.choice(coditions)
251 | print(prompt)
252 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
253 |
254 | # Get the original size of the image
255 | original_size = Image.open(file_path).size
256 |
257 | # Save the result in the output folder with the same filename
258 | output_file_path = os.path.join(output_folder, png_file)
259 | save_result(result, output_file_path, original_size)
260 |
--------------------------------------------------------------------------------
/dataset_creation/GTA5_to_PTD_rand_location.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | import numpy as np
4 | import gradio as gr
5 | from PIL import Image
6 | from omegaconf import OmegaConf
7 | from einops import repeat, rearrange
8 | from pytorch_lightning import seed_everything
9 | from imwatermark import WatermarkEncoder
10 |
11 | from scripts.txt2img import put_watermark
12 | from ldm.util import instantiate_from_config
13 | from ldm.models.diffusion.ddim import DDIMSampler
14 | from ldm.data.util import AddMiDaS
15 |
16 | torch.set_grad_enabled(False)
17 |
18 |
19 | def initialize_model(config, ckpt):
20 | config = OmegaConf.load(config)
21 | print(config.model)
22 | model = instantiate_from_config(config.model)
23 | model.load_state_dict(torch.load(ckpt)["state_dict"], strict=False)
24 |
25 | device = torch.device(
26 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
27 | model = model.to(device)
28 | sampler = DDIMSampler(model)
29 | return sampler
30 |
31 |
32 | def make_batch_sd(
33 | image,
34 | txt,
35 | device,
36 | num_samples=1,
37 | model_type="dpt_hybrid"
38 | ):
39 | image = np.array(image.convert("RGB"))
40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | # sample['jpg'] is tensor hwc in [-1, 1] at this point
42 | midas_trafo = AddMiDaS(model_type=model_type)
43 | batch = {
44 | "jpg": image,
45 | "txt": num_samples * [txt],
46 | }
47 | batch = midas_trafo(batch)
48 | batch["jpg"] = rearrange(batch["jpg"], 'h w c -> 1 c h w')
49 | batch["jpg"] = repeat(batch["jpg"].to(device=device),
50 | "1 ... -> n ...", n=num_samples)
51 | batch["midas_in"] = repeat(torch.from_numpy(batch["midas_in"][None, ...]).to(
52 | device=device), "1 ... -> n ...", n=num_samples)
53 | return batch
54 |
55 |
56 | def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=None,
57 | do_full_sample=False):
58 | device = torch.device(
59 | "cuda") if torch.cuda.is_available() else torch.device("cpu")
60 | model = sampler.model
61 | seed_everything(seed)
62 |
63 | print("Creating invisible watermark encoder (see https://github.com/ShieldMnt/invisible-watermark)...")
64 | wm = "SDV2"
65 | wm_encoder = WatermarkEncoder()
66 | wm_encoder.set_watermark('bytes', wm.encode('utf-8'))
67 |
68 | with torch.no_grad(),\
69 | torch.autocast("cuda"):
70 | batch = make_batch_sd(
71 | image, txt=prompt, device=device, num_samples=num_samples)
72 | z = model.get_first_stage_encoding(model.encode_first_stage(
73 | batch[model.first_stage_key])) # move to latent space
74 | c = model.cond_stage_model.encode(batch["txt"])
75 | c_cat = list()
76 | for ck in model.concat_keys:
77 | cc = batch[ck]
78 | cc = model.depth_model(cc)
79 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
80 | keepdim=True)
81 | display_depth = (cc - depth_min) / (depth_max - depth_min)
82 | depth_image = Image.fromarray(
83 | (display_depth[0, 0, ...].cpu().numpy() * 255.).astype(np.uint8))
84 | cc = torch.nn.functional.interpolate(
85 | cc,
86 | size=z.shape[2:],
87 | mode="bicubic",
88 | align_corners=False,
89 | )
90 | depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
91 | keepdim=True)
92 | cc = 2. * (cc - depth_min) / (depth_max - depth_min) - 1.
93 | c_cat.append(cc)
94 | c_cat = torch.cat(c_cat, dim=1)
95 | # cond
96 | cond = {"c_concat": [c_cat], "c_crossattn": [c]}
97 |
98 | # uncond cond
99 | uc_cross = model.get_unconditional_conditioning(num_samples, "")
100 | uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]}
101 | if not do_full_sample:
102 | # encode (scaled latent)
103 | z_enc = sampler.stochastic_encode(
104 | z, torch.tensor([t_enc] * num_samples).to(model.device))
105 | else:
106 | z_enc = torch.randn_like(z)
107 | # decode it
108 | samples = sampler.decode(z_enc, cond, t_enc, unconditional_guidance_scale=scale,
109 | unconditional_conditioning=uc_full, callback=callback)
110 | x_samples_ddim = model.decode_first_stage(samples)
111 | result = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
112 | result = result.cpu().numpy().transpose(0, 2, 3, 1) * 255
113 | return [depth_image] + [put_watermark(Image.fromarray(img.astype(np.uint8)), wm_encoder) for img in result]
114 |
115 |
116 | def pad_image(input_image):
117 | pad_w, pad_h = np.max(((2, 2), np.ceil(
118 | np.array(input_image.size) / 64).astype(int)), axis=0) * 64 - input_image.size
119 | im_padded = Image.fromarray(
120 | np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
121 | return im_padded
122 |
123 |
124 | def predict(input_image, prompt, steps, num_samples, scale, seed, eta, strength):
125 | init_image = input_image.convert("RGB")
126 | image = pad_image(init_image) # resize to integer multiple of 32
127 |
128 | sampler.make_schedule(steps, ddim_eta=eta, verbose=True)
129 | assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
130 | do_full_sample = strength == 1.
131 | t_enc = min(int(strength * steps), steps-1)
132 | result = paint(
133 | sampler=sampler,
134 | image=image,
135 | prompt=prompt,
136 | t_enc=t_enc,
137 | seed=seed,
138 | scale=scale,
139 | num_samples=num_samples,
140 | callback=None,
141 | do_full_sample=do_full_sample
142 | )
143 | return result
144 |
145 |
146 | sampler = initialize_model(sys.argv[1], sys.argv[2])
147 |
148 | from PIL import Image
149 | import os
150 | import random
151 | def process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength):
152 | input_image = Image.open(file_path)
153 | width, height = input_image.size
154 | new_width = int(width * 0.5)
155 | new_height = int(height * 0.5)
156 | # Resize the image
157 | input_image = input_image.resize((new_width, new_height))
158 |
159 | result = predict(input_image, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
160 | return result
161 |
162 | def save_result(result, output_file_path, original_size):
163 | # Upsample the result back to the original size
164 | result_image = result[1].resize(original_size)
165 | result_image.save(output_file_path)
166 |
167 | prompt_1 = "A high quality photo; europe"
168 | prompt_2 = "A high quality photo; europe;Highway"
169 | prompt_3 = "A high quality photo; europe;City"
170 | prompt_4 = "A high quality photo; germany"
171 | prompt_5 = "A high quality photo; germany;Highway"
172 | prompt_6 = "A high quality photo; germany;City"
173 | promts=[prompt_1,prompt_2,prompt_3,prompt_4,prompt_5,prompt_6]
174 |
175 |
176 | codition_1 = "rain"
177 | codition_2 = "Fog/Mist"
178 | codition_3 = "Snowy"
179 | codition_4 = "Sunny"
180 | codition_5 = "Overcast"
181 | codition_6 = "Stormy"
182 | codition_7 = "overexposure"
183 | codition_8 = "underexposure"
184 | codition_9 = "evening"
185 | codition_10 = "morning"
186 | codition_11 = "Night/Darkness"
187 | codition_12 = "Backlighting"
188 | codition_13 = "Artificial Lighting"
189 | codition_14 = "Harsh Light"
190 | codition_15 = "Dappled Light"
191 | codition_16 = "Sun Flare"
192 | codition_17 = "Hazy/Haze"
193 | codition_18 = "Spring"
194 | codition_19 = "Autumn"
195 | codition_20 = "Winter"
196 | codition_21 = "Summer"
197 | coditions=[codition_1,codition_2,codition_3,codition_4,codition_5,codition_6,codition_7, codition_8, codition_9, codition_10,
198 | codition_11, codition_12, codition_13, codition_14, codition_15, codition_16, codition_17,codition_18,codition_19,
199 | codition_20, codition_21]
200 |
201 | location_1 = "europe;"
202 | location_2 = "germany;"
203 | location_3 = "China;"
204 | location_4 = "USA;"
205 | location_5 = "India;"
206 |
207 | locations = [location_1, location_2, location_3, location_4, location_5]
208 |
209 | traffic_location_1 = ""
210 | traffic_location_2 = "Highway"
211 | traffic_location_3 = "City"
212 | traffic_locations = [traffic_location_1, traffic_location_2, traffic_location_3]
213 |
214 | base_prompt = "A high quality photo; "
215 |
216 | ddim_steps = 25 #50
217 | num_samples = 1
218 | scale = 9 # 9
219 | seed = 0
220 | eta = 0
221 | strength = 0.9
222 |
223 | # Replace with the actual path to the folder containing PNG images
224 | input_folder = 'GTA5/images/train'
225 | input_folder_label = 'GTA5/labels/train'
226 | # Replace with the actual path to the folder where you want to save the processed images
227 | output_folder = 'pseudo_target_domain/GTA5/uni_cls_rand_location'
228 |
229 | CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
230 | 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
231 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
232 | 'bicycle')
233 | # List all PNG files in the input folder
234 | png_files = [file for file in os.listdir(input_folder) if file.endswith('.png')]
235 |
236 | hist = np.zeros(19)
237 |
238 | for png_file in png_files:
239 | file_path = os.path.join(input_folder, png_file)
240 | label_path = os.path.join(input_folder_label, png_file.replace('.png', '_labelTrainIds.png'))
241 | label = Image.open(label_path)
242 | label_array = np.array(label)
243 | classes_present = np.unique(label_array)
244 | classes_present = [i for i in classes_present if i != 255]
245 | addressed_classes = [CLASSES[i] for i in classes_present]
246 | addressed_classes_string = ', '.join(addressed_classes)
247 | print(classes_present, addressed_classes_string)
248 |
249 | # Update the histogram with the current image's class occurrences
250 | hist[classes_present] +=1
251 | current_least_often_cls = np.argmin(hist)
252 | current_least_often_cls_string = CLASSES[current_least_often_cls]
253 | hist[np.argmin(hist)] +=1
254 |
255 | # Process the image
256 | random.seed()
257 | # prompt = random.choice(promts)+ ", " + random.choice(coditions) + ", " + addressed_classes_string
258 | prompt = base_prompt + random.choice(locations) + random.choice(traffic_locations) + ", " + addressed_classes_string
259 | print(prompt)
260 | result = process_image(file_path, prompt, ddim_steps, num_samples, scale, seed, eta, strength)
261 |
262 | # Get the original size of the image
263 | original_size = Image.open(file_path).size
264 |
265 | # Save the result in the output folder with the same filename
266 | output_file_path = os.path.join(output_folder, png_file)
267 | save_result(result, output_file_path, original_size)
268 |
--------------------------------------------------------------------------------