├── PIT_tensor.py
├── README.md
├── pit_annotations.py
├── pit_images_in_root_folder.py
├── test_annotations
    └── aachen_000000_000019_leftImg8bit.xml
└── test_images
    ├── RGB.png
    └── gray.png


/PIT_tensor.py:
--------------------------------------------------------------------------------
  1 | #position-invariant transform
  2 | 
  3 | import math
  4 | import os
  5 | import sys
  6 | import time
  7 | import numpy as np
  8 | from PIL import Image
  9 | import torch
 10 | from itertools import product
 11 | 
 12 | 
 13 | class PIT_module:
 14 | 	def __init__(self, w, h, fovx = 0, fovy = 0, isPITedSize = False):
 15 | 		'''
 16 | 		w and h: the width and height of input image
 17 | 		fovx, fovy: intrinsic parameter of camera. One is enough, the other would be calculated through the aspect ratio. Provided in Radian system.
 18 | 		isPITedSize = False: the size(w and h) comes from an original image.
 19 | 		isPITedSize = Ture: the size(w and h) comes from a PITed image. (Used when need to reverse PIT a PITed image, and don't know the original size)
 20 | 
 21 | 		If you need to transform a image circularly (original->PITed->original), don't need to create two PIT_module.
 22 | 		You can do this by setting the "reverse" parameter in the "pit" function.
 23 | 		'''
 24 | 		self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 25 | 		self.plain_width, self.plain_height, self.arc_width, self.arc_height = 0, 0, 0, 0
 26 | 
 27 | 		if isPITedSize:
 28 | 			self.arc_width, self.arc_height = w, h
 29 | 			self.cal_fov_with_PITed_image_size(fovx, fovy)
 30 | 			self.cal_plain_size()
 31 | 		else:
 32 | 			self.plain_width, self.plain_height = w, h
 33 | 			self.aspect_ratio = self.plain_width / self.plain_height
 34 | 			self.cal_fov_with_original_image_size(fovx, fovy)
 35 | 			self.cal_arc_size()
 36 | 
 37 | 		self.arc_pos_list = 0
 38 | 		self.plain_pos_list = 0
 39 | 
 40 | 	def cal_fov_with_PITed_image_size(self, fovx, fovy):
 41 | 		'''Calculate the focal length (and fovx or fovy if not provided)'''
 42 | 		self.fovx = fovx
 43 | 		self.fovy = fovy
 44 | 		if not fovx == 0 and not fovy == 0:
 45 | 			self.fx = self.arc_width / fovx
 46 | 			self.fy = self.arc_height / fovy
 47 | 		elif not fovx == 0:
 48 | 			self.fx = self.arc_width / fovx
 49 | 			self.fy = self.fx
 50 | 			self.fovy = self.arc_height / self.fy
 51 | 		elif not fovy == 0:
 52 | 			self.fy = self.arc_height / fovy
 53 | 			self.fx = self.fy
 54 | 			self.fovx = self.arc_width/ self.fx
 55 | 
 56 | 	def cal_fov_with_original_image_size(self, fovx, fovy):
 57 | 		self.fovx = fovx
 58 | 		self.fovy = fovy
 59 | 		if not fovx == 0 and not fovy == 0:
 60 | 			pass
 61 | 		elif not fovx == 0:
 62 | 			self.fovy = 2 * math.atan(1 / self.aspect_ratio * math.tan(fovx / 2))
 63 | 		else: # not fovy == 0:
 64 | 			self.fovx = 2 * math.atan(self.aspect_ratio * math.tan(fovy / 2))
 65 | 
 66 | 		self.fx = self.plain_width / (2 * math.tan(self.fovx / 2)) #focal length x
 67 | 		self.fy = self.plain_height / (2 * math.tan(self.fovy / 2)) #focal length y
 68 | 
 69 | 	def cal_arc_size(self):
 70 | 		'''known the size of original image, calculate the size of PITed image'''
 71 | 		self.arc_width = int(2 * math.atan(self.plain_width / self.fx / 2) * self.fx)
 72 | 		self.arc_height = int(2 * math.atan(self.plain_height / self.fy / 2) * self.fy)
 73 | 
 74 | 	def cal_plain_size(self):
 75 | 		'''known the size of PITed image, calculate the size of original image'''
 76 | 		self.plain_width = int(2 * math.tan(self.arc_width / self.fx / 2) * self.fx)
 77 | 		self.plain_height = int(2 * math.tan(self.arc_height / self.fy / 2) * self.fy)
 78 | 
 79 | 	def coord_plain_to_arc(self, pos_list):
 80 | 		x = pos_list[:,0] + 0.5
 81 | 		y = pos_list[:,1] + 0.5
 82 | 		u = self.fx * (self.fovx / 2 - torch.atan((self.plain_width / 2 - x) / self.fx))
 83 | 		v = self.fy * (self.fovy / 2 - torch.atan((self.plain_height / 2 - y) / self.fy))
 84 | 		u -= 0.5
 85 | 		v -= 0.5
 86 | 		new_pos_list = torch.stack([u, v], dim=1)
 87 | 		return new_pos_list
 88 | 
 89 | 	def coord_arc_to_plain(self, pos_list):
 90 | 		u = pos_list[:,0] + 0.5
 91 | 		v = pos_list[:,1] + 0.5
 92 | 		x = self.plain_width / 2 - self.fx * torch.tan(self.fovx / 2 - u / self.fx)
 93 | 		y = self.plain_height / 2 - self.fy * torch.tan(self.fovy / 2 - v / self.fy)
 94 | 		x -= 0.5
 95 | 		y -= 0.5
 96 | 		new_pos_list = torch.stack([x,y], dim = 1)
 97 | 		return new_pos_list
 98 | 
 99 | 	def coord_plain_to_arc_scalar(self, x, y):
100 | 		'''used for pit annotations'''
101 | 		x += 0.5
102 | 		y += 0.5
103 | 		u = self.fx * (self.fovx / 2 - math.atan((self.plain_width / 2 - x) / self.fx))
104 | 		v = self.fy * (self.fovy / 2 - math.atan((self.plain_height / 2 - y) / self.fy))
105 | 		u -= 0.5
106 | 		v -= 0.5
107 | 		return u,v
108 | 
109 | 	def coord_arc_to_plain_scalar(self, u, v):
110 | 		'''used for pit annotations'''
111 | 		u += 0.5
112 | 		v += 0.5
113 | 		x = self.plain_width / 2 - self.fx * math.tan(self.fovx / 2 - u / self.fx)
114 | 		y = self.plain_height / 2 - self.fy * math.tan(self.fovy / 2 - v / self.fy)
115 | 		x -= 0.5
116 | 		y -= 0.5
117 | 		return x,y
118 | 
119 | 	def create_pos_list(self, w, h):
120 | 		a = list(range(w))  # x
121 | 		b = list(range(h))  # y
122 | 		pos = [i for i in product(a, b)]
123 | 		pos = torch.Tensor(pos).to(self.device)
124 | 		return pos
125 | 
126 | 	def limit_range(self, t, min_value, max_value):
127 | 		'''set the value in t less than min_value to min_value, more than max_value to max_value'''
128 | 		t[t < min_value] = min_value
129 | 		t[t > max_value] = max_value
130 | 		return t
131 | 
132 | 	def pit_cal_pos_list(self):
133 | 		'''used for pit'''
134 | 		self.arc_pos_list = self.create_pos_list(self.arc_width, self.arc_height)
135 | 		self.pos_in_plain = self.coord_arc_to_plain(self.arc_pos_list)
136 | 		self.pos_in_plain_nearest = self.change_2d_pos_into_1d_index(self.limit_range(torch.round(self.pos_in_plain), 0, self.plain_width - 1), self.plain_width)
137 | 		self.pos_in_plain_4_vtx = self.cal_4_vertex(self.pos_in_plain, self.plain_width, self.plain_height)
138 | 		self.pos_in_plain_16_vtx = self.cal_16_vertex(self.pos_in_plain, self.plain_width, self.plain_height)
139 | 
140 | 	def rpit_cal_pos_list(self):
141 | 		'''used for reversed pit'''
142 | 		self.plain_pos_list = self.create_pos_list(self.plain_width, self.plain_height)
143 | 		self.pos_in_arc = self.coord_plain_to_arc(self.plain_pos_list)
144 | 		self.pos_in_arc_nearest = self.change_2d_pos_into_1d_index(self.limit_range(torch.round(self.pos_in_arc), 0, self.arc_width - 1), self.arc_width)
145 | 		self.pos_in_arc_4_vtx = self.cal_4_vertex(self.pos_in_arc, self.arc_width, self.arc_height)
146 | 		self.pos_in_arc_16_vtx = self.cal_16_vertex(self.pos_in_arc, self.arc_width, self.arc_height)
147 | 
148 | 	def change_2d_pos_into_1d_index(self, pos, w):
149 | 		'''the function torch.take() needs 1d index'''
150 | 		x = pos[:, 0]
151 | 		y = pos[:, 1]
152 | 		pos_1dim = (y * w + x).long()
153 | 		return pos_1dim
154 | 
155 | 	def cal_4_vertex(self, pos_in_float, w, h):
156 | 		'''used for bilinear interpolation'''
157 | 		x = self.limit_range(pos_in_float[:, 0], 0, w - 1)  #
158 | 		y = self.limit_range(pos_in_float[:, 1], 0, h - 1)
159 | 
160 | 		pos_4_vtx = [[0 for i in range(2)] for i in range(2)]
161 | 
162 | 		x_list = [torch.floor(x), torch.ceil(x)]  # x_list
163 | 		y_list = [torch.floor(y), torch.ceil(y)]  # y_list
164 | 
165 | 		for i in range(2):
166 | 			for j in range(2):
167 | 				pos = torch.stack([x_list[i], y_list[j]], dim=1)
168 | 				pos_4_vtx[i][j] = self.change_2d_pos_into_1d_index(pos, w)
169 | 
170 | 		return pos_4_vtx
171 | 
172 | 	def cal_16_vertex(self, pos_in_float, w, h):
173 | 		'''used for bicubic interpolation'''
174 | 		x = self.limit_range(pos_in_float[:, 0], 0, w - 1)  #
175 | 		y = self.limit_range(pos_in_float[:, 1], 0, h - 1)
176 | 		x_list = [self.limit_range(torch.floor(x), 0, w - 1), torch.floor(x), torch.ceil(x),
177 | 		          self.limit_range((torch.ceil(x) + 1), 0, w - 1)]
178 | 		y_list = [self.limit_range(torch.floor(y), 0, h - 1), torch.floor(y), torch.ceil(y),
179 | 		          self.limit_range((torch.ceil(y) + 1), 0, h - 1)]
180 | 
181 | 		pos_16_vtx = [[0 for i in range(4)] for i in range(4)]
182 | 		for i in range(4):
183 | 			for j in range(4):
184 | 				pos = torch.stack([x_list[i], y_list[j]], dim=1)
185 | 				pos_16_vtx[i][j] = self.change_2d_pos_into_1d_index(pos, w)
186 | 		return pos_16_vtx
187 | 
188 | 	def nearest_interpolation(self, this_channel, pos):
189 | 		res = torch.take(this_channel, pos)
190 | 		return res
191 | 
192 | 	def bilinear_interpolation(self, this_channel, real_pos, pos_4_vtx):
193 | 		pix_lt = torch.take(this_channel, pos_4_vtx[0][0])  # left top
194 | 		pix_lb = torch.take(this_channel, pos_4_vtx[0][1])  # left bottom
195 | 		pix_rt = torch.take(this_channel, pos_4_vtx[1][0])  # right top
196 | 		pix_rb = torch.take(this_channel, pos_4_vtx[1][1])  # right bottom
197 | 
198 | 		x = real_pos[:, 0]
199 | 		y = real_pos[:, 1]
200 | 		xfrac, yfrac = torch.frac(x), torch.frac(y)
201 | 
202 | 		t = pix_lt + (pix_rt - pix_lt) * xfrac
203 | 		b = pix_lb + (pix_rb - pix_lb) * xfrac
204 | 		res = t + (b - t) * yfrac
205 | 		return res
206 | 
207 | 	def W(self, x):
208 | 		'''used for bicubic interpolation'''
209 | 		x = torch.abs(x)
210 | 		a = 1 - 2 * x.pow(2) + x.pow(3)
211 | 		b = 4 - 8 * x + 5 * x.pow(2) - x.pow(3)
212 | 		c = torch.zeros_like(x)
213 | 		res1 = torch.where(x <= 1, a, c)
214 | 		res2 = torch.where(x < 2, b, c)
215 | 		res = torch.where(res1 == c, res2, res1)
216 | 		return res
217 | 
218 | 	def bicubic_interpolation(self, this_channel, real_pos, pos_16_vtx):
219 | 		'''
220 | 		The caculation method comes from:
221 | 			https://blog.csdn.net/yycocl/article/details/102588362
222 | 		'''
223 | 		x = real_pos[:, 0]
224 | 		y = real_pos[:, 1]
225 | 		u,v = torch.frac(x), torch.frac(y) #u,v
226 | 
227 | 		pix = [[0 for i in range(4)] for i in range(4)]
228 | 		for i in range(4):
229 | 			for j in range(4):
230 | 				pix[i][j] = torch.take(this_channel, pos_16_vtx[i][j])
231 | 
232 | 		res = 0
233 | 
234 | 		for i in range(4):
235 | 			for j in range(4):
236 | 				res += pix[i][j] * self.W((i - 1 - u)) * self.W((j - 1 - v))
237 | 		res = self.limit_range(res, 0, 255)
238 | 		return res
239 | 
240 | 	def pit(self, im, interpolation = 'bilinear', reverse = False, ori_w = 0, ori_h = 0):
241 | 		'''
242 | 		im: image in torch tensor format. [N,C,H,W]
243 | 		reverse: False = PIT, True = rPIT
244 | 		interpolation: 'nearest' or 'bilinear' or 'bicubic'
245 | 		ori_w/ori_h: the width and height of original image.
246 | 			if reverse == True, the better to provide them to avoid error.
247 | 		'''
248 | 		pix = im  #n,c,h,w  torch.Size([2, 19, 180, 360])
249 | 		#print(im.shape)
250 | 		start_dim = len(pix.shape)
251 | 		assert start_dim == 3 or start_dim == 4
252 | 		if start_dim == 3: #n,h,w
253 | 			pix = pix[:,None,:,:]
254 | 
255 | 		self.n, self.c = pix.shape[0], pix.shape[1]
256 | 
257 | 		pos = 0
258 | 		pos_4_vtx = 0
259 | 		pos_nearest = 0
260 | 		new_w, new_h = 0, 0
261 | 
262 | 		if not reverse:
263 | 			new_w, new_h = self.arc_width, self.arc_height
264 | 			if type(self.arc_pos_list) == type(0):
265 | 				self.pit_cal_pos_list()
266 | 			pos = self.pos_in_plain
267 | 			pos_nearest = self.pos_in_plain_nearest
268 | 			pos_4_vtx = self.pos_in_plain_4_vtx
269 | 			pos_16_vtx = self.pos_in_plain_16_vtx
270 | 		if reverse:
271 | 			if ori_w:
272 | 				self.plain_width = ori_w
273 | 			if ori_h:
274 | 				self.plain_height = ori_h
275 | 			new_w, new_h = self.plain_width, self.plain_height
276 | 			if type(self.plain_pos_list) == type(0):
277 | 				self.rpit_cal_pos_list()
278 | 			pos = self.pos_in_arc
279 | 			pos_nearest = self.pos_in_arc_nearest
280 | 			pos_4_vtx = self.pos_in_arc_4_vtx
281 | 			pos_16_vtx = self.pos_in_arc_16_vtx
282 | 
283 | 		batch_new = []
284 | 		for i in range(self.n):
285 | 			pix_new = []
286 | 			for j in range(self.c):
287 | 				res = 0
288 | 				this_channel = pix[i,j,:,:].float().squeeze()  # dtype:torch.float32
289 | 				if interpolation == 'nearest' or interpolation == 1:
290 | 					res = self.nearest_interpolation(this_channel, pos_nearest)
291 | 				elif interpolation == 'bilinear' or interpolation == 2:
292 | 					res = self.bilinear_interpolation(this_channel, pos, pos_4_vtx)
293 | 				elif interpolation == 'bicubic' or interpolation == 3:
294 | 					res = self.bicubic_interpolation(this_channel, pos, pos_16_vtx)
295 | 				else:
296 | 					print('"' + interpolation + '" is not a interpolation mode!')
297 | 
298 | 				res = torch.round(res).reshape(new_w, new_h).int()  # [h,w]
299 | 				res = torch.transpose(res, 0, 1)  # [h,w]
300 | 				pix_new.append(res)
301 | 			pix_new = torch.stack(pix_new, dim=0)
302 | 			batch_new.append(pix_new)
303 | 		batch_new = torch.stack(batch_new, dim = 0)
304 | 		if start_dim == 3:
305 | 			batch_new = batch_new.flatten(0,1)
306 | 		return batch_new
307 | 
308 | '''convert tensor and image'''
309 | def tensor_to_image(t):
310 | 	if t.shape[1] == 1: #gray image
311 | 		im = t[0, 0, ...]
312 | 	else:    #rgb image
313 | 		im = t[0,...].permute(1,2,0)  # n c h w -> h w c
314 | 	if not t.device == 'cpu':
315 | 		im = im.cpu()
316 | 	im = im.numpy().astype('uint8')
317 | 	im = Image.fromarray(im)
318 | 	return im
319 | 
320 | def image_to_tensor(im):
321 | 	t = torch.from_numpy(np.array(im)) #[h,w,channel]
322 | 	if len(t.shape) == 2:  #gray images
323 | 		t = t[None, None, ...]  # n c h w
324 | 	else:   #RGB images
325 | 		t = t.permute(2, 0, 1)[None, ...]  #n c h w
326 | 	return t
327 | 
328 | 
329 | if __name__ == "__main__":
330 | 	'''Usage of PIT_module'''
331 | 
332 | 	interpolation = 2
333 | 
334 | 	'''testing for gray images (1 channel) '''
335 | 	im_path = 'test_images/gray.png'
336 | 	im = Image.open(im_path)
337 | 	t = image_to_tensor(im).cuda()  #create input
338 | 
339 | 	width, height = t.shape[3], t.shape[2]
340 | 	proj = PIT_module(width, height, fovx=math.pi / 2)
341 | 
342 | 	t_new = proj.pit(t, interpolation = interpolation)
343 | 	im_new = tensor_to_image(t_new)
344 | 	im_new.save('test_images/gray_pit_tensor.png')
345 | 
346 | 	t_cycle = proj.pit(t_new, interpolation = interpolation, reverse = True, ori_w = width, ori_h = height)
347 | 	im_cycle = tensor_to_image(t_cycle)
348 | 	im_cycle.save('test_images/gray_cycle_tensor.png')
349 | 
350 | 	'''testing for RGB images (3 channel)'''
351 | 	im_path = 'test_images/RGB.png'
352 | 	im = Image.open(im_path)
353 | 	t = image_to_tensor(im).cuda()  #create input
354 | 
355 | 	width, height = t.shape[3], t.shape[2]
356 | 	proj = PIT_module(width, height, fovx=math.pi / 2)
357 | 
358 | 	t_new = proj.pit(t, interpolation = interpolation)
359 | 	im_new = tensor_to_image(t_new)
360 | 	im_new.save('test_images/RGB_pit_tensor.png')
361 | 
362 | 	t_cycle = proj.pit(t_new, interpolation = interpolation, reverse = True, ori_w = width, ori_h = height)
363 | 	im_cycle = tensor_to_image(t_cycle)
364 | 	im_cycle.save('test_images/RGB_cycle_tensor.png')
365 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## [PIT: Position-Invariant Transform for Cross-FoV Domain Adaptation (ICCV 2021)](https://arxiv.org/abs/2108.07142)
 2 | 
 3 | ### Getting started
 4 | ***
 5 | ##### Clone the repo:
 6 | 
 7 | ```
 8 | git clone https://github.com/sheepooo/pit-Position-Invariant-Transform/
 9 | cd pit-Position-Invariant-Transform
10 | ```
11 | 
12 | ##### Requirements:
13 | 
14 | * Python >= 3.6 (numpy, itertools, argparse)
15 | * pytorch >= 0.4.1
16 | 
17 | ##### To test the PIT function, you can run:
18 | ```
19 | python PIT_tensor.py
20 | ```
21 | The images in "test_images" folder will be PITed and RPITed, and be saved in the same folder.
22 | 
23 | 
24 | ##### To PIT all images in a folder, you can run:
25 | ```
26 | python pit_images_in_root_folder.py --fovx 'YourFovx' --root_path 'YourImageFolderName'
27 | ```
28 | either fovx or fovy is enough (both is ok, too).
29 | **NOTICE:** this code would change the images in the root folder directly, so you may need to back up the original images.
30 | 
31 | ##### To PIT annotations for object detection (XML file in "Pascal VOC" format, as shown in the "test_annotations" folder), you can run:
32 | ```
33 | python pit_annotations.py --fovx 'YourFovx' --root_path 'YourAnnotationFolderName'
34 | ```
35 | either fovx or fovy is enough (both is ok, too).
36 | **NOTICE:** this code would change the annotations in the root folder directly, so you may need to back up the original annotations.
37 | 


--------------------------------------------------------------------------------
/pit_annotations.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import numpy as np
 4 | import argparse
 5 | import xml.etree.ElementTree as ET
 6 | from PIT_tensor import *
 7 | 
 8 | def parse_args():
 9 |     """
10 |     Parse input arguments
11 |     """
12 |     parser = argparse.ArgumentParser(description='FOV')
13 |     parser.add_argument('--dataset', dest='dataset',
14 |                         help='cityscapes, foggy_cityscapes, kitti, vkitti',
15 |                         default='kitti', type=str)
16 |     parser.add_argument('--fovx', dest='fovx',
17 |                         help='fovx, the unit is degree',
18 |                         default='0', type=int)
19 |     parser.add_argument('--fovy', dest='fovy',
20 |                         help='fovy, the unit is degree',
21 |                         default='0', type=int)
22 |     parser.add_argument('--root_path', dest='root_path',
23 |                         help='the path to place the image folder',
24 |                         default='./', type=str)
25 |     args = parser.parse_args()
26 |     return args
27 | 
28 | if __name__ == "__main__":
29 |     args = parse_args()
30 |     print(args)
31 | 
32 |     for root_folder, dirs, files in os.walk(args.root_path, topdown=True):
33 |         print(root_folder)
34 |         for file_name in files:
35 |             if not file_name.endswith('.xml'):
36 |                 continue
37 |             print(file_name)
38 |             file_path = os.path.join(root_folder, file_name)
39 |             tree = ET.parse(file_path)
40 |             root = tree.getroot()
41 | 
42 |             size = root.find('size')
43 |             width, height = int(size[0].text), int(size[1].text)
44 |             pit = PIT_module(width, height, args.fovx / 180 * math.pi, args.fovy / 180 * math.pi)
45 | 
46 |             new_width, new_height = pit.coord_plain_to_arc_scalar(width, height)
47 |             new_width, new_height = int(new_width), int(new_height)
48 |             # print(new_width)
49 |             size[0].text, size[1].text = str(new_width), str(new_height)
50 | 
51 |             for child in root:
52 |               if child.tag == 'object':
53 |                   bbox = child.find('bndbox')
54 |                   xmin, ymin = int(bbox[0].text), int(bbox[1].text)
55 |                   xmax, ymax = int(bbox[2].text), int(bbox[3].text)
56 |                   #print('--',xmin, ymin, xmax, ymax, new_width, new_height)
57 |                   xmin, ymin = pit.coord_plain_to_arc_scalar(xmin, ymin)
58 |                   xmax, ymax = pit.coord_plain_to_arc_scalar(xmax, ymax)
59 |                   xmin, ymin = int(np.around(xmin)), int(np.around(ymin))
60 |                   xmax, ymax = int(np.around(xmax)), int(np.around(ymax))
61 |                   #print(xmin, ymin, xmax, ymax, new_width, new_height)
62 |                   assert xmin >= 0 and ymin >= 0 and xmax - 1 <= new_width and ymax - 1 <= new_height
63 |                   if xmax > new_width:
64 |                       xmax = new_width
65 |                   if ymax > new_height:
66 |                       ymax = new_height
67 |                   bbox[0].text, bbox[1].text = str(xmin), str(ymin)
68 |                   bbox[2].text, bbox[3].text = str(xmax), str(ymax)
69 |             # break
70 |             tree.write(file_path)
71 | 
72 |         print('All annotation projected')
73 | 


--------------------------------------------------------------------------------
/pit_images_in_root_folder.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from PIT_tensor import *
 3 | 
 4 | def parse_args():
 5 |     """
 6 |     Parse input arguments
 7 |     """
 8 |     parser = argparse.ArgumentParser(description='position invariant transform')
 9 |     parser.add_argument('--fovx', dest='fovx',
10 |                         help='fovx, the unit is degree',
11 |                         default='0', type=int)
12 |     parser.add_argument('--fovy', dest='fovy',
13 |                         help='fovy, the unit is degree',
14 |                         default='0', type=int)
15 |     parser.add_argument('--root_path', dest='root_path',
16 |                         help='the path to place the image folder',
17 |                         default='./', type=str)
18 |     parser.add_argument('--itp', dest='itp',
19 |                         help='interpolation mode, 1 as nearest, 2 as biliner, 3 as bicubic',
20 |                         default='2', type=int)
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | if __name__ == "__main__":
25 |     args = parse_args()
26 |     print(args)
27 | 
28 |     for root, dirs, files in os.walk(args.root_path, topdown=True):
29 |         for name in files:
30 |             if not name.endswith('.png') and not name.endswith('.jpg'):
31 |                 continue
32 |             im_path = os.path.join(root, name)
33 | 
34 |             img = Image.open(im_path)
35 |             im = image_to_tensor(img).cuda()  # create input
36 |             width, height = im.shape[3], im.shape[2]
37 |             proj = PIT_module(width, height, fovx = args.fovx / 180 * math.pi, fovy = args.fovy * math.pi / 180)
38 | 
39 |             im_new = proj.pit(im, interpolation=args.itp, reverse=False)
40 |             im_new = tensor_to_image(im_new)
41 |             im_new.save(im_path)
42 |             img.close()
43 | 
44 |             print('Image done:', im_path)
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/test_annotations/aachen_000000_000019_leftImg8bit.xml:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='utf-8'?>
  2 | <annotation>
  3 | 	<folder>VOC2007</folder>
  4 | 	<filename>source_aachen_000000_000019_leftImg8bit.jpg</filename>
  5 | 	<source>
  6 | 		<database>The VOC2007 Database</database>
  7 | 		<annotation>PASCAL VOC2007</annotation>
  8 | 		<image>flickr</image>
  9 | 		<flickrid>0</flickrid>
 10 | 	</source>
 11 | 	<owner>
 12 | 		<flickrid>rtz</flickrid>
 13 | 		<name>?</name>
 14 | 	</owner>
 15 | 	<size>
 16 | 		<width>2048</width>
 17 | 		<height>1024</height>
 18 | 		<deep>3</deep>
 19 | 	</size>
 20 | 	<segmented>0</segmented>
 21 | 	<object>
 22 | 		<name>car</name>
 23 | 		<pose>Frontal</pose>
 24 | 		<truncated>0</truncated>
 25 | 		<difficult>0</difficult>
 26 | 		<bndbox>
 27 | 			<xmin>609</xmin>
 28 | 			<ymin>420</ymin>
 29 | 			<xmax>807</xmax>
 30 | 			<ymax>532</ymax>
 31 | 		</bndbox>
 32 | 	</object>
 33 | 	<object>
 34 | 		<name>car</name>
 35 | 		<pose>Frontal</pose>
 36 | 		<truncated>0</truncated>
 37 | 		<difficult>0</difficult>
 38 | 		<bndbox>
 39 | 			<xmin>145</xmin>
 40 | 			<ymin>429</ymin>
 41 | 			<xmax>304</xmax>
 42 | 			<ymax>502</ymax>
 43 | 		</bndbox>
 44 | 	</object>
 45 | 	<object>
 46 | 		<name>car</name>
 47 | 		<pose>Frontal</pose>
 48 | 		<truncated>0</truncated>
 49 | 		<difficult>0</difficult>
 50 | 		<bndbox>
 51 | 			<xmin>145</xmin>
 52 | 			<ymin>429</ymin>
 53 | 			<xmax>304</xmax>
 54 | 			<ymax>502</ymax>
 55 | 		</bndbox>
 56 | 	</object>
 57 | 	<object>
 58 | 		<name>car</name>
 59 | 		<pose>Frontal</pose>
 60 | 		<truncated>0</truncated>
 61 | 		<difficult>0</difficult>
 62 | 		<bndbox>
 63 | 			<xmin>1962</xmin>
 64 | 			<ymin>488</ymin>
 65 | 			<xmax>2047</xmax>
 66 | 			<ymax>526</ymax>
 67 | 		</bndbox>
 68 | 	</object>
 69 | 	<object>
 70 | 		<name>car</name>
 71 | 		<pose>Frontal</pose>
 72 | 		<truncated>0</truncated>
 73 | 		<difficult>0</difficult>
 74 | 		<bndbox>
 75 | 			<xmin>1512</xmin>
 76 | 			<ymin>446</ymin>
 77 | 			<xmax>1660</xmax>
 78 | 			<ymax>499</ymax>
 79 | 		</bndbox>
 80 | 	</object>
 81 | 	<object>
 82 | 		<name>car</name>
 83 | 		<pose>Frontal</pose>
 84 | 		<truncated>0</truncated>
 85 | 		<difficult>0</difficult>
 86 | 		<bndbox>
 87 | 			<xmin>1479</xmin>
 88 | 			<ymin>456</ymin>
 89 | 			<xmax>1634</xmax>
 90 | 			<ymax>514</ymax>
 91 | 		</bndbox>
 92 | 	</object>
 93 | 	<object>
 94 | 		<name>car</name>
 95 | 		<pose>Frontal</pose>
 96 | 		<truncated>0</truncated>
 97 | 		<difficult>0</difficult>
 98 | 		<bndbox>
 99 | 			<xmin>1862</xmin>
100 | 			<ymin>481</ymin>
101 | 			<xmax>1937</xmax>
102 | 			<ymax>500</ymax>
103 | 		</bndbox>
104 | 	</object>
105 | 	<object>
106 | 		<name>car</name>
107 | 		<pose>Frontal</pose>
108 | 		<truncated>0</truncated>
109 | 		<difficult>0</difficult>
110 | 		<bndbox>
111 | 			<xmin>1872</xmin>
112 | 			<ymin>486</ymin>
113 | 			<xmax>1956</xmax>
114 | 			<ymax>513</ymax>
115 | 		</bndbox>
116 | 	</object>
117 | 	<object>
118 | 		<name>car</name>
119 | 		<pose>Frontal</pose>
120 | 		<truncated>0</truncated>
121 | 		<difficult>0</difficult>
122 | 		<bndbox>
123 | 			<xmin>1721</xmin>
124 | 			<ymin>450</ymin>
125 | 			<xmax>1864</xmax>
126 | 			<ymax>509</ymax>
127 | 		</bndbox>
128 | 	</object>
129 | 	<object>
130 | 		<name>car</name>
131 | 		<pose>Frontal</pose>
132 | 		<truncated>0</truncated>
133 | 		<difficult>0</difficult>
134 | 		<bndbox>
135 | 			<xmin>145</xmin>
136 | 			<ymin>429</ymin>
137 | 			<xmax>304</xmax>
138 | 			<ymax>502</ymax>
139 | 		</bndbox>
140 | 	</object>
141 | 	<object>
142 | 		<name>bicycle</name>
143 | 		<pose>Frontal</pose>
144 | 		<truncated>0</truncated>
145 | 		<difficult>0</difficult>
146 | 		<bndbox>
147 | 			<xmin>787</xmin>
148 | 			<ymin>446</ymin>
149 | 			<xmax>828</xmax>
150 | 			<ymax>490</ymax>
151 | 		</bndbox>
152 | 	</object>
153 | 	<object>
154 | 		<name>person</name>
155 | 		<pose>Frontal</pose>
156 | 		<truncated>0</truncated>
157 | 		<difficult>0</difficult>
158 | 		<bndbox>
159 | 			<xmin>887</xmin>
160 | 			<ymin>446</ymin>
161 | 			<xmax>913</xmax>
162 | 			<ymax>498</ymax>
163 | 		</bndbox>
164 | 	</object>
165 | 	<object>
166 | 		<name>person</name>
167 | 		<pose>Frontal</pose>
168 | 		<truncated>0</truncated>
169 | 		<difficult>0</difficult>
170 | 		<bndbox>
171 | 			<xmin>901</xmin>
172 | 			<ymin>444</ymin>
173 | 			<xmax>934</xmax>
174 | 			<ymax>498</ymax>
175 | 		</bndbox>
176 | 	</object>
177 | 	<object>
178 | 		<name>car</name>
179 | 		<pose>Frontal</pose>
180 | 		<truncated>0</truncated>
181 | 		<difficult>0</difficult>
182 | 		<bndbox>
183 | 			<xmin>609</xmin>
184 | 			<ymin>420</ymin>
185 | 			<xmax>807</xmax>
186 | 			<ymax>532</ymax>
187 | 		</bndbox>
188 | 	</object>
189 | 	<object>
190 | 		<name>rider</name>
191 | 		<pose>Frontal</pose>
192 | 		<truncated>0</truncated>
193 | 		<difficult>0</difficult>
194 | 		<bndbox>
195 | 			<xmin>1831</xmin>
196 | 			<ymin>436</ymin>
197 | 			<xmax>1887</xmax>
198 | 			<ymax>548</ymax>
199 | 		</bndbox>
200 | 	</object>
201 | 	<object>
202 | 		<name>bicycle</name>
203 | 		<pose>Frontal</pose>
204 | 		<truncated>0</truncated>
205 | 		<difficult>0</difficult>
206 | 		<bndbox>
207 | 			<xmin>1839</xmin>
208 | 			<ymin>468</ymin>
209 | 			<xmax>1877</xmax>
210 | 			<ymax>547</ymax>
211 | 		</bndbox>
212 | 	</object>
213 | </annotation>


--------------------------------------------------------------------------------
/test_images/RGB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sheepooo/PIT-Position-Invariant-Transform/990e71d3f1971299328a31c81268641809530d1e/test_images/RGB.png


--------------------------------------------------------------------------------
/test_images/gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sheepooo/PIT-Position-Invariant-Transform/990e71d3f1971299328a31c81268641809530d1e/test_images/gray.png


--------------------------------------------------------------------------------