├── .gitignore ├── README.md ├── affine_transform.py ├── augmentation-helpers ├── background │ ├── background_1.jpg │ ├── background_2.jpg │ ├── background_3.jpg │ ├── background_4.jpg │ ├── background_5.jpg │ └── background_6.jpg └── overlays │ ├── monitor │ ├── monitor_1.jpg │ ├── monitor_2.jpg │ ├── monitor_3.jpg │ ├── monitor_5.jpg │ ├── monitor_6.jpg │ └── monitor_7.jpg │ └── wrinkle │ ├── wrinkle_1.jpg │ ├── wrinkle_2.jpg │ └── wrinkle_3.jpg ├── basic_transform.py ├── composite_transform.py ├── data └── sample.jpg ├── demo.py ├── distortion.py ├── main.py ├── output ├── blur.gif ├── contrast_and_brighten.gif ├── distort.gif ├── gamma_saturation.gif ├── lcd_overlay.gif ├── noise.gif ├── perspective.gif ├── rotate.gif ├── scanner_like.gif ├── shadow.gif ├── stretch.gif ├── watermark.gif └── wrinkles.gif ├── requirements.txt ├── utility.py └── warp_mls.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | .vscode/ 5 | .ipynb_checkpoints 6 | profile_default/ 7 | ipython_config.py 8 | output/* 9 | !output/.gitkeep 10 | !output/*.gif 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Form data augmentation 2 | 3 | ## Available augmentations 4 | 1. Shadow 5 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/shadow.gif) 6 | 7 | 2. Wrinkles 8 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/wrinkles.gif) 9 | 10 | 3. Saturation 11 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/gamma_saturation.gif) 12 | 13 | 4. Watermark 14 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/watermark.gif?raw=true) 15 | 16 | 5. Binarize 17 | ![](https://user-images.githubusercontent.com/70262751/111758313-71413b00-88c4-11eb-846e-4380ee32d606.png) 18 | 19 | 6. Perspective Distortion 20 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/perspective.gif?raw=true) 21 | 22 | 7. Stretch Distortion 23 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/stretch.gif?raw=true) 24 | 25 | 8. LCD Texture 26 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/lcd_overlay.gif?raw=true) 27 | 28 | ## Steps: 29 | 30 | ### Install requirements 31 | 32 | * `pip install -r requirements.txt` 33 | 34 | ### Run demo to see the effect of individual augmentations 35 | * `python demo.py` 36 | 37 | `demo.py` uses the sample data in `data/` and generates `GIF` outputs in output. 38 | 39 | ### Run the augmentation pipeline 40 | * `python main.py python main.py --data-root data/ --output-dir output/ --aug-prob 0.1` 41 | 42 | * `--data-root`: path to data directory 43 | * `--output-dir`: path to outputs directory 44 | * `--aug_prob`: probability with which each augmentation is applied, when the value is equal to `1`, all augmentations are applied, and when the value is equal to `0.1` an augmentation is applied with probability equal to `0.1` 45 | -------------------------------------------------------------------------------- /affine_transform.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import cv2 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | 8 | def displacement( 9 | image: np.ndarray, horizontal_scale: float = 0.1, vertical_scale: float = 0.1 10 | ) -> np.ndarray: 11 | """ 12 | Displaces an image horzontally and vertically by respective scales 13 | Args: 14 | image (np.ndarray): BGR image 15 | horizontal_scale (float, optional): Fraction of original image width to displace. 16 | Defaults to 0.1. 17 | vertical_scale (float, optional): Fraction of original image width to displace. 18 | Defaults to 0.1. 19 | 20 | Returns: 21 | np.ndarray: Displaced image 22 | """ 23 | height, width = image.shape[:2] 24 | horizontal = int(horizontal_scale * width) 25 | vertical = int(vertical_scale * height) 26 | translation_mat = np.array([[1, 0, horizontal], [0, 1, vertical]], dtype=np.float32) 27 | border_color = (255, 255, 255) # white borders 28 | displaced_image = cv2.warpAffine( 29 | image, 30 | translation_mat, 31 | (width + horizontal, height + vertical), 32 | borderValue=border_color, 33 | ) 34 | return displaced_image 35 | 36 | 37 | def rotation(image: np.ndarray, angle: int = 90, same: bool = False) -> np.ndarray: 38 | """ 39 | Rotates an image through given angle 40 | Args: 41 | image (np.ndarray): BGR image 42 | angle (int, optional): Rotation angle (counterclockwise) about its center. Defaults to 90. 43 | same (bool, optional): When True the output image is of the same size as input 44 | however, some portion of the original image may be lost. When False, height and width 45 | are adjusted to preserve original image content. 46 | Defaults to False. 47 | 48 | Returns: 49 | np.ndarray: Rotated image 50 | """ 51 | height, width = image.shape[:2] 52 | centerX = (width - 1) / 2 53 | centerY = (height - 1) / 2 54 | rotation_mat = cv2.getRotationMatrix2D((centerX, centerY), angle, 1) 55 | 56 | if same: 57 | new_width = width 58 | new_height = height 59 | else: 60 | cos = np.abs(rotation_mat[0, 0]) 61 | sin = np.abs(rotation_mat[0, 1]) 62 | # compute the new bounding dimensions of the image 63 | new_width = int((height * sin) + (width * cos)) 64 | new_height = int((height * cos) + (width * sin)) 65 | # adjust the rotation matrix to take into account translation 66 | rotation_mat[0, 2] += (new_width / 2) - centerX 67 | rotation_mat[1, 2] += (new_height / 2) - centerY 68 | 69 | border_color = (255, 255, 255) # white borders 70 | rotated_image = cv2.warpAffine( 71 | image, rotation_mat, (new_width, new_height), borderValue=border_color 72 | ) 73 | 74 | return rotated_image 75 | 76 | 77 | def shear(image: np.ndarray, shear_X: float = 0.1, shear_Y: float = 0.1) -> np.ndarray: 78 | """ 79 | Shears the image along x and y directions 80 | Args: 81 | image (np.ndarray): BGR image 82 | shear_X (float, optional): Value of horizontal shear. Defaults to 0.1. 83 | shear_Y (float, optional): Value of vertical shear. Defaults to 0.1. 84 | 85 | Returns: 86 | np.ndarray: Sheared image 87 | """ 88 | width, height = image.shape[:2] 89 | 90 | # increase image height and width to preserve image content 91 | new_width = int(2 * width) 92 | new_height = int(2 * height) 93 | M2 = np.float32([[1, shear_Y, 0], [shear_X, 1, 0]]) 94 | # M2[0,2] = -M2[0,1] * W/2 95 | # M2[1,2] = -M2[1,0] * H/2 96 | centerX = (width - 1) / 2 97 | centerY = (height - 1) / 2 98 | M2[0, 2] += (new_width / 2) - centerX 99 | M2[1, 2] += (new_height / 2) - centerY 100 | 101 | sheared_image = cv2.warpAffine(image, M2, (new_width, new_height)) 102 | return sheared_image 103 | 104 | 105 | def arg_to_string(arg): 106 | result = "(" 107 | i = 0 108 | for key, value in arg.items(): 109 | if i != 0: 110 | result += "_" 111 | result += key 112 | result += "_" + str(value) 113 | i += 1 114 | result += ")" 115 | return result 116 | 117 | 118 | if __name__ == "__main__": 119 | data_dir = Path("../fuse/notebooks/exploratory/data/Ncell-Phase3") 120 | output_dir = Path("../fuse/notebooks/exploratory/outputs/Ncell/augmented_data2") 121 | output_dir.mkdir(exist_ok=True) 122 | # transformations = [displacement, rotation, noise, shear] 123 | # transformation_names = ['displacement', 'rotation', 'gaussian_noise', 'shear'] 124 | # transform_params = {'displacement': [{'horizontal': 100, 'vertical': 100}, {'horizontal': 200, 'vertical': 200}, 125 | # {'horizontal': 500, 'vertical': 500}], 126 | # 'rotation': [{'angle': 30}, {'angle': 45}, {'angle': 60}, {'angle': 90}], 127 | # 'gaussian_noise': [{'var': 10}, {'var': 20}, {'var': 30}], 128 | # 'shear': [{'shear_X': 0.1, 'shear_Y': 0.1}, {'shear_X': 0.1, 'shear_Y': 0.2}, 129 | # {'shear_X': 0.2, 'shear_Y': 0.1}]} 130 | # 131 | # for file in tqdm(list(data_dir.glob('*.jpg'))): 132 | # for i, transformation in enumerate(transformations): 133 | # for arg in transform_params[transformation_names[i]]: 134 | # image = cv2.imread(str(file)) 135 | # transformed_img = transformation(image, **arg) 136 | # new_filename = f'{transformation_names[i]}_{arg_to_string(arg)}_{file.parts[-1]}' 137 | # cv2.imwrite(str(Path(output_dir, new_filename)), transformed_img) 138 | 139 | transformations = [rotation] 140 | transformation_names = ["rotation", "shear"] 141 | transform_params = {"rotation": [{"angle": 11}, {"angle": 180}]} 142 | 143 | for file in tqdm(list(data_dir.glob("*.jpg"))): 144 | for i, transformation in enumerate(transformations): 145 | for arg in transform_params[transformation_names[i]]: 146 | image = cv2.imread(str(file)) 147 | transformed_img = transformation(image, **arg) 148 | new_filename = ( 149 | f"{transformation_names[i]}_{arg_to_string(arg)}_{file.parts[-1]}" 150 | ) 151 | cv2.imwrite(str(Path(output_dir, new_filename)), transformed_img) 152 | -------------------------------------------------------------------------------- /augmentation-helpers/background/background_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_1.jpg -------------------------------------------------------------------------------- /augmentation-helpers/background/background_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_2.jpg -------------------------------------------------------------------------------- /augmentation-helpers/background/background_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_3.jpg -------------------------------------------------------------------------------- /augmentation-helpers/background/background_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_4.jpg -------------------------------------------------------------------------------- /augmentation-helpers/background/background_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_5.jpg -------------------------------------------------------------------------------- /augmentation-helpers/background/background_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_6.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_1.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_2.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_3.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_5.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_6.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/monitor/monitor_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_7.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/wrinkle/wrinkle_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_1.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/wrinkle/wrinkle_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_2.jpg -------------------------------------------------------------------------------- /augmentation-helpers/overlays/wrinkle/wrinkle_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_3.jpg -------------------------------------------------------------------------------- /basic_transform.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | from affine_transform import rotation 7 | from distortion import distort 8 | from utility import (_add_texture, _generate_shadow_coordinates, 9 | _perspective_warp) 10 | 11 | PATH_TO_WRINKLED_TEXTURE = Path("augmentation-helpers/overlays/wrinkle") 12 | PATH_TO_MONITOR_TEXTURE = Path("augmentation-helpers/overlays/monitor") 13 | PATH_TO_BG_IMAGES = Path("augmentation-helpers/background") 14 | 15 | assert PATH_TO_WRINKLED_TEXTURE.exists() 16 | assert PATH_TO_MONITOR_TEXTURE.exists() 17 | assert PATH_TO_BG_IMAGES.exists() 18 | 19 | 20 | def noise(image: np.ndarray, noise_typ: str = None) -> np.ndarray: 21 | """ 22 | Adds noise to an image. Avaiable noise_types "gauss", 23 | "s&p" (salt and pepper) 24 | Args: 25 | image (np.ndarray): BGR image on which to add noise 26 | noise_typ (str, optional): type of noise to add: "gauss" or "s&p". 27 | Defaults to None. 28 | 29 | Returns: 30 | np.ndarray: BGR image with noise added 31 | """ 32 | noise_types = ["gauss", "s&p"] 33 | if not noise_typ: 34 | noise_typ = np.random.choice(noise_types) 35 | if noise_typ == "gauss": 36 | height, width, ch = image.shape 37 | mean = 0 # gaussian mean 38 | var = 30 # gaussian variance 39 | sigma = var ** 0.5 40 | gauss = np.random.normal(mean, sigma, (height, width, ch)) 41 | noisy = image + gauss 42 | return noisy.astype(np.uint8) 43 | elif noise_typ == "s&p": 44 | height, width, ch = image.shape 45 | s_vs_p = 0.5 46 | amount = 0.004 # fraction of image to be converted to noise 47 | out = np.copy(image) 48 | # Salt mode 49 | num_salt = np.ceil(amount * image.size * s_vs_p) 50 | # get random coordinates for sale noise 51 | coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape] 52 | out[coords] = 1 53 | 54 | # Pepper mode 55 | num_pepper = np.ceil(amount * image.size * (1.0 - s_vs_p)) 56 | # get random coordinated for pepper noise 57 | coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape] 58 | out[coords] = 0 59 | 60 | return out 61 | 62 | 63 | def shadow(image: np.ndarray, no_of_shadows: int = 1) -> np.ndarray: 64 | """Add shadow to an image by decreasing lightness of 65 | random polygonal regions in an image 66 | Note: As the number of shadows increase, there are chances of overlapping 67 | of shadows which causes the brightness of overlapped region to decrease further 68 | Args: 69 | image (np.ndarray): BGR image to add shadow on 70 | no_of_shadows (int, optional): Number of shadows to add. Defaults to 1. 71 | 72 | Returns: 73 | np.ndarray: image with shadows 74 | """ 75 | # convert to HLS 76 | image_HLS = cv2.cvtColor(image, cv2.COLOR_BGR2HLS) 77 | mask = np.zeros_like(image) 78 | imshape = image.shape[:2] 79 | vertices_list = _generate_shadow_coordinates(imshape, no_of_shadows) 80 | # get list of shadow vertices 81 | for vertices in vertices_list: 82 | # add all shadow polygons on empty mask, 83 | # single 255 denotes only blue channel 84 | cv2.fillPoly(mask, vertices, 255) 85 | # if blue channel is hot, lower the birghtness for light channel 86 | image_HLS[:, :, 1][mask[:, :, 0] == 255] = ( 87 | image_HLS[:, :, 1][mask[:, :, 0] == 255] * 0.75 88 | ) 89 | # convert to BGR 90 | image_BGR = cv2.cvtColor(image_HLS, cv2.COLOR_HLS2BGR) 91 | return image_BGR 92 | 93 | 94 | def virtual_background( 95 | image: np.ndarray, 96 | bg_image: np.ndarray = None, 97 | scale: float = 1.25, 98 | interactive: bool = False, 99 | ) -> np.ndarray: 100 | """ 101 | Adds a background to an image by warping the image into the perspective of background. 102 | Args: 103 | image (np.ndarray): BGR image, foreground 104 | bg_image (np.ndarray, optional): BGR image background. Defaults to None. 105 | If None, a random image from a predefined list is chosen as background 106 | scale (float, optional): scale of background with respect to foreground. Defaults to 1.25. 107 | interactive (bool, optional): If True, an interactive window allows to choose 108 | the perspective points in the background image, otherwise random points are chosen. 109 | Defaults to False. 110 | 111 | Returns: 112 | np.ndarray: [description] 113 | """ 114 | if not bg_image: 115 | files = sorted(PATH_TO_BG_IMAGES.glob("*.jpg")) 116 | chosen_file = np.random.choice(files) 117 | bg_image = cv2.imread(str(chosen_file)) 118 | 119 | # resize bg image to approproate scale 120 | fg_height, fg_width = image.shape[:2] 121 | new_height, new_width = int(fg_height * scale), int(fg_width * scale) 122 | bg_image = cv2.resize(bg_image, (new_width, new_height)) 123 | 124 | # warp image in perspective of background 125 | warped_fg_image, pts = _perspective_warp(bg_image, image) 126 | 127 | # create a white image like with same shape as bg 128 | img_buffer = np.ones(bg_image.shape, dtype=np.uint8) * 255 129 | # blacken the part where fg image goes to create a warped image template 130 | warp_template = cv2.fillPoly(img_buffer, np.int32([pts]), (0, 0, 0)) 131 | 132 | # mask out bg image with the warp_template 133 | masked = cv2.bitwise_and(bg_image, warp_template) 134 | # combine the warped_fg_image to the masked bg 135 | final_image = cv2.bitwise_or(masked, warped_fg_image) 136 | 137 | return final_image 138 | 139 | 140 | def watermark(image: np.ndarray, text: str = None) -> np.ndarray: 141 | """ 142 | Add watermark text to an image 143 | Args: 144 | image (np.ndarray): BGR image 145 | text (str, optional): text for watermark. Defaults to None. 146 | When None, a random text is chosen from a pre-defined list 147 | 148 | Returns: 149 | np.ndarray: BGR image with watermark added 150 | """ 151 | texts = ["confidential", "fusemachines", "official", "W2-Tax"] 152 | if not text: 153 | text = np.random.choice(texts) 154 | 155 | # choose a random location for watermark 156 | loc = np.random.randint(image.shape[0] // 4, image.shape[1] // 2, 2) 157 | 158 | # write text in solid on an all black image 159 | image_with_text = cv2.putText( 160 | np.zeros(image.shape, image.dtype), 161 | text, 162 | tuple(loc), 163 | cv2.FONT_HERSHEY_PLAIN, 164 | 20, 165 | tuple(map(int, np.random.randint(0, 255, 3))), 166 | 15, 167 | cv2.LINE_AA, 168 | ) 169 | 170 | # rotate the text at a random angle 171 | rotation_angle = np.random.rand() * 90 - 90 172 | image_with_text = rotation(image_with_text, rotation_angle, same=True) 173 | 174 | # add rotated text to image 175 | image_with_text += image 176 | # again add the original image to the image with text with 177 | # different weights to get a semi transparent look 178 | alpha = 0.7 179 | beta = 1 - alpha 180 | final_image = cv2.addWeighted(image, alpha, image_with_text, beta, 0) 181 | return final_image 182 | 183 | 184 | def wrinkles(image: np.ndarray, wrinkled_overlay: np.ndarray = None) -> np.ndarray: 185 | """ 186 | Adds wrinkles to an image 187 | Args: 188 | image (np.ndarray): Original BGR image 189 | wrinkled_overlay (np.ndarray, optional): Wrinkled texture image to overlay. 190 | Defaults to None. 191 | 192 | Returns: 193 | np.ndarray: wrinkled image 194 | """ 195 | if not wrinkled_overlay: 196 | # randomly choose a texture for overlay 197 | files = sorted(PATH_TO_WRINKLED_TEXTURE.glob("*.jpg")) 198 | chosen_file = np.random.choice(files) 199 | wrinkled_overlay = cv2.imread(str(chosen_file)) 200 | # add wrinkled texture 201 | textured = _add_texture(image, wrinkled_overlay) 202 | # get a distortion in text 203 | distorted = distort(textured) 204 | 205 | return distorted 206 | 207 | 208 | def lcd_overlay(image: np.ndarray, overlay: np.ndarray = None) -> np.ndarray: 209 | """ 210 | Add a LCD texture to an image 211 | Args: 212 | image (np.ndarray): BGR image 213 | overlay (np.ndarray, optional): BGR image with LCD texture. 214 | Defaults to None. 215 | 216 | Returns: 217 | np.ndarray: [description] 218 | """ 219 | if not overlay: 220 | # randomly choose a texture for overlay 221 | files = sorted(PATH_TO_MONITOR_TEXTURE.glob("*.jpg")) 222 | index = np.random.randint(0, len(files)) 223 | overlay = cv2.imread(str(files[index])) 224 | return _add_texture(image, overlay) 225 | 226 | 227 | def rotate(image: np.ndarray, angle: int = None) -> np.ndarray: 228 | """[summary] 229 | 230 | Args: 231 | image (np.ndarray): image to be rotated 232 | angle (int, optional): angle to rotate. Defaults to None. 233 | 234 | Returns: 235 | np.ndarray: rotated image 236 | """ 237 | if not angle: 238 | # Randomly choose an angle 239 | random = np.random.rand() 240 | # 50% of the time choose an angle between -10 and 10 degrees 241 | if random < 0.5: 242 | angle = np.random.rand() * 10 - 10 243 | elif random < 0.75: 244 | # 25% of the time 90 degrees 245 | angle = 90 246 | else: 247 | # 25% of the time 180 degrees 248 | angle = 180 249 | rotated_img = rotation(image, angle) 250 | return rotated_img 251 | 252 | 253 | def blur(image: np.ndarray, sigma_x: int = None, sigma_y: int = None) -> np.ndarray: 254 | """ 255 | Applies Gussian blur to an image 256 | Args: 257 | image (np.ndarray): BGR image 258 | sigma_x (int, optional): Standard deviation along x-axis. 259 | When None a value is randomly chosen. Defaults to None. 260 | sigma_y (int, optional): Standard deviation along y-axis. 261 | When None a value is randomly chosen. Defaults to None. 262 | 263 | Returns: 264 | np.ndarray: [description] 265 | """ 266 | if not sigma_x: 267 | sigma_x = np.random.randint(50, 200) 268 | if not sigma_y: 269 | sigma_y = np.random.randint(50, 200) 270 | blurred_image = cv2.GaussianBlur(image, (5, 5), sigma_x, sigma_y) 271 | return blurred_image 272 | 273 | 274 | def contrast_and_brighten( 275 | image: np.ndarray, contrast: float = None, brightness: int = None 276 | ) -> np.ndarray: 277 | """ 278 | Use alpha-beta method to contrast and brightness images 279 | Args: 280 | image (np.ndarray): BGR image 281 | contrast (float, optional): Contrast value (multiplicative factor) to be applied. 282 | Good results for values between 0.5 and 3.0. Defaults to None. 283 | brightness (int, optional): Brightness value to be added. Negative values decrease brightness. 284 | Good results between -50 to 100. Defaults to None. 285 | 286 | Returns: 287 | np.ndarray: image with brightness and contrast values altered. 288 | """ 289 | if not contrast: 290 | contrast = np.random.rand(1) + 0.5 291 | if not brightness: 292 | brightness = np.random.randint(-50, 100) 293 | 294 | # g(x,y) = contrast * f(x,y) + brightness 295 | new_image = np.clip(image.astype(np.int64) * contrast + brightness, 0, 255) 296 | new_image = new_image.astype(image.dtype) 297 | return new_image 298 | 299 | 300 | def scanner_like(image: np.ndarray) -> np.ndarray: 301 | """ 302 | Binarizes image and thresholds it to get a photocopier/scanner like look. 303 | Args: 304 | image (np.ndarray): BGR image 305 | 306 | Returns: 307 | np.ndarray: BGR image 308 | """ 309 | image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 310 | thresh = cv2.adaptiveThreshold( 311 | image_grayscale, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 312 | ) 313 | image_bgr = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR) 314 | return image_bgr 315 | 316 | 317 | def gamma_saturation(image: np.ndarray, gamma: float = None) -> np.ndarray: 318 | """ 319 | Applies gamma saturation to an image. 320 | Args: 321 | image (np.ndarray): [BGR image 322 | gamma (float, optional): Values between 0 and 1 decrease contrast. 323 | Values greater than 1 increase contrast. Defaults to None. 324 | 325 | Returns: 326 | np.ndarray: BGR image 327 | """ 328 | if not gamma: 329 | if np.random.rand(1) < 0.5: 330 | # lower saturation 331 | gamma = np.random.rand(1) 332 | else: 333 | # increase saturation 334 | gamma = np.random.randint(1, 11) 335 | lookup_table = np.zeros((1, 256), np.uint8) 336 | for i in range(256): 337 | # output_intensity = (input_intensity/255)** gamma × 255 338 | lookup_table[0, i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255) 339 | new_img = cv2.LUT(image, lookup_table) 340 | return new_img 341 | -------------------------------------------------------------------------------- /composite_transform.py: -------------------------------------------------------------------------------- 1 | from basic_transform import (lcd_overlay, noise, rotate, virtual_background, 2 | wrinkles) 3 | from distortion import stretch 4 | 5 | 6 | def rotation_with_lcd(image): 7 | rotated_img = rotate(image) 8 | final_img = lcd_overlay(rotated_img) 9 | return final_img 10 | 11 | 12 | def wrinkle_with_noise(image): 13 | wrinkled_img = wrinkles(image) 14 | final_img = noise(wrinkled_img) 15 | return final_img 16 | 17 | 18 | def background_with_lcd_stretch(image): 19 | img_with_bg = virtual_background(image) 20 | img_with_overlay = lcd_overlay(img_with_bg) 21 | final_img = stretch(img_with_overlay) 22 | return final_img 23 | -------------------------------------------------------------------------------- /data/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/data/sample.jpg -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import imageio 3 | from tqdm import tqdm 4 | 5 | from basic_transform import (blur, contrast_and_brighten, gamma_saturation, 6 | lcd_overlay, noise, rotate, scanner_like, shadow, 7 | watermark, wrinkles) 8 | from distortion import distort, perspective, stretch 9 | 10 | augmentations = [ 11 | rotate, 12 | shadow, 13 | watermark, 14 | wrinkles, 15 | lcd_overlay, 16 | gamma_saturation, 17 | contrast_and_brighten, 18 | scanner_like, 19 | distort, 20 | perspective, 21 | stretch, 22 | blur, 23 | noise, 24 | ] 25 | 26 | image = cv2.imread("data/sample.jpg") 27 | 28 | 29 | def create_gif(image_list, gif_name, duration=1): 30 | frames = [] 31 | for image in image_list: 32 | frames.append(image) 33 | imageio.mimsave(gif_name, frames, "GIF", duration=duration) 34 | return 35 | 36 | 37 | for aug in tqdm(augmentations): 38 | aug_list = [] 39 | for i in range(5): 40 | result = aug(image) 41 | aug_list.append(result[:, :, ::-1]) 42 | create_gif(aug_list, f"output/{aug.__name__}.gif") 43 | -------------------------------------------------------------------------------- /distortion.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: RubanSeven 3 | # Github repo: https://github.com/RubanSeven/Text-Image-Augmentation-python 4 | 5 | # import cv2 6 | import numpy as np 7 | 8 | # from transform import get_perspective_transform, warp_perspective 9 | from warp_mls import WarpMLS 10 | 11 | 12 | def distort(src, segment=5): 13 | img_h, img_w = src.shape[:2] 14 | 15 | cut = img_w // segment 16 | thresh = cut // 3 17 | # thresh = img_h // segment // 3 18 | # thresh = img_h // 5 19 | 20 | src_pts = list() 21 | dst_pts = list() 22 | 23 | src_pts.append([0, 0]) 24 | src_pts.append([img_w, 0]) 25 | src_pts.append([img_w, img_h]) 26 | src_pts.append([0, img_h]) 27 | 28 | dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)]) 29 | dst_pts.append([img_w - np.random.randint(thresh), np.random.randint(thresh)]) 30 | dst_pts.append( 31 | [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)] 32 | ) 33 | dst_pts.append([np.random.randint(thresh), img_h - np.random.randint(thresh)]) 34 | 35 | half_thresh = thresh * 0.5 36 | 37 | for cut_idx in np.arange(1, segment, 1): 38 | src_pts.append([cut * cut_idx, 0]) 39 | src_pts.append([cut * cut_idx, img_h]) 40 | dst_pts.append( 41 | [ 42 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 43 | np.random.randint(thresh) - half_thresh, 44 | ] 45 | ) 46 | dst_pts.append( 47 | [ 48 | cut * cut_idx + np.random.randint(thresh) - half_thresh, 49 | img_h + np.random.randint(thresh) - half_thresh, 50 | ] 51 | ) 52 | 53 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 54 | dst = trans.generate() 55 | 56 | return dst 57 | 58 | 59 | def stretch(src, segment=5): 60 | img_h, img_w = src.shape[:2] 61 | 62 | cut = img_w // segment 63 | thresh = cut * 4 // 5 64 | # thresh = img_h // segment // 3 65 | # thresh = img_h // 5 66 | 67 | src_pts = list() 68 | dst_pts = list() 69 | 70 | src_pts.append([0, 0]) 71 | src_pts.append([img_w, 0]) 72 | src_pts.append([img_w, img_h]) 73 | src_pts.append([0, img_h]) 74 | 75 | dst_pts.append([0, 0]) 76 | dst_pts.append([img_w, 0]) 77 | dst_pts.append([img_w, img_h]) 78 | dst_pts.append([0, img_h]) 79 | 80 | half_thresh = thresh * 0.5 81 | 82 | for cut_idx in np.arange(1, segment, 1): 83 | move = np.random.randint(thresh) - half_thresh 84 | src_pts.append([cut * cut_idx, 0]) 85 | src_pts.append([cut * cut_idx, img_h]) 86 | dst_pts.append([cut * cut_idx + move, 0]) 87 | dst_pts.append([cut * cut_idx + move, img_h]) 88 | 89 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 90 | dst = trans.generate() 91 | 92 | return dst 93 | 94 | 95 | def perspective(src): 96 | img_h, img_w = src.shape[:2] 97 | 98 | thresh = img_h // 4 99 | 100 | src_pts = list() 101 | dst_pts = list() 102 | 103 | src_pts.append([0, 0]) 104 | src_pts.append([img_w, 0]) 105 | src_pts.append([img_w, img_h]) 106 | src_pts.append([0, img_h]) 107 | 108 | dst_pts.append([0, np.random.randint(thresh)]) 109 | dst_pts.append([img_w, np.random.randint(thresh)]) 110 | dst_pts.append([img_w, img_h - np.random.randint(thresh)]) 111 | dst_pts.append([0, img_h - np.random.randint(thresh)]) 112 | 113 | trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) 114 | dst = trans.generate() 115 | 116 | return dst 117 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import click 4 | import cv2 5 | import numpy as np 6 | from pdf2image import convert_from_path 7 | from tqdm import tqdm 8 | 9 | from basic_transform import (contrast_and_brighten, gamma_saturation, 10 | lcd_overlay, rotate, scanner_like, shadow, 11 | virtual_background, watermark, wrinkles) 12 | from composite_transform import (background_with_lcd_stretch, 13 | rotation_with_lcd, wrinkle_with_noise) 14 | 15 | 16 | def get_image(filename, page=1): 17 | if filename.lower().endswith("pdf"): 18 | image = convert_from_path(filename, 600) 19 | image = np.asarray(image[0]) 20 | else: 21 | image = cv2.imread(filename) 22 | img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 23 | return image, img_gray 24 | 25 | 26 | augmentations = [ 27 | rotate, 28 | shadow, 29 | watermark, 30 | wrinkles, 31 | lcd_overlay, 32 | gamma_saturation, 33 | contrast_and_brighten, 34 | scanner_like, 35 | virtual_background, 36 | rotation_with_lcd, 37 | wrinkle_with_noise, 38 | background_with_lcd_stretch, 39 | ] 40 | 41 | 42 | @click.command() 43 | @click.option("--data-root", type=click.Path(exists=True), default="data/") 44 | @click.option("--output-dir", type=click.Path(exists=True), default="output/") 45 | @click.option("--aug-prob",type=float, default=1.0) 46 | def main(data_root, output_dir, aug_prob): 47 | data_root = Path(data_root) 48 | output_dir = Path(output_dir) 49 | 50 | # list all jpg, pdf or png files 51 | aug_files = list(data_root.rglob("*.[jp][pnd][gf]")) 52 | 53 | for file in tqdm(aug_files): 54 | org_img, _ = get_image(str(file)) 55 | data_inner = file.parts[1:-1] 56 | if not data_inner: 57 | data_inner = "" 58 | else: 59 | data_inner = Path(*data_inner) 60 | for aug in augmentations: 61 | if np.random.rand() < aug_prob: 62 | result = aug(org_img) 63 | filename = file.parts[-1] 64 | new_filename = f"{filename.split('.')[0]}_{aug.__name__}.jpg" 65 | output_path = Path(output_dir, data_inner, new_filename) 66 | cv2.imwrite(str(output_path), result) 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /output/blur.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/blur.gif -------------------------------------------------------------------------------- /output/contrast_and_brighten.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/contrast_and_brighten.gif -------------------------------------------------------------------------------- /output/distort.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/distort.gif -------------------------------------------------------------------------------- /output/gamma_saturation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/gamma_saturation.gif -------------------------------------------------------------------------------- /output/lcd_overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/lcd_overlay.gif -------------------------------------------------------------------------------- /output/noise.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/noise.gif -------------------------------------------------------------------------------- /output/perspective.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/perspective.gif -------------------------------------------------------------------------------- /output/rotate.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/rotate.gif -------------------------------------------------------------------------------- /output/scanner_like.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/scanner_like.gif -------------------------------------------------------------------------------- /output/shadow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/shadow.gif -------------------------------------------------------------------------------- /output/stretch.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/stretch.gif -------------------------------------------------------------------------------- /output/watermark.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/watermark.gif -------------------------------------------------------------------------------- /output/wrinkles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/wrinkles.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python==3.4.5.20 2 | numpy==1.16.1 3 | tqdm==4.56.0 4 | imageio==2.9.0 5 | click==7.1.2 -------------------------------------------------------------------------------- /utility.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import List, Tuple 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def _add_texture(image: np.ndarray, overlay: np.ndarray) -> np.ndarray: 9 | """ 10 | Overlays overlay on top of image to get the texture of overlay on the image 11 | Args: 12 | image (np.ndarray): original image 13 | overlay (np.ndarray): overlay image 14 | 15 | Returns: 16 | np.ndarray: image with overlay added 17 | """ 18 | alpha = 0.7 19 | beta = 1 - alpha 20 | height, width = image.shape[:2] 21 | # resize overlay to the size of original image 22 | overlay = cv2.resize(overlay, (width, height)) 23 | texturized_image = cv2.addWeighted(image, alpha, overlay, beta, 0) 24 | return texturized_image 25 | 26 | 27 | def _get_perspective_points( 28 | image: np.ndarray, 29 | min_height: int = None, 30 | min_width: int = None, 31 | interactive: bool = False, 32 | ) -> np.ndarray: 33 | """ 34 | Gets perspective points from an image for perspective warp. When interactive is True 35 | an interactive window allows selection of points from the image, 36 | otherwise a random perspective rectangle with min_height and min_width are selected 37 | Args: 38 | image (np.ndarray): BGR image on which perspective points are to be selected 39 | interactive (bool, optional): When True, an interactive window allows 40 | manual selection of perspective points. Defaults to False. 41 | min_height (int, optional): minimum height of the perspective trapezoid. 42 | Required when interactive is False. Defaults to None. 43 | min_width (int, optional): minimum width of the perspective trapezoid. 44 | Required when interactive is False. Defaults to None. 45 | 46 | Returns: 47 | np.ndarray: Array of perspective points 48 | """ 49 | positions = [] 50 | if interactive: 51 | window_name = "Select perspective points" 52 | cv2.namedWindow(window_name) 53 | cv2.setMouseCallback( 54 | window_name, partial(_draw_circle, positions=positions, image=image) 55 | ) 56 | while True: 57 | cv2.imshow(window_name, image) 58 | k = cv2.waitKey(20) & 0xFF 59 | if k == 27: 60 | break 61 | cv2.destroyAllWindows() 62 | else: 63 | # get a rectangle with min_height and min_width 64 | image_height, image_width = image.shape[:2] 65 | assert int((image.shape[1] - min_width)) / 2 > 0 66 | 67 | # random rectangular portion of the image, with min_height and min_width 68 | x1 = np.random.randint(0, (image_width - min_width) // 2) 69 | y1 = np.random.randint(0, (image_height - min_height) // 2) 70 | x2 = np.random.randint(x1 + min_width, image_width) 71 | y2 = np.random.randint(y1 + min_height, image_height) 72 | 73 | # change in height and width from the default rectangle 74 | delta = 0.1 75 | delta_height = np.random.randint(-delta * min_height, delta * min_height) 76 | delta_width = np.random.randint(-delta * min_width, delta * min_width) 77 | 78 | positions = [ 79 | (x1, y1 + delta_height), 80 | (x1, y2), 81 | (x2, y1), 82 | (x2 + delta_width, y2), 83 | ] 84 | return _order_points(positions) 85 | 86 | 87 | def _perspective_warp( 88 | bg_image: np.ndarray, fg_image: np.ndarray, interactive: bool = False 89 | ) -> np.ndarray: 90 | """ 91 | Perspective warp foreground image on the perspective of background image 92 | Args: 93 | bg_image (np.ndarray): [description] 94 | fg_image (np.ndarray): [description] 95 | interactive (bool, optional): Defaults to False. 96 | 97 | Returns: 98 | np.ndarray: [description] 99 | """ 100 | pts1 = _get_perspective_points( 101 | bg_image, 102 | min_height=fg_image.shape[0], 103 | min_width=fg_image.shape[1], 104 | interactive=interactive, 105 | ) 106 | 107 | pts2 = _order_points( 108 | np.array( 109 | [ 110 | [0, 0], 111 | [fg_image.shape[1], 0], 112 | [0, fg_image.shape[0]], 113 | [fg_image.shape[1], fg_image.shape[0]], 114 | ] 115 | ) 116 | ) 117 | h, mask = cv2.findHomography(pts2, pts1, cv2.RANSAC, 5.0) 118 | warped_image = cv2.warpPerspective( 119 | fg_image, h, (bg_image.shape[1], bg_image.shape[0]) 120 | ) 121 | return warped_image, pts1 122 | 123 | 124 | def _draw_circle( 125 | event: int, x: int, y: int, flags: int, param, positions: List, image: np.ndarray 126 | ): 127 | """ 128 | Callback function to draw a circle on the given image, when event is triggered 129 | Args: 130 | event (int): cv2 event 131 | x (int): x coordinate 132 | y (int): y coordinate 133 | flags (int): [description] 134 | param ([type]): [description] 135 | positions (List): empty list as a reference 136 | image (np.ndarray): BGR image 137 | """ 138 | # If event is Left Button Click then store the coordinate in the lists 139 | if event == cv2.EVENT_LBUTTONUP: 140 | cv2.circle(image, (x, y), 2, (255, 0, 0), -1) 141 | positions.append([x, y]) 142 | 143 | 144 | def _order_points(pts: List) -> np.ndarray: 145 | """ 146 | Order the points of a rectangle in top-left, top-right, bottom-right 147 | and bottom-left order 148 | Args: 149 | pts (List): List of points to be ordered 150 | 151 | Returns: 152 | np.ndarray: Array of ordered points 153 | """ 154 | final_rect = np.zeros((4, 2)) 155 | 156 | sums = np.sum(pts, axis=1) 157 | 158 | # top left 159 | final_rect[0] = pts[np.argmin(sums)] 160 | # bottom right 161 | final_rect[2] = pts[np.argmax(sums)] 162 | 163 | diff = np.diff(pts, axis=1) 164 | 165 | # top right 166 | final_rect[1] = pts[np.argmin(diff)] 167 | # bottom left 168 | final_rect[3] = pts[np.argmax(diff)] 169 | 170 | return final_rect 171 | 172 | 173 | def _generate_shadow_coordinates( 174 | imshape: Tuple[int], no_of_shadows: int = 1 175 | ) -> List[np.ndarray]: 176 | """ 177 | Generates 2D coordinates for a polygon of random dimensionality 178 | where the value of the coordinates are limited by imshape 179 | Args: 180 | imshape (Tuple[int]): maximum values for x and y coordinate 181 | no_of_shadows (int, optional): Number of polygons to generate. 182 | Defaults to 1. 183 | 184 | Returns: 185 | List[np.ndarray]: List of polygon coordiantes 186 | """ 187 | vertices_list = [] 188 | x_lim, y_lim = imshape 189 | for index in range(no_of_shadows): 190 | vertex = [] 191 | min_vertices, max_vertices = 3, 5 192 | for dimensions in range(np.random.randint(min_vertices, max_vertices)): 193 | vertex.append( 194 | (y_lim * np.random.uniform(), x_lim // 3 + x_lim * np.random.uniform(),) 195 | ) 196 | # polygon vertices 197 | vertices = np.array([vertex], dtype=np.int32) 198 | vertices_list.append(vertices) 199 | return vertices_list 200 | -------------------------------------------------------------------------------- /warp_mls.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: RubanSeven 3 | # Github repo: https://github.com/RubanSeven/Text-Image-Augmentation-python 4 | 5 | 6 | import numpy as np 7 | 8 | 9 | class WarpMLS: 10 | def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.0): 11 | self.src = src 12 | self.src_pts = src_pts 13 | self.dst_pts = dst_pts 14 | self.pt_count = len(self.dst_pts) 15 | self.dst_w = dst_w 16 | self.dst_h = dst_h 17 | self.trans_ratio = trans_ratio 18 | self.grid_size = 100 19 | self.rdx = np.zeros((self.dst_h, self.dst_w)) 20 | self.rdy = np.zeros((self.dst_h, self.dst_w)) 21 | 22 | @staticmethod 23 | def __bilinear_interp(x, y, v11, v12, v21, v22): 24 | return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * (1 - y) + v22 * y) * x 25 | 26 | def generate(self): 27 | self.calc_delta() 28 | return self.gen_img() 29 | 30 | def calc_delta(self): 31 | w = np.zeros(self.pt_count, dtype=np.float32) 32 | 33 | if self.pt_count < 2: 34 | return 35 | 36 | i = 0 37 | while 1: 38 | if self.dst_w <= i < self.dst_w + self.grid_size - 1: 39 | i = self.dst_w - 1 40 | elif i >= self.dst_w: 41 | break 42 | 43 | j = 0 44 | while 1: 45 | if self.dst_h <= j < self.dst_h + self.grid_size - 1: 46 | j = self.dst_h - 1 47 | elif j >= self.dst_h: 48 | break 49 | 50 | sw = 0 51 | swp = np.zeros(2, dtype=np.float32) 52 | swq = np.zeros(2, dtype=np.float32) 53 | new_pt = np.zeros(2, dtype=np.float32) 54 | cur_pt = np.array([i, j], dtype=np.float32) 55 | 56 | k = 0 57 | for k in range(self.pt_count): 58 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 59 | break 60 | 61 | w[k] = 1.0 / ( 62 | (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) 63 | + (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]) 64 | ) 65 | 66 | sw += w[k] 67 | swp = swp + w[k] * np.array(self.dst_pts[k]) 68 | swq = swq + w[k] * np.array(self.src_pts[k]) 69 | 70 | if k == self.pt_count - 1: 71 | pstar = 1 / sw * swp 72 | qstar = 1 / sw * swq 73 | 74 | miu_s = 0 75 | for k in range(self.pt_count): 76 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 77 | continue 78 | pt_i = self.dst_pts[k] - pstar 79 | miu_s += w[k] * np.sum(pt_i * pt_i) 80 | 81 | cur_pt -= pstar 82 | cur_pt_j = np.array([-cur_pt[1], cur_pt[0]]) 83 | 84 | for k in range(self.pt_count): 85 | if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: 86 | continue 87 | 88 | pt_i = self.dst_pts[k] - pstar 89 | pt_j = np.array([-pt_i[1], pt_i[0]]) 90 | 91 | tmp_pt = np.zeros(2, dtype=np.float32) 92 | tmp_pt[0] = ( 93 | np.sum(pt_i * cur_pt) * self.src_pts[k][0] 94 | - np.sum(pt_j * cur_pt) * self.src_pts[k][1] 95 | ) 96 | tmp_pt[1] = ( 97 | -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] 98 | + np.sum(pt_j * cur_pt_j) * self.src_pts[k][1] 99 | ) 100 | tmp_pt *= w[k] / miu_s 101 | new_pt += tmp_pt 102 | 103 | new_pt += qstar 104 | else: 105 | new_pt = self.src_pts[k] 106 | 107 | self.rdx[j, i] = new_pt[0] - i 108 | self.rdy[j, i] = new_pt[1] - j 109 | 110 | j += self.grid_size 111 | i += self.grid_size 112 | 113 | def gen_img(self): 114 | src_h, src_w = self.src.shape[:2] 115 | dst = np.zeros_like(self.src, dtype=np.float32) 116 | 117 | for i in np.arange(0, self.dst_h, self.grid_size): 118 | for j in np.arange(0, self.dst_w, self.grid_size): 119 | ni = i + self.grid_size 120 | nj = j + self.grid_size 121 | w = h = self.grid_size 122 | if ni >= self.dst_h: 123 | ni = self.dst_h - 1 124 | h = ni - i + 1 125 | if nj >= self.dst_w: 126 | nj = self.dst_w - 1 127 | w = nj - j + 1 128 | 129 | di = np.reshape(np.arange(h), (-1, 1)) 130 | dj = np.reshape(np.arange(w), (1, -1)) 131 | delta_x = self.__bilinear_interp( 132 | di / h, 133 | dj / w, 134 | self.rdx[i, j], 135 | self.rdx[i, nj], 136 | self.rdx[ni, j], 137 | self.rdx[ni, nj], 138 | ) 139 | delta_y = self.__bilinear_interp( 140 | di / h, 141 | dj / w, 142 | self.rdy[i, j], 143 | self.rdy[i, nj], 144 | self.rdy[ni, j], 145 | self.rdy[ni, nj], 146 | ) 147 | nx = j + dj + delta_x * self.trans_ratio 148 | ny = i + di + delta_y * self.trans_ratio 149 | nx = np.clip(nx, 0, src_w - 1) 150 | ny = np.clip(ny, 0, src_h - 1) 151 | nxi = np.array(np.floor(nx), dtype=np.int32) 152 | nyi = np.array(np.floor(ny), dtype=np.int32) 153 | nxi1 = np.array(np.ceil(nx), dtype=np.int32) 154 | nyi1 = np.array(np.ceil(ny), dtype=np.int32) 155 | 156 | if len(self.src.shape) == 3: 157 | x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3)) 158 | y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3)) 159 | else: 160 | x = ny - nyi 161 | y = nx - nxi 162 | dst[i : i + h, j : j + w] = self.__bilinear_interp( 163 | x, 164 | y, 165 | self.src[nyi, nxi], 166 | self.src[nyi, nxi1], 167 | self.src[nyi1, nxi], 168 | self.src[nyi1, nxi1], 169 | ) 170 | 171 | # for di in range(h): 172 | # for dj in range(w): 173 | # # print(ni, nj, i, j) 174 | # delta_x = self.__bilinear_interp(di / h, dj / w, self.rdx[i, j], self.rdx[i, nj], 175 | # self.rdx[ni, j], self.rdx[ni, nj]) 176 | # delta_y = self.__bilinear_interp(di / h, dj / w, self.rdy[i, j], self.rdy[i, nj], 177 | # self.rdy[ni, j], self.rdy[ni, nj]) 178 | # nx = j + dj + delta_x * self.trans_ratio 179 | # ny = i + di + delta_y * self.trans_ratio 180 | # nx = min(src_w - 1, max(0, nx)) 181 | # ny = min(src_h - 1, max(0, ny)) 182 | # nxi = int(nx) 183 | # nyi = int(ny) 184 | # nxi1 = math.ceil(nx) 185 | # nyi1 = math.ceil(ny) 186 | # 187 | # dst[i + di, j + dj] = self.__bilinear_interp(ny - nyi, nx - nxi, 188 | # self.src[nyi, nxi], 189 | # self.src[nyi, nxi1], 190 | # self.src[nyi1, nxi], 191 | # self.src[nyi1, nxi1] 192 | # ) 193 | 194 | dst = np.clip(dst, 0, 255) 195 | dst = np.array(dst, dtype=np.uint8) 196 | 197 | return dst 198 | --------------------------------------------------------------------------------