├── .gitignore
├── README.md
├── affine_transform.py
├── augmentation-helpers
    ├── background
    │   ├── background_1.jpg
    │   ├── background_2.jpg
    │   ├── background_3.jpg
    │   ├── background_4.jpg
    │   ├── background_5.jpg
    │   └── background_6.jpg
    └── overlays
    │   ├── monitor
    │       ├── monitor_1.jpg
    │       ├── monitor_2.jpg
    │       ├── monitor_3.jpg
    │       ├── monitor_5.jpg
    │       ├── monitor_6.jpg
    │       └── monitor_7.jpg
    │   └── wrinkle
    │       ├── wrinkle_1.jpg
    │       ├── wrinkle_2.jpg
    │       └── wrinkle_3.jpg
├── basic_transform.py
├── composite_transform.py
├── data
    └── sample.jpg
├── demo.py
├── distortion.py
├── main.py
├── output
    ├── blur.gif
    ├── contrast_and_brighten.gif
    ├── distort.gif
    ├── gamma_saturation.gif
    ├── lcd_overlay.gif
    ├── noise.gif
    ├── perspective.gif
    ├── rotate.gif
    ├── scanner_like.gif
    ├── shadow.gif
    ├── stretch.gif
    ├── watermark.gif
    └── wrinkles.gif
├── requirements.txt
├── utility.py
└── warp_mls.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | .vscode/
 5 | .ipynb_checkpoints
 6 | profile_default/
 7 | ipython_config.py
 8 | output/*
 9 | !output/.gitkeep
10 | !output/*.gif
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Form data augmentation
 2 | 
 3 | ## Available augmentations
 4 | 1. Shadow
 5 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/shadow.gif)
 6 | 
 7 | 2. Wrinkles
 8 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/wrinkles.gif)
 9 | 
10 | 3. Saturation
11 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/gamma_saturation.gif)
12 | 
13 | 4. Watermark
14 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/watermark.gif?raw=true)
15 | 
16 | 5. Binarize
17 | ![](https://user-images.githubusercontent.com/70262751/111758313-71413b00-88c4-11eb-846e-4380ee32d606.png)
18 | 
19 | 6. Perspective Distortion
20 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/perspective.gif?raw=true)
21 | 
22 | 7. Stretch Distortion
23 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/stretch.gif?raw=true)
24 | 
25 | 8. LCD Texture
26 | ![](https://github.com/gautam-aayush/form-data-augmentation/blob/main/output/lcd_overlay.gif?raw=true)
27 | 
28 | ## Steps:
29 | 
30 | ### Install requirements
31 | 
32 | * `pip install -r requirements.txt`
33 | 
34 | ### Run demo to see the effect of individual augmentations
35 | * `python demo.py`
36 | 
37 | `demo.py` uses the sample data in `data/` and generates `GIF` outputs in output.
38 | 
39 | ### Run the augmentation pipeline
40 | * `python main.py python main.py --data-root data/ --output-dir output/ --aug-prob 0.1`
41 | 
42 |     *  `--data-root`: path to data directory
43 |     * `--output-dir`: path to outputs directory
44 |     * `--aug_prob`: probability with which each augmentation is applied, when the value is equal to `1`, all augmentations are applied, and when the value is equal to `0.1` an augmentation is applied with probability equal to `0.1`
45 | 


--------------------------------------------------------------------------------
/affine_transform.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | from tqdm import tqdm
  6 | 
  7 | 
  8 | def displacement(
  9 |     image: np.ndarray, horizontal_scale: float = 0.1, vertical_scale: float = 0.1
 10 | ) -> np.ndarray:
 11 |     """
 12 |     Displaces an image horzontally and vertically by respective scales
 13 |     Args:
 14 |         image (np.ndarray): BGR image
 15 |         horizontal_scale (float, optional): Fraction of original image width to displace.
 16 |         Defaults to 0.1.
 17 |         vertical_scale (float, optional): Fraction of original image width to displace.
 18 |         Defaults to 0.1.
 19 | 
 20 |     Returns:
 21 |         np.ndarray: Displaced image
 22 |     """
 23 |     height, width = image.shape[:2]
 24 |     horizontal = int(horizontal_scale * width)
 25 |     vertical = int(vertical_scale * height)
 26 |     translation_mat = np.array([[1, 0, horizontal], [0, 1, vertical]], dtype=np.float32)
 27 |     border_color = (255, 255, 255)  # white borders
 28 |     displaced_image = cv2.warpAffine(
 29 |         image,
 30 |         translation_mat,
 31 |         (width + horizontal, height + vertical),
 32 |         borderValue=border_color,
 33 |     )
 34 |     return displaced_image
 35 | 
 36 | 
 37 | def rotation(image: np.ndarray, angle: int = 90, same: bool = False) -> np.ndarray:
 38 |     """
 39 |     Rotates an image through given angle
 40 |     Args:
 41 |         image (np.ndarray): BGR image
 42 |         angle (int, optional): Rotation angle (counterclockwise) about its center. Defaults to 90.
 43 |         same (bool, optional): When True the output image is of the same size as input
 44 |         however, some portion of the original image may be lost. When False, height and width
 45 |         are adjusted to preserve original image content.
 46 |         Defaults to False.
 47 | 
 48 |     Returns:
 49 |         np.ndarray: Rotated image
 50 |     """
 51 |     height, width = image.shape[:2]
 52 |     centerX = (width - 1) / 2
 53 |     centerY = (height - 1) / 2
 54 |     rotation_mat = cv2.getRotationMatrix2D((centerX, centerY), angle, 1)
 55 | 
 56 |     if same:
 57 |         new_width = width
 58 |         new_height = height
 59 |     else:
 60 |         cos = np.abs(rotation_mat[0, 0])
 61 |         sin = np.abs(rotation_mat[0, 1])
 62 |         # compute the new bounding dimensions of the image
 63 |         new_width = int((height * sin) + (width * cos))
 64 |         new_height = int((height * cos) + (width * sin))
 65 |         # adjust the rotation matrix to take into account translation
 66 |         rotation_mat[0, 2] += (new_width / 2) - centerX
 67 |         rotation_mat[1, 2] += (new_height / 2) - centerY
 68 | 
 69 |     border_color = (255, 255, 255)  # white borders
 70 |     rotated_image = cv2.warpAffine(
 71 |         image, rotation_mat, (new_width, new_height), borderValue=border_color
 72 |     )
 73 | 
 74 |     return rotated_image
 75 | 
 76 | 
 77 | def shear(image: np.ndarray, shear_X: float = 0.1, shear_Y: float = 0.1) -> np.ndarray:
 78 |     """
 79 |     Shears the image along x and y directions
 80 |     Args:
 81 |         image (np.ndarray): BGR image
 82 |         shear_X (float, optional): Value of horizontal shear. Defaults to 0.1.
 83 |         shear_Y (float, optional): Value of vertical shear. Defaults to 0.1.
 84 | 
 85 |     Returns:
 86 |         np.ndarray: Sheared image
 87 |     """
 88 |     width, height = image.shape[:2]
 89 | 
 90 |     # increase image height and width to preserve image content
 91 |     new_width = int(2 * width)
 92 |     new_height = int(2 * height)
 93 |     M2 = np.float32([[1, shear_Y, 0], [shear_X, 1, 0]])
 94 |     #     M2[0,2] = -M2[0,1] * W/2
 95 |     #     M2[1,2] = -M2[1,0] * H/2
 96 |     centerX = (width - 1) / 2
 97 |     centerY = (height - 1) / 2
 98 |     M2[0, 2] += (new_width / 2) - centerX
 99 |     M2[1, 2] += (new_height / 2) - centerY
100 | 
101 |     sheared_image = cv2.warpAffine(image, M2, (new_width, new_height))
102 |     return sheared_image
103 | 
104 | 
105 | def arg_to_string(arg):
106 |     result = "("
107 |     i = 0
108 |     for key, value in arg.items():
109 |         if i != 0:
110 |             result += "_"
111 |         result += key
112 |         result += "_" + str(value)
113 |         i += 1
114 |     result += ")"
115 |     return result
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     data_dir = Path("../fuse/notebooks/exploratory/data/Ncell-Phase3")
120 |     output_dir = Path("../fuse/notebooks/exploratory/outputs/Ncell/augmented_data2")
121 |     output_dir.mkdir(exist_ok=True)
122 |     # transformations = [displacement, rotation, noise, shear]
123 |     # transformation_names = ['displacement', 'rotation', 'gaussian_noise', 'shear']
124 |     # transform_params = {'displacement': [{'horizontal': 100, 'vertical': 100}, {'horizontal': 200, 'vertical': 200},
125 |     #                                      {'horizontal': 500, 'vertical': 500}],
126 |     #                     'rotation': [{'angle': 30}, {'angle': 45}, {'angle': 60}, {'angle': 90}],
127 |     #                     'gaussian_noise': [{'var': 10}, {'var': 20}, {'var': 30}],
128 |     #                     'shear': [{'shear_X': 0.1, 'shear_Y': 0.1}, {'shear_X': 0.1, 'shear_Y': 0.2},
129 |     #                               {'shear_X': 0.2, 'shear_Y': 0.1}]}
130 |     #
131 |     # for file in tqdm(list(data_dir.glob('*.jpg'))):
132 |     #     for i, transformation in enumerate(transformations):
133 |     #         for arg in transform_params[transformation_names[i]]:
134 |     #             image = cv2.imread(str(file))
135 |     #             transformed_img = transformation(image, **arg)
136 |     #             new_filename = f'{transformation_names[i]}_{arg_to_string(arg)}_{file.parts[-1]}'
137 |     #             cv2.imwrite(str(Path(output_dir, new_filename)), transformed_img)
138 | 
139 |     transformations = [rotation]
140 |     transformation_names = ["rotation", "shear"]
141 |     transform_params = {"rotation": [{"angle": 11}, {"angle": 180}]}
142 | 
143 |     for file in tqdm(list(data_dir.glob("*.jpg"))):
144 |         for i, transformation in enumerate(transformations):
145 |             for arg in transform_params[transformation_names[i]]:
146 |                 image = cv2.imread(str(file))
147 |                 transformed_img = transformation(image, **arg)
148 |                 new_filename = (
149 |                     f"{transformation_names[i]}_{arg_to_string(arg)}_{file.parts[-1]}"
150 |                 )
151 |                 cv2.imwrite(str(Path(output_dir, new_filename)), transformed_img)
152 | 


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_1.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_2.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_3.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_4.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_5.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/background/background_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/background/background_6.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_1.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_2.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_3.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_5.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_6.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/monitor/monitor_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/monitor/monitor_7.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/wrinkle/wrinkle_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_1.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/wrinkle/wrinkle_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_2.jpg


--------------------------------------------------------------------------------
/augmentation-helpers/overlays/wrinkle/wrinkle_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/augmentation-helpers/overlays/wrinkle/wrinkle_3.jpg


--------------------------------------------------------------------------------
/basic_transform.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | from affine_transform import rotation
  7 | from distortion import distort
  8 | from utility import (_add_texture, _generate_shadow_coordinates,
  9 |                      _perspective_warp)
 10 | 
 11 | PATH_TO_WRINKLED_TEXTURE = Path("augmentation-helpers/overlays/wrinkle")
 12 | PATH_TO_MONITOR_TEXTURE = Path("augmentation-helpers/overlays/monitor")
 13 | PATH_TO_BG_IMAGES = Path("augmentation-helpers/background")
 14 | 
 15 | assert PATH_TO_WRINKLED_TEXTURE.exists()
 16 | assert PATH_TO_MONITOR_TEXTURE.exists()
 17 | assert PATH_TO_BG_IMAGES.exists()
 18 | 
 19 | 
 20 | def noise(image: np.ndarray, noise_typ: str = None) -> np.ndarray:
 21 |     """
 22 |     Adds noise to an image. Avaiable noise_types "gauss",
 23 |     "s&p" (salt and pepper)
 24 |     Args:
 25 |         image (np.ndarray): BGR image on which to add noise
 26 |         noise_typ (str, optional): type of noise to add: "gauss" or "s&p".
 27 |         Defaults to None.
 28 | 
 29 |     Returns:
 30 |         np.ndarray: BGR image with noise added
 31 |     """
 32 |     noise_types = ["gauss", "s&p"]
 33 |     if not noise_typ:
 34 |         noise_typ = np.random.choice(noise_types)
 35 |     if noise_typ == "gauss":
 36 |         height, width, ch = image.shape
 37 |         mean = 0  # gaussian mean
 38 |         var = 30  # gaussian variance
 39 |         sigma = var ** 0.5
 40 |         gauss = np.random.normal(mean, sigma, (height, width, ch))
 41 |         noisy = image + gauss
 42 |         return noisy.astype(np.uint8)
 43 |     elif noise_typ == "s&p":
 44 |         height, width, ch = image.shape
 45 |         s_vs_p = 0.5
 46 |         amount = 0.004  # fraction of image to be converted to noise
 47 |         out = np.copy(image)
 48 |         # Salt mode
 49 |         num_salt = np.ceil(amount * image.size * s_vs_p)
 50 |         # get random coordinates for sale noise
 51 |         coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape]
 52 |         out[coords] = 1
 53 | 
 54 |         # Pepper mode
 55 |         num_pepper = np.ceil(amount * image.size * (1.0 - s_vs_p))
 56 |         # get random coordinated for pepper noise
 57 |         coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape]
 58 |         out[coords] = 0
 59 | 
 60 |         return out
 61 | 
 62 | 
 63 | def shadow(image: np.ndarray, no_of_shadows: int = 1) -> np.ndarray:
 64 |     """Add shadow to an image by decreasing lightness of
 65 |     random polygonal regions in an image
 66 |     Note: As the number of shadows increase, there are chances of overlapping
 67 |     of shadows which causes the brightness of overlapped region to decrease further
 68 |     Args:
 69 |         image (np.ndarray): BGR image to add shadow on
 70 |         no_of_shadows (int, optional): Number of shadows to add. Defaults to 1.
 71 | 
 72 |     Returns:
 73 |         np.ndarray: image with shadows
 74 |     """
 75 |     # convert to HLS
 76 |     image_HLS = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
 77 |     mask = np.zeros_like(image)
 78 |     imshape = image.shape[:2]
 79 |     vertices_list = _generate_shadow_coordinates(imshape, no_of_shadows)
 80 |     # get list of shadow vertices
 81 |     for vertices in vertices_list:
 82 |         # add all shadow polygons on empty mask,
 83 |         # single 255 denotes only blue channel
 84 |         cv2.fillPoly(mask, vertices, 255)
 85 |         # if blue channel is hot, lower the birghtness for light channel
 86 |         image_HLS[:, :, 1][mask[:, :, 0] == 255] = (
 87 |             image_HLS[:, :, 1][mask[:, :, 0] == 255] * 0.75
 88 |         )
 89 |         # convert to BGR
 90 |         image_BGR = cv2.cvtColor(image_HLS, cv2.COLOR_HLS2BGR)
 91 |     return image_BGR
 92 | 
 93 | 
 94 | def virtual_background(
 95 |     image: np.ndarray,
 96 |     bg_image: np.ndarray = None,
 97 |     scale: float = 1.25,
 98 |     interactive: bool = False,
 99 | ) -> np.ndarray:
100 |     """
101 |     Adds a background to an image by warping the image into the perspective of background.
102 |     Args:
103 |         image (np.ndarray): BGR image, foreground
104 |         bg_image (np.ndarray, optional): BGR image background. Defaults to None.
105 |         If None, a random image from a predefined list is chosen as background
106 |         scale (float, optional): scale of background with respect to foreground. Defaults to 1.25.
107 |         interactive (bool, optional): If True, an interactive window allows to choose
108 |         the perspective points in the background image, otherwise random points are chosen.
109 |         Defaults to False.
110 | 
111 |     Returns:
112 |         np.ndarray: [description]
113 |     """
114 |     if not bg_image:
115 |         files = sorted(PATH_TO_BG_IMAGES.glob("*.jpg"))
116 |         chosen_file = np.random.choice(files)
117 |         bg_image = cv2.imread(str(chosen_file))
118 | 
119 |     # resize bg image to approproate scale
120 |     fg_height, fg_width = image.shape[:2]
121 |     new_height, new_width = int(fg_height * scale), int(fg_width * scale)
122 |     bg_image = cv2.resize(bg_image, (new_width, new_height))
123 | 
124 |     # warp image in perspective of background
125 |     warped_fg_image, pts = _perspective_warp(bg_image, image)
126 | 
127 |     # create a white image like with same shape as bg
128 |     img_buffer = np.ones(bg_image.shape, dtype=np.uint8) * 255
129 |     # blacken the part where fg image goes to create a warped image template
130 |     warp_template = cv2.fillPoly(img_buffer, np.int32([pts]), (0, 0, 0))
131 | 
132 |     # mask out bg image with the warp_template
133 |     masked = cv2.bitwise_and(bg_image, warp_template)
134 |     # combine the warped_fg_image to the masked bg
135 |     final_image = cv2.bitwise_or(masked, warped_fg_image)
136 | 
137 |     return final_image
138 | 
139 | 
140 | def watermark(image: np.ndarray, text: str = None) -> np.ndarray:
141 |     """
142 |     Add watermark text to an image
143 |     Args:
144 |         image (np.ndarray): BGR image
145 |         text (str, optional): text for watermark. Defaults to None.
146 |         When None, a random text is chosen from a pre-defined list
147 | 
148 |     Returns:
149 |         np.ndarray: BGR image with watermark added
150 |     """
151 |     texts = ["confidential", "fusemachines", "official", "W2-Tax"]
152 |     if not text:
153 |         text = np.random.choice(texts)
154 | 
155 |     # choose a random location for watermark
156 |     loc = np.random.randint(image.shape[0] // 4, image.shape[1] // 2, 2)
157 | 
158 |     # write text in solid on an all black image
159 |     image_with_text = cv2.putText(
160 |         np.zeros(image.shape, image.dtype),
161 |         text,
162 |         tuple(loc),
163 |         cv2.FONT_HERSHEY_PLAIN,
164 |         20,
165 |         tuple(map(int, np.random.randint(0, 255, 3))),
166 |         15,
167 |         cv2.LINE_AA,
168 |     )
169 | 
170 |     # rotate the text at a random angle
171 |     rotation_angle = np.random.rand() * 90 - 90
172 |     image_with_text = rotation(image_with_text, rotation_angle, same=True)
173 | 
174 |     # add rotated text to image
175 |     image_with_text += image
176 |     # again add the original image to the image with text with
177 |     # different weights to get a semi transparent look
178 |     alpha = 0.7
179 |     beta = 1 - alpha
180 |     final_image = cv2.addWeighted(image, alpha, image_with_text, beta, 0)
181 |     return final_image
182 | 
183 | 
184 | def wrinkles(image: np.ndarray, wrinkled_overlay: np.ndarray = None) -> np.ndarray:
185 |     """
186 |     Adds wrinkles to an image
187 |     Args:
188 |         image (np.ndarray): Original BGR image
189 |         wrinkled_overlay (np.ndarray, optional): Wrinkled texture image to overlay.
190 |         Defaults to None.
191 | 
192 |     Returns:
193 |         np.ndarray: wrinkled image
194 |     """
195 |     if not wrinkled_overlay:
196 |         # randomly choose a texture for overlay
197 |         files = sorted(PATH_TO_WRINKLED_TEXTURE.glob("*.jpg"))
198 |         chosen_file = np.random.choice(files)
199 |         wrinkled_overlay = cv2.imread(str(chosen_file))
200 |     # add wrinkled texture
201 |     textured = _add_texture(image, wrinkled_overlay)
202 |     # get a distortion in text
203 |     distorted = distort(textured)
204 | 
205 |     return distorted
206 | 
207 | 
208 | def lcd_overlay(image: np.ndarray, overlay: np.ndarray = None) -> np.ndarray:
209 |     """
210 |     Add a LCD texture to an image
211 |     Args:
212 |         image (np.ndarray): BGR image
213 |         overlay (np.ndarray, optional): BGR image with LCD texture.
214 |         Defaults to None.
215 | 
216 |     Returns:
217 |         np.ndarray: [description]
218 |     """
219 |     if not overlay:
220 |         # randomly choose a texture for overlay
221 |         files = sorted(PATH_TO_MONITOR_TEXTURE.glob("*.jpg"))
222 |         index = np.random.randint(0, len(files))
223 |         overlay = cv2.imread(str(files[index]))
224 |     return _add_texture(image, overlay)
225 | 
226 | 
227 | def rotate(image: np.ndarray, angle: int = None) -> np.ndarray:
228 |     """[summary]
229 | 
230 |     Args:
231 |         image (np.ndarray): image to be rotated
232 |         angle (int, optional): angle to rotate. Defaults to None.
233 | 
234 |     Returns:
235 |         np.ndarray: rotated image
236 |     """
237 |     if not angle:
238 |         # Randomly choose an angle
239 |         random = np.random.rand()
240 |         # 50% of the time choose an angle between -10 and 10 degrees
241 |         if random < 0.5:
242 |             angle = np.random.rand() * 10 - 10
243 |         elif random < 0.75:
244 |             # 25% of the time 90 degrees
245 |             angle = 90
246 |         else:
247 |             # 25% of the time 180 degrees
248 |             angle = 180
249 |     rotated_img = rotation(image, angle)
250 |     return rotated_img
251 | 
252 | 
253 | def blur(image: np.ndarray, sigma_x: int = None, sigma_y: int = None) -> np.ndarray:
254 |     """
255 |     Applies Gussian blur to an image
256 |     Args:
257 |         image (np.ndarray): BGR image
258 |         sigma_x (int, optional): Standard deviation along x-axis.
259 |         When None a value is randomly chosen. Defaults to None.
260 |         sigma_y (int, optional): Standard deviation along y-axis.
261 |         When None a value is randomly chosen. Defaults to None.
262 | 
263 |     Returns:
264 |         np.ndarray: [description]
265 |     """
266 |     if not sigma_x:
267 |         sigma_x = np.random.randint(50, 200)
268 |     if not sigma_y:
269 |         sigma_y = np.random.randint(50, 200)
270 |     blurred_image = cv2.GaussianBlur(image, (5, 5), sigma_x, sigma_y)
271 |     return blurred_image
272 | 
273 | 
274 | def contrast_and_brighten(
275 |     image: np.ndarray, contrast: float = None, brightness: int = None
276 | ) -> np.ndarray:
277 |     """
278 |     Use alpha-beta method to contrast and brightness images
279 |     Args:
280 |         image (np.ndarray): BGR image
281 |         contrast (float, optional): Contrast value (multiplicative factor) to be applied. 
282 |         Good results for values between 0.5 and 3.0. Defaults to None.
283 |         brightness (int, optional): Brightness value to be added. Negative values decrease brightness.
284 |         Good results between -50 to 100. Defaults to None.
285 | 
286 |     Returns:
287 |         np.ndarray: image with brightness and contrast values altered.
288 |     """
289 |     if not contrast:
290 |         contrast = np.random.rand(1) + 0.5
291 |     if not brightness:
292 |         brightness = np.random.randint(-50, 100)
293 | 
294 |     # g(x,y) = contrast * f(x,y) + brightness
295 |     new_image = np.clip(image.astype(np.int64) * contrast + brightness, 0, 255)
296 |     new_image = new_image.astype(image.dtype)
297 |     return new_image
298 | 
299 | 
300 | def scanner_like(image: np.ndarray) -> np.ndarray:
301 |     """
302 |     Binarizes image and thresholds it to get a photocopier/scanner like look.
303 |     Args:
304 |         image (np.ndarray): BGR image
305 | 
306 |     Returns:
307 |         np.ndarray: BGR image
308 |     """
309 |     image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
310 |     thresh = cv2.adaptiveThreshold(
311 |         image_grayscale, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
312 |     )
313 |     image_bgr = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR)
314 |     return image_bgr
315 | 
316 | 
317 | def gamma_saturation(image: np.ndarray, gamma: float = None) -> np.ndarray:
318 |     """
319 |     Applies gamma saturation to an image.
320 |     Args:
321 |         image (np.ndarray): [BGR image
322 |         gamma (float, optional): Values between 0 and 1 decrease contrast.
323 |         Values greater than 1 increase contrast. Defaults to None.
324 | 
325 |     Returns:
326 |         np.ndarray: BGR image
327 |     """
328 |     if not gamma:
329 |         if np.random.rand(1) < 0.5:
330 |             # lower saturation
331 |             gamma = np.random.rand(1)
332 |         else:
333 |             # increase saturation
334 |             gamma = np.random.randint(1, 11)
335 |     lookup_table = np.zeros((1, 256), np.uint8)
336 |     for i in range(256):
337 |         # output_intensity = (input_intensity/255)** gamma × 255
338 |         lookup_table[0, i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
339 |     new_img = cv2.LUT(image, lookup_table)
340 |     return new_img
341 | 


--------------------------------------------------------------------------------
/composite_transform.py:
--------------------------------------------------------------------------------
 1 | from basic_transform import (lcd_overlay, noise, rotate, virtual_background,
 2 |                              wrinkles)
 3 | from distortion import stretch
 4 | 
 5 | 
 6 | def rotation_with_lcd(image):
 7 |     rotated_img = rotate(image)
 8 |     final_img = lcd_overlay(rotated_img)
 9 |     return final_img
10 | 
11 | 
12 | def wrinkle_with_noise(image):
13 |     wrinkled_img = wrinkles(image)
14 |     final_img = noise(wrinkled_img)
15 |     return final_img
16 | 
17 | 
18 | def background_with_lcd_stretch(image):
19 |     img_with_bg = virtual_background(image)
20 |     img_with_overlay = lcd_overlay(img_with_bg)
21 |     final_img = stretch(img_with_overlay)
22 |     return final_img
23 | 


--------------------------------------------------------------------------------
/data/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/data/sample.jpg


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import imageio
 3 | from tqdm import tqdm
 4 | 
 5 | from basic_transform import (blur, contrast_and_brighten, gamma_saturation,
 6 |                              lcd_overlay, noise, rotate, scanner_like, shadow,
 7 |                              watermark, wrinkles)
 8 | from distortion import distort, perspective, stretch
 9 | 
10 | augmentations = [
11 |     rotate,
12 |     shadow,
13 |     watermark,
14 |     wrinkles,
15 |     lcd_overlay,
16 |     gamma_saturation,
17 |     contrast_and_brighten,
18 |     scanner_like,
19 |     distort,
20 |     perspective,
21 |     stretch,
22 |     blur,
23 |     noise,
24 | ]
25 | 
26 | image = cv2.imread("data/sample.jpg")
27 | 
28 | 
29 | def create_gif(image_list, gif_name, duration=1):
30 |     frames = []
31 |     for image in image_list:
32 |         frames.append(image)
33 |     imageio.mimsave(gif_name, frames, "GIF", duration=duration)
34 |     return
35 | 
36 | 
37 | for aug in tqdm(augmentations):
38 |     aug_list = []
39 |     for i in range(5):
40 |         result = aug(image)
41 |         aug_list.append(result[:, :, ::-1])
42 |     create_gif(aug_list, f"output/{aug.__name__}.gif")
43 | 


--------------------------------------------------------------------------------
/distortion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: RubanSeven
  3 | # Github repo: https://github.com/RubanSeven/Text-Image-Augmentation-python
  4 | 
  5 | # import cv2
  6 | import numpy as np
  7 | 
  8 | # from transform import get_perspective_transform, warp_perspective
  9 | from warp_mls import WarpMLS
 10 | 
 11 | 
 12 | def distort(src, segment=5):
 13 |     img_h, img_w = src.shape[:2]
 14 | 
 15 |     cut = img_w // segment
 16 |     thresh = cut // 3
 17 |     # thresh = img_h // segment // 3
 18 |     # thresh = img_h // 5
 19 | 
 20 |     src_pts = list()
 21 |     dst_pts = list()
 22 | 
 23 |     src_pts.append([0, 0])
 24 |     src_pts.append([img_w, 0])
 25 |     src_pts.append([img_w, img_h])
 26 |     src_pts.append([0, img_h])
 27 | 
 28 |     dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
 29 |     dst_pts.append([img_w - np.random.randint(thresh), np.random.randint(thresh)])
 30 |     dst_pts.append(
 31 |         [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)]
 32 |     )
 33 |     dst_pts.append([np.random.randint(thresh), img_h - np.random.randint(thresh)])
 34 | 
 35 |     half_thresh = thresh * 0.5
 36 | 
 37 |     for cut_idx in np.arange(1, segment, 1):
 38 |         src_pts.append([cut * cut_idx, 0])
 39 |         src_pts.append([cut * cut_idx, img_h])
 40 |         dst_pts.append(
 41 |             [
 42 |                 cut * cut_idx + np.random.randint(thresh) - half_thresh,
 43 |                 np.random.randint(thresh) - half_thresh,
 44 |             ]
 45 |         )
 46 |         dst_pts.append(
 47 |             [
 48 |                 cut * cut_idx + np.random.randint(thresh) - half_thresh,
 49 |                 img_h + np.random.randint(thresh) - half_thresh,
 50 |             ]
 51 |         )
 52 | 
 53 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 54 |     dst = trans.generate()
 55 | 
 56 |     return dst
 57 | 
 58 | 
 59 | def stretch(src, segment=5):
 60 |     img_h, img_w = src.shape[:2]
 61 | 
 62 |     cut = img_w // segment
 63 |     thresh = cut * 4 // 5
 64 |     # thresh = img_h // segment // 3
 65 |     # thresh = img_h // 5
 66 | 
 67 |     src_pts = list()
 68 |     dst_pts = list()
 69 | 
 70 |     src_pts.append([0, 0])
 71 |     src_pts.append([img_w, 0])
 72 |     src_pts.append([img_w, img_h])
 73 |     src_pts.append([0, img_h])
 74 | 
 75 |     dst_pts.append([0, 0])
 76 |     dst_pts.append([img_w, 0])
 77 |     dst_pts.append([img_w, img_h])
 78 |     dst_pts.append([0, img_h])
 79 | 
 80 |     half_thresh = thresh * 0.5
 81 | 
 82 |     for cut_idx in np.arange(1, segment, 1):
 83 |         move = np.random.randint(thresh) - half_thresh
 84 |         src_pts.append([cut * cut_idx, 0])
 85 |         src_pts.append([cut * cut_idx, img_h])
 86 |         dst_pts.append([cut * cut_idx + move, 0])
 87 |         dst_pts.append([cut * cut_idx + move, img_h])
 88 | 
 89 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
 90 |     dst = trans.generate()
 91 | 
 92 |     return dst
 93 | 
 94 | 
 95 | def perspective(src):
 96 |     img_h, img_w = src.shape[:2]
 97 | 
 98 |     thresh = img_h // 4
 99 | 
100 |     src_pts = list()
101 |     dst_pts = list()
102 | 
103 |     src_pts.append([0, 0])
104 |     src_pts.append([img_w, 0])
105 |     src_pts.append([img_w, img_h])
106 |     src_pts.append([0, img_h])
107 | 
108 |     dst_pts.append([0, np.random.randint(thresh)])
109 |     dst_pts.append([img_w, np.random.randint(thresh)])
110 |     dst_pts.append([img_w, img_h - np.random.randint(thresh)])
111 |     dst_pts.append([0, img_h - np.random.randint(thresh)])
112 | 
113 |     trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
114 |     dst = trans.generate()
115 | 
116 |     return dst
117 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import click
 4 | import cv2
 5 | import numpy as np
 6 | from pdf2image import convert_from_path
 7 | from tqdm import tqdm
 8 | 
 9 | from basic_transform import (contrast_and_brighten, gamma_saturation,
10 |                              lcd_overlay, rotate, scanner_like, shadow,
11 |                              virtual_background, watermark, wrinkles)
12 | from composite_transform import (background_with_lcd_stretch,
13 |                                  rotation_with_lcd, wrinkle_with_noise)
14 | 
15 | 
16 | def get_image(filename, page=1):
17 |     if filename.lower().endswith("pdf"):
18 |         image = convert_from_path(filename, 600)
19 |         image = np.asarray(image[0])
20 |     else:
21 |         image = cv2.imread(filename)
22 |     img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
23 |     return image, img_gray
24 | 
25 | 
26 | augmentations = [
27 |     rotate,
28 |     shadow,
29 |     watermark,
30 |     wrinkles,
31 |     lcd_overlay,
32 |     gamma_saturation,
33 |     contrast_and_brighten,
34 |     scanner_like,
35 |     virtual_background,
36 |     rotation_with_lcd,
37 |     wrinkle_with_noise,
38 |     background_with_lcd_stretch,
39 | ]
40 | 
41 | 
42 | @click.command()
43 | @click.option("--data-root", type=click.Path(exists=True), default="data/")
44 | @click.option("--output-dir", type=click.Path(exists=True), default="output/")
45 | @click.option("--aug-prob",type=float, default=1.0)
46 | def main(data_root, output_dir, aug_prob):
47 |     data_root = Path(data_root)
48 |     output_dir = Path(output_dir)
49 | 
50 |     # list all jpg, pdf or png files
51 |     aug_files = list(data_root.rglob("*.[jp][pnd][gf]"))
52 | 
53 |     for file in tqdm(aug_files):
54 |         org_img, _ = get_image(str(file))
55 |         data_inner = file.parts[1:-1]
56 |         if not data_inner:
57 |             data_inner = ""
58 |         else:
59 |             data_inner = Path(*data_inner)
60 |         for aug in augmentations:
61 |             if np.random.rand() < aug_prob:
62 |                 result = aug(org_img)
63 |                 filename = file.parts[-1]
64 |                 new_filename = f"{filename.split('.')[0]}_{aug.__name__}.jpg"
65 |                 output_path = Path(output_dir, data_inner, new_filename)
66 |                 cv2.imwrite(str(output_path), result)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     main()
71 | 


--------------------------------------------------------------------------------
/output/blur.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/blur.gif


--------------------------------------------------------------------------------
/output/contrast_and_brighten.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/contrast_and_brighten.gif


--------------------------------------------------------------------------------
/output/distort.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/distort.gif


--------------------------------------------------------------------------------
/output/gamma_saturation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/gamma_saturation.gif


--------------------------------------------------------------------------------
/output/lcd_overlay.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/lcd_overlay.gif


--------------------------------------------------------------------------------
/output/noise.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/noise.gif


--------------------------------------------------------------------------------
/output/perspective.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/perspective.gif


--------------------------------------------------------------------------------
/output/rotate.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/rotate.gif


--------------------------------------------------------------------------------
/output/scanner_like.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/scanner_like.gif


--------------------------------------------------------------------------------
/output/shadow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/shadow.gif


--------------------------------------------------------------------------------
/output/stretch.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/stretch.gif


--------------------------------------------------------------------------------
/output/watermark.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/watermark.gif


--------------------------------------------------------------------------------
/output/wrinkles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gautam-aayush/form-data-augmentation/1be8bcfd4b798f17a8290cead19aba4f97fc3edb/output/wrinkles.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python==3.4.5.20
2 | numpy==1.16.1
3 | tqdm==4.56.0
4 | imageio==2.9.0
5 | click==7.1.2


--------------------------------------------------------------------------------
/utility.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from typing import List, Tuple
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | 
  8 | def _add_texture(image: np.ndarray, overlay: np.ndarray) -> np.ndarray:
  9 |     """
 10 |     Overlays overlay on top of image to get the texture of overlay on the image
 11 |     Args:
 12 |         image (np.ndarray): original image
 13 |         overlay (np.ndarray): overlay image
 14 | 
 15 |     Returns:
 16 |         np.ndarray: image with overlay added
 17 |     """
 18 |     alpha = 0.7
 19 |     beta = 1 - alpha
 20 |     height, width = image.shape[:2]
 21 |     # resize overlay to the size of original image
 22 |     overlay = cv2.resize(overlay, (width, height))
 23 |     texturized_image = cv2.addWeighted(image, alpha, overlay, beta, 0)
 24 |     return texturized_image
 25 | 
 26 | 
 27 | def _get_perspective_points(
 28 |     image: np.ndarray,
 29 |     min_height: int = None,
 30 |     min_width: int = None,
 31 |     interactive: bool = False,
 32 | ) -> np.ndarray:
 33 |     """
 34 |     Gets perspective points from an image for perspective warp. When interactive is True
 35 |     an interactive window allows selection of points from the image,
 36 |     otherwise a random perspective rectangle with min_height and min_width are selected
 37 |     Args:
 38 |         image (np.ndarray): BGR image on which perspective points are to be selected
 39 |         interactive (bool, optional): When True, an interactive window allows
 40 |         manual selection of perspective points. Defaults to False.
 41 |         min_height (int, optional): minimum height of the perspective trapezoid.
 42 |         Required when interactive is False. Defaults to None.
 43 |         min_width (int, optional): minimum width of the perspective trapezoid.
 44 |         Required when interactive is False. Defaults to None.
 45 | 
 46 |     Returns:
 47 |         np.ndarray: Array of perspective points
 48 |     """
 49 |     positions = []
 50 |     if interactive:
 51 |         window_name = "Select perspective points"
 52 |         cv2.namedWindow(window_name)
 53 |         cv2.setMouseCallback(
 54 |             window_name, partial(_draw_circle, positions=positions, image=image)
 55 |         )
 56 |         while True:
 57 |             cv2.imshow(window_name, image)
 58 |             k = cv2.waitKey(20) & 0xFF
 59 |             if k == 27:
 60 |                 break
 61 |         cv2.destroyAllWindows()
 62 |     else:
 63 |         # get a rectangle with min_height and min_width
 64 |         image_height, image_width = image.shape[:2]
 65 |         assert int((image.shape[1] - min_width)) / 2 > 0
 66 | 
 67 |         # random rectangular portion of the image, with min_height and min_width
 68 |         x1 = np.random.randint(0, (image_width - min_width) // 2)
 69 |         y1 = np.random.randint(0, (image_height - min_height) // 2)
 70 |         x2 = np.random.randint(x1 + min_width, image_width)
 71 |         y2 = np.random.randint(y1 + min_height, image_height)
 72 | 
 73 |         # change in height and width from the default rectangle
 74 |         delta = 0.1
 75 |         delta_height = np.random.randint(-delta * min_height, delta * min_height)
 76 |         delta_width = np.random.randint(-delta * min_width, delta * min_width)
 77 | 
 78 |         positions = [
 79 |             (x1, y1 + delta_height),
 80 |             (x1, y2),
 81 |             (x2, y1),
 82 |             (x2 + delta_width, y2),
 83 |         ]
 84 |     return _order_points(positions)
 85 | 
 86 | 
 87 | def _perspective_warp(
 88 |     bg_image: np.ndarray, fg_image: np.ndarray, interactive: bool = False
 89 | ) -> np.ndarray:
 90 |     """
 91 |     Perspective warp foreground image on the perspective of background image
 92 |     Args:
 93 |         bg_image (np.ndarray): [description]
 94 |         fg_image (np.ndarray): [description]
 95 |         interactive (bool, optional): Defaults to False.
 96 | 
 97 |     Returns:
 98 |         np.ndarray: [description]
 99 |     """
100 |     pts1 = _get_perspective_points(
101 |         bg_image,
102 |         min_height=fg_image.shape[0],
103 |         min_width=fg_image.shape[1],
104 |         interactive=interactive,
105 |     )
106 | 
107 |     pts2 = _order_points(
108 |         np.array(
109 |             [
110 |                 [0, 0],
111 |                 [fg_image.shape[1], 0],
112 |                 [0, fg_image.shape[0]],
113 |                 [fg_image.shape[1], fg_image.shape[0]],
114 |             ]
115 |         )
116 |     )
117 |     h, mask = cv2.findHomography(pts2, pts1, cv2.RANSAC, 5.0)
118 |     warped_image = cv2.warpPerspective(
119 |         fg_image, h, (bg_image.shape[1], bg_image.shape[0])
120 |     )
121 |     return warped_image, pts1
122 | 
123 | 
124 | def _draw_circle(
125 |     event: int, x: int, y: int, flags: int, param, positions: List, image: np.ndarray
126 | ):
127 |     """
128 |     Callback function to draw a circle on the given image, when event is triggered
129 |     Args:
130 |         event (int): cv2 event
131 |         x (int): x coordinate
132 |         y (int): y coordinate
133 |         flags (int): [description]
134 |         param ([type]): [description]
135 |         positions (List): empty list as a reference
136 |         image (np.ndarray): BGR image
137 |     """
138 |     # If event is Left Button Click then store the coordinate in the lists
139 |     if event == cv2.EVENT_LBUTTONUP:
140 |         cv2.circle(image, (x, y), 2, (255, 0, 0), -1)
141 |         positions.append([x, y])
142 | 
143 | 
144 | def _order_points(pts: List) -> np.ndarray:
145 |     """
146 |     Order the points of a rectangle in top-left, top-right, bottom-right
147 |     and bottom-left order
148 |     Args:
149 |         pts (List): List of points to be ordered
150 | 
151 |     Returns:
152 |         np.ndarray: Array of ordered points
153 |     """
154 |     final_rect = np.zeros((4, 2))
155 | 
156 |     sums = np.sum(pts, axis=1)
157 | 
158 |     # top left
159 |     final_rect[0] = pts[np.argmin(sums)]
160 |     # bottom right
161 |     final_rect[2] = pts[np.argmax(sums)]
162 | 
163 |     diff = np.diff(pts, axis=1)
164 | 
165 |     # top right
166 |     final_rect[1] = pts[np.argmin(diff)]
167 |     # bottom left
168 |     final_rect[3] = pts[np.argmax(diff)]
169 | 
170 |     return final_rect
171 | 
172 | 
173 | def _generate_shadow_coordinates(
174 |     imshape: Tuple[int], no_of_shadows: int = 1
175 | ) -> List[np.ndarray]:
176 |     """
177 |     Generates 2D coordinates for a polygon of random dimensionality
178 |     where the value of the coordinates are limited by imshape
179 |     Args:
180 |         imshape (Tuple[int]): maximum values for x and y coordinate
181 |         no_of_shadows (int, optional): Number of polygons to generate.
182 |         Defaults to 1.
183 | 
184 |     Returns:
185 |         List[np.ndarray]: List of polygon coordiantes
186 |     """
187 |     vertices_list = []
188 |     x_lim, y_lim = imshape
189 |     for index in range(no_of_shadows):
190 |         vertex = []
191 |         min_vertices, max_vertices = 3, 5
192 |         for dimensions in range(np.random.randint(min_vertices, max_vertices)):
193 |             vertex.append(
194 |                 (y_lim * np.random.uniform(), x_lim // 3 + x_lim * np.random.uniform(),)
195 |             )
196 |         # polygon vertices
197 |         vertices = np.array([vertex], dtype=np.int32)
198 |         vertices_list.append(vertices)
199 |     return vertices_list
200 | 


--------------------------------------------------------------------------------
/warp_mls.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: RubanSeven
  3 | # Github repo: https://github.com/RubanSeven/Text-Image-Augmentation-python
  4 | 
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | class WarpMLS:
 10 |     def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.0):
 11 |         self.src = src
 12 |         self.src_pts = src_pts
 13 |         self.dst_pts = dst_pts
 14 |         self.pt_count = len(self.dst_pts)
 15 |         self.dst_w = dst_w
 16 |         self.dst_h = dst_h
 17 |         self.trans_ratio = trans_ratio
 18 |         self.grid_size = 100
 19 |         self.rdx = np.zeros((self.dst_h, self.dst_w))
 20 |         self.rdy = np.zeros((self.dst_h, self.dst_w))
 21 | 
 22 |     @staticmethod
 23 |     def __bilinear_interp(x, y, v11, v12, v21, v22):
 24 |         return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * (1 - y) + v22 * y) * x
 25 | 
 26 |     def generate(self):
 27 |         self.calc_delta()
 28 |         return self.gen_img()
 29 | 
 30 |     def calc_delta(self):
 31 |         w = np.zeros(self.pt_count, dtype=np.float32)
 32 | 
 33 |         if self.pt_count < 2:
 34 |             return
 35 | 
 36 |         i = 0
 37 |         while 1:
 38 |             if self.dst_w <= i < self.dst_w + self.grid_size - 1:
 39 |                 i = self.dst_w - 1
 40 |             elif i >= self.dst_w:
 41 |                 break
 42 | 
 43 |             j = 0
 44 |             while 1:
 45 |                 if self.dst_h <= j < self.dst_h + self.grid_size - 1:
 46 |                     j = self.dst_h - 1
 47 |                 elif j >= self.dst_h:
 48 |                     break
 49 | 
 50 |                 sw = 0
 51 |                 swp = np.zeros(2, dtype=np.float32)
 52 |                 swq = np.zeros(2, dtype=np.float32)
 53 |                 new_pt = np.zeros(2, dtype=np.float32)
 54 |                 cur_pt = np.array([i, j], dtype=np.float32)
 55 | 
 56 |                 k = 0
 57 |                 for k in range(self.pt_count):
 58 |                     if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 59 |                         break
 60 | 
 61 |                     w[k] = 1.0 / (
 62 |                         (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0])
 63 |                         + (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1])
 64 |                     )
 65 | 
 66 |                     sw += w[k]
 67 |                     swp = swp + w[k] * np.array(self.dst_pts[k])
 68 |                     swq = swq + w[k] * np.array(self.src_pts[k])
 69 | 
 70 |                 if k == self.pt_count - 1:
 71 |                     pstar = 1 / sw * swp
 72 |                     qstar = 1 / sw * swq
 73 | 
 74 |                     miu_s = 0
 75 |                     for k in range(self.pt_count):
 76 |                         if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 77 |                             continue
 78 |                         pt_i = self.dst_pts[k] - pstar
 79 |                         miu_s += w[k] * np.sum(pt_i * pt_i)
 80 | 
 81 |                     cur_pt -= pstar
 82 |                     cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
 83 | 
 84 |                     for k in range(self.pt_count):
 85 |                         if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
 86 |                             continue
 87 | 
 88 |                         pt_i = self.dst_pts[k] - pstar
 89 |                         pt_j = np.array([-pt_i[1], pt_i[0]])
 90 | 
 91 |                         tmp_pt = np.zeros(2, dtype=np.float32)
 92 |                         tmp_pt[0] = (
 93 |                             np.sum(pt_i * cur_pt) * self.src_pts[k][0]
 94 |                             - np.sum(pt_j * cur_pt) * self.src_pts[k][1]
 95 |                         )
 96 |                         tmp_pt[1] = (
 97 |                             -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0]
 98 |                             + np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
 99 |                         )
100 |                         tmp_pt *= w[k] / miu_s
101 |                         new_pt += tmp_pt
102 | 
103 |                     new_pt += qstar
104 |                 else:
105 |                     new_pt = self.src_pts[k]
106 | 
107 |                 self.rdx[j, i] = new_pt[0] - i
108 |                 self.rdy[j, i] = new_pt[1] - j
109 | 
110 |                 j += self.grid_size
111 |             i += self.grid_size
112 | 
113 |     def gen_img(self):
114 |         src_h, src_w = self.src.shape[:2]
115 |         dst = np.zeros_like(self.src, dtype=np.float32)
116 | 
117 |         for i in np.arange(0, self.dst_h, self.grid_size):
118 |             for j in np.arange(0, self.dst_w, self.grid_size):
119 |                 ni = i + self.grid_size
120 |                 nj = j + self.grid_size
121 |                 w = h = self.grid_size
122 |                 if ni >= self.dst_h:
123 |                     ni = self.dst_h - 1
124 |                     h = ni - i + 1
125 |                 if nj >= self.dst_w:
126 |                     nj = self.dst_w - 1
127 |                     w = nj - j + 1
128 | 
129 |                 di = np.reshape(np.arange(h), (-1, 1))
130 |                 dj = np.reshape(np.arange(w), (1, -1))
131 |                 delta_x = self.__bilinear_interp(
132 |                     di / h,
133 |                     dj / w,
134 |                     self.rdx[i, j],
135 |                     self.rdx[i, nj],
136 |                     self.rdx[ni, j],
137 |                     self.rdx[ni, nj],
138 |                 )
139 |                 delta_y = self.__bilinear_interp(
140 |                     di / h,
141 |                     dj / w,
142 |                     self.rdy[i, j],
143 |                     self.rdy[i, nj],
144 |                     self.rdy[ni, j],
145 |                     self.rdy[ni, nj],
146 |                 )
147 |                 nx = j + dj + delta_x * self.trans_ratio
148 |                 ny = i + di + delta_y * self.trans_ratio
149 |                 nx = np.clip(nx, 0, src_w - 1)
150 |                 ny = np.clip(ny, 0, src_h - 1)
151 |                 nxi = np.array(np.floor(nx), dtype=np.int32)
152 |                 nyi = np.array(np.floor(ny), dtype=np.int32)
153 |                 nxi1 = np.array(np.ceil(nx), dtype=np.int32)
154 |                 nyi1 = np.array(np.ceil(ny), dtype=np.int32)
155 | 
156 |                 if len(self.src.shape) == 3:
157 |                     x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
158 |                     y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
159 |                 else:
160 |                     x = ny - nyi
161 |                     y = nx - nxi
162 |                 dst[i : i + h, j : j + w] = self.__bilinear_interp(
163 |                     x,
164 |                     y,
165 |                     self.src[nyi, nxi],
166 |                     self.src[nyi, nxi1],
167 |                     self.src[nyi1, nxi],
168 |                     self.src[nyi1, nxi1],
169 |                 )
170 | 
171 |                 # for di in range(h):
172 |                 #     for dj in range(w):
173 |                 #         # print(ni, nj, i, j)
174 |                 #         delta_x = self.__bilinear_interp(di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
175 |                 #                                          self.rdx[ni, j], self.rdx[ni, nj])
176 |                 #         delta_y = self.__bilinear_interp(di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
177 |                 #                                          self.rdy[ni, j], self.rdy[ni, nj])
178 |                 #         nx = j + dj + delta_x * self.trans_ratio
179 |                 #         ny = i + di + delta_y * self.trans_ratio
180 |                 #         nx = min(src_w - 1, max(0, nx))
181 |                 #         ny = min(src_h - 1, max(0, ny))
182 |                 #         nxi = int(nx)
183 |                 #         nyi = int(ny)
184 |                 #         nxi1 = math.ceil(nx)
185 |                 #         nyi1 = math.ceil(ny)
186 |                 #
187 |                 #         dst[i + di, j + dj] = self.__bilinear_interp(ny - nyi, nx - nxi,
188 |                 #                                                      self.src[nyi, nxi],
189 |                 #                                                      self.src[nyi, nxi1],
190 |                 #                                                      self.src[nyi1, nxi],
191 |                 #                                                      self.src[nyi1, nxi1]
192 |                 #                                                      )
193 | 
194 |         dst = np.clip(dst, 0, 255)
195 |         dst = np.array(dst, dtype=np.uint8)
196 | 
197 |         return dst
198 | 


--------------------------------------------------------------------------------