├── README.md ├── nms.py └── transforms.py /README.md: -------------------------------------------------------------------------------- 1 | # deep-learning-tools 2 | 3 | ## NMS Demo Image 4 | ![nms](https://user-images.githubusercontent.com/21311442/34034495-c630ce9c-e1b9-11e7-86db-2ca88bb32dcd.jpg) 5 | 6 | ## Transform Demo Image 7 | 8 | ![Transform](https://user-images.githubusercontent.com/21311442/84614447-c2b53780-aef8-11ea-9278-ea964f6109b0.png) 9 | -------------------------------------------------------------------------------- /nms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | """ 10 | Non-max Suppression Algorithm 11 | 12 | @param list Object candidate bounding boxes 13 | @param list Confidence score of bounding boxes 14 | @param float IoU threshold 15 | 16 | @return Rest boxes after nms operation 17 | """ 18 | def nms(bounding_boxes, confidence_score, threshold): 19 | # If no bounding boxes, return empty list 20 | if len(bounding_boxes) == 0: 21 | return [], [] 22 | 23 | # Bounding boxes 24 | boxes = np.array(bounding_boxes) 25 | 26 | # coordinates of bounding boxes 27 | start_x = boxes[:, 0] 28 | start_y = boxes[:, 1] 29 | end_x = boxes[:, 2] 30 | end_y = boxes[:, 3] 31 | 32 | # Confidence scores of bounding boxes 33 | score = np.array(confidence_score) 34 | 35 | # Picked bounding boxes 36 | picked_boxes = [] 37 | picked_score = [] 38 | 39 | # Compute areas of bounding boxes 40 | areas = (end_x - start_x + 1) * (end_y - start_y + 1) 41 | 42 | # Sort by confidence score of bounding boxes 43 | order = np.argsort(score) 44 | 45 | # Iterate bounding boxes 46 | while order.size > 0: 47 | # The index of largest confidence score 48 | index = order[-1] 49 | 50 | # Pick the bounding box with largest confidence score 51 | picked_boxes.append(bounding_boxes[index]) 52 | picked_score.append(confidence_score[index]) 53 | 54 | # Compute ordinates of intersection-over-union(IOU) 55 | x1 = np.maximum(start_x[index], start_x[order[:-1]]) 56 | x2 = np.minimum(end_x[index], end_x[order[:-1]]) 57 | y1 = np.maximum(start_y[index], start_y[order[:-1]]) 58 | y2 = np.minimum(end_y[index], end_y[order[:-1]]) 59 | 60 | # Compute areas of intersection-over-union 61 | w = np.maximum(0.0, x2 - x1 + 1) 62 | h = np.maximum(0.0, y2 - y1 + 1) 63 | intersection = w * h 64 | 65 | # Compute the ratio between intersection and union 66 | ratio = intersection / (areas[index] + areas[order[:-1]] - intersection) 67 | 68 | left = np.where(ratio < threshold) 69 | order = order[left] 70 | 71 | return picked_boxes, picked_score 72 | 73 | 74 | # Image name 75 | image_name = 'nms.jpg' 76 | 77 | # Bounding boxes 78 | bounding_boxes = [(187, 82, 337, 317), (150, 67, 305, 282), (246, 121, 368, 304)] 79 | confidence_score = [0.9, 0.75, 0.8] 80 | 81 | # Read image 82 | image = cv2.imread(image_name) 83 | 84 | # Copy image as original 85 | org = image.copy() 86 | 87 | # Draw parameters 88 | font = cv2.FONT_HERSHEY_SIMPLEX 89 | font_scale = 1 90 | thickness = 2 91 | 92 | # IoU threshold 93 | threshold = 0.4 94 | 95 | # Draw bounding boxes and confidence score 96 | for (start_x, start_y, end_x, end_y), confidence in zip(bounding_boxes, confidence_score): 97 | (w, h), baseline = cv2.getTextSize(str(confidence), font, font_scale, thickness) 98 | cv2.rectangle(org, (start_x, start_y - (2 * baseline + 5)), (start_x + w, start_y), (0, 255, 255), -1) 99 | cv2.rectangle(org, (start_x, start_y), (end_x, end_y), (0, 255, 255), 2) 100 | cv2.putText(org, str(confidence), (start_x, start_y), font, font_scale, (0, 0, 0), thickness) 101 | 102 | # Run non-max suppression algorithm 103 | picked_boxes, picked_score = nms(bounding_boxes, confidence_score, threshold) 104 | 105 | # Draw bounding boxes and confidence score after non-maximum supression 106 | for (start_x, start_y, end_x, end_y), confidence in zip(picked_boxes, picked_score): 107 | (w, h), baseline = cv2.getTextSize(str(confidence), font, font_scale, thickness) 108 | cv2.rectangle(image, (start_x, start_y - (2 * baseline + 5)), (start_x + w, start_y), (0, 255, 255), -1) 109 | cv2.rectangle(image, (start_x, start_y), (end_x, end_y), (0, 255, 255), 2) 110 | cv2.putText(image, str(confidence), (start_x, start_y), font, font_scale, (0, 0, 0), thickness) 111 | 112 | # Show image 113 | cv2.imshow('Original', org) 114 | cv2.imshow('NMS', image) 115 | cv2.waitKey(0) 116 | -------------------------------------------------------------------------------- /transforms.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as transforms 2 | 3 | from PIL import Image 4 | 5 | img = Image.open('tina.jpg') 6 | 7 | ''' 8 | # CenterCrop 9 | size = (224, 224) 10 | transform = transforms.CenterCrop(size) 11 | center_crop = transform(img) 12 | center_crop.save('center_crop.jpg') 13 | 14 | # ColorJitter 15 | brightness = (1, 10) 16 | contrast = (1, 10) 17 | saturation = (1, 10) 18 | hue = (0.2, 0.4) 19 | transform = transforms.ColorJitter(brightness, contrast, saturation, hue) 20 | color_jitter = transform(img) 21 | color_jitter.save('color_jitter.jpg') 22 | 23 | # FiveCrop 24 | size = (224, 224) 25 | transform = transforms.FiveCrop(size) 26 | five_crop = transform(img) 27 | for index, img in enumerate(five_crop): 28 | img.save(str(index) + '.jpg') 29 | 30 | # Grayscale 31 | transform = transforms.Grayscale() 32 | grayscale = transform(img) 33 | grayscale.save('grayscale.jpg') 34 | 35 | # Compose, Pad 36 | size = (224, 224) 37 | padding = 16 38 | fill = (0, 0, 255) 39 | transform = transforms.Compose([ 40 | transforms.CenterCrop(size), 41 | transforms.Pad(padding, fill) 42 | ]) 43 | pad = transform(img) 44 | pad.save('pad.jpg') 45 | 46 | # RandomAffine 47 | degrees = (15, 30) 48 | translate=(0, 0.2) 49 | scale=(0.8, 1) 50 | fillcolor = (0, 0, 255) 51 | transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, fillcolor=fillcolor) 52 | random_affine = transform(img) 53 | random_affine.save('random_affine.jpg') 54 | 55 | # RandomApply 56 | size = (224, 224) 57 | padding = 16 58 | fill = (0, 0, 255) 59 | transform = transforms.RandomApply([transforms.CenterCrop(size), transforms.Pad(padding, fill)]) 60 | for i in range(3): 61 | random_apply = transform(img) 62 | random_apply.save(str(i) + '.jpg') 63 | 64 | # RandomChoice 65 | transform = transforms.RandomChoice([transforms.RandomAffine(degrees), 66 | transforms.CenterCrop(size), 67 | transforms.Pad(padding, fill)]) 68 | for i in range(3): 69 | random_order = transform(img) 70 | random_order.save(str(i) + '.jpg') 71 | 72 | # RandomCrop 73 | size = (224, 224) 74 | transform = transforms.RandomCrop(size) 75 | random_crop = transform(img) 76 | random_crop.save('p.jpg') 77 | 78 | # RandomGrayscale 79 | p = 0.5 80 | transform = transforms.RandomGrayscale(p) 81 | for i in range(3): 82 | random_grayscale = transform(img) 83 | random_grayscale.save(str(i) + '.jpg') 84 | 85 | # RandomHorizontalFlip 86 | p = 0.5 87 | transform = transforms.RandomHorizontalFlip(p) 88 | for i in range(3): 89 | random_horizontal_filp = transform(img) 90 | random_horizontal_filp.save(str(i) + '.jpg') 91 | 92 | # RandomOrder 93 | size = (224, 224) 94 | padding = 16 95 | fill = (0, 0, 255) 96 | degrees = (15, 30) 97 | transform = transforms.RandomOrder([transforms.RandomAffine(degrees), 98 | transforms.CenterCrop(size), 99 | transforms.Pad(padding, fill)]) 100 | for i in range(3): 101 | random_order = transform(img) 102 | random_order.save(str(i) + '.jpg') 103 | 104 | # RandomPerspective 105 | distortion_scale = 1 106 | p = 1 107 | fill = (0, 0, 255) 108 | transform = transforms.RandomPerspective(distortion_scale=distortion_scale, p=p, fill=fill) 109 | random_perspective = transform(img) 110 | random_perspective.save('random_perspective.jpg') 111 | 112 | # RandomResizedCrop 113 | size = (256, 256) 114 | scale=(0.8, 1.0) 115 | ratio=(0.75, 1.0) 116 | transform = transforms.RandomResizedCrop(size=size, scale=scale, ratio=ratio) 117 | random_resized_crop = transform(img) 118 | random_resized_crop.save('random_resized_crop.jpg') 119 | 120 | # RandomRotation 121 | degrees = (15, 30) 122 | fill = (0, 0, 255) 123 | transform = transforms.RandomRotation(degrees=degrees, fill=fill) 124 | random_rotation = transform(img) 125 | random_rotation.save('random_rotation.jpg') 126 | 127 | # RandomVerticalFlip 128 | p = 1 129 | transform = transforms.RandomVerticalFlip(p) 130 | random_vertical_filp = transform(img) 131 | random_vertical_filp.save('random_vertical_filp.jpg') 132 | 133 | # Resize 134 | size = (224, 224) 135 | transform = transforms.Resize(size) 136 | resize_img = transform(img) 137 | resize_img.save('resize_img.jpg') 138 | 139 | # ToPILImage 140 | img = Image.open('tina.jpg') 141 | transform = transforms.ToTensor() 142 | img = transform(img) 143 | print(img.size()) 144 | img_r = img[0, :, :] 145 | img_g = img[1, :, :] 146 | img_b = img[2, :, :] 147 | print(type(img_r)) 148 | print(img_r.size()) 149 | transform = transforms.ToPILImage() 150 | img_r = transform(img_r) 151 | img_g = transform(img_g) 152 | img_b = transform(img_b) 153 | print(type(img_r)) 154 | img_r.save('img_r.jpg') 155 | img_g.save('img_g.jpg') 156 | img_b.save('img_b.jpg') 157 | 158 | # ToTensor 159 | img = Image.open('tina.jpg') 160 | print(type(img)) 161 | print(img.size) 162 | transform = transforms.ToTensor() 163 | img = transform(img) 164 | print(type(img)) 165 | print(img.size()) 166 | ''' 167 | 168 | # RandomErasing 169 | p = 1.0 170 | scale = (0.2, 0.3) 171 | ratio = (0.5, 1.0) 172 | value = (0, 0, 255) 173 | 174 | transform = transforms.Compose([ 175 | transforms.ToTensor(), 176 | transforms.RandomErasing(p=p, scale=scale, ratio=ratio, value=value), 177 | transforms.ToPILImage() 178 | ]) 179 | random_erasing = transform(img) 180 | random_erasing.save('random_erasing.jpg') 181 | 182 | --------------------------------------------------------------------------------