├── .gitignore ├── Readme.md ├── Resources ├── coco.names.txt ├── yolov4-tiny.cfg └── yolov4-tiny.weights ├── Result ├── input │ ├── bus1.png │ ├── cars.png │ ├── city.png │ ├── traffic_light.png │ └── truck.jpg └── output │ ├── bus1.png │ ├── cars.png │ ├── city.png │ ├── traffic_light.png │ └── truck.png ├── libraries.bat ├── main.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | Readme.md 2 | Result -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Finding objects on the image 2 | In this project, I'll show you how to find objects in an image, select them, and count them. This program can work with recaptcha 3 | 4 | ## Features 5 | * Works with image 6 | * Distinguishes 80 objects 7 | * The user can specify which object to look for on the image 8 | 9 | ## How to install 10 | 1. Clone this repository on your computer 11 | `https://github.com/paveldat/objects_on_image.git` 12 | 2. Install all the requirements 13 | `run libraries.bat` or 14 | `pip install -r requirements.txt` 15 | 3. Run the program 16 | `python main.py` 17 | 18 | ## Help 19 | When you start the program, you will be prompted to enter the path to the image and the name of the object that you need to find and calculate it. 20 | If you need to find several objects in the image, write them separated by commas. 21 | Names of possible objects: 22 | ``` 23 | 'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 24 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 25 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 26 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 27 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 28 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 29 | 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 30 | 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 31 | 'teddy bear', 'hair drier', 'toothbrush' 32 | ``` 33 | 34 | ## Result 35 | ``` 36 | Path to image(recapcha): Result\input\bus1.png 37 | What we are looking for: bus 38 | ``` 39 | ![image_input_1](https://github.com/paveldat/objects_on_image/blob/main/Result/input/bus1.png) 40 | ![image_output_1](https://github.com/paveldat/objects_on_image/blob/main/Result/output/bus1.png) 41 | 42 | ``` 43 | Path to image(recapcha): Result\input\truck.jpg 44 | What we are looking for: truck 45 | ``` 46 | 47 | 48 | ``` 49 | Path to image(recapcha): Result\input\city.png 50 | What we are looking for: car, person, traffic light 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /Resources/coco.names.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /Resources/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | 21 | max_batches = 2000200 22 | policy=steps 23 | steps=1600000,1800000 24 | scales=.1,.1 25 | 26 | 27 | #weights_reject_freq=1001 28 | #ema_alpha=0.9998 29 | #equidistant_point=1000 30 | #num_sigmas_reject_badlabels=3 31 | #badlabels_rejection_percentage=0.2 32 | 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=32 37 | size=3 38 | stride=2 39 | pad=1 40 | activation=leaky 41 | 42 | [convolutional] 43 | batch_normalize=1 44 | filters=64 45 | size=3 46 | stride=2 47 | pad=1 48 | activation=leaky 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [route] 59 | layers=-1 60 | groups=2 61 | group_id=1 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=32 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=32 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [route] 80 | layers = -1,-2 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=64 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [route] 91 | layers = -6,-1 92 | 93 | [maxpool] 94 | size=2 95 | stride=2 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=128 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [route] 106 | layers=-1 107 | groups=2 108 | group_id=1 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [route] 127 | layers = -1,-2 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=128 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [route] 138 | layers = -6,-1 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=256 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [route] 153 | layers=-1 154 | groups=2 155 | group_id=1 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=128 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [route] 174 | layers = -1,-2 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=256 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [route] 185 | layers = -6,-1 186 | 187 | [maxpool] 188 | size=2 189 | stride=2 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=512 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | ################################## 200 | 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=1 205 | stride=1 206 | pad=1 207 | activation=leaky 208 | 209 | [convolutional] 210 | batch_normalize=1 211 | filters=512 212 | size=3 213 | stride=1 214 | pad=1 215 | activation=leaky 216 | 217 | [convolutional] 218 | size=1 219 | stride=1 220 | pad=1 221 | filters=255 222 | activation=linear 223 | 224 | 225 | 226 | [yolo] 227 | mask = 3,4,5 228 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 229 | classes=80 230 | num=6 231 | jitter=.3 232 | scale_x_y = 1.05 233 | cls_normalizer=1.0 234 | iou_normalizer=0.07 235 | iou_loss=ciou 236 | ignore_thresh = .7 237 | truth_thresh = 1 238 | random=0 239 | resize=1.5 240 | nms_kind=greedynms 241 | beta_nms=0.6 242 | #new_coords=1 243 | #scale_x_y = 2.0 244 | 245 | [route] 246 | layers = -4 247 | 248 | [convolutional] 249 | batch_normalize=1 250 | filters=128 251 | size=1 252 | stride=1 253 | pad=1 254 | activation=leaky 255 | 256 | [upsample] 257 | stride=2 258 | 259 | [route] 260 | layers = -1, 23 261 | 262 | [convolutional] 263 | batch_normalize=1 264 | filters=256 265 | size=3 266 | stride=1 267 | pad=1 268 | activation=leaky 269 | 270 | [convolutional] 271 | size=1 272 | stride=1 273 | pad=1 274 | filters=255 275 | activation=linear 276 | 277 | [yolo] 278 | mask = 1,2,3 279 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 280 | classes=80 281 | num=6 282 | jitter=.3 283 | scale_x_y = 1.05 284 | cls_normalizer=1.0 285 | iou_normalizer=0.07 286 | iou_loss=ciou 287 | ignore_thresh = .7 288 | truth_thresh = 1 289 | random=0 290 | resize=1.5 291 | nms_kind=greedynms 292 | beta_nms=0.6 293 | #new_coords=1 294 | #scale_x_y = 2.0 -------------------------------------------------------------------------------- /Resources/yolov4-tiny.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Resources/yolov4-tiny.weights -------------------------------------------------------------------------------- /Result/input/bus1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/bus1.png -------------------------------------------------------------------------------- /Result/input/cars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/cars.png -------------------------------------------------------------------------------- /Result/input/city.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/city.png -------------------------------------------------------------------------------- /Result/input/traffic_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/traffic_light.png -------------------------------------------------------------------------------- /Result/input/truck.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/truck.jpg -------------------------------------------------------------------------------- /Result/output/bus1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/bus1.png -------------------------------------------------------------------------------- /Result/output/cars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/cars.png -------------------------------------------------------------------------------- /Result/output/city.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/city.png -------------------------------------------------------------------------------- /Result/output/traffic_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/traffic_light.png -------------------------------------------------------------------------------- /Result/output/truck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/truck.png -------------------------------------------------------------------------------- /libraries.bat: -------------------------------------------------------------------------------- 1 | pip install opencv-python 2 | pip install numpy 3 | pip install art -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from art import tprint 4 | 5 | 6 | def apply_yolo_object_detection(image_to_process): 7 | """ 8 | Recognition and determination of the coordinates of objects on the image 9 | :param image_to_process: original image 10 | :return: image with marked objects and captions to them 11 | """ 12 | 13 | height, width, _ = image_to_process.shape 14 | blob = cv2.dnn.blobFromImage(image_to_process, 1 / 255, (608, 608), 15 | (0, 0, 0), swapRB=True, crop=False) 16 | net.setInput(blob) 17 | outs = net.forward(out_layers) 18 | class_indexes, class_scores, boxes = ([] for i in range(3)) 19 | objects_count = 0 20 | 21 | # Starting a search for objects in an image 22 | for out in outs: 23 | for obj in out: 24 | scores = obj[5:] 25 | class_index = np.argmax(scores) 26 | class_score = scores[class_index] 27 | if class_score > 0: 28 | center_x = int(obj[0] * width) 29 | center_y = int(obj[1] * height) 30 | obj_width = int(obj[2] * width) 31 | obj_height = int(obj[3] * height) 32 | box = [center_x - obj_width // 2, center_y - obj_height // 2, 33 | obj_width, obj_height] 34 | boxes.append(box) 35 | class_indexes.append(class_index) 36 | class_scores.append(float(class_score)) 37 | 38 | # Selection 39 | chosen_boxes = cv2.dnn.NMSBoxes(boxes, class_scores, 0.0, 0.4) 40 | for box_index in chosen_boxes: 41 | box_index = box_index 42 | box = boxes[box_index] 43 | class_index = class_indexes[box_index] 44 | 45 | # For debugging, we draw objects included in the desired classes 46 | if classes[class_index] in classes_to_look_for: 47 | objects_count += 1 48 | image_to_process = draw_object_bounding_box(image_to_process, 49 | class_index, box) 50 | 51 | final_image = draw_object_count(image_to_process, objects_count) 52 | return final_image 53 | 54 | 55 | def draw_object_bounding_box(image_to_process, index, box): 56 | """ 57 | Drawing object borders with captions 58 | :param image_to_process: original image 59 | :param index: index of object class defined with YOLO 60 | :param box: coordinates of the area around the object 61 | :return: image with marked objects 62 | """ 63 | 64 | x, y, w, h = box 65 | start = (x, y) 66 | end = (x + w, y + h) 67 | color = (0, 255, 0) 68 | width = 2 69 | final_image = cv2.rectangle(image_to_process, start, end, color, width) 70 | 71 | start = (x, y - 10) 72 | font_size = 1 73 | font = cv2.FONT_HERSHEY_SIMPLEX 74 | width = 2 75 | text = classes[index] 76 | final_image = cv2.putText(final_image, text, start, font, 77 | font_size, color, width, cv2.LINE_AA) 78 | 79 | return final_image 80 | 81 | 82 | def draw_object_count(image_to_process, objects_count): 83 | """ 84 | Signature of the number of found objects in the image 85 | :param image_to_process: original image 86 | :param objects_count: the number of objects of the desired class 87 | :return: image with labeled number of found objects 88 | """ 89 | 90 | start = (10, 120) 91 | font_size = 1.5 92 | font = cv2.FONT_HERSHEY_SIMPLEX 93 | width = 3 94 | text = "Objects found: " + str(objects_count) 95 | 96 | # Text output with a stroke 97 | # (so that it can be seen in different lighting conditions of the picture) 98 | white_color = (255, 255, 255) 99 | black_outline_color = (0, 0, 0) 100 | final_image = cv2.putText(image_to_process, text, start, font, font_size, 101 | black_outline_color, width * 3, cv2.LINE_AA) 102 | final_image = cv2.putText(final_image, text, start, font, font_size, 103 | white_color, width, cv2.LINE_AA) 104 | 105 | return final_image 106 | 107 | 108 | def start_image_object_detection(img_path): 109 | """ 110 | Image analysis 111 | """ 112 | 113 | try: 114 | # Applying Object Recognition Techniques in an Image by YOLO 115 | image = cv2.imread(img_path) 116 | image = apply_yolo_object_detection(image) 117 | 118 | # Displaying the processed image on the screen 119 | cv2.imshow("Image", image) 120 | if cv2.waitKey(0): 121 | cv2.destroyAllWindows() 122 | 123 | except KeyboardInterrupt: 124 | pass 125 | 126 | 127 | if __name__ == '__main__': 128 | 129 | # Logo 130 | tprint("Object detection") 131 | tprint("by") 132 | tprint("paveldat") 133 | 134 | # Loading YOLO scales from files and setting up the network 135 | net = cv2.dnn.readNetFromDarknet("Resources/yolov4-tiny.cfg", 136 | "Resources/yolov4-tiny.weights") 137 | layer_names = net.getLayerNames() 138 | out_layers_indexes = net.getUnconnectedOutLayers() 139 | out_layers = [layer_names[index - 1] for index in out_layers_indexes] 140 | 141 | # Loading from a file of object classes that YOLO can detect 142 | with open("Resources/coco.names.txt") as file: 143 | classes = file.read().split("\n") 144 | 145 | # Determining classes that will be prioritized for search in an image 146 | # The names are in the file coco.names.txt 147 | 148 | image = input("Path to image(recapcha): ") 149 | look_for = input("What we are looking for: ").split(',') 150 | 151 | # Delete spaces 152 | list_look_for = [] 153 | for look in look_for: 154 | list_look_for.append(look.strip()) 155 | 156 | classes_to_look_for = list_look_for 157 | 158 | start_image_object_detection(image) 159 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | numpy 3 | art --------------------------------------------------------------------------------