├── .gitignore
├── Readme.md
├── Resources
├── coco.names.txt
├── yolov4-tiny.cfg
└── yolov4-tiny.weights
├── Result
├── input
│ ├── bus1.png
│ ├── cars.png
│ ├── city.png
│ ├── traffic_light.png
│ └── truck.jpg
└── output
│ ├── bus1.png
│ ├── cars.png
│ ├── city.png
│ ├── traffic_light.png
│ └── truck.png
├── libraries.bat
├── main.py
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | Readme.md
2 | Result
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | # Finding objects on the image
2 | In this project, I'll show you how to find objects in an image, select them, and count them. This program can work with recaptcha
3 |
4 | ## Features
5 | * Works with image
6 | * Distinguishes 80 objects
7 | * The user can specify which object to look for on the image
8 |
9 | ## How to install
10 | 1. Clone this repository on your computer
11 | `https://github.com/paveldat/objects_on_image.git`
12 | 2. Install all the requirements
13 | `run libraries.bat` or
14 | `pip install -r requirements.txt`
15 | 3. Run the program
16 | `python main.py`
17 |
18 | ## Help
19 | When you start the program, you will be prompted to enter the path to the image and the name of the object that you need to find and calculate it.
20 | If you need to find several objects in the image, write them separated by commas.
21 | Names of possible objects:
22 | ```
23 | 'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
24 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
25 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
26 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
27 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
28 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa',
29 | 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard',
30 | 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
31 | 'teddy bear', 'hair drier', 'toothbrush'
32 | ```
33 |
34 | ## Result
35 | ```
36 | Path to image(recapcha): Result\input\bus1.png
37 | What we are looking for: bus
38 | ```
39 | 
40 | 
41 |
42 | ```
43 | Path to image(recapcha): Result\input\truck.jpg
44 | What we are looking for: truck
45 | ```
46 | 
47 |
48 | ```
49 | Path to image(recapcha): Result\input\city.png
50 | What we are looking for: car, person, traffic light
51 | ```
52 | 
53 |
--------------------------------------------------------------------------------
/Resources/coco.names.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
--------------------------------------------------------------------------------
/Resources/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.00261
19 | burn_in=1000
20 |
21 | max_batches = 2000200
22 | policy=steps
23 | steps=1600000,1800000
24 | scales=.1,.1
25 |
26 |
27 | #weights_reject_freq=1001
28 | #ema_alpha=0.9998
29 | #equidistant_point=1000
30 | #num_sigmas_reject_badlabels=3
31 | #badlabels_rejection_percentage=0.2
32 |
33 |
34 | [convolutional]
35 | batch_normalize=1
36 | filters=32
37 | size=3
38 | stride=2
39 | pad=1
40 | activation=leaky
41 |
42 | [convolutional]
43 | batch_normalize=1
44 | filters=64
45 | size=3
46 | stride=2
47 | pad=1
48 | activation=leaky
49 |
50 | [convolutional]
51 | batch_normalize=1
52 | filters=64
53 | size=3
54 | stride=1
55 | pad=1
56 | activation=leaky
57 |
58 | [route]
59 | layers=-1
60 | groups=2
61 | group_id=1
62 |
63 | [convolutional]
64 | batch_normalize=1
65 | filters=32
66 | size=3
67 | stride=1
68 | pad=1
69 | activation=leaky
70 |
71 | [convolutional]
72 | batch_normalize=1
73 | filters=32
74 | size=3
75 | stride=1
76 | pad=1
77 | activation=leaky
78 |
79 | [route]
80 | layers = -1,-2
81 |
82 | [convolutional]
83 | batch_normalize=1
84 | filters=64
85 | size=1
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [route]
91 | layers = -6,-1
92 |
93 | [maxpool]
94 | size=2
95 | stride=2
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=128
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | [route]
106 | layers=-1
107 | groups=2
108 | group_id=1
109 |
110 | [convolutional]
111 | batch_normalize=1
112 | filters=64
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | [convolutional]
119 | batch_normalize=1
120 | filters=64
121 | size=3
122 | stride=1
123 | pad=1
124 | activation=leaky
125 |
126 | [route]
127 | layers = -1,-2
128 |
129 | [convolutional]
130 | batch_normalize=1
131 | filters=128
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 |
137 | [route]
138 | layers = -6,-1
139 |
140 | [maxpool]
141 | size=2
142 | stride=2
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=256
147 | size=3
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [route]
153 | layers=-1
154 | groups=2
155 | group_id=1
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=128
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [route]
174 | layers = -1,-2
175 |
176 | [convolutional]
177 | batch_normalize=1
178 | filters=256
179 | size=1
180 | stride=1
181 | pad=1
182 | activation=leaky
183 |
184 | [route]
185 | layers = -6,-1
186 |
187 | [maxpool]
188 | size=2
189 | stride=2
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=512
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | ##################################
200 |
201 | [convolutional]
202 | batch_normalize=1
203 | filters=256
204 | size=1
205 | stride=1
206 | pad=1
207 | activation=leaky
208 |
209 | [convolutional]
210 | batch_normalize=1
211 | filters=512
212 | size=3
213 | stride=1
214 | pad=1
215 | activation=leaky
216 |
217 | [convolutional]
218 | size=1
219 | stride=1
220 | pad=1
221 | filters=255
222 | activation=linear
223 |
224 |
225 |
226 | [yolo]
227 | mask = 3,4,5
228 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
229 | classes=80
230 | num=6
231 | jitter=.3
232 | scale_x_y = 1.05
233 | cls_normalizer=1.0
234 | iou_normalizer=0.07
235 | iou_loss=ciou
236 | ignore_thresh = .7
237 | truth_thresh = 1
238 | random=0
239 | resize=1.5
240 | nms_kind=greedynms
241 | beta_nms=0.6
242 | #new_coords=1
243 | #scale_x_y = 2.0
244 |
245 | [route]
246 | layers = -4
247 |
248 | [convolutional]
249 | batch_normalize=1
250 | filters=128
251 | size=1
252 | stride=1
253 | pad=1
254 | activation=leaky
255 |
256 | [upsample]
257 | stride=2
258 |
259 | [route]
260 | layers = -1, 23
261 |
262 | [convolutional]
263 | batch_normalize=1
264 | filters=256
265 | size=3
266 | stride=1
267 | pad=1
268 | activation=leaky
269 |
270 | [convolutional]
271 | size=1
272 | stride=1
273 | pad=1
274 | filters=255
275 | activation=linear
276 |
277 | [yolo]
278 | mask = 1,2,3
279 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
280 | classes=80
281 | num=6
282 | jitter=.3
283 | scale_x_y = 1.05
284 | cls_normalizer=1.0
285 | iou_normalizer=0.07
286 | iou_loss=ciou
287 | ignore_thresh = .7
288 | truth_thresh = 1
289 | random=0
290 | resize=1.5
291 | nms_kind=greedynms
292 | beta_nms=0.6
293 | #new_coords=1
294 | #scale_x_y = 2.0
--------------------------------------------------------------------------------
/Resources/yolov4-tiny.weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Resources/yolov4-tiny.weights
--------------------------------------------------------------------------------
/Result/input/bus1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/bus1.png
--------------------------------------------------------------------------------
/Result/input/cars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/cars.png
--------------------------------------------------------------------------------
/Result/input/city.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/city.png
--------------------------------------------------------------------------------
/Result/input/traffic_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/traffic_light.png
--------------------------------------------------------------------------------
/Result/input/truck.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/input/truck.jpg
--------------------------------------------------------------------------------
/Result/output/bus1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/bus1.png
--------------------------------------------------------------------------------
/Result/output/cars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/cars.png
--------------------------------------------------------------------------------
/Result/output/city.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/city.png
--------------------------------------------------------------------------------
/Result/output/traffic_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/traffic_light.png
--------------------------------------------------------------------------------
/Result/output/truck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paveldat/objects_detection_on_image/8d4f3f7673d54679d4e2957d38f3dc344c6ab5ee/Result/output/truck.png
--------------------------------------------------------------------------------
/libraries.bat:
--------------------------------------------------------------------------------
1 | pip install opencv-python
2 | pip install numpy
3 | pip install art
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from art import tprint
4 |
5 |
6 | def apply_yolo_object_detection(image_to_process):
7 | """
8 | Recognition and determination of the coordinates of objects on the image
9 | :param image_to_process: original image
10 | :return: image with marked objects and captions to them
11 | """
12 |
13 | height, width, _ = image_to_process.shape
14 | blob = cv2.dnn.blobFromImage(image_to_process, 1 / 255, (608, 608),
15 | (0, 0, 0), swapRB=True, crop=False)
16 | net.setInput(blob)
17 | outs = net.forward(out_layers)
18 | class_indexes, class_scores, boxes = ([] for i in range(3))
19 | objects_count = 0
20 |
21 | # Starting a search for objects in an image
22 | for out in outs:
23 | for obj in out:
24 | scores = obj[5:]
25 | class_index = np.argmax(scores)
26 | class_score = scores[class_index]
27 | if class_score > 0:
28 | center_x = int(obj[0] * width)
29 | center_y = int(obj[1] * height)
30 | obj_width = int(obj[2] * width)
31 | obj_height = int(obj[3] * height)
32 | box = [center_x - obj_width // 2, center_y - obj_height // 2,
33 | obj_width, obj_height]
34 | boxes.append(box)
35 | class_indexes.append(class_index)
36 | class_scores.append(float(class_score))
37 |
38 | # Selection
39 | chosen_boxes = cv2.dnn.NMSBoxes(boxes, class_scores, 0.0, 0.4)
40 | for box_index in chosen_boxes:
41 | box_index = box_index
42 | box = boxes[box_index]
43 | class_index = class_indexes[box_index]
44 |
45 | # For debugging, we draw objects included in the desired classes
46 | if classes[class_index] in classes_to_look_for:
47 | objects_count += 1
48 | image_to_process = draw_object_bounding_box(image_to_process,
49 | class_index, box)
50 |
51 | final_image = draw_object_count(image_to_process, objects_count)
52 | return final_image
53 |
54 |
55 | def draw_object_bounding_box(image_to_process, index, box):
56 | """
57 | Drawing object borders with captions
58 | :param image_to_process: original image
59 | :param index: index of object class defined with YOLO
60 | :param box: coordinates of the area around the object
61 | :return: image with marked objects
62 | """
63 |
64 | x, y, w, h = box
65 | start = (x, y)
66 | end = (x + w, y + h)
67 | color = (0, 255, 0)
68 | width = 2
69 | final_image = cv2.rectangle(image_to_process, start, end, color, width)
70 |
71 | start = (x, y - 10)
72 | font_size = 1
73 | font = cv2.FONT_HERSHEY_SIMPLEX
74 | width = 2
75 | text = classes[index]
76 | final_image = cv2.putText(final_image, text, start, font,
77 | font_size, color, width, cv2.LINE_AA)
78 |
79 | return final_image
80 |
81 |
82 | def draw_object_count(image_to_process, objects_count):
83 | """
84 | Signature of the number of found objects in the image
85 | :param image_to_process: original image
86 | :param objects_count: the number of objects of the desired class
87 | :return: image with labeled number of found objects
88 | """
89 |
90 | start = (10, 120)
91 | font_size = 1.5
92 | font = cv2.FONT_HERSHEY_SIMPLEX
93 | width = 3
94 | text = "Objects found: " + str(objects_count)
95 |
96 | # Text output with a stroke
97 | # (so that it can be seen in different lighting conditions of the picture)
98 | white_color = (255, 255, 255)
99 | black_outline_color = (0, 0, 0)
100 | final_image = cv2.putText(image_to_process, text, start, font, font_size,
101 | black_outline_color, width * 3, cv2.LINE_AA)
102 | final_image = cv2.putText(final_image, text, start, font, font_size,
103 | white_color, width, cv2.LINE_AA)
104 |
105 | return final_image
106 |
107 |
108 | def start_image_object_detection(img_path):
109 | """
110 | Image analysis
111 | """
112 |
113 | try:
114 | # Applying Object Recognition Techniques in an Image by YOLO
115 | image = cv2.imread(img_path)
116 | image = apply_yolo_object_detection(image)
117 |
118 | # Displaying the processed image on the screen
119 | cv2.imshow("Image", image)
120 | if cv2.waitKey(0):
121 | cv2.destroyAllWindows()
122 |
123 | except KeyboardInterrupt:
124 | pass
125 |
126 |
127 | if __name__ == '__main__':
128 |
129 | # Logo
130 | tprint("Object detection")
131 | tprint("by")
132 | tprint("paveldat")
133 |
134 | # Loading YOLO scales from files and setting up the network
135 | net = cv2.dnn.readNetFromDarknet("Resources/yolov4-tiny.cfg",
136 | "Resources/yolov4-tiny.weights")
137 | layer_names = net.getLayerNames()
138 | out_layers_indexes = net.getUnconnectedOutLayers()
139 | out_layers = [layer_names[index - 1] for index in out_layers_indexes]
140 |
141 | # Loading from a file of object classes that YOLO can detect
142 | with open("Resources/coco.names.txt") as file:
143 | classes = file.read().split("\n")
144 |
145 | # Determining classes that will be prioritized for search in an image
146 | # The names are in the file coco.names.txt
147 |
148 | image = input("Path to image(recapcha): ")
149 | look_for = input("What we are looking for: ").split(',')
150 |
151 | # Delete spaces
152 | list_look_for = []
153 | for look in look_for:
154 | list_look_for.append(look.strip())
155 |
156 | classes_to_look_for = list_look_for
157 |
158 | start_image_object_detection(image)
159 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | numpy
3 | art
--------------------------------------------------------------------------------