├── README.md ├── [Part 3] Get Images └── get_images.py ├── [part 1]YOLOv3_with_OpenCV ├── OD.py ├── coco.names └── yolov3.cfg ├── [part 4]OpenLabelling ├── Author's_GitHub.txt ├── LICENSE ├── bbox_txt │ └── 1.txt ├── class_list.txt ├── images │ ├── 000001.jpg │ ├── 000002.jpg │ └── 000003.jpg ├── move_images.py └── run.py └── [part 5]Start Training YOLOv3 └── train_folder.py /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3-Series 2 | Hey, guys! Here I'll be sharing with you all the stuff that I've learned while playing with YOLOv3. Hope you enjoy!! 3 | 4 | # Part 1 - Implement YOLOv3 with OpenCV in Python 5 | https://www.youtube.com/watch?v=R0hipZXJjlI&t=277s 6 | 7 | # Part 2 - Install YOLOv3 with Darknet and Compile it with OpenCV and CUDA 8 | https://www.youtube.com/watch?v=-HtiYHpqnBs 9 | 10 | # Part 3 - Train YOLOv3 to Detect Custom Objects pt.1: Collect Images 11 | https://www.youtube.com/watch?v=yXD5_W0JPuw 12 | 13 | # Part 4 - Label Training Images for YOLOv3 14 | https://youtu.be/kmgocZpidU8 15 | 16 | # Part 5 - Start Training YOLOv3 17 | https://youtu.be/TP67icLSt1Y 18 | -------------------------------------------------------------------------------- /[Part 3] Get Images/get_images.py: -------------------------------------------------------------------------------- 1 | #Importing stuff 2 | import os 3 | import urllib.request as ulib 4 | from bs4 import BeautifulSoup as Soup 5 | #The library that will turn a weird string library that we'll scrap from Google into one that we can read 6 | import ast 7 | 8 | from selenium import webdriver 9 | 10 | chromePath=r'C:\Windows.old\Users\Ivan\MyPythonScripts\Drivers\chromedriver.exe' 11 | 12 | driver = webdriver.Chrome(chromePath) 13 | 14 | URL = 'https://www.google.ru/search?q=bus&num=100&newwindow=1&safe=off&source=lnms&tbm=isch&sa=X&ved=0ahUKEwiGueO-uN7eAhXCFiwKHTiYDlUQ_AUIDigB&biw=1440&bih=789' 15 | directory = 'BeautifulBusesYo' 16 | 17 | 18 | def getURLs(URL): 19 | 20 | driver.get(URL) 21 | a=input() 22 | page = driver.page_source 23 | print(page) 24 | 25 | soup = Soup(page, 'lxml') 26 | 27 | desiredURLs = soup.findAll('div', {'class':'rg_meta notranslate'}) 28 | 29 | ourURLs = [] 30 | 31 | for url in desiredURLs: 32 | theURL = url.text 33 | theURL = ast.literal_eval(theURL)['ou'] 34 | 35 | ourURLs.append(theURL) 36 | 37 | return ourURLs 38 | 39 | 40 | 41 | 42 | def save_images(URLs, directory): 43 | 44 | if not os.path.isdir(directory): 45 | os.mkdir(directory) 46 | 47 | for i, url in enumerate(URLs): 48 | savePath = os.path.join(directory, '{:06}.jpg'.format(i)) 49 | 50 | try: 51 | ulib.urlretrieve(url, savePath) 52 | 53 | except: 54 | print('I failed with', url) 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | URLs = getURLs(URL) 65 | 66 | 67 | for url in URLs: 68 | print(url) 69 | 70 | save_images(URLs, directory) 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /[part 1]YOLOv3_with_OpenCV/OD.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | 4 | 5 | 6 | #Write down conf, nms thresholds,inp width/height 7 | confThreshold = 0.25 8 | nmsThreshold = 0.40 9 | inpWidth = 416 10 | inpHeight = 416 11 | 12 | 13 | #Load names of classes and turn that into a list 14 | classesFile = "coco.names" 15 | classes = None 16 | 17 | with open(classesFile,'rt') as f: 18 | classes = f.read().rstrip('\n').split('\n') 19 | 20 | #Model configuration 21 | modelConf = 'yolov3.cfg' 22 | modelWeights = 'yolov3.weights' 23 | 24 | def postprocess(frame, outs): 25 | frameHeight = frame.shape[0] 26 | frameWidth = frame.shape[1] 27 | 28 | classIDs = [] 29 | confidences = [] 30 | boxes = [] 31 | 32 | 33 | 34 | 35 | for out in outs: 36 | for detection in out: 37 | 38 | scores = detection [5:] 39 | classID = np.argmax(scores) 40 | confidence = scores[classID] 41 | 42 | if confidence > confThreshold: 43 | centerX = int(detection[0] * frameWidth) 44 | centerY = int(detection[1] * frameHeight) 45 | 46 | width = int(detection[2]* frameWidth) 47 | height = int(detection[3]*frameHeight ) 48 | 49 | left = int(centerX - width/2) 50 | top = int(centerY - height/2) 51 | 52 | classIDs.append(classID) 53 | confidences.append(float(confidence)) 54 | boxes.append([left, top, width, height]) 55 | 56 | indices = cv.dnn.NMSBoxes (boxes,confidences, confThreshold, nmsThreshold ) 57 | 58 | indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) 59 | for i in indices: 60 | i = i[0] 61 | box = boxes[i] 62 | left = box[0] 63 | top = box[1] 64 | width = box[2] 65 | height = box[3] 66 | 67 | drawPred(classIDs[i], confidences[i], left, top, left + width, top + height) 68 | 69 | 70 | def drawPred(classId, conf, left, top, right, bottom): 71 | # Draw a bounding box. 72 | cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3) 73 | 74 | label = '%.2f' % conf 75 | 76 | # Get the label for the class name and its confidence 77 | if classes: 78 | assert (classId < len(classes)) 79 | label = '%s:%s' % (classes[classId], label) 80 | 81 | #A fancier display of the label from learnopencv.com 82 | # Display the label at the top of the bounding box 83 | #labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) 84 | #top = max(top, labelSize[1]) 85 | #cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), 86 | #(255, 255, 255), cv.FILLED) 87 | # cv.rectangle(frame, (left,top),(right,bottom), (255,255,255), 1 ) 88 | #cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1) 89 | cv.putText(frame, label, (left,top), cv.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3) 90 | 91 | def getOutputsNames(net): 92 | # Get the names of all the layers in the network 93 | layersNames = net.getLayerNames() 94 | 95 | # Get the names of the output layers, i.e. the layers with unconnected outputs 96 | return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()] 97 | 98 | 99 | #Set up the net 100 | 101 | net = cv.dnn.readNetFromDarknet(modelConf, modelWeights) 102 | net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) 103 | net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) 104 | 105 | 106 | #Process inputs 107 | winName = 'DL OD with OpenCV' 108 | cv.namedWindow(winName, cv.WINDOW_NORMAL) 109 | cv.resizeWindow(winName, 1000,1000) 110 | 111 | 112 | 113 | 114 | 115 | cap = cv.VideoCapture(0) 116 | 117 | while cv.waitKey(1) < 0: 118 | 119 | #get frame from video 120 | hasFrame, frame = cap.read() 121 | 122 | #Create a 4D blob from a frame 123 | 124 | blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop = False) 125 | 126 | #Set the input the the net 127 | net.setInput(blob) 128 | outs = net.forward (getOutputsNames(net)) 129 | 130 | 131 | postprocess (frame, outs) 132 | 133 | #show the image 134 | cv.imshow(winName, frame) 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /[part 1]YOLOv3_with_OpenCV/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /[part 1]YOLOv3_with_OpenCV/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /[part 4]OpenLabelling/Author's_GitHub.txt: -------------------------------------------------------------------------------- 1 | Joao Cartucho 2 | https://github.com/Cartucho/OpenLabeling -------------------------------------------------------------------------------- /[part 4]OpenLabelling/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /[part 4]OpenLabelling/bbox_txt/1.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /[part 4]OpenLabelling/class_list.txt: -------------------------------------------------------------------------------- 1 | bus_doors 2 | bus_wheels 3 | people_faces 4 | rabbit -------------------------------------------------------------------------------- /[part 4]OpenLabelling/images/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivangrov/YOLOv3-Series/2c5d3d941a79514db3b4494a47fb8e5d35ea31aa/[part 4]OpenLabelling/images/000001.jpg -------------------------------------------------------------------------------- /[part 4]OpenLabelling/images/000002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivangrov/YOLOv3-Series/2c5d3d941a79514db3b4494a47fb8e5d35ea31aa/[part 4]OpenLabelling/images/000002.jpg -------------------------------------------------------------------------------- /[part 4]OpenLabelling/images/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivangrov/YOLOv3-Series/2c5d3d941a79514db3b4494a47fb8e5d35ea31aa/[part 4]OpenLabelling/images/000003.jpg -------------------------------------------------------------------------------- /[part 4]OpenLabelling/move_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | folders = ['AlsoBuses', 'BeautifulBusesYo'] 4 | path = r'C:\Users\user\Desktop\OpenLabelling\images' 5 | 6 | 7 | n = 0 8 | for folder in folders: 9 | for image in os.scandir(folder): 10 | n+=1 11 | os.rename(image.path, os.path.join(path, '{:06}.jpg'.format(n))) 12 | -------------------------------------------------------------------------------- /[part 4]OpenLabelling/run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import textwrap 3 | import glob 4 | import os 5 | 6 | import numpy as np 7 | import cv2 8 | 9 | 10 | WITH_QT = True 11 | try: 12 | cv2.namedWindow("Test") 13 | cv2.displayOverlay("Test", "Test QT", 1000) 14 | except: 15 | WITH_QT = False 16 | cv2.destroyAllWindows() 17 | 18 | bbox_thickness = 2 19 | 20 | parser = argparse.ArgumentParser(description='YOLO v2 Bounding Box Tool') 21 | parser.add_argument('--format', default='yolo', type=str, choices=['yolo', 'voc'], help="Bounding box format") 22 | parser.add_argument('--sort', action='store_true', help="If true, shows images in order.") 23 | parser.add_argument('--cross-thickness', default='1', type=int, help="Cross thickness") 24 | parser.add_argument('--bbox-thickness', default=bbox_thickness, type=int, help="Bounding box thickness") 25 | args = parser.parse_args() 26 | 27 | class_index = 0 28 | img_index = 0 29 | img = None 30 | img_objects = [] 31 | bb_dir = "bbox_txt/" 32 | 33 | # selected bounding box 34 | prev_was_double_click = False 35 | is_bbox_selected = False 36 | selected_bbox = -1 37 | 38 | mouse_x = 0 39 | mouse_y = 0 40 | point_1 = (-1, -1) 41 | point_2 = (-1, -1) 42 | 43 | def change_img_index(x): 44 | global img_index, img 45 | img_index = x 46 | img_path = image_list[img_index] 47 | img = cv2.imread(img_path) 48 | if WITH_QT: 49 | cv2.displayOverlay(WINDOW_NAME, "Showing image " 50 | "" + str(img_index) + "/" 51 | "" + str(last_img_index), 1000) 52 | else: 53 | print("Showing image " 54 | "" + str(img_index) + "/" 55 | "" + str(last_img_index) + " path:" + img_path) 56 | 57 | def change_class_index(x): 58 | global class_index 59 | class_index = x 60 | if WITH_QT: 61 | cv2.displayOverlay(WINDOW_NAME, "Selected class " 62 | "" + str(class_index) + "/" 63 | "" + str(last_class_index) + "" 64 | "\n " + class_list[class_index],3000) 65 | else: 66 | print("Selected class :" + class_list[class_index]) 67 | 68 | 69 | def draw_edges(tmp_img): 70 | blur = cv2.bilateralFilter(tmp_img, 3, 75, 75) 71 | edges = cv2.Canny(blur, 150, 250, 3) 72 | edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) 73 | # Overlap image and edges together 74 | tmp_img = np.bitwise_or(tmp_img, edges) 75 | #tmp_img = cv2.addWeighted(tmp_img, 1 - edges_val, edges, edges_val, 0) 76 | return tmp_img 77 | 78 | 79 | def decrease_index(current_index, last_index): 80 | current_index -= 1 81 | if current_index < 0: 82 | current_index = last_index 83 | return current_index 84 | 85 | 86 | def increase_index(current_index, last_index): 87 | current_index += 1 88 | if current_index > last_index: 89 | current_index = 0 90 | return current_index 91 | 92 | 93 | def draw_line(img, x, y, height, width, color): 94 | cv2.line(img, (x, 0), (x, height), color, thickness=args.cross_thickness) 95 | cv2.line(img, (0, y), (width, y), color, thickness=args.cross_thickness) 96 | 97 | 98 | def yolo_format(class_index, point_1, point_2, width, height): 99 | # YOLO wants everything normalized 100 | # Order: class x_center y_center x_width y_height 101 | x_center = (point_1[0] + point_2[0]) / float(2.0 * width) 102 | y_center = (point_1[1] + point_2[1]) / float(2.0 * height) 103 | x_width = float(abs(point_2[0] - point_1[0])) / width 104 | y_height = float(abs(point_2[1] - point_1[1])) / height 105 | return str(class_index) + " " + str(x_center) \ 106 | + " " + str(y_center) + " " + str(x_width) + " " + str(y_height) 107 | 108 | 109 | def voc_format(class_index, point_1, point_2): 110 | # Order: xmin ymin xmax ymax class 111 | # Top left pixel is (1, 1) in VOC 112 | xmin, ymin = min(point_1[0], point_2[0]) + 1, min(point_1[1], point_2[1]) + 1 113 | xmax, ymax = max(point_1[0], point_2[0]) + 1, max(point_1[1], point_2[1]) + 1 114 | items = map(str, [xmin, ymin, xmax, ymax, class_index]) 115 | return ' '.join(items) 116 | 117 | 118 | def get_txt_path(img_path): 119 | img_name = os.path.basename(os.path.normpath(img_path)) 120 | img_type = img_path.split('.')[-1] 121 | return bb_dir + img_name.replace(img_type, 'txt') 122 | 123 | 124 | def save_bb(txt_path, line): 125 | with open(txt_path, 'a') as myfile: 126 | myfile.write(line + "\n") # append line 127 | 128 | 129 | def delete_bb(txt_path, line_index): 130 | with open(txt_path, "r") as old_file: 131 | lines = old_file.readlines() 132 | 133 | with open(txt_path, "w") as new_file: 134 | counter = 0 135 | for line in lines: 136 | if counter is not line_index: 137 | new_file.write(line) 138 | counter += 1 139 | 140 | 141 | def yolo_to_x_y(x_center, y_center, x_width, y_height, width, height): 142 | x_center *= width 143 | y_center *= height 144 | x_width *= width 145 | y_height *= height 146 | x_width /= 2.0 147 | y_height /= 2.0 148 | return int(x_center - x_width), int(y_center - y_height), int(x_center + x_width), int(y_center + y_height) 149 | 150 | 151 | def draw_text(tmp_img, text, center, color, size): 152 | font = cv2.FONT_HERSHEY_SIMPLEX 153 | cv2.putText(tmp_img, text, center, font, 0.7, color, size, cv2.FONT_HERSHEY_COMPLEX_SMALL) 154 | return tmp_img 155 | 156 | def draw_bboxes_from_file(tmp_img, txt_path, width, height): 157 | global img_objects 158 | img_objects = [] 159 | if os.path.isfile(txt_path): 160 | with open(txt_path) as f: 161 | content = f.readlines() 162 | for line in content: 163 | values_str = line.split() 164 | if args.format == 'yolo': 165 | class_index, x_center, y_center, x_width, y_height = map(float, values_str) 166 | class_index = int(class_index) 167 | # convert yolo to points 168 | x1, y1, x2, y2 = yolo_to_x_y(x_center, y_center, x_width, y_height, width, height) 169 | if x_center == int(x_center): 170 | error = ("You selected the 'yolo' format but your labels " 171 | "seem to be in a different format. Consider " 172 | "removing your old label files.") 173 | raise Exception(textwrap.fill(error, 70)) 174 | elif args.format == 'voc': 175 | try: 176 | x1, y1, x2, y2, class_index = map(int, values_str) 177 | except ValueError: 178 | error = ("You selected the 'voc' format but your labels " 179 | "seem to be in a different format. Consider " 180 | "removing your old label files.") 181 | raise Exception(textwrap.fill(error, 70)) 182 | x1, y1, x2, y2 = x1-1, y1-1, x2-1, y2-1 183 | img_objects.append([class_index, x1, y1, x2, y2]) 184 | color = class_rgb[class_index].tolist() 185 | cv2.rectangle(tmp_img, (x1, y1), (x2, y2), color, thickness=args.bbox_thickness) 186 | tmp_img = draw_text(tmp_img, class_list[class_index], (x1, y1 - 5), color, args.bbox_thickness) 187 | return tmp_img 188 | 189 | 190 | def get_bbox_area(x1, y1, x2, y2): 191 | width = abs(x2 - x1) 192 | height = abs(y2 - y1) 193 | return width*height 194 | 195 | 196 | def set_selected_bbox(): 197 | global is_bbox_selected, selected_bbox 198 | smallest_area = -1 199 | # if clicked inside multiple bboxes selects the smallest one 200 | for idx, obj in enumerate(img_objects): 201 | ind, x1, y1, x2, y2 = obj 202 | if is_mouse_inside_points(x1, y1, x2, y2): 203 | is_bbox_selected = True 204 | tmp_area = get_bbox_area(x1, y1, x2, y2) 205 | if tmp_area < smallest_area or smallest_area == -1: 206 | smallest_area = tmp_area 207 | selected_bbox = idx 208 | 209 | 210 | def mouse_inside_delete_button(): 211 | for idx, obj in enumerate(img_objects): 212 | if idx == selected_bbox: 213 | ind, x1, y1, x2, y2 = obj 214 | x1_c, y1_c, x2_c, y2_c = get_close_icon(x1, y1, x2, y2) 215 | if is_mouse_inside_points(x1_c, y1_c, x2_c, y2_c): 216 | return True 217 | return False 218 | 219 | def delete_selected_bbox(): 220 | img_path = image_list[img_index] 221 | txt_path = get_txt_path(img_path) 222 | is_bbox_selected = False 223 | 224 | with open(txt_path, "r") as old_file: 225 | lines = old_file.readlines() 226 | 227 | with open(txt_path, "w") as new_file: 228 | counter = 0 229 | for line in lines: 230 | if counter is not selected_bbox: 231 | new_file.write(line) 232 | counter += 1 233 | 234 | # mouse callback function 235 | def mouse_listener(event, x, y, flags, param): 236 | global is_bbox_selected, prev_was_double_click, mouse_x, mouse_y, point_1, point_2 237 | 238 | if event == cv2.EVENT_MOUSEMOVE: 239 | mouse_x = x 240 | mouse_y = y 241 | elif event == cv2.EVENT_LBUTTONDBLCLK: 242 | prev_was_double_click = True 243 | #print("Double click") 244 | point_1 = (-1, -1) 245 | # if clicked inside a bounding box 246 | set_selected_bbox() 247 | # AlexeyGy change: delete via right-click 248 | elif event == cv2.EVENT_RBUTTONDOWN: 249 | set_selected_bbox() 250 | if is_bbox_selected: 251 | delete_selected_bbox() 252 | elif event == cv2.EVENT_LBUTTONDOWN: 253 | if prev_was_double_click: 254 | #print("Finish double click") 255 | prev_was_double_click = False 256 | 257 | #print("Normal left click") 258 | is_mouse_inside_delete_button = mouse_inside_delete_button() 259 | if point_1[0] is -1: 260 | if is_bbox_selected and is_mouse_inside_delete_button: 261 | # the user wants to delete the bbox 262 | #print("Delete bbox") 263 | delete_selected_bbox() 264 | else: 265 | is_bbox_selected = False 266 | # first click (start drawing a bounding box or delete an item) 267 | point_1 = (x, y) 268 | else: 269 | # minimal size for bounding box to avoid errors 270 | #!!!!!!!!!!!!!!!!!!!! 271 | threshold = 5 272 | if abs(x - point_1[0]) > threshold or abs(y - point_1[1]) > threshold: 273 | # second click 274 | point_2 = (x, y) 275 | 276 | 277 | def is_mouse_inside_points(x1, y1, x2, y2): 278 | return mouse_x > x1 and mouse_x < x2 and mouse_y > y1 and mouse_y < y2 279 | 280 | 281 | def get_close_icon(x1, y1, x2, y2): 282 | percentage = 0.05 283 | height = -1 284 | while height < 15 and percentage < 1.0: 285 | height = int((y2 - y1) * percentage) 286 | percentage += 0.1 287 | return (x2 - height), y1, x2, (y1 + height) 288 | 289 | 290 | def draw_close_icon(tmp_img, x1_c, y1_c, x2_c, y2_c): 291 | red = (0,0,255) 292 | cv2.rectangle(tmp_img, (x1_c + 1, y1_c - 1), (x2_c, y2_c), red, -1) 293 | white = (255, 255, 255) 294 | cv2.line(tmp_img, (x1_c, y1_c), (x2_c, y2_c), white, 2) 295 | cv2.line(tmp_img, (x1_c, y2_c), (x2_c, y1_c), white, 2) 296 | return tmp_img 297 | 298 | 299 | def draw_info_bb_selected(tmp_img): 300 | for idx, obj in enumerate(img_objects): 301 | ind, x1, y1, x2, y2 = obj 302 | if idx == selected_bbox: 303 | x1_c, y1_c, x2_c, y2_c = get_close_icon(x1, y1, x2, y2) 304 | draw_close_icon(tmp_img, x1_c, y1_c, x2_c, y2_c) 305 | return tmp_img 306 | 307 | def remove_bad_data(img_path, img_path_txt): 308 | os.remove(img_path) 309 | os.remove(img_path_txt) 310 | 311 | 312 | # load all images (with multiple extensions) from a directory using OpenCV 313 | img_dir = "images/" 314 | image_list = [] 315 | for f in os.listdir(img_dir): 316 | f_path = os.path.join(img_dir, f) 317 | test_img = cv2.imread(f_path) 318 | if test_img is not None: 319 | image_list.append(f_path) 320 | 321 | #print(image_list) 322 | 323 | #SORT OR NOT? 324 | #image_list.sort() 325 | #if not args.sort: 326 | #np.random.seed(123) # Keep random img order consistent 327 | #np.random.shuffle(image_list) 328 | 329 | last_img_index = len(image_list) - 1 330 | print(image_list) 331 | 332 | if not os.path.exists(bb_dir): 333 | os.makedirs(bb_dir) 334 | 335 | # create empty .txt file for each of the images if it doesn't exist already 336 | for img_path in image_list: 337 | txt_path = get_txt_path(img_path) 338 | if not os.path.isfile(txt_path): 339 | open(txt_path, 'a').close() 340 | 341 | # load class list 342 | with open('class_list.txt') as f: 343 | class_list = f.read().splitlines() 344 | #print(class_list) 345 | last_class_index = len(class_list) - 1 346 | 347 | # Make the class colors the same each session 348 | # The colors are in BGR order because we're using OpenCV 349 | class_rgb = [ 350 | (0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (0, 255, 255), 351 | (255, 0, 255), (192, 192, 192), (128, 128, 128), (128, 0, 0), 352 | (128, 128, 0), (0, 128, 0), (128, 0, 128), (0, 128, 128), (0, 0, 128)] 353 | class_rgb = np.array(class_rgb) 354 | # If there are still more classes, add new colors randomly 355 | num_colors_missing = len(class_list) - len(class_rgb) 356 | if num_colors_missing > 0: 357 | more_colors = np.random.randint(0, 255+1, size=(num_colors_missing, 3)) 358 | class_rgb = np.vstack([class_rgb, more_colors]) 359 | 360 | # create window 361 | WINDOW_NAME = 'Bounding Box Labeler' 362 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_KEEPRATIO) 363 | #cv2.resizeWindow(WINDOW_NAME, 1000, 700) 364 | cv2.resizeWindow(WINDOW_NAME,500, 500) 365 | cv2.setMouseCallback(WINDOW_NAME, mouse_listener) 366 | 367 | # selected image 368 | TRACKBAR_IMG = 'Image' 369 | cv2.createTrackbar(TRACKBAR_IMG, WINDOW_NAME, 0, last_img_index, change_img_index) 370 | 371 | # selected class 372 | TRACKBAR_CLASS = 'Class' 373 | if last_class_index != 0: 374 | cv2.createTrackbar(TRACKBAR_CLASS, WINDOW_NAME, 0, last_class_index, change_class_index) 375 | 376 | # initialize 377 | change_img_index(0) 378 | edges_on = False 379 | 380 | if WITH_QT: 381 | cv2.displayOverlay(WINDOW_NAME, "Welcome!\n Press [h] for help.", 4000) 382 | print(" Welcome!\n Select the window and press [h] for help.") 383 | 384 | color = class_rgb[class_index].tolist() 385 | # loop 386 | while True: 387 | # clone the img 388 | tmp_img = img.copy() 389 | height, width = tmp_img.shape[:2] 390 | if edges_on == True: 391 | # draw edges 392 | tmp_img = draw_edges(tmp_img) 393 | 394 | 395 | #print('MOUSE',mouse_x, mouse_y) 396 | #print('POINTS', point_1, point_2) 397 | 398 | 399 | img_path = image_list[img_index] 400 | txt_path = get_txt_path(img_path) 401 | 402 | # draw already done bounding boxes 403 | tmp_img = draw_bboxes_from_file(tmp_img, txt_path, width, height) 404 | # if bounding box is selected add extra info 405 | if is_bbox_selected: 406 | tmp_img = draw_info_bb_selected(tmp_img) 407 | # if first click 408 | if point_1[0] is not -1: 409 | color = class_rgb[class_index].tolist() 410 | # draw partial bbox 411 | cv2.rectangle(tmp_img, point_1, (mouse_x, mouse_y), color, thickness=args.bbox_thickness) 412 | # if second click 413 | if point_2[0] is not -1: 414 | # save the bounding box 415 | if args.format == 'yolo': 416 | line = yolo_format(class_index, point_1, point_2, width, height) 417 | elif args.format == 'voc': 418 | line = voc_format(class_index, point_1, point_2) 419 | save_bb(txt_path, line) 420 | # reset the points 421 | point_1 = (-1, -1) 422 | point_2 = (-1, -1) 423 | else: 424 | if WITH_QT: 425 | cv2.displayOverlay(WINDOW_NAME, "Selected label: " + class_list[class_index] + "" 426 | "\nPress [w] or [s] to change.", 120) 427 | 428 | cv2.imshow(WINDOW_NAME, tmp_img) 429 | pressed_key = cv2.waitKey(50) 430 | 431 | """ Key Listeners START """ 432 | if pressed_key == ord('a') or pressed_key == ord('d'): 433 | # show previous image key listener 434 | if pressed_key == ord('a'): 435 | img_index = decrease_index(img_index, last_img_index) 436 | # show next image key listener 437 | elif pressed_key == ord('d'): 438 | img_index = increase_index(img_index, last_img_index) 439 | cv2.setTrackbarPos(TRACKBAR_IMG, WINDOW_NAME, img_index) 440 | 441 | elif pressed_key == ord('s') or pressed_key == ord('w'): 442 | # change down current class key listener 443 | if pressed_key == ord('s'): 444 | class_index = decrease_index(class_index, last_class_index) 445 | # change up current class key listener 446 | elif pressed_key == ord('w'): 447 | class_index = increase_index(class_index, last_class_index) 448 | color = class_rgb[class_index].tolist() 449 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 450 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 451 | 452 | #REMOVING BAD DATA 453 | elif pressed_key == ord('r'): 454 | 455 | bad_path=img_path 456 | bad_text=txt_path 457 | 458 | 459 | img_index = increase_index(img_index, last_img_index) 460 | cv2.setTrackbarPos(TRACKBAR_IMG, WINDOW_NAME, img_index) 461 | 462 | 463 | 464 | if img_index == 0: 465 | del image_list[last_img_index] 466 | last_img_index = len(image_list) - 1 467 | 468 | remove_bad_data(bad_path, bad_text) 469 | 470 | img_index -= 0 471 | 472 | else: 473 | del image_list[img_index - 1] 474 | last_img_index = len(image_list)-1 475 | 476 | remove_bad_data(bad_path, bad_text) 477 | 478 | img_index -= 1 479 | 480 | cv2.setTrackbarPos(TRACKBAR_IMG, WINDOW_NAME, img_index) 481 | 482 | 483 | #Num class-switchin' 484 | elif pressed_key == ord('0'): 485 | 486 | if len(class_list)>=1: 487 | class_index=0 488 | color = class_rgb[class_index].tolist() 489 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 490 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 491 | 492 | elif pressed_key == ord('1'): 493 | 494 | if len(class_list) >= 2: 495 | class_index=1 496 | color = class_rgb[class_index].tolist() 497 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 498 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 499 | 500 | elif pressed_key == ord('2'): 501 | 502 | if len(class_list) >= 3: 503 | class_index=2 504 | color = class_rgb[class_index].tolist() 505 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 506 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 507 | 508 | elif pressed_key == ord('3'): 509 | if len(class_list) >= 4: 510 | class_index=3 511 | color = class_rgb[class_index].tolist() 512 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 513 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 514 | 515 | elif pressed_key == ord('4'): 516 | if len(class_list) >= 5: 517 | class_index=4 518 | color = class_rgb[class_index].tolist() 519 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 520 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 521 | 522 | elif pressed_key == ord('5'): 523 | if len(class_list) >= 6: 524 | class_index=5 525 | color = class_rgb[class_index].tolist() 526 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 527 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 528 | 529 | elif pressed_key == ord('6'): 530 | if len(class_list) >= 7: 531 | class_index=6 532 | color = class_rgb[class_index].tolist() 533 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 534 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 535 | 536 | elif pressed_key == ord('7'): 537 | if len(class_list) >= 8: 538 | class_index=7 539 | color = class_rgb[class_index].tolist() 540 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 541 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 542 | 543 | elif pressed_key == ord('8'): 544 | if len(class_list) >= 9: 545 | class_index=8 546 | color = class_rgb[class_index].tolist() 547 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 548 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 549 | 550 | elif pressed_key == ord('9'): 551 | if len(class_list) >= 10: 552 | class_index=9 553 | color = class_rgb[class_index].tolist() 554 | draw_line(tmp_img, mouse_x, mouse_y, height, width, color) 555 | cv2.setTrackbarPos(TRACKBAR_CLASS, WINDOW_NAME, class_index) 556 | 557 | 558 | # help key listener 559 | elif pressed_key == ord('h'): 560 | if WITH_QT: 561 | cv2.displayOverlay(WINDOW_NAME, "[e] to show edges;\n" 562 | "[q] to quit;\n" 563 | "[a] or [d] to change Image;\n" 564 | "[w] or [s] to change Class.\n" 565 | "%s" % img_path, 6000) 566 | else: 567 | print("[e] to show edges;\n" 568 | "[q] to quit;\n" 569 | "[a] or [d] to change Image;\n" 570 | "[w] or [s] to change Class.\n" 571 | "%s" % img_path) 572 | # show edges key listener 573 | elif pressed_key == ord('e'): 574 | if edges_on == True: 575 | edges_on = False 576 | if WITH_QT: 577 | cv2.displayOverlay(WINDOW_NAME, "Edges turned OFF!", 1000) 578 | else: 579 | print("Edges turned OFF!") 580 | else: 581 | edges_on = True 582 | if WITH_QT: 583 | cv2.displayOverlay(WINDOW_NAME, "Edges turned ON!", 1000) 584 | else: 585 | print("Edges turned ON!") 586 | 587 | # quit key listener 588 | elif pressed_key == ord('q'): 589 | break 590 | """ Key Listeners END """ 591 | 592 | if WITH_QT: 593 | # if window gets closed then quit 594 | if cv2.getWindowProperty(WINDOW_NAME,cv2.WND_PROP_VISIBLE) < 1: 595 | break 596 | 597 | cv2.destroyAllWindows() 598 | -------------------------------------------------------------------------------- /[part 5]Start Training YOLOv3/train_folder.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | path='data/obj/' 4 | 5 | 6 | 7 | imgList=os.listdir('images') 8 | 9 | print(imgList) 10 | 11 | textFile=open('train.txt','w') 12 | 13 | 14 | for img in imgList: 15 | imgPath=path+ img +'\n' 16 | textFile.write(imgPath) 17 | --------------------------------------------------------------------------------