├── .devcontainer └── devcontainer.json ├── .gitignore ├── 2007_train.txt ├── 2007_val.txt ├── Pipfile ├── README.md ├── VOCdevkit └── VOC2007 │ ├── Annotations │ ├── 1.xml │ ├── 2.xml │ ├── 3.xml │ ├── 4.xml │ ├── 5.xml │ └── README.md │ ├── ImageSets │ └── Main │ │ ├── README.md │ │ ├── test.txt │ │ ├── train.txt │ │ ├── trainval.txt │ │ └── val.txt │ └── JPEGImages │ ├── 1.jpg │ ├── 2.jpg │ ├── 3.jpg │ ├── 4.jpg │ └── 5.jpg ├── YOLOv4-study学习资料md ├── detect.py ├── gen_annotation.py ├── gesture.streamlit.py ├── get_map.py ├── get_yaml.py ├── img ├── anticlockwise.jpg ├── back.jpg ├── clockwise.jpg ├── down.jpg ├── front.jpg ├── left.jpg ├── right.jpg └── up.jpg ├── instructions.md ├── kmeans_for_anchors.jpg ├── kmeans_for_anchors.py ├── logs ├── README.md ├── gesture_loss_2021_11_14_22_04_00 │ ├── epoch_loss_2021_11_14_22_04_00.png │ ├── epoch_loss_2021_11_14_22_04_00.txt │ └── epoch_val_loss_2021_11_14_22_04_00.txt ├── loss_2022_04_27_08_48_16 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651049298.fef10e9dbba1.425.0 ├── loss_2022_04_27_10_38_48 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651055931.9b45dd4991ae.367.0 ├── loss_2022_04_27_12_50_47 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651063849.274e119c63fb.1015.0 ├── loss_2022_04_28_00_40_54 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651106457.117e69507361.564.0 ├── loss_2022_04_28_14_54_17 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0 └── loss_2022_05_02_14_57_57 │ ├── epoch_loss.png │ ├── epoch_loss.txt │ ├── epoch_val_loss.txt │ └── events.out.tfevents.1651503480.437fb01f4bb0.370.0 ├── model_data ├── .gitattributes ├── gesture.yaml ├── gesture_classes.txt ├── simhei.ttf ├── yolo_anchors.txt └── yolotiny_anchors.txt ├── nets ├── CSPdarknet.py ├── CSPdarknet53_tiny.py ├── __init__.py ├── attention.py ├── yolo.py ├── yolo_tiny.py ├── yolo_training.py └── yolotiny_training.py ├── packages.txt ├── predict.py ├── requirements.txt ├── summary.py ├── train.py ├── utils ├── __init__.py ├── callbacks.py ├── dataloader.py ├── utils.py ├── utils_bbox.py ├── utils_fit.py └── utils_map.py ├── utils_coco ├── coco_annotation.py └── get_map_coco.py ├── voc_annotation.py ├── yolo.py ├── yolo_anchors.txt └── yolov4-gesture-tutorial.ipynb /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "gesture_streamlit.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y 2 | JPEGImages 3 | 1.jpg 4 | E:\handpose_x_gesture_v2\JPEGImages\1.jpg 5 | 6 | Unknown 7 | 8 | 9 | 175 10 | 223 11 | 3 12 | 13 | 0 14 | 15 | down 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 21 21 | 7 22 | 174 23 | 210 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/2.xml: -------------------------------------------------------------------------------- 1 | 2 | JPEGImages 3 | 2.jpg 4 | E:\handpose_x_gesture_v2\JPEGImages\2.jpg 5 | 6 | Unknown 7 | 8 | 9 | 274 10 | 295 11 | 3 12 | 13 | 0 14 | 15 | down 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 44 21 | 20 22 | 259 23 | 264 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/3.xml: -------------------------------------------------------------------------------- 1 | 2 | JPEGImages 3 | 3.jpg 4 | E:\handpose_x_gesture_v2\JPEGImages\3.jpg 5 | 6 | Unknown 7 | 8 | 9 | 325 10 | 363 11 | 3 12 | 13 | 0 14 | 15 | down 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 30 21 | 59 22 | 261 23 | 297 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/4.xml: -------------------------------------------------------------------------------- 1 | 2 | JPEGImages 3 | 4.jpg 4 | E:\handpose_x_gesture_v2\JPEGImages\4.jpg 5 | 6 | Unknown 7 | 8 | 9 | 306 10 | 299 11 | 3 12 | 13 | 0 14 | 15 | down 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 44 21 | 45 22 | 264 23 | 256 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/5.xml: -------------------------------------------------------------------------------- 1 | 2 | JPEGImages 3 | 5.jpg 4 | E:\handpose_x_gesture_v2\JPEGImages\5.jpg 5 | 6 | Unknown 7 | 8 | 9 | 191 10 | 211 11 | 3 12 | 13 | 0 14 | 15 | down 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 31 21 | 19 22 | 152 23 | 167 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/README.md: -------------------------------------------------------------------------------- 1 | 存放标签文件 -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/README.md: -------------------------------------------------------------------------------- 1 | 存放训练索引文件 -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/ImageSets/Main/test.txt -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/train.txt: -------------------------------------------------------------------------------- 1 | 10 2 | 100 3 | 1000 4 | 1001 5 | 1002 6 | 1003 7 | 1005 8 | 1006 9 | 1007 10 | 1008 11 | 1009 12 | 101 13 | 1010 14 | 1011 15 | 1012 16 | 1013 17 | 1014 18 | 1015 19 | 1016 20 | 1017 21 | 1019 22 | 102 23 | 1020 24 | 1021 25 | 1022 26 | 1023 27 | 1024 28 | 1025 29 | 1026 30 | 1028 31 | 1029 32 | 103 33 | 1030 34 | 1031 35 | 1032 36 | 1033 37 | 1034 38 | 1035 39 | 1036 40 | 1037 41 | 1038 42 | 1039 43 | 104 44 | 1040 45 | 1041 46 | 1042 47 | 1043 48 | 1044 49 | 1045 50 | 1046 51 | 1047 52 | 1048 53 | 105 54 | 1050 55 | 1051 56 | 1052 57 | 1053 58 | 1054 59 | 1056 60 | 1057 61 | 1058 62 | 1059 63 | 106 64 | 1060 65 | 1061 66 | 1062 67 | 1063 68 | 1064 69 | 1065 70 | 1066 71 | 1067 72 | 1068 73 | 1069 74 | 107 75 | 1070 76 | 1071 77 | 1072 78 | 1074 79 | 1075 80 | 1076 81 | 1077 82 | 1078 83 | 1079 84 | 108 85 | 1080 86 | 1081 87 | 1083 88 | 1084 89 | 1085 90 | 1086 91 | 1088 92 | 1089 93 | 109 94 | 1090 95 | 1091 96 | 1094 97 | 1096 98 | 1098 99 | 11 100 | 110 101 | 1100 102 | 1101 103 | 1102 104 | 1103 105 | 1104 106 | 1105 107 | 1106 108 | 1107 109 | 1108 110 | 1109 111 | 111 112 | 1111 113 | 1112 114 | 1113 115 | 1114 116 | 1115 117 | 1116 118 | 1117 119 | 1118 120 | 1119 121 | 112 122 | 1120 123 | 1121 124 | 1122 125 | 1123 126 | 1124 127 | 1125 128 | 1126 129 | 1127 130 | 1130 131 | 1131 132 | 1132 133 | 1133 134 | 1134 135 | 1135 136 | 1136 137 | 1137 138 | 1138 139 | 1139 140 | 114 141 | 1140 142 | 1141 143 | 1142 144 | 1144 145 | 1145 146 | 1146 147 | 1147 148 | 1148 149 | 1149 150 | 1150 151 | 1151 152 | 1152 153 | 1153 154 | 1155 155 | 1156 156 | 1157 157 | 1158 158 | 116 159 | 1160 160 | 1161 161 | 1162 162 | 1163 163 | 1164 164 | 1165 165 | 1166 166 | 1167 167 | 1168 168 | 1169 169 | 117 170 | 1170 171 | 1171 172 | 1172 173 | 1173 174 | 1174 175 | 1175 176 | 1176 177 | 1178 178 | 1179 179 | 118 180 | 1180 181 | 1181 182 | 1182 183 | 1183 184 | 1184 185 | 1185 186 | 1186 187 | 1187 188 | 1188 189 | 1189 190 | 119 191 | 1190 192 | 1191 193 | 1192 194 | 1193 195 | 1195 196 | 1196 197 | 1197 198 | 1198 199 | 1199 200 | 12 201 | 120 202 | 1202 203 | 1203 204 | 1204 205 | 1205 206 | 1206 207 | 1207 208 | 1208 209 | 1209 210 | 121 211 | 1210 212 | 1211 213 | 1213 214 | 1214 215 | 1216 216 | 1217 217 | 1218 218 | 1219 219 | 122 220 | 1220 221 | 1221 222 | 1222 223 | 1223 224 | 1224 225 | 1225 226 | 1226 227 | 1227 228 | 1228 229 | 1229 230 | 123 231 | 1230 232 | 1231 233 | 1232 234 | 1234 235 | 1235 236 | 1236 237 | 1237 238 | 1238 239 | 1239 240 | 124 241 | 1240 242 | 1241 243 | 1242 244 | 1243 245 | 1244 246 | 1247 247 | 1248 248 | 1249 249 | 1250 250 | 1251 251 | 1252 252 | 1253 253 | 1254 254 | 1255 255 | 1256 256 | 1257 257 | 1259 258 | 126 259 | 1260 260 | 1261 261 | 1263 262 | 1264 263 | 1265 264 | 1266 265 | 1267 266 | 1268 267 | 1269 268 | 127 269 | 1270 270 | 1271 271 | 1272 272 | 1273 273 | 1274 274 | 1275 275 | 1276 276 | 1277 277 | 1278 278 | 1279 279 | 128 280 | 1280 281 | 1281 282 | 1282 283 | 1284 284 | 1285 285 | 1286 286 | 1287 287 | 1288 288 | 1289 289 | 129 290 | 1290 291 | 1291 292 | 1292 293 | 1293 294 | 1295 295 | 1296 296 | 1297 297 | 1298 298 | 1299 299 | 13 300 | 130 301 | 1300 302 | 1301 303 | 1302 304 | 1303 305 | 1304 306 | 1305 307 | 1306 308 | 1307 309 | 1308 310 | 131 311 | 1310 312 | 1311 313 | 1312 314 | 1313 315 | 1314 316 | 1316 317 | 1317 318 | 1318 319 | 1320 320 | 1321 321 | 1322 322 | 1323 323 | 1324 324 | 1325 325 | 1326 326 | 1327 327 | 1328 328 | 1329 329 | 133 330 | 1330 331 | 1331 332 | 1332 333 | 1333 334 | 1334 335 | 1335 336 | 1336 337 | 1338 338 | 134 339 | 1340 340 | 1342 341 | 1343 342 | 1344 343 | 1345 344 | 1346 345 | 1347 346 | 1348 347 | 1349 348 | 135 349 | 1350 350 | 1352 351 | 1353 352 | 1354 353 | 1355 354 | 1356 355 | 1357 356 | 1358 357 | 1359 358 | 136 359 | 1360 360 | 1361 361 | 1362 362 | 1363 363 | 1364 364 | 1365 365 | 1366 366 | 1367 367 | 1369 368 | 137 369 | 1370 370 | 1372 371 | 1373 372 | 1374 373 | 1375 374 | 1376 375 | 1377 376 | 1378 377 | 1379 378 | 138 379 | 1380 380 | 1381 381 | 1382 382 | 1383 383 | 1384 384 | 1385 385 | 1386 386 | 1387 387 | 1388 388 | 1389 389 | 139 390 | 1390 391 | 1391 392 | 1392 393 | 1394 394 | 1395 395 | 1396 396 | 1397 397 | 1398 398 | 1399 399 | 14 400 | 1400 401 | 1401 402 | 1402 403 | 1403 404 | 1404 405 | 1405 406 | 1406 407 | 1407 408 | 1409 409 | 141 410 | 1410 411 | 1411 412 | 1412 413 | 1413 414 | 1414 415 | 1416 416 | 1417 417 | 1418 418 | 142 419 | 1420 420 | 1421 421 | 1422 422 | 1424 423 | 1425 424 | 1426 425 | 1427 426 | 1428 427 | 1429 428 | 143 429 | 1430 430 | 1431 431 | 1432 432 | 1433 433 | 1434 434 | 1435 435 | 1436 436 | 1437 437 | 1439 438 | 1440 439 | 1441 440 | 1442 441 | 1443 442 | 1444 443 | 1445 444 | 1446 445 | 1448 446 | 1449 447 | 145 448 | 1450 449 | 1452 450 | 1454 451 | 1456 452 | 1458 453 | 1459 454 | 146 455 | 1460 456 | 1461 457 | 1462 458 | 1463 459 | 1464 460 | 1465 461 | 1466 462 | 1467 463 | 1468 464 | 147 465 | 1470 466 | 1471 467 | 1472 468 | 1473 469 | 1474 470 | 1475 471 | 1476 472 | 1477 473 | 1478 474 | 1479 475 | 148 476 | 1480 477 | 1482 478 | 1483 479 | 1484 480 | 1485 481 | 1487 482 | 1488 483 | 1489 484 | 149 485 | 1490 486 | 1491 487 | 1492 488 | 1494 489 | 1495 490 | 1496 491 | 1497 492 | 1498 493 | 1499 494 | 15 495 | 150 496 | 1500 497 | 1501 498 | 1502 499 | 1503 500 | 1504 501 | 1506 502 | 1507 503 | 1508 504 | 1509 505 | 151 506 | 1510 507 | 1511 508 | 1512 509 | 1513 510 | 1515 511 | 1516 512 | 1517 513 | 1518 514 | 1519 515 | 152 516 | 1520 517 | 1521 518 | 1522 519 | 1523 520 | 1524 521 | 1525 522 | 1526 523 | 1527 524 | 1528 525 | 1529 526 | 1530 527 | 1532 528 | 1533 529 | 1534 530 | 1535 531 | 1536 532 | 1537 533 | 1539 534 | 154 535 | 1540 536 | 1541 537 | 1542 538 | 1543 539 | 1544 540 | 1545 541 | 1546 542 | 1547 543 | 1548 544 | 1549 545 | 1550 546 | 1551 547 | 1552 548 | 1554 549 | 1555 550 | 1556 551 | 1557 552 | 1558 553 | 1559 554 | 156 555 | 1560 556 | 1561 557 | 1562 558 | 1563 559 | 1564 560 | 1565 561 | 1566 562 | 1567 563 | 1568 564 | 1569 565 | 157 566 | 1571 567 | 1572 568 | 1573 569 | 1574 570 | 1575 571 | 1576 572 | 1577 573 | 1578 574 | 1579 575 | 158 576 | 1580 577 | 1581 578 | 1582 579 | 1583 580 | 1584 581 | 1585 582 | 1586 583 | 1587 584 | 1588 585 | 1589 586 | 159 587 | 1591 588 | 1592 589 | 1593 590 | 1594 591 | 1595 592 | 1596 593 | 1597 594 | 1598 595 | 1599 596 | 16 597 | 160 598 | 1600 599 | 161 600 | 162 601 | 163 602 | 164 603 | 165 604 | 166 605 | 168 606 | 169 607 | 17 608 | 170 609 | 171 610 | 172 611 | 173 612 | 174 613 | 175 614 | 176 615 | 177 616 | 178 617 | 18 618 | 182 619 | 183 620 | 184 621 | 185 622 | 186 623 | 187 624 | 189 625 | 19 626 | 190 627 | 193 628 | 194 629 | 195 630 | 196 631 | 197 632 | 198 633 | 199 634 | 2 635 | 200 636 | 201 637 | 202 638 | 203 639 | 204 640 | 205 641 | 206 642 | 207 643 | 208 644 | 209 645 | 21 646 | 210 647 | 212 648 | 213 649 | 214 650 | 215 651 | 216 652 | 217 653 | 218 654 | 219 655 | 22 656 | 220 657 | 221 658 | 222 659 | 223 660 | 224 661 | 225 662 | 226 663 | 227 664 | 228 665 | 230 666 | 231 667 | 232 668 | 233 669 | 234 670 | 235 671 | 236 672 | 237 673 | 238 674 | 239 675 | 24 676 | 240 677 | 241 678 | 242 679 | 243 680 | 244 681 | 245 682 | 246 683 | 247 684 | 25 685 | 250 686 | 251 687 | 252 688 | 253 689 | 254 690 | 255 691 | 256 692 | 257 693 | 258 694 | 259 695 | 26 696 | 260 697 | 261 698 | 262 699 | 263 700 | 265 701 | 266 702 | 267 703 | 268 704 | 269 705 | 27 706 | 270 707 | 271 708 | 272 709 | 273 710 | 274 711 | 275 712 | 276 713 | 277 714 | 278 715 | 279 716 | 28 717 | 280 718 | 281 719 | 284 720 | 285 721 | 286 722 | 287 723 | 288 724 | 291 725 | 294 726 | 295 727 | 296 728 | 297 729 | 298 730 | 299 731 | 3 732 | 30 733 | 300 734 | 301 735 | 302 736 | 303 737 | 304 738 | 305 739 | 306 740 | 307 741 | 308 742 | 309 743 | 31 744 | 310 745 | 311 746 | 312 747 | 313 748 | 314 749 | 315 750 | 316 751 | 317 752 | 318 753 | 319 754 | 32 755 | 320 756 | 321 757 | 322 758 | 323 759 | 324 760 | 325 761 | 326 762 | 327 763 | 328 764 | 329 765 | 33 766 | 330 767 | 331 768 | 332 769 | 334 770 | 335 771 | 337 772 | 338 773 | 339 774 | 34 775 | 340 776 | 341 777 | 342 778 | 343 779 | 344 780 | 345 781 | 346 782 | 347 783 | 348 784 | 349 785 | 35 786 | 350 787 | 351 788 | 353 789 | 354 790 | 355 791 | 356 792 | 357 793 | 358 794 | 36 795 | 360 796 | 361 797 | 362 798 | 363 799 | 364 800 | 365 801 | 366 802 | 367 803 | 368 804 | 369 805 | 37 806 | 370 807 | 371 808 | 372 809 | 373 810 | 374 811 | 375 812 | 376 813 | 377 814 | 378 815 | 379 816 | 38 817 | 380 818 | 382 819 | 383 820 | 384 821 | 385 822 | 386 823 | 387 824 | 388 825 | 389 826 | 39 827 | 390 828 | 391 829 | 392 830 | 393 831 | 394 832 | 395 833 | 396 834 | 397 835 | 398 836 | 399 837 | 4 838 | 40 839 | 400 840 | 401 841 | 402 842 | 404 843 | 405 844 | 406 845 | 407 846 | 408 847 | 409 848 | 41 849 | 410 850 | 411 851 | 412 852 | 413 853 | 414 854 | 415 855 | 416 856 | 417 857 | 418 858 | 419 859 | 42 860 | 420 861 | 421 862 | 422 863 | 423 864 | 425 865 | 426 866 | 427 867 | 428 868 | 429 869 | 43 870 | 430 871 | 431 872 | 432 873 | 433 874 | 434 875 | 436 876 | 437 877 | 438 878 | 439 879 | 44 880 | 440 881 | 441 882 | 442 883 | 443 884 | 444 885 | 445 886 | 446 887 | 447 888 | 448 889 | 449 890 | 45 891 | 450 892 | 451 893 | 452 894 | 453 895 | 454 896 | 455 897 | 457 898 | 458 899 | 459 900 | 46 901 | 461 902 | 462 903 | 464 904 | 465 905 | 466 906 | 467 907 | 468 908 | 47 909 | 470 910 | 471 911 | 472 912 | 473 913 | 474 914 | 475 915 | 476 916 | 477 917 | 478 918 | 479 919 | 48 920 | 480 921 | 481 922 | 482 923 | 483 924 | 484 925 | 485 926 | 486 927 | 487 928 | 488 929 | 489 930 | 49 931 | 490 932 | 491 933 | 492 934 | 493 935 | 494 936 | 495 937 | 496 938 | 497 939 | 498 940 | 499 941 | 5 942 | 50 943 | 500 944 | 501 945 | 502 946 | 503 947 | 504 948 | 505 949 | 506 950 | 507 951 | 508 952 | 509 953 | 51 954 | 512 955 | 513 956 | 514 957 | 515 958 | 516 959 | 517 960 | 518 961 | 519 962 | 520 963 | 521 964 | 522 965 | 523 966 | 524 967 | 526 968 | 527 969 | 528 970 | 529 971 | 53 972 | 530 973 | 532 974 | 533 975 | 534 976 | 536 977 | 537 978 | 539 979 | 54 980 | 540 981 | 541 982 | 542 983 | 543 984 | 544 985 | 545 986 | 546 987 | 547 988 | 549 989 | 55 990 | 551 991 | 552 992 | 553 993 | 554 994 | 556 995 | 557 996 | 559 997 | 56 998 | 561 999 | 562 1000 | 563 1001 | 564 1002 | 565 1003 | 566 1004 | 567 1005 | 568 1006 | 569 1007 | 57 1008 | 570 1009 | 571 1010 | 572 1011 | 573 1012 | 575 1013 | 577 1014 | 578 1015 | 579 1016 | 58 1017 | 580 1018 | 581 1019 | 582 1020 | 583 1021 | 585 1022 | 586 1023 | 587 1024 | 588 1025 | 589 1026 | 59 1027 | 590 1028 | 591 1029 | 592 1030 | 593 1031 | 594 1032 | 596 1033 | 597 1034 | 598 1035 | 599 1036 | 6 1037 | 600 1038 | 601 1039 | 602 1040 | 603 1041 | 604 1042 | 605 1043 | 606 1044 | 608 1045 | 609 1046 | 61 1047 | 610 1048 | 611 1049 | 612 1050 | 613 1051 | 614 1052 | 615 1053 | 616 1054 | 617 1055 | 618 1056 | 619 1057 | 62 1058 | 620 1059 | 621 1060 | 622 1061 | 623 1062 | 625 1063 | 626 1064 | 627 1065 | 628 1066 | 629 1067 | 63 1068 | 630 1069 | 631 1070 | 632 1071 | 633 1072 | 634 1073 | 635 1074 | 636 1075 | 637 1076 | 638 1077 | 639 1078 | 64 1079 | 640 1080 | 641 1081 | 642 1082 | 643 1083 | 644 1084 | 645 1085 | 646 1086 | 647 1087 | 648 1088 | 649 1089 | 65 1090 | 650 1091 | 651 1092 | 652 1093 | 653 1094 | 654 1095 | 655 1096 | 656 1097 | 657 1098 | 658 1099 | 66 1100 | 660 1101 | 662 1102 | 663 1103 | 664 1104 | 665 1105 | 666 1106 | 667 1107 | 668 1108 | 669 1109 | 67 1110 | 670 1111 | 671 1112 | 672 1113 | 673 1114 | 675 1115 | 676 1116 | 677 1117 | 678 1118 | 679 1119 | 68 1120 | 680 1121 | 681 1122 | 682 1123 | 683 1124 | 684 1125 | 685 1126 | 686 1127 | 687 1128 | 688 1129 | 689 1130 | 69 1131 | 690 1132 | 691 1133 | 692 1134 | 693 1135 | 694 1136 | 695 1137 | 696 1138 | 698 1139 | 699 1140 | 70 1141 | 700 1142 | 701 1143 | 702 1144 | 703 1145 | 704 1146 | 705 1147 | 706 1148 | 708 1149 | 709 1150 | 71 1151 | 710 1152 | 711 1153 | 713 1154 | 714 1155 | 715 1156 | 716 1157 | 717 1158 | 718 1159 | 719 1160 | 72 1161 | 720 1162 | 721 1163 | 722 1164 | 723 1165 | 724 1166 | 725 1167 | 726 1168 | 727 1169 | 729 1170 | 730 1171 | 731 1172 | 732 1173 | 733 1174 | 734 1175 | 735 1176 | 736 1177 | 737 1178 | 738 1179 | 739 1180 | 740 1181 | 741 1182 | 742 1183 | 743 1184 | 744 1185 | 745 1186 | 746 1187 | 747 1188 | 748 1189 | 749 1190 | 75 1191 | 750 1192 | 751 1193 | 752 1194 | 753 1195 | 755 1196 | 756 1197 | 757 1198 | 759 1199 | 76 1200 | 760 1201 | 761 1202 | 762 1203 | 763 1204 | 764 1205 | 766 1206 | 767 1207 | 769 1208 | 77 1209 | 770 1210 | 771 1211 | 772 1212 | 773 1213 | 774 1214 | 775 1215 | 776 1216 | 777 1217 | 778 1218 | 779 1219 | 78 1220 | 780 1221 | 781 1222 | 783 1223 | 784 1224 | 785 1225 | 786 1226 | 787 1227 | 788 1228 | 789 1229 | 79 1230 | 790 1231 | 791 1232 | 792 1233 | 793 1234 | 794 1235 | 795 1236 | 796 1237 | 797 1238 | 798 1239 | 799 1240 | 8 1241 | 80 1242 | 800 1243 | 801 1244 | 802 1245 | 803 1246 | 804 1247 | 805 1248 | 806 1249 | 807 1250 | 808 1251 | 809 1252 | 81 1253 | 810 1254 | 812 1255 | 813 1256 | 814 1257 | 815 1258 | 816 1259 | 819 1260 | 82 1261 | 820 1262 | 821 1263 | 822 1264 | 823 1265 | 824 1266 | 825 1267 | 826 1268 | 827 1269 | 828 1270 | 829 1271 | 83 1272 | 830 1273 | 831 1274 | 832 1275 | 833 1276 | 834 1277 | 836 1278 | 837 1279 | 838 1280 | 839 1281 | 84 1282 | 840 1283 | 841 1284 | 842 1285 | 843 1286 | 844 1287 | 845 1288 | 846 1289 | 847 1290 | 848 1291 | 849 1292 | 85 1293 | 850 1294 | 851 1295 | 852 1296 | 853 1297 | 854 1298 | 855 1299 | 856 1300 | 857 1301 | 858 1302 | 859 1303 | 86 1304 | 860 1305 | 861 1306 | 862 1307 | 863 1308 | 864 1309 | 866 1310 | 867 1311 | 868 1312 | 869 1313 | 87 1314 | 870 1315 | 872 1316 | 873 1317 | 874 1318 | 876 1319 | 877 1320 | 878 1321 | 879 1322 | 88 1323 | 880 1324 | 882 1325 | 883 1326 | 884 1327 | 885 1328 | 886 1329 | 887 1330 | 888 1331 | 889 1332 | 89 1333 | 890 1334 | 891 1335 | 892 1336 | 893 1337 | 895 1338 | 896 1339 | 897 1340 | 899 1341 | 9 1342 | 900 1343 | 901 1344 | 902 1345 | 903 1346 | 904 1347 | 905 1348 | 906 1349 | 908 1350 | 909 1351 | 91 1352 | 910 1353 | 911 1354 | 912 1355 | 913 1356 | 914 1357 | 915 1358 | 916 1359 | 917 1360 | 918 1361 | 919 1362 | 92 1363 | 920 1364 | 921 1365 | 923 1366 | 924 1367 | 925 1368 | 926 1369 | 927 1370 | 928 1371 | 929 1372 | 93 1373 | 930 1374 | 931 1375 | 932 1376 | 933 1377 | 934 1378 | 935 1379 | 936 1380 | 938 1381 | 94 1382 | 940 1383 | 941 1384 | 943 1385 | 944 1386 | 945 1387 | 946 1388 | 947 1389 | 948 1390 | 949 1391 | 95 1392 | 950 1393 | 951 1394 | 952 1395 | 953 1396 | 954 1397 | 955 1398 | 956 1399 | 957 1400 | 958 1401 | 959 1402 | 96 1403 | 960 1404 | 961 1405 | 963 1406 | 965 1407 | 966 1408 | 967 1409 | 968 1410 | 969 1411 | 97 1412 | 970 1413 | 971 1414 | 972 1415 | 973 1416 | 974 1417 | 975 1418 | 976 1419 | 977 1420 | 978 1421 | 979 1422 | 98 1423 | 981 1424 | 982 1425 | 983 1426 | 984 1427 | 985 1428 | 986 1429 | 987 1430 | 988 1431 | 989 1432 | 99 1433 | 990 1434 | 991 1435 | 992 1436 | 994 1437 | 995 1438 | 996 1439 | 997 1440 | 998 1441 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/trainval.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 10 3 | 100 4 | 1000 5 | 1001 6 | 1002 7 | 1003 8 | 1004 9 | 1005 10 | 1006 11 | 1007 12 | 1008 13 | 1009 14 | 101 15 | 1010 16 | 1011 17 | 1012 18 | 1013 19 | 1014 20 | 1015 21 | 1016 22 | 1017 23 | 1018 24 | 1019 25 | 102 26 | 1020 27 | 1021 28 | 1022 29 | 1023 30 | 1024 31 | 1025 32 | 1026 33 | 1027 34 | 1028 35 | 1029 36 | 103 37 | 1030 38 | 1031 39 | 1032 40 | 1033 41 | 1034 42 | 1035 43 | 1036 44 | 1037 45 | 1038 46 | 1039 47 | 104 48 | 1040 49 | 1041 50 | 1042 51 | 1043 52 | 1044 53 | 1045 54 | 1046 55 | 1047 56 | 1048 57 | 1049 58 | 105 59 | 1050 60 | 1051 61 | 1052 62 | 1053 63 | 1054 64 | 1055 65 | 1056 66 | 1057 67 | 1058 68 | 1059 69 | 106 70 | 1060 71 | 1061 72 | 1062 73 | 1063 74 | 1064 75 | 1065 76 | 1066 77 | 1067 78 | 1068 79 | 1069 80 | 107 81 | 1070 82 | 1071 83 | 1072 84 | 1073 85 | 1074 86 | 1075 87 | 1076 88 | 1077 89 | 1078 90 | 1079 91 | 108 92 | 1080 93 | 1081 94 | 1082 95 | 1083 96 | 1084 97 | 1085 98 | 1086 99 | 1087 100 | 1088 101 | 1089 102 | 109 103 | 1090 104 | 1091 105 | 1092 106 | 1093 107 | 1094 108 | 1095 109 | 1096 110 | 1097 111 | 1098 112 | 1099 113 | 11 114 | 110 115 | 1100 116 | 1101 117 | 1102 118 | 1103 119 | 1104 120 | 1105 121 | 1106 122 | 1107 123 | 1108 124 | 1109 125 | 111 126 | 1110 127 | 1111 128 | 1112 129 | 1113 130 | 1114 131 | 1115 132 | 1116 133 | 1117 134 | 1118 135 | 1119 136 | 112 137 | 1120 138 | 1121 139 | 1122 140 | 1123 141 | 1124 142 | 1125 143 | 1126 144 | 1127 145 | 1128 146 | 1129 147 | 113 148 | 1130 149 | 1131 150 | 1132 151 | 1133 152 | 1134 153 | 1135 154 | 1136 155 | 1137 156 | 1138 157 | 1139 158 | 114 159 | 1140 160 | 1141 161 | 1142 162 | 1143 163 | 1144 164 | 1145 165 | 1146 166 | 1147 167 | 1148 168 | 1149 169 | 115 170 | 1150 171 | 1151 172 | 1152 173 | 1153 174 | 1154 175 | 1155 176 | 1156 177 | 1157 178 | 1158 179 | 1159 180 | 116 181 | 1160 182 | 1161 183 | 1162 184 | 1163 185 | 1164 186 | 1165 187 | 1166 188 | 1167 189 | 1168 190 | 1169 191 | 117 192 | 1170 193 | 1171 194 | 1172 195 | 1173 196 | 1174 197 | 1175 198 | 1176 199 | 1177 200 | 1178 201 | 1179 202 | 118 203 | 1180 204 | 1181 205 | 1182 206 | 1183 207 | 1184 208 | 1185 209 | 1186 210 | 1187 211 | 1188 212 | 1189 213 | 119 214 | 1190 215 | 1191 216 | 1192 217 | 1193 218 | 1194 219 | 1195 220 | 1196 221 | 1197 222 | 1198 223 | 1199 224 | 12 225 | 120 226 | 1200 227 | 1201 228 | 1202 229 | 1203 230 | 1204 231 | 1205 232 | 1206 233 | 1207 234 | 1208 235 | 1209 236 | 121 237 | 1210 238 | 1211 239 | 1212 240 | 1213 241 | 1214 242 | 1215 243 | 1216 244 | 1217 245 | 1218 246 | 1219 247 | 122 248 | 1220 249 | 1221 250 | 1222 251 | 1223 252 | 1224 253 | 1225 254 | 1226 255 | 1227 256 | 1228 257 | 1229 258 | 123 259 | 1230 260 | 1231 261 | 1232 262 | 1233 263 | 1234 264 | 1235 265 | 1236 266 | 1237 267 | 1238 268 | 1239 269 | 124 270 | 1240 271 | 1241 272 | 1242 273 | 1243 274 | 1244 275 | 1245 276 | 1246 277 | 1247 278 | 1248 279 | 1249 280 | 125 281 | 1250 282 | 1251 283 | 1252 284 | 1253 285 | 1254 286 | 1255 287 | 1256 288 | 1257 289 | 1258 290 | 1259 291 | 126 292 | 1260 293 | 1261 294 | 1262 295 | 1263 296 | 1264 297 | 1265 298 | 1266 299 | 1267 300 | 1268 301 | 1269 302 | 127 303 | 1270 304 | 1271 305 | 1272 306 | 1273 307 | 1274 308 | 1275 309 | 1276 310 | 1277 311 | 1278 312 | 1279 313 | 128 314 | 1280 315 | 1281 316 | 1282 317 | 1283 318 | 1284 319 | 1285 320 | 1286 321 | 1287 322 | 1288 323 | 1289 324 | 129 325 | 1290 326 | 1291 327 | 1292 328 | 1293 329 | 1294 330 | 1295 331 | 1296 332 | 1297 333 | 1298 334 | 1299 335 | 13 336 | 130 337 | 1300 338 | 1301 339 | 1302 340 | 1303 341 | 1304 342 | 1305 343 | 1306 344 | 1307 345 | 1308 346 | 1309 347 | 131 348 | 1310 349 | 1311 350 | 1312 351 | 1313 352 | 1314 353 | 1315 354 | 1316 355 | 1317 356 | 1318 357 | 1319 358 | 132 359 | 1320 360 | 1321 361 | 1322 362 | 1323 363 | 1324 364 | 1325 365 | 1326 366 | 1327 367 | 1328 368 | 1329 369 | 133 370 | 1330 371 | 1331 372 | 1332 373 | 1333 374 | 1334 375 | 1335 376 | 1336 377 | 1337 378 | 1338 379 | 1339 380 | 134 381 | 1340 382 | 1341 383 | 1342 384 | 1343 385 | 1344 386 | 1345 387 | 1346 388 | 1347 389 | 1348 390 | 1349 391 | 135 392 | 1350 393 | 1351 394 | 1352 395 | 1353 396 | 1354 397 | 1355 398 | 1356 399 | 1357 400 | 1358 401 | 1359 402 | 136 403 | 1360 404 | 1361 405 | 1362 406 | 1363 407 | 1364 408 | 1365 409 | 1366 410 | 1367 411 | 1368 412 | 1369 413 | 137 414 | 1370 415 | 1371 416 | 1372 417 | 1373 418 | 1374 419 | 1375 420 | 1376 421 | 1377 422 | 1378 423 | 1379 424 | 138 425 | 1380 426 | 1381 427 | 1382 428 | 1383 429 | 1384 430 | 1385 431 | 1386 432 | 1387 433 | 1388 434 | 1389 435 | 139 436 | 1390 437 | 1391 438 | 1392 439 | 1393 440 | 1394 441 | 1395 442 | 1396 443 | 1397 444 | 1398 445 | 1399 446 | 14 447 | 140 448 | 1400 449 | 1401 450 | 1402 451 | 1403 452 | 1404 453 | 1405 454 | 1406 455 | 1407 456 | 1408 457 | 1409 458 | 141 459 | 1410 460 | 1411 461 | 1412 462 | 1413 463 | 1414 464 | 1415 465 | 1416 466 | 1417 467 | 1418 468 | 1419 469 | 142 470 | 1420 471 | 1421 472 | 1422 473 | 1423 474 | 1424 475 | 1425 476 | 1426 477 | 1427 478 | 1428 479 | 1429 480 | 143 481 | 1430 482 | 1431 483 | 1432 484 | 1433 485 | 1434 486 | 1435 487 | 1436 488 | 1437 489 | 1438 490 | 1439 491 | 144 492 | 1440 493 | 1441 494 | 1442 495 | 1443 496 | 1444 497 | 1445 498 | 1446 499 | 1447 500 | 1448 501 | 1449 502 | 145 503 | 1450 504 | 1451 505 | 1452 506 | 1453 507 | 1454 508 | 1455 509 | 1456 510 | 1457 511 | 1458 512 | 1459 513 | 146 514 | 1460 515 | 1461 516 | 1462 517 | 1463 518 | 1464 519 | 1465 520 | 1466 521 | 1467 522 | 1468 523 | 1469 524 | 147 525 | 1470 526 | 1471 527 | 1472 528 | 1473 529 | 1474 530 | 1475 531 | 1476 532 | 1477 533 | 1478 534 | 1479 535 | 148 536 | 1480 537 | 1481 538 | 1482 539 | 1483 540 | 1484 541 | 1485 542 | 1486 543 | 1487 544 | 1488 545 | 1489 546 | 149 547 | 1490 548 | 1491 549 | 1492 550 | 1493 551 | 1494 552 | 1495 553 | 1496 554 | 1497 555 | 1498 556 | 1499 557 | 15 558 | 150 559 | 1500 560 | 1501 561 | 1502 562 | 1503 563 | 1504 564 | 1505 565 | 1506 566 | 1507 567 | 1508 568 | 1509 569 | 151 570 | 1510 571 | 1511 572 | 1512 573 | 1513 574 | 1514 575 | 1515 576 | 1516 577 | 1517 578 | 1518 579 | 1519 580 | 152 581 | 1520 582 | 1521 583 | 1522 584 | 1523 585 | 1524 586 | 1525 587 | 1526 588 | 1527 589 | 1528 590 | 1529 591 | 153 592 | 1530 593 | 1531 594 | 1532 595 | 1533 596 | 1534 597 | 1535 598 | 1536 599 | 1537 600 | 1538 601 | 1539 602 | 154 603 | 1540 604 | 1541 605 | 1542 606 | 1543 607 | 1544 608 | 1545 609 | 1546 610 | 1547 611 | 1548 612 | 1549 613 | 155 614 | 1550 615 | 1551 616 | 1552 617 | 1553 618 | 1554 619 | 1555 620 | 1556 621 | 1557 622 | 1558 623 | 1559 624 | 156 625 | 1560 626 | 1561 627 | 1562 628 | 1563 629 | 1564 630 | 1565 631 | 1566 632 | 1567 633 | 1568 634 | 1569 635 | 157 636 | 1570 637 | 1571 638 | 1572 639 | 1573 640 | 1574 641 | 1575 642 | 1576 643 | 1577 644 | 1578 645 | 1579 646 | 158 647 | 1580 648 | 1581 649 | 1582 650 | 1583 651 | 1584 652 | 1585 653 | 1586 654 | 1587 655 | 1588 656 | 1589 657 | 159 658 | 1590 659 | 1591 660 | 1592 661 | 1593 662 | 1594 663 | 1595 664 | 1596 665 | 1597 666 | 1598 667 | 1599 668 | 16 669 | 160 670 | 1600 671 | 161 672 | 162 673 | 163 674 | 164 675 | 165 676 | 166 677 | 167 678 | 168 679 | 169 680 | 17 681 | 170 682 | 171 683 | 172 684 | 173 685 | 174 686 | 175 687 | 176 688 | 177 689 | 178 690 | 179 691 | 18 692 | 180 693 | 181 694 | 182 695 | 183 696 | 184 697 | 185 698 | 186 699 | 187 700 | 188 701 | 189 702 | 19 703 | 190 704 | 191 705 | 192 706 | 193 707 | 194 708 | 195 709 | 196 710 | 197 711 | 198 712 | 199 713 | 2 714 | 20 715 | 200 716 | 201 717 | 202 718 | 203 719 | 204 720 | 205 721 | 206 722 | 207 723 | 208 724 | 209 725 | 21 726 | 210 727 | 211 728 | 212 729 | 213 730 | 214 731 | 215 732 | 216 733 | 217 734 | 218 735 | 219 736 | 22 737 | 220 738 | 221 739 | 222 740 | 223 741 | 224 742 | 225 743 | 226 744 | 227 745 | 228 746 | 229 747 | 23 748 | 230 749 | 231 750 | 232 751 | 233 752 | 234 753 | 235 754 | 236 755 | 237 756 | 238 757 | 239 758 | 24 759 | 240 760 | 241 761 | 242 762 | 243 763 | 244 764 | 245 765 | 246 766 | 247 767 | 248 768 | 249 769 | 25 770 | 250 771 | 251 772 | 252 773 | 253 774 | 254 775 | 255 776 | 256 777 | 257 778 | 258 779 | 259 780 | 26 781 | 260 782 | 261 783 | 262 784 | 263 785 | 264 786 | 265 787 | 266 788 | 267 789 | 268 790 | 269 791 | 27 792 | 270 793 | 271 794 | 272 795 | 273 796 | 274 797 | 275 798 | 276 799 | 277 800 | 278 801 | 279 802 | 28 803 | 280 804 | 281 805 | 282 806 | 283 807 | 284 808 | 285 809 | 286 810 | 287 811 | 288 812 | 289 813 | 29 814 | 290 815 | 291 816 | 292 817 | 293 818 | 294 819 | 295 820 | 296 821 | 297 822 | 298 823 | 299 824 | 3 825 | 30 826 | 300 827 | 301 828 | 302 829 | 303 830 | 304 831 | 305 832 | 306 833 | 307 834 | 308 835 | 309 836 | 31 837 | 310 838 | 311 839 | 312 840 | 313 841 | 314 842 | 315 843 | 316 844 | 317 845 | 318 846 | 319 847 | 32 848 | 320 849 | 321 850 | 322 851 | 323 852 | 324 853 | 325 854 | 326 855 | 327 856 | 328 857 | 329 858 | 33 859 | 330 860 | 331 861 | 332 862 | 333 863 | 333 864 | 334 865 | 335 866 | 336 867 | 337 868 | 338 869 | 339 870 | 34 871 | 340 872 | 341 873 | 342 874 | 343 875 | 344 876 | 345 877 | 346 878 | 347 879 | 348 880 | 349 881 | 35 882 | 350 883 | 351 884 | 352 885 | 353 886 | 354 887 | 355 888 | 356 889 | 357 890 | 358 891 | 359 892 | 36 893 | 360 894 | 361 895 | 362 896 | 363 897 | 364 898 | 365 899 | 366 900 | 367 901 | 368 902 | 369 903 | 37 904 | 370 905 | 371 906 | 372 907 | 373 908 | 374 909 | 375 910 | 376 911 | 377 912 | 378 913 | 379 914 | 38 915 | 380 916 | 381 917 | 382 918 | 383 919 | 384 920 | 385 921 | 386 922 | 387 923 | 388 924 | 389 925 | 39 926 | 390 927 | 391 928 | 392 929 | 393 930 | 394 931 | 395 932 | 396 933 | 397 934 | 398 935 | 399 936 | 4 937 | 40 938 | 400 939 | 401 940 | 402 941 | 403 942 | 404 943 | 405 944 | 406 945 | 407 946 | 408 947 | 409 948 | 41 949 | 410 950 | 411 951 | 412 952 | 413 953 | 414 954 | 415 955 | 416 956 | 417 957 | 418 958 | 419 959 | 42 960 | 420 961 | 421 962 | 422 963 | 423 964 | 424 965 | 425 966 | 426 967 | 427 968 | 428 969 | 429 970 | 43 971 | 430 972 | 431 973 | 432 974 | 433 975 | 434 976 | 435 977 | 436 978 | 437 979 | 438 980 | 439 981 | 44 982 | 440 983 | 441 984 | 442 985 | 443 986 | 444 987 | 445 988 | 446 989 | 447 990 | 448 991 | 449 992 | 45 993 | 450 994 | 451 995 | 452 996 | 453 997 | 454 998 | 455 999 | 456 1000 | 457 1001 | 458 1002 | 459 1003 | 46 1004 | 460 1005 | 461 1006 | 462 1007 | 463 1008 | 464 1009 | 465 1010 | 466 1011 | 467 1012 | 468 1013 | 469 1014 | 47 1015 | 470 1016 | 471 1017 | 472 1018 | 473 1019 | 474 1020 | 475 1021 | 476 1022 | 477 1023 | 478 1024 | 479 1025 | 48 1026 | 480 1027 | 481 1028 | 482 1029 | 483 1030 | 484 1031 | 485 1032 | 486 1033 | 487 1034 | 488 1035 | 489 1036 | 49 1037 | 490 1038 | 491 1039 | 492 1040 | 493 1041 | 494 1042 | 495 1043 | 496 1044 | 497 1045 | 498 1046 | 499 1047 | 5 1048 | 50 1049 | 500 1050 | 501 1051 | 502 1052 | 503 1053 | 504 1054 | 505 1055 | 506 1056 | 507 1057 | 508 1058 | 509 1059 | 51 1060 | 510 1061 | 511 1062 | 512 1063 | 513 1064 | 514 1065 | 515 1066 | 516 1067 | 517 1068 | 518 1069 | 519 1070 | 52 1071 | 520 1072 | 521 1073 | 522 1074 | 523 1075 | 524 1076 | 525 1077 | 526 1078 | 527 1079 | 528 1080 | 529 1081 | 53 1082 | 530 1083 | 531 1084 | 532 1085 | 533 1086 | 534 1087 | 535 1088 | 536 1089 | 537 1090 | 538 1091 | 539 1092 | 54 1093 | 540 1094 | 541 1095 | 542 1096 | 543 1097 | 544 1098 | 545 1099 | 546 1100 | 547 1101 | 548 1102 | 549 1103 | 55 1104 | 550 1105 | 551 1106 | 552 1107 | 553 1108 | 554 1109 | 555 1110 | 556 1111 | 557 1112 | 558 1113 | 559 1114 | 56 1115 | 560 1116 | 561 1117 | 562 1118 | 563 1119 | 564 1120 | 565 1121 | 566 1122 | 567 1123 | 568 1124 | 569 1125 | 57 1126 | 570 1127 | 571 1128 | 572 1129 | 573 1130 | 574 1131 | 575 1132 | 576 1133 | 577 1134 | 578 1135 | 579 1136 | 58 1137 | 580 1138 | 581 1139 | 582 1140 | 583 1141 | 584 1142 | 585 1143 | 586 1144 | 587 1145 | 588 1146 | 589 1147 | 59 1148 | 590 1149 | 591 1150 | 592 1151 | 593 1152 | 594 1153 | 595 1154 | 596 1155 | 597 1156 | 598 1157 | 599 1158 | 6 1159 | 60 1160 | 600 1161 | 601 1162 | 602 1163 | 603 1164 | 604 1165 | 605 1166 | 606 1167 | 607 1168 | 608 1169 | 609 1170 | 61 1171 | 610 1172 | 611 1173 | 612 1174 | 613 1175 | 614 1176 | 615 1177 | 616 1178 | 617 1179 | 618 1180 | 619 1181 | 62 1182 | 620 1183 | 621 1184 | 622 1185 | 623 1186 | 624 1187 | 625 1188 | 626 1189 | 627 1190 | 628 1191 | 629 1192 | 63 1193 | 630 1194 | 631 1195 | 632 1196 | 633 1197 | 634 1198 | 635 1199 | 636 1200 | 637 1201 | 638 1202 | 639 1203 | 64 1204 | 640 1205 | 641 1206 | 642 1207 | 643 1208 | 644 1209 | 645 1210 | 646 1211 | 647 1212 | 648 1213 | 649 1214 | 65 1215 | 650 1216 | 651 1217 | 652 1218 | 653 1219 | 654 1220 | 655 1221 | 656 1222 | 657 1223 | 658 1224 | 659 1225 | 66 1226 | 660 1227 | 661 1228 | 662 1229 | 663 1230 | 664 1231 | 665 1232 | 666 1233 | 667 1234 | 668 1235 | 669 1236 | 67 1237 | 670 1238 | 671 1239 | 672 1240 | 673 1241 | 674 1242 | 675 1243 | 676 1244 | 677 1245 | 678 1246 | 679 1247 | 68 1248 | 680 1249 | 681 1250 | 682 1251 | 683 1252 | 684 1253 | 685 1254 | 686 1255 | 687 1256 | 688 1257 | 689 1258 | 69 1259 | 690 1260 | 691 1261 | 692 1262 | 693 1263 | 694 1264 | 695 1265 | 696 1266 | 697 1267 | 698 1268 | 699 1269 | 7 1270 | 70 1271 | 700 1272 | 701 1273 | 702 1274 | 703 1275 | 704 1276 | 705 1277 | 706 1278 | 707 1279 | 708 1280 | 709 1281 | 71 1282 | 710 1283 | 711 1284 | 712 1285 | 713 1286 | 714 1287 | 715 1288 | 716 1289 | 717 1290 | 718 1291 | 719 1292 | 72 1293 | 720 1294 | 721 1295 | 722 1296 | 723 1297 | 724 1298 | 725 1299 | 726 1300 | 727 1301 | 728 1302 | 729 1303 | 73 1304 | 730 1305 | 731 1306 | 732 1307 | 733 1308 | 734 1309 | 735 1310 | 736 1311 | 737 1312 | 738 1313 | 739 1314 | 74 1315 | 740 1316 | 741 1317 | 742 1318 | 743 1319 | 744 1320 | 745 1321 | 746 1322 | 747 1323 | 748 1324 | 749 1325 | 75 1326 | 750 1327 | 751 1328 | 752 1329 | 753 1330 | 754 1331 | 755 1332 | 756 1333 | 757 1334 | 758 1335 | 759 1336 | 76 1337 | 760 1338 | 761 1339 | 762 1340 | 763 1341 | 764 1342 | 765 1343 | 766 1344 | 767 1345 | 768 1346 | 769 1347 | 77 1348 | 770 1349 | 771 1350 | 772 1351 | 773 1352 | 774 1353 | 775 1354 | 776 1355 | 777 1356 | 778 1357 | 779 1358 | 78 1359 | 780 1360 | 781 1361 | 782 1362 | 783 1363 | 784 1364 | 785 1365 | 786 1366 | 787 1367 | 788 1368 | 789 1369 | 79 1370 | 790 1371 | 791 1372 | 792 1373 | 793 1374 | 794 1375 | 795 1376 | 796 1377 | 797 1378 | 798 1379 | 799 1380 | 8 1381 | 80 1382 | 800 1383 | 801 1384 | 802 1385 | 803 1386 | 804 1387 | 805 1388 | 806 1389 | 807 1390 | 808 1391 | 809 1392 | 81 1393 | 810 1394 | 811 1395 | 812 1396 | 813 1397 | 814 1398 | 815 1399 | 816 1400 | 817 1401 | 818 1402 | 819 1403 | 82 1404 | 820 1405 | 821 1406 | 822 1407 | 823 1408 | 824 1409 | 825 1410 | 826 1411 | 827 1412 | 828 1413 | 829 1414 | 83 1415 | 830 1416 | 831 1417 | 832 1418 | 833 1419 | 834 1420 | 835 1421 | 836 1422 | 837 1423 | 838 1424 | 839 1425 | 84 1426 | 840 1427 | 841 1428 | 842 1429 | 843 1430 | 844 1431 | 845 1432 | 846 1433 | 847 1434 | 848 1435 | 849 1436 | 85 1437 | 850 1438 | 851 1439 | 852 1440 | 853 1441 | 854 1442 | 855 1443 | 856 1444 | 857 1445 | 858 1446 | 859 1447 | 86 1448 | 860 1449 | 861 1450 | 862 1451 | 863 1452 | 864 1453 | 865 1454 | 866 1455 | 867 1456 | 868 1457 | 869 1458 | 87 1459 | 870 1460 | 871 1461 | 872 1462 | 873 1463 | 874 1464 | 875 1465 | 876 1466 | 877 1467 | 878 1468 | 879 1469 | 88 1470 | 880 1471 | 881 1472 | 882 1473 | 883 1474 | 884 1475 | 885 1476 | 886 1477 | 887 1478 | 888 1479 | 889 1480 | 89 1481 | 890 1482 | 891 1483 | 892 1484 | 893 1485 | 894 1486 | 895 1487 | 896 1488 | 897 1489 | 898 1490 | 899 1491 | 9 1492 | 90 1493 | 900 1494 | 901 1495 | 902 1496 | 903 1497 | 904 1498 | 905 1499 | 906 1500 | 907 1501 | 908 1502 | 909 1503 | 91 1504 | 910 1505 | 911 1506 | 912 1507 | 913 1508 | 914 1509 | 915 1510 | 916 1511 | 917 1512 | 918 1513 | 919 1514 | 92 1515 | 920 1516 | 921 1517 | 922 1518 | 923 1519 | 924 1520 | 925 1521 | 926 1522 | 927 1523 | 928 1524 | 929 1525 | 93 1526 | 930 1527 | 931 1528 | 932 1529 | 933 1530 | 934 1531 | 935 1532 | 936 1533 | 937 1534 | 938 1535 | 939 1536 | 94 1537 | 940 1538 | 941 1539 | 942 1540 | 943 1541 | 944 1542 | 945 1543 | 946 1544 | 947 1545 | 948 1546 | 949 1547 | 95 1548 | 950 1549 | 951 1550 | 952 1551 | 953 1552 | 954 1553 | 955 1554 | 956 1555 | 957 1556 | 958 1557 | 959 1558 | 96 1559 | 960 1560 | 961 1561 | 962 1562 | 963 1563 | 964 1564 | 965 1565 | 966 1566 | 967 1567 | 968 1568 | 969 1569 | 97 1570 | 970 1571 | 971 1572 | 972 1573 | 973 1574 | 974 1575 | 975 1576 | 976 1577 | 977 1578 | 978 1579 | 979 1580 | 98 1581 | 980 1582 | 981 1583 | 982 1584 | 983 1585 | 984 1586 | 985 1587 | 986 1588 | 987 1589 | 988 1590 | 989 1591 | 99 1592 | 990 1593 | 991 1594 | 992 1595 | 993 1596 | 994 1597 | 995 1598 | 996 1599 | 997 1600 | 998 1601 | 999 1602 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/val.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 1004 3 | 1018 4 | 1027 5 | 1049 6 | 1055 7 | 1073 8 | 1082 9 | 1087 10 | 1092 11 | 1093 12 | 1095 13 | 1097 14 | 1099 15 | 1110 16 | 1128 17 | 1129 18 | 113 19 | 1143 20 | 115 21 | 1154 22 | 1159 23 | 1177 24 | 1194 25 | 1200 26 | 1201 27 | 1212 28 | 1215 29 | 1233 30 | 1245 31 | 1246 32 | 125 33 | 1258 34 | 1262 35 | 1283 36 | 1294 37 | 1309 38 | 1315 39 | 1319 40 | 132 41 | 1337 42 | 1339 43 | 1341 44 | 1351 45 | 1368 46 | 1371 47 | 1393 48 | 140 49 | 1408 50 | 1415 51 | 1419 52 | 1423 53 | 1438 54 | 144 55 | 1447 56 | 1451 57 | 1453 58 | 1455 59 | 1457 60 | 1469 61 | 1481 62 | 1486 63 | 1493 64 | 1505 65 | 1514 66 | 153 67 | 1531 68 | 1538 69 | 155 70 | 1553 71 | 1570 72 | 1590 73 | 167 74 | 179 75 | 180 76 | 181 77 | 188 78 | 191 79 | 192 80 | 20 81 | 211 82 | 229 83 | 23 84 | 248 85 | 249 86 | 264 87 | 282 88 | 283 89 | 289 90 | 29 91 | 290 92 | 292 93 | 293 94 | 333 95 | 333 96 | 336 97 | 352 98 | 359 99 | 381 100 | 403 101 | 424 102 | 435 103 | 456 104 | 460 105 | 463 106 | 469 107 | 510 108 | 511 109 | 52 110 | 525 111 | 531 112 | 535 113 | 538 114 | 548 115 | 550 116 | 555 117 | 558 118 | 560 119 | 574 120 | 576 121 | 584 122 | 595 123 | 60 124 | 607 125 | 624 126 | 659 127 | 661 128 | 674 129 | 697 130 | 7 131 | 707 132 | 712 133 | 728 134 | 73 135 | 74 136 | 754 137 | 758 138 | 765 139 | 768 140 | 782 141 | 811 142 | 817 143 | 818 144 | 835 145 | 865 146 | 871 147 | 875 148 | 881 149 | 894 150 | 898 151 | 90 152 | 907 153 | 922 154 | 937 155 | 939 156 | 942 157 | 962 158 | 964 159 | 980 160 | 993 161 | 999 162 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/1.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/2.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/3.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/4.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/5.jpg -------------------------------------------------------------------------------- /YOLOv4-study学习资料md: -------------------------------------------------------------------------------- 1 | # YOLOv4 学习资料 2 | 3 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/e3551e344873465d8ad884f856d652ed.png) 4 | 5 | [Tianxiaomo](https://github.com/Tianxiaomo)/**[pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)** star 3.5k 6 | 7 | PyTorch ,ONNX and TensorRT implementation of *YOLOv4* 8 | 9 | [WongKinYiu](https://github.com/WongKinYiu)/**[PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)** star 1.5k 10 | 11 | PyTorch implementation of *YOLOv4* 12 | 13 | [argusswift](https://github.com/argusswift)/**[YOLOv4-pytorch ](https://github.com/argusswift/YOLOv4-pytorch)** star 1.4k 14 | 15 | This is a pytorch repository of *YOLOv4*, attentive *YOLOv4* and mobilenet *YOLOv4* with PASCAL VOC and COCO 16 | 17 | [bubbliiiing/*yolov4*-pytorch ](https://github.com/bubbliiiing/yolov4-pytorch) star 1.2k 18 | 19 | 这是一个*YoloV4*-pytorch的源码,可以用于训练自己的模型。 20 | 21 | 22 | 23 | 24 | 25 | ## 扩展 26 | 27 | [Bil369](https://github.com/Bil369)/**[MaskDetect-YOLOv4-PyTorch](https://github.com/Bil369/MaskDetect-YOLOv4-PyTorch)** 28 | 29 | 基于*PyTorch*&*YOLOv4*实现的口罩佩戴检测 ⭐ 自建口罩数据集分享 30 | 31 | [bobo0810](https://github.com/bobo0810)/**[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)** 32 | 33 | 项目注释+论文复现+算法竞赛+Pytorch指北 34 | 35 | [Bil369](https://github.com/Bil369)/**[YOLOv4-PyTorch-Simple-Implementation](https://github.com/Bil369/YOLOv4-PyTorch-Simple-Implementation)** 36 | 37 | *YOLOv4* *PyTorch* Simple Implementation -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | #-----------------------------------------------------------------------# 2 | # detect.py 是用来尝试利用小模型半自动化进行标注数据 3 | #-----------------------------------------------------------------------# 4 | import numpy as np 5 | from PIL import Image 6 | from get_yaml import get_config 7 | 8 | from yolo import YOLO 9 | from gen_annotation import GEN_Annotations 10 | 11 | if __name__ == "__main__":# 配置文件 12 | # 配置文件 13 | config = get_config() 14 | yolo = YOLO() 15 | 16 | dir_detect_path = config['dir_detect_path'] 17 | detect_save_path = config['detect_save_path'] 18 | 19 | import os 20 | from tqdm import tqdm 21 | 22 | img_names = os.listdir(dir_detect_path) 23 | for img_name in tqdm(img_names): 24 | 25 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): 26 | # if int(img_name.split('.')[0][-4:]) < 355: 27 | # continue 28 | image_path = os.path.join(dir_detect_path, img_name) 29 | image = Image.open(image_path) 30 | boxes = yolo.get_box(image) 31 | if not os.path.exists(detect_save_path): 32 | os.makedirs(detect_save_path) 33 | 34 | annotation = GEN_Annotations(img_name) 35 | w,h = np.array(np.shape(image)[0:2]) 36 | annotation.set_size(w,h,3) 37 | if boxes: 38 | for box in boxes: 39 | label,ymin,xmin,ymax,xmax = box 40 | annotation.add_pic_attr(label,xmin,ymin,xmax,ymax) 41 | annotation_path = os.path.join(detect_save_path, img_name.split('.')[0]) 42 | annotation.savefile("{}.xml".format(annotation_path )) 43 | # print(img_name,'已经半自动标注') -------------------------------------------------------------------------------- /gen_annotation.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | 3 | class GEN_Annotations: 4 | def __init__(self, filename): 5 | self.root = etree.Element("annotation") 6 | 7 | child1 = etree.SubElement(self.root, "folder") 8 | child1.text = "VOC2007" 9 | 10 | child2 = etree.SubElement(self.root, "filename") 11 | child2.text = filename 12 | 13 | child3 = etree.SubElement(self.root, "source") 14 | 15 | child4 = etree.SubElement(child3, "annotation") 16 | child4.text = "PASCAL VOC2007" 17 | child5 = etree.SubElement(child3, "database") 18 | child5.text = "Unknown" 19 | 20 | ## child6 = etree.SubElement(child3, "image") 21 | ## child6.text = "flickr" 22 | ## child7 = etree.SubElement(child3, "flickrid") 23 | ## child7.text = "35435" 24 | 25 | 26 | def set_size(self,witdh,height,channel): 27 | size = etree.SubElement(self.root, "size") 28 | widthn = etree.SubElement(size, "width") 29 | widthn.text = str(witdh) 30 | heightn = etree.SubElement(size, "height") 31 | heightn.text = str(height) 32 | channeln = etree.SubElement(size, "depth") 33 | channeln.text = str(channel) 34 | def savefile(self,filename): 35 | tree = etree.ElementTree(self.root) 36 | tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') 37 | def add_pic_attr(self,label,xmin,ymin,xmax,ymax): 38 | object = etree.SubElement(self.root, "object") 39 | namen = etree.SubElement(object, "name") 40 | namen.text = label 41 | bndbox = etree.SubElement(object, "bndbox") 42 | xminn = etree.SubElement(bndbox, "xmin") 43 | xminn.text = str(xmin) 44 | yminn = etree.SubElement(bndbox, "ymin") 45 | yminn.text = str(ymin) 46 | xmaxn = etree.SubElement(bndbox, "xmax") 47 | xmaxn.text = str(xmax) 48 | ymaxn = etree.SubElement(bndbox, "ymax") 49 | ymaxn.text = str(ymax) 50 | 51 | 52 | if __name__ == '__main__': 53 | filename="000001.jpg" 54 | anno= GEN_Annotations(filename) 55 | anno.set_size(1280,720,3) 56 | for i in range(3): 57 | xmin=i+1 58 | ymin=i+10 59 | xmax=i+100 60 | ymax=i+100 61 | anno.add_pic_attr("pikachu",xmin,ymin,xmax,ymax) 62 | anno.savefile("00001.xml") 63 | -------------------------------------------------------------------------------- /gesture.streamlit.py: -------------------------------------------------------------------------------- 1 | """Create an Object Detection Web App using PyTorch and Streamlit.""" 2 | # import libraries 3 | from PIL import Image 4 | from torchvision import models, transforms 5 | import torch 6 | import streamlit as st 7 | from yolo import YOLO 8 | import os 9 | import urllib 10 | import numpy as np 11 | from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration 12 | import av 13 | # 设置网页的icon 14 | st.set_page_config(page_title='Gesture Detector', page_icon='✌', 15 | layout='centered', initial_sidebar_state='expanded') 16 | 17 | RTC_CONFIGURATION = RTCConfiguration( 18 | { 19 | "RTCIceServer": [{ 20 | "urls": ["stun:stun.l.google.com:19302"], 21 | "username": "pikachu", 22 | "credential": "1234", 23 | }] 24 | } 25 | ) 26 | def main(): 27 | # Render the readme as markdown using st.markdown. 28 | readme_text = st.markdown(open("instructions.md",encoding='utf-8').read()) 29 | 30 | 31 | # Once we have the dependencies, add a selector for the app mode on the sidebar. 32 | st.sidebar.title("What to do") 33 | app_mode = st.sidebar.selectbox("Choose the app mode", 34 | ["Show instructions", "Run the app", "Show the source code"]) 35 | if app_mode == "Show instructions": 36 | st.sidebar.success('To continue select "Run the app".') 37 | elif app_mode == "Show the source code": 38 | readme_text.empty() 39 | st.code(open("gesture.streamlit.py",encoding='utf-8').read()) 40 | elif app_mode == "Run the app": 41 | # Download external dependencies. 42 | for filename in EXTERNAL_DEPENDENCIES.keys(): 43 | download_file(filename) 44 | 45 | readme_text.empty() 46 | run_the_app() 47 | 48 | # External files to download. 49 | EXTERNAL_DEPENDENCIES = { 50 | "yolov4_tiny.pth": { 51 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_tiny.pth", 52 | "size": 23631189 53 | }, 54 | "yolov4_SE.pth": { 55 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_SE.pth", 56 | "size": 23806027 57 | }, 58 | "yolov4_CBAM.pth":{ 59 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_CBAM.pth", 60 | "size": 23981478 61 | }, 62 | "yolov4_ECA.pth":{ 63 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_ECA.pth", 64 | "size": 23632688 65 | }, 66 | "yolov4_weights_ep150_608.pth":{ 67 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_weights_ep150_608.pth", 68 | "size": 256423031 69 | }, 70 | "yolov4_weights_ep150_416.pth":{ 71 | "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_weights_ep150_416.pth", 72 | "size": 256423031 73 | }, 74 | } 75 | 76 | 77 | # This file downloader demonstrates Streamlit animation. 78 | def download_file(file_path): 79 | # Don't download the file twice. (If possible, verify the download using the file length.) 80 | if os.path.exists(file_path): 81 | if "size" not in EXTERNAL_DEPENDENCIES[file_path]: 82 | return 83 | elif os.path.getsize(file_path) == EXTERNAL_DEPENDENCIES[file_path]["size"]: 84 | return 85 | # print(os.path.getsize(file_path)) 86 | # These are handles to two visual elements to animate. 87 | weights_warning, progress_bar = None, None 88 | try: 89 | weights_warning = st.warning("Downloading %s..." % file_path) 90 | progress_bar = st.progress(0) 91 | with open(file_path, "wb") as output_file: 92 | with urllib.request.urlopen(EXTERNAL_DEPENDENCIES[file_path]["url"]) as response: 93 | length = int(response.info()["Content-Length"]) 94 | counter = 0.0 95 | MEGABYTES = 2.0 ** 20.0 96 | while True: 97 | data = response.read(8192) 98 | if not data: 99 | break 100 | counter += len(data) 101 | output_file.write(data) 102 | 103 | # We perform animation by overwriting the elements. 104 | weights_warning.warning("Downloading %s... (%6.2f/%6.2f MB)" % 105 | (file_path, counter / MEGABYTES, length / MEGABYTES)) 106 | progress_bar.progress(min(counter / length, 1.0)) 107 | except Exception as e: 108 | print(e) 109 | # Finally, we remove these visual elements by calling .empty(). 110 | finally: 111 | if weights_warning is not None: 112 | weights_warning.empty() 113 | if progress_bar is not None: 114 | progress_bar.empty() 115 | 116 | # This is the main app app itself, which appears when the user selects "Run the app". 117 | def run_the_app(): 118 | class Config(): 119 | def __init__(self, weights = 'yolov4_tiny.pth', tiny = True, phi = 0, shape = 416,nms_iou = 0.3, confidence = 0.5): 120 | self.weights = weights 121 | self.tiny = tiny 122 | self.phi = phi 123 | self.cuda = False 124 | self.shape = shape 125 | self.confidence = confidence 126 | self.nms_iou = nms_iou 127 | # set title of app 128 | st.markdown('

✌ Gesture Detection

', 129 | unsafe_allow_html=True) 130 | st.sidebar.markdown("# Gesture Detection on?") 131 | activities = ["Example","Image", "Camera", "FPS", "Heatmap","Real Time", "Video"] 132 | choice = st.sidebar.selectbox("Choose among the given options:", activities) 133 | phi = st.sidebar.selectbox("yolov4-tiny 使用的自注意力模式:",('0tiny','1SE','2CABM','3ECA')) 134 | print("") 135 | 136 | tiny = st.sidebar.checkbox('是否使用 yolov4 tiny 模型') 137 | if not tiny: 138 | shape = st.sidebar.selectbox("Choose shape to Input:", [416,608]) 139 | conf,nms = object_detector_ui() 140 | @st.cache 141 | def get_yolo(tiny,phi,conf,nms,shape=416): 142 | weights = 'yolov4_tiny.pth' 143 | if tiny: 144 | if phi == '0tiny': 145 | weights = 'yolov4_tiny.pth' 146 | elif phi == '1SE': 147 | weights = 'yolov4_SE.pth' 148 | elif phi == '2CABM': 149 | weights = 'yolov4_CBAM.pth' 150 | elif phi == '3ECA': 151 | weights = 'yolov4_ECA.pth' 152 | else: 153 | if shape == 608: 154 | weights = 'yolov4_weights_ep150_608.pth' 155 | elif shape == 416: 156 | weights = 'yolov4_weights_ep150_416.pth' 157 | opt = Config(weights = weights, tiny = tiny , phi = int(phi[0]), shape = shape,nms_iou = nms, confidence = conf) 158 | yolo = YOLO(opt) 159 | return yolo 160 | 161 | if tiny: 162 | yolo = get_yolo(tiny, phi, conf, nms) 163 | st.write("YOLOV4 tiny 模型加载完毕") 164 | else: 165 | yolo = get_yolo(tiny, phi, conf, nms, shape) 166 | st.write("YOLOV4 模型加载完毕") 167 | 168 | if choice == 'Image': 169 | detect_image(yolo) 170 | elif choice =='Camera': 171 | detect_camera(yolo) 172 | elif choice == 'FPS': 173 | detect_fps(yolo) 174 | elif choice == "Heatmap": 175 | detect_heatmap(yolo) 176 | elif choice == "Example": 177 | detect_example(yolo) 178 | elif choice == "Real Time": 179 | detect_realtime(yolo) 180 | elif choice == "Video": 181 | detect_video(yolo) 182 | 183 | 184 | 185 | # This sidebar UI lets the user select parameters for the YOLO object detector. 186 | def object_detector_ui(): 187 | st.sidebar.markdown("# Model") 188 | confidence_threshold = st.sidebar.slider("Confidence threshold", 0.0, 1.0, 0.5, 0.01) 189 | overlap_threshold = st.sidebar.slider("Overlap threshold", 0.0, 1.0, 0.3, 0.01) 190 | return confidence_threshold, overlap_threshold 191 | 192 | def predict(image,yolo): 193 | """Return predictions. 194 | 195 | Parameters 196 | ---------- 197 | :param image: uploaded image 198 | :type image: jpg 199 | :rtype: list 200 | :return: none 201 | """ 202 | crop = False 203 | count = False 204 | try: 205 | # image = Image.open(image) 206 | r_image = yolo.detect_image(image, crop = crop, count=count) 207 | transform = transforms.Compose([transforms.ToTensor()]) 208 | result = transform(r_image) 209 | st.image(result.permute(1,2,0).numpy(), caption = 'Processed Image.', use_column_width = True) 210 | except Exception as e: 211 | print(e) 212 | 213 | def fps(image,yolo): 214 | test_interval = 50 215 | tact_time = yolo.get_FPS(image, test_interval) 216 | st.write(str(tact_time) + ' seconds, ', str(1/tact_time),'FPS, @batch_size 1') 217 | return tact_time 218 | # print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1') 219 | 220 | 221 | def detect_image(yolo): 222 | # enable users to upload images for the model to make predictions 223 | file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"]) 224 | classes = ["up","down","left","right","front","back","clockwise","anticlockwise"] 225 | class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)} 226 | st.sidebar.markdown("See the model preformance and play with it") 227 | if file_up is not None: 228 | with st.spinner(text='Preparing Image'): 229 | # display image that user uploaded 230 | image = Image.open(file_up) 231 | st.image(image, caption = 'Uploaded Image.', use_column_width = True) 232 | st.balloons() 233 | detect = st.button("开始检测Image") 234 | if detect: 235 | st.write("") 236 | st.write("Just a second ...") 237 | predict(image,yolo) 238 | st.balloons() 239 | 240 | 241 | 242 | def detect_camera(yolo): 243 | picture = st.camera_input("Take a picture") 244 | if picture: 245 | filters_to_funcs = { 246 | "No filter": predict, 247 | "Heatmap": heatmap, 248 | "FPS": fps, 249 | } 250 | filters = st.selectbox("...and now, apply a filter!", filters_to_funcs.keys()) 251 | image = Image.open(picture) 252 | with st.spinner(text='Preparing Image'): 253 | filters_to_funcs[filters](image,yolo) 254 | st.balloons() 255 | 256 | def detect_fps(yolo): 257 | file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"]) 258 | classes = ["up","down","left","right","front","back","clockwise","anticlockwise"] 259 | class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)} 260 | st.sidebar.markdown("See the model preformance and play with it") 261 | if file_up is not None: 262 | # display image that user uploaded 263 | image = Image.open(file_up) 264 | st.image(image, caption = 'Uploaded Image.', use_column_width = True) 265 | st.balloons() 266 | detect = st.button("开始检测 FPS") 267 | if detect: 268 | with st.spinner(text='Preparing Image'): 269 | st.write("") 270 | st.write("Just a second ...") 271 | tact_time = fps(image,yolo) 272 | # st.write(str(tact_time) + ' seconds, ', str(1/tact_time),'FPS, @batch_size 1') 273 | st.balloons() 274 | 275 | def heatmap(image,yolo): 276 | heatmap_save_path = "heatmap_vision.png" 277 | yolo.detect_heatmap(image, heatmap_save_path) 278 | img = Image.open(heatmap_save_path) 279 | transform = transforms.Compose([transforms.ToTensor()]) 280 | result = transform(img) 281 | st.image(result.permute(1,2,0).numpy(), caption = 'Processed Image.', use_column_width = True) 282 | 283 | def detect_heatmap(yolo): 284 | file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"]) 285 | classes = ["up","down","left","right","front","back","clockwise","anticlockwise"] 286 | class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)} 287 | st.sidebar.markdown("See the model preformance and play with it") 288 | if file_up is not None: 289 | # display image that user uploaded 290 | image = Image.open(file_up) 291 | st.image(image, caption = 'Uploaded Image.', use_column_width = True) 292 | st.balloons() 293 | detect = st.button("开始检测 heatmap") 294 | if detect: 295 | with st.spinner(text='Preparing Heatmap'): 296 | st.write("") 297 | st.write("Just a second ...") 298 | heatmap(image,yolo) 299 | st.balloons() 300 | 301 | def detect_example(yolo): 302 | st.sidebar.title("Choose an Image as a example") 303 | images = os.listdir('./img') 304 | images.sort() 305 | image = st.sidebar.selectbox("Image Name", images) 306 | st.sidebar.markdown("See the model preformance and play with it") 307 | image = Image.open(os.path.join('img',image)) 308 | st.image(image, caption = 'Choose Image.', use_column_width = True) 309 | st.balloons() 310 | detect = st.button("开始检测Image") 311 | if detect: 312 | st.write("") 313 | st.write("Just a second ...") 314 | predict(image,yolo) 315 | st.balloons() 316 | 317 | def detect_realtime(yolo): 318 | 319 | class VideoProcessor: 320 | def recv(self, frame): 321 | img = frame.to_ndarray(format="bgr24") 322 | img = Image.fromarray(img) 323 | crop = False 324 | count = False 325 | r_image = yolo.detect_image(img, crop = crop, count=count) 326 | transform = transforms.Compose([transforms.ToTensor()]) 327 | result = transform(r_image) 328 | result = result.permute(1,2,0).numpy() 329 | result = (result * 255).astype(np.uint8) 330 | return av.VideoFrame.from_ndarray(result, format="bgr24") 331 | 332 | webrtc_ctx = webrtc_streamer( 333 | key="example", 334 | mode=WebRtcMode.SENDRECV, 335 | rtc_configuration=RTC_CONFIGURATION, 336 | media_stream_constraints={"video": True, "audio": False}, 337 | async_processing=False, 338 | video_processor_factory=VideoProcessor 339 | ) 340 | 341 | import cv2 342 | import time 343 | def detect_video(yolo): 344 | file_up = st.file_uploader("Upload a video", type = ["mp4"]) 345 | print(file_up) 346 | classes = ["up","down","left","right","front","back","clockwise","anticlockwise"] 347 | 348 | if file_up is not None: 349 | video_path = 'video.mp4' 350 | st.video(file_up) 351 | with open(video_path, 'wb') as f: 352 | f.write(file_up.read()) 353 | detect = st.button("开始检测 Video") 354 | 355 | if detect: 356 | video_save_path = 'video2.mp4' 357 | # display image that user uploaded 358 | capture = cv2.VideoCapture(video_path) 359 | 360 | video_fps = st.slider("Video FPS", 5, 30, int(capture.get(cv2.CAP_PROP_FPS)), 1) 361 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 362 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) 363 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) 364 | 365 | 366 | 367 | while(True): 368 | # 读取某一帧 369 | ref, frame = capture.read() 370 | if not ref: 371 | break 372 | # 转变成Image 373 | # frame = Image.fromarray(np.uint8(frame)) 374 | # 格式转变,BGRtoRGB 375 | frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) 376 | # 转变成Image 377 | frame = Image.fromarray(np.uint8(frame)) 378 | # 进行检测 379 | frame = np.array(yolo.detect_image(frame)) 380 | # RGBtoBGR满足opencv显示格式 381 | frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) 382 | 383 | # print("fps= %.2f"%(fps)) 384 | # frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 385 | out.write(frame) 386 | 387 | out.release() 388 | capture.release() 389 | print("Save processed video to the path :" + video_save_path) 390 | 391 | with open(video_save_path, "rb") as file: 392 | btn = st.download_button( 393 | label="Download Video", 394 | data=file, 395 | file_name="video.mp4", 396 | ) 397 | st.balloons() 398 | 399 | if __name__ == "__main__": 400 | main() -------------------------------------------------------------------------------- /get_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | 4 | from PIL import Image 5 | from tqdm import tqdm 6 | import yaml 7 | from utils.utils import get_classes 8 | from utils.utils_map import get_coco_map, get_map 9 | from yolo import YOLO 10 | from get_yaml import get_config 11 | import argparse 12 | if __name__ == "__main__": 13 | ''' 14 | Recall和Precision不像AP是一个面积的概念,在门限值不同时,网络的Recall和Precision值是不同的。 15 | map计算结果中的Recall和Precision代表的是当预测时,门限置信度为0.5时,所对应的Recall和Precision值。 16 | 17 | 此处获得的./map_out/detection-results/里面的txt的框的数量会比直接predict多一些,这是因为这里的门限低, 18 | 目的是为了计算不同门限条件下的Recall和Precision值,从而实现map的计算。 19 | ''' 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--weights',type=str,default='model_data/yolotiny_SE_ep100.pth',help='initial weights path') 22 | parser.add_argument('--tiny',action='store_true',help='使用yolotiny模型') 23 | parser.add_argument('--phi',type=int,default=1,help='yolov4tiny注意力机制类型') 24 | parser.add_argument('--mode',type=int,default=0,help='get map的模式') 25 | parser.add_argument('--cuda',action='store_true',help='表示是否使用GPU') 26 | parser.add_argument('--shape',type=int,default=416,help='输入图像的shape') 27 | parser.add_argument('--confidence',type=float,default=0.5,help='只有得分大于置信度的预测框会被保留下来') 28 | parser.add_argument('--nms_iou',type=float,default=0.3,help='非极大抑制所用到的nms_iou大小') 29 | opt = parser.parse_args() 30 | print(opt) 31 | # 配置文件 32 | config = get_config() 33 | 34 | #------------------------------------------------------------------------------------------------------------------# 35 | # map_mode用于指定该文件运行时计算的内容 36 | # map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。 37 | # map_mode为1代表仅仅获得预测结果。 38 | # map_mode为2代表仅仅获得真实框。 39 | # map_mode为3代表仅仅计算VOC_map。 40 | # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 41 | #-------------------------------------------------------------------------------------------------------------------# 42 | map_mode = opt.mode 43 | #-------------------------------------------------------# 44 | # MINOVERLAP用于指定想要获得的mAP0.x 45 | # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 46 | #-------------------------------------------------------# 47 | MINOVERLAP = 0.5 48 | #-------------------------------------------------------# 49 | # map_vis用于指定是否开启VOC_map计算的可视化 50 | #-------------------------------------------------------# 51 | map_vis = False 52 | #-------------------------------------------------------# 53 | # 指向VOC数据集所在的文件夹 54 | # 默认指向根目录下的VOC数据集 55 | #-------------------------------------------------------# 56 | VOCdevkit_path = 'VOCdevkit' 57 | #-------------------------------------------------------# 58 | # 结果输出的文件夹,默认为map_out 59 | #-------------------------------------------------------# 60 | map_out_path = 'map_out' 61 | 62 | image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/val.txt")).read().strip().split() 63 | 64 | if not os.path.exists(map_out_path): 65 | os.makedirs(map_out_path) 66 | if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): 67 | os.makedirs(os.path.join(map_out_path, 'ground-truth')) 68 | if not os.path.exists(os.path.join(map_out_path, 'detection-results')): 69 | os.makedirs(os.path.join(map_out_path, 'detection-results')) 70 | if not os.path.exists(os.path.join(map_out_path, 'images-optional')): 71 | os.makedirs(os.path.join(map_out_path, 'images-optional')) 72 | 73 | class_names = config['classes'] 74 | 75 | if map_mode == 0 or map_mode == 1: 76 | print("Load model.") 77 | yolo = YOLO(opt, confidence = 0.001, nms_iou = 0.5) 78 | print("Load model done.") 79 | 80 | print("Get predict result.") 81 | for image_id in tqdm(image_ids): 82 | image_path = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg") 83 | image = Image.open(image_path) 84 | if map_vis: 85 | image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) 86 | yolo.get_map_txt(image_id, image, class_names, map_out_path) 87 | print("Get predict result done.") 88 | 89 | if map_mode == 0 or map_mode == 2: 90 | print("Get ground truth result.") 91 | for image_id in tqdm(image_ids): 92 | with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 93 | root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot() 94 | for obj in root.findall('object'): 95 | difficult_flag = False 96 | if obj.find('difficult')!=None: 97 | difficult = obj.find('difficult').text 98 | if int(difficult)==1: 99 | difficult_flag = True 100 | obj_name = obj.find('name').text 101 | if obj_name not in class_names: 102 | continue 103 | bndbox = obj.find('bndbox') 104 | left = bndbox.find('xmin').text 105 | top = bndbox.find('ymin').text 106 | right = bndbox.find('xmax').text 107 | bottom = bndbox.find('ymax').text 108 | 109 | if difficult_flag: 110 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) 111 | else: 112 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 113 | print("Get ground truth result done.") 114 | 115 | if map_mode == 0 or map_mode == 3: 116 | print("Get map.") 117 | get_map(MINOVERLAP, True, path = map_out_path) 118 | print("Get map done.") 119 | 120 | if map_mode == 4: 121 | print("Get map.") 122 | get_coco_map(class_names = class_names, path = map_out_path) 123 | print("Get map done.") 124 | -------------------------------------------------------------------------------- /get_yaml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import yaml 4 | 5 | def get_config(): 6 | yaml_path = 'model_data/gesture.yaml' 7 | f = open(yaml_path,'r',encoding='utf-8') 8 | config = yaml.load(f,Loader =yaml.FullLoader) 9 | f.close() 10 | return config 11 | 12 | if __name__ == "__main__": 13 | config = get_config() 14 | print(config) -------------------------------------------------------------------------------- /img/anticlockwise.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/anticlockwise.jpg -------------------------------------------------------------------------------- /img/back.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/back.jpg -------------------------------------------------------------------------------- /img/clockwise.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/clockwise.jpg -------------------------------------------------------------------------------- /img/down.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/down.jpg -------------------------------------------------------------------------------- /img/front.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/front.jpg -------------------------------------------------------------------------------- /img/left.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/left.jpg -------------------------------------------------------------------------------- /img/right.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/right.jpg -------------------------------------------------------------------------------- /img/up.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/up.jpg -------------------------------------------------------------------------------- /instructions.md: -------------------------------------------------------------------------------- 1 | # ✌ Gesture Detection 2 | 3 | 4 | 这是一个基于无人机视觉图像手势识别控制系统,选择了YOLOv4模型进行训练 5 | 6 | **YOLOv4 = CSPDarknet53(主干) + SPP** **附加模块(颈** **) +** **PANet** **路径聚合(颈** **) + YOLOv3(头部)** 7 | 8 | ![img](https://pdf.cdn.readpaper.com/parsed/fetch_target/699143cdb334ecfc63caf8192472490c_0_Figure_1.png) 9 | 10 | -------------------------------------------------------------------------------- /kmeans_for_anchors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/kmeans_for_anchors.jpg -------------------------------------------------------------------------------- /kmeans_for_anchors.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------------------------------------------------# 2 | # kmeans虽然会对数据集中的框进行聚类,但是很多数据集由于框的大小相近,聚类出来的9个框相差不大, 3 | # 这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框,shape越小的特征层适合越大的先验框 4 | # 原始网络的先验框已经按大中小比例分配好了,不进行聚类也会有非常好的效果。 5 | #-------------------------------------------------------------------------------------------------------# 6 | import glob 7 | import xml.etree.ElementTree as ET 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | from tqdm import tqdm 12 | 13 | 14 | def cas_iou(box, cluster): 15 | x = np.minimum(cluster[:, 0], box[0]) 16 | y = np.minimum(cluster[:, 1], box[1]) 17 | 18 | intersection = x * y 19 | area1 = box[0] * box[1] 20 | 21 | area2 = cluster[:,0] * cluster[:,1] 22 | iou = intersection / (area1 + area2 - intersection) 23 | 24 | return iou 25 | 26 | def avg_iou(box, cluster): 27 | return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])]) 28 | 29 | def kmeans(box, k): 30 | #-------------------------------------------------------------# 31 | # 取出一共有多少框 32 | #-------------------------------------------------------------# 33 | row = box.shape[0] 34 | 35 | #-------------------------------------------------------------# 36 | # 每个框各个点的位置 37 | #-------------------------------------------------------------# 38 | distance = np.empty((row, k)) 39 | 40 | #-------------------------------------------------------------# 41 | # 最后的聚类位置 42 | #-------------------------------------------------------------# 43 | last_clu = np.zeros((row, )) 44 | 45 | np.random.seed() 46 | 47 | #-------------------------------------------------------------# 48 | # 随机选5个当聚类中心 49 | #-------------------------------------------------------------# 50 | cluster = box[np.random.choice(row, k, replace = False)] 51 | 52 | iter = 0 53 | while True: 54 | #-------------------------------------------------------------# 55 | # 计算当前框和先验框的宽高比例 56 | #-------------------------------------------------------------# 57 | for i in range(row): 58 | distance[i] = 1 - cas_iou(box[i], cluster) 59 | 60 | #-------------------------------------------------------------# 61 | # 取出最小点 62 | #-------------------------------------------------------------# 63 | near = np.argmin(distance, axis=1) 64 | 65 | if (last_clu == near).all(): 66 | break 67 | 68 | #-------------------------------------------------------------# 69 | # 求每一个类的中位点 70 | #-------------------------------------------------------------# 71 | for j in range(k): 72 | cluster[j] = np.median( 73 | box[near == j],axis=0) 74 | 75 | last_clu = near 76 | if iter % 5 == 0: 77 | print('iter: {:d}. avg_iou:{:.2f}'.format(iter, avg_iou(box, cluster))) 78 | iter += 1 79 | 80 | return cluster, near 81 | 82 | def load_data(path): 83 | data = [] 84 | #-------------------------------------------------------------# 85 | # 对于每一个xml都寻找box 86 | #-------------------------------------------------------------# 87 | for xml_file in tqdm(glob.glob('{}/*xml'.format(path))): 88 | tree = ET.parse(xml_file) 89 | height = int(tree.findtext('./size/height')) 90 | width = int(tree.findtext('./size/width')) 91 | if height<=0 or width<=0: 92 | continue 93 | 94 | #-------------------------------------------------------------# 95 | # 对于每一个目标都获得它的宽高 96 | #-------------------------------------------------------------# 97 | for obj in tree.iter('object'): 98 | xmin = int(float(obj.findtext('bndbox/xmin'))) / width 99 | ymin = int(float(obj.findtext('bndbox/ymin'))) / height 100 | xmax = int(float(obj.findtext('bndbox/xmax'))) / width 101 | ymax = int(float(obj.findtext('bndbox/ymax'))) / height 102 | 103 | xmin = np.float64(xmin) 104 | ymin = np.float64(ymin) 105 | xmax = np.float64(xmax) 106 | ymax = np.float64(ymax) 107 | # 得到宽高 108 | data.append([xmax - xmin, ymax - ymin]) 109 | return np.array(data) 110 | 111 | if __name__ == '__main__': 112 | np.random.seed(0) 113 | #-------------------------------------------------------------# 114 | # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml 115 | # 会生成yolo_anchors.txt 116 | #-------------------------------------------------------------# 117 | input_shape = [224, 224] 118 | anchors_num = 9 119 | #-------------------------------------------------------------# 120 | # 载入数据集,可以使用VOC的xml 121 | #-------------------------------------------------------------# 122 | path = 'VOCdevkit/VOC2007/Annotations' 123 | 124 | #-------------------------------------------------------------# 125 | # 载入所有的xml 126 | # 存储格式为转化为比例后的width,height 127 | #-------------------------------------------------------------# 128 | print('Load xmls.') 129 | data = load_data(path) 130 | print('Load xmls done.') 131 | 132 | #-------------------------------------------------------------# 133 | # 使用k聚类算法 134 | #-------------------------------------------------------------# 135 | print('K-means boxes.') 136 | cluster, near = kmeans(data, anchors_num) 137 | print('K-means boxes done.') 138 | data = data * np.array([input_shape[1], input_shape[0]]) 139 | cluster = cluster * np.array([input_shape[1], input_shape[0]]) 140 | 141 | #-------------------------------------------------------------# 142 | # 绘图 143 | #-------------------------------------------------------------# 144 | for j in range(anchors_num): 145 | plt.scatter(data[near == j][:,0], data[near == j][:,1]) 146 | plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black') 147 | plt.savefig("kmeans_for_anchors.jpg") 148 | plt.show() 149 | print('Save kmeans_for_anchors.jpg in root dir.') 150 | 151 | cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])] 152 | print('avg_ratio:{:.2f}'.format(avg_iou(data, cluster))) 153 | print(cluster) 154 | 155 | f = open("yolo_anchors.txt", 'w') 156 | row = np.shape(cluster)[0] 157 | for i in range(row): 158 | if i == 0: 159 | x_y = "%d,%d" % (cluster[i][0], cluster[i][1]) 160 | else: 161 | x_y = ", %d,%d" % (cluster[i][0], cluster[i][1]) 162 | f.write(x_y) 163 | f.close() 164 | -------------------------------------------------------------------------------- /logs/README.md: -------------------------------------------------------------------------------- 1 | 用于存放训练好的文件 -------------------------------------------------------------------------------- /logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.png -------------------------------------------------------------------------------- /logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.txt: -------------------------------------------------------------------------------- 1 | 390.34399642473386 2 | 21.87092101721116 3 | 14.030741856421953 4 | 11.276778338867942 5 | 9.814540598127577 6 | 8.89100271978496 7 | 8.609104168267898 8 | 7.924442773983802 9 | 7.723959027984996 10 | 7.2670195367601185 11 | 7.255199196897907 12 | 6.893556188654016 13 | 6.661026071619104 14 | 6.5294443972316785 15 | 6.535371827490536 16 | 6.529178083678822 17 | 6.403998654565694 18 | 6.439444012112087 19 | 6.092924733220795 20 | 5.926193254965323 21 | 5.9576384785734575 22 | 5.8119951972255 23 | 5.6878520206168846 24 | 5.819804650765878 25 | 5.707105348139633 26 | 5.458082881974585 27 | 5.665041320117903 28 | 5.317585485952872 29 | 5.349038653903538 30 | 5.283199619363855 31 | 5.064980445084749 32 | 5.070186079284291 33 | 4.9681971073150635 34 | 4.793072164794545 35 | 4.973145805759194 36 | 4.918124354915855 37 | 4.663256362632469 38 | 4.837633197690233 39 | 4.743683688434554 40 | 4.616998254516979 41 | 4.524823586146037 42 | 4.4209345593864535 43 | 4.558955289699413 44 | 4.333138801433422 45 | 4.426347941528132 46 | 4.412103137852233 47 | 4.295655697952082 48 | 4.364107617625484 49 | 4.211893027211413 50 | 4.084111590444306 51 | 4.018801480163763 52 | 3.8647101366961443 53 | 3.734398615581018 54 | 3.6937082122873375 55 | 3.793811333032302 56 | 3.429193678093545 57 | 3.6194330038111886 58 | 3.4087822738988898 59 | 3.331124112193967 60 | 3.3782305434162234 61 | 3.3561593158009613 62 | 3.25705443598606 63 | 3.2106575075490973 64 | 3.0107549484129303 65 | 3.0536143231539077 66 | 2.9674469438599953 67 | 3.1300665189822516 68 | 2.909559675204901 69 | 2.9446099194479576 70 | 2.8209132660686236 71 | 2.8917798992292383 72 | 2.815192371238897 73 | 2.861111617566627 74 | 2.9016677490722986 75 | 2.8193857658792427 76 | 2.8216423440126723 77 | 2.777715330874478 78 | 2.725730179820532 79 | 2.589312877184079 80 | 2.670389473438263 81 | 2.626439411331106 82 | 2.57100960759469 83 | 2.6649326178026786 84 | 2.449705180930503 85 | 2.6089335954115715 86 | 2.666015229291386 87 | 2.5139025822281837 88 | 2.4510488511971484 89 | 2.60918134248551 90 | 2.615589211384455 91 | 2.4221341083815067 92 | 2.5034887735490448 93 | 2.3411180855315408 94 | 2.3742799654970934 95 | 2.4252039420383946 96 | 2.5134657593788923 97 | 2.5887757239886273 98 | 2.5031773506859203 99 | 2.3927585335425388 100 | 2.4924555529414874 101 | 2.3816184005987497 102 | 2.3525361067351 103 | 2.35756847280779 104 | 2.4606370890030154 105 | 2.262793848084079 106 | 2.283497501026701 107 | 2.2522216586419095 108 | 2.3806339068177307 109 | 2.345363718767961 110 | 2.305632569723659 111 | 2.1932848855669116 112 | 2.332635486199532 113 | 2.2705356725204138 114 | 2.233249652992796 115 | 2.4728508678115446 116 | 2.3142452859952125 117 | 2.3585592800820314 118 | 2.335805359078042 119 | 2.337391757118849 120 | 2.391327069129473 121 | 2.3404054016242792 122 | 2.3145943543425314 123 | 2.196398460570677 124 | 2.2358641638248056 125 | 2.3038836038774915 126 | 2.2790947368851415 127 | 2.2812541202630525 128 | 2.2533860233278924 129 | 2.3108025224488458 130 | 2.2092323683110284 131 | 2.308551702050515 132 | 2.2422945557369127 133 | 2.1741022714126257 134 | 2.44105933992951 135 | 2.3797168718811905 136 | 2.231722431326354 137 | 2.3973163276174922 138 | 2.1568032256615015 139 | 2.239097781571341 140 | 2.2258979082107544 141 | 2.1682290563612807 142 | 2.2031694714117935 143 | 2.2706658139272973 144 | 2.329095835854978 145 | 2.255610410262037 146 | 2.2977319957665454 147 | 2.3046101513836117 148 | 2.249893919369321 149 | 2.2964354607242123 150 | 2.315463280696192 151 | -------------------------------------------------------------------------------- /logs/gesture_loss_2021_11_14_22_04_00/epoch_val_loss_2021_11_14_22_04_00.txt: -------------------------------------------------------------------------------- 1 | 28.558996200561523 2 | 15.032766554090712 3 | 11.545120133293999 4 | 9.72215329276191 5 | 8.58862935172187 6 | 8.486469162835014 7 | 7.804132832421197 8 | 7.238262918260363 9 | 6.890773402320014 10 | 6.530833350287543 11 | 6.475247330135769 12 | 6.4751937124464245 13 | 6.239521026611328 14 | 6.0489738782246905 15 | 6.12673372692532 16 | 5.641317420535618 17 | 6.040707217322455 18 | 5.724527147081163 19 | 5.265863656997681 20 | 5.316834555731879 21 | 5.4665877024332685 22 | 5.622564209832086 23 | 5.04600026872423 24 | 5.060362259546916 25 | 5.527375910017225 26 | 5.435662375556098 27 | 5.021538707945082 28 | 5.028834872775608 29 | 4.896508720186022 30 | 4.989696582158406 31 | 5.161070320341322 32 | 5.098267449273004 33 | 4.707995070351495 34 | 4.600137048297459 35 | 4.426739745669895 36 | 4.481476042005751 37 | 4.555791060129802 38 | 4.693203316794501 39 | 4.515556865268284 40 | 4.371145274904039 41 | 4.138098372353448 42 | 4.548380348417494 43 | 4.3106510109371605 44 | 4.320602138837178 45 | 4.131023804346721 46 | 4.0555612511105 47 | 4.217087030410767 48 | 4.128190358479817 49 | 4.032541698879665 50 | 3.99964001443651 51 | 3.741890834437476 52 | 3.749820719162623 53 | 3.6366468982564077 54 | 3.5657983157369824 55 | 3.9311270780033536 56 | 3.6530382368299694 57 | 4.012030104796092 58 | 3.8975768751568265 59 | 3.764561494191488 60 | 3.4476174149248333 61 | 3.535598119099935 62 | 3.998010264502631 63 | 3.88807831870185 64 | 3.810675323009491 65 | 3.8832875225279064 66 | 3.532531124022272 67 | 3.9232571257485285 68 | 3.58525949716568 69 | 3.7238865759637623 70 | 3.7168162133958607 71 | 3.503431843386756 72 | 3.5310314959949918 73 | 3.7993387116326227 74 | 3.5516341394848294 75 | 3.6795931648876934 76 | 3.564246873060862 77 | 3.484692699379391 78 | 3.7236365245448217 79 | 3.7466657956441245 80 | 3.66163033246994 81 | 3.751209259033203 82 | 3.6696145402060614 83 | 3.5883768465783863 84 | 3.853155712286631 85 | 3.4928252498308816 86 | 3.602889382176929 87 | 3.7287648055288525 88 | 3.6207654832137957 89 | 3.610999337500996 90 | 3.8127831634547977 91 | 3.6820534533924527 92 | 3.716387847231494 93 | 3.6561857561270394 94 | 3.703249845239851 95 | 3.686804783013132 96 | 3.687538597318861 97 | 3.8072550859716205 98 | 3.6593143989642463 99 | 3.707283900843726 100 | 3.7246316257450314 101 | 3.8617856800556183 102 | 3.573318580786387 103 | 3.531035871969329 104 | 3.6177483134799533 105 | 3.6122085054715476 106 | 3.5437003208531275 107 | 3.5555910716454187 108 | 3.6909723381201425 109 | 3.5987775524457297 110 | 3.646808198756642 111 | 3.6476809779802957 112 | 3.615621048543188 113 | 3.8375576469633312 114 | 3.7161912678016558 115 | 3.694040416015519 116 | 3.677286409669452 117 | 3.6777902278635235 118 | 3.7830483151806726 119 | 3.707444575097826 120 | 3.7904206779268055 121 | 3.5872142712275186 122 | 3.6864367392328052 123 | 3.7757607218292026 124 | 3.835707320107354 125 | 3.6799587168627315 126 | 3.8233094347847834 127 | 3.6921923756599426 128 | 3.7244893974728055 129 | 3.6797771288288965 130 | 3.711515542533663 131 | 3.8481360466943846 132 | 3.8577410876750946 133 | 3.710074722766876 134 | 3.8249045742882624 135 | 3.7864705423514047 136 | 3.6575047771135965 137 | 3.8352384832170276 138 | 3.7801570263173847 139 | 3.7013448344336615 140 | 3.6655967930952706 141 | 3.657223959763845 142 | 3.722360614273283 143 | 3.772919843594233 144 | 3.7007322708765664 145 | 3.7042017413510218 146 | 3.8934470083978443 147 | 3.8964318566852145 148 | 3.6877589921156564 149 | 3.713595751259062 150 | 3.597744878795412 151 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_08_48_16/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_08_48_16/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_04_27_08_48_16/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 4.311199968511408 2 | 2.641528855670582 3 | 1.0470811074430293 4 | 0.3173784383318641 5 | 0.1660231321372769 6 | 0.12659757448868317 7 | 0.11646865105087106 8 | 0.1186594499105757 9 | 0.11129742149602283 10 | 0.09524408660151741 11 | 0.09781679036942395 12 | 0.09211275726556778 13 | 0.08542741784317927 14 | 0.08707698925652287 15 | 0.08003932000561194 16 | 0.09124952453103932 17 | 0.07743281058289787 18 | 0.07542280463332479 19 | 0.062316759235479614 20 | 0.07161653380502354 21 | 0.06821535866368901 22 | 0.07083209519359199 23 | 0.07460641437633471 24 | 0.07450477220118046 25 | 0.06487809985198757 26 | 0.050884095443920654 27 | 0.07091375355693427 28 | 0.06433163752610033 29 | 0.0656029749661684 30 | 0.05935167453505776 31 | 0.06459851512177424 32 | 0.06376675008372827 33 | 0.05718133259903301 34 | 0.05716039274226536 35 | 0.05911739483814348 36 | 0.05761603875593706 37 | 0.051265862939709965 38 | 0.047803171148354355 39 | 0.0480937244031917 40 | 0.05439905263483524 41 | 0.058482232080264526 42 | 0.05515999550169164 43 | 0.049258994361893696 44 | 0.050817748277702114 45 | 0.05204320927573876 46 | 0.04787483066320419 47 | 0.050909879194064575 48 | 0.04848571375689723 49 | 0.050943345593457874 50 | 0.04928677469830622 51 | 0.05230807525416215 52 | 0.054047910206847724 53 | 0.04724785503413942 54 | 0.04339685816731718 55 | 0.04393725813262993 56 | 0.04542147194345792 57 | 0.046219487115740775 58 | 0.04159199959701962 59 | 0.0356766721026765 60 | 0.0347878428383006 61 | 0.0335447210404608 62 | 0.03512532735864322 63 | 0.032664823532104495 64 | 0.035281008275018795 65 | 0.027731664727131525 66 | 0.03222298233045472 67 | 0.03146794889536169 68 | 0.02836602210170693 69 | 0.028307923198574118 70 | 0.027572717414134078 71 | 0.026898101448184913 72 | 0.029324432876374987 73 | 0.02880634083929989 74 | 0.024556251760158274 75 | 0.027897736864785354 76 | 0.024288477210534943 77 | 0.022848848750193915 78 | 0.023355903372996385 79 | 0.02707639779481623 80 | 0.022250585506359735 81 | 0.025191593791047732 82 | 0.022139282586673897 83 | 0.02378465121404992 84 | 0.02341305265824 85 | 0.02176100810368856 86 | 0.025529090170231132 87 | 0.023221762292087077 88 | 0.02107305938584937 89 | 0.019723483237127463 90 | 0.027768902087377176 91 | 0.023790666233334277 92 | 0.02183559000906017 93 | 0.019348353561427858 94 | 0.021541342077155908 95 | 0.020851219362682766 96 | 0.01955224501176013 97 | 0.02228688634932041 98 | 0.018856989074912338 99 | 0.01816959279692835 100 | 0.024754421909650166 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_08_48_16/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 3.5736865997314453 2 | 1.7812694907188416 3 | 0.5147329270839691 4 | 0.15201690793037415 5 | 0.10024188458919525 6 | 0.08380990475416183 7 | 0.07576803863048553 8 | 0.06853799521923065 9 | 0.06467496231198311 10 | 0.060902709141373634 11 | 0.05481202341616154 12 | 0.05164487101137638 13 | 0.046625690534710884 14 | 0.046081338077783585 15 | 0.04508414678275585 16 | 0.046726442873477936 17 | 0.041066285222768784 18 | 0.039722129702568054 19 | 0.0392248947173357 20 | 0.04033488966524601 21 | 0.03738676756620407 22 | 0.0356711745262146 23 | 0.03774934820830822 24 | 0.035463595762848854 25 | 0.03278419189155102 26 | 0.03250573016703129 27 | 0.03182028792798519 28 | 0.031694755889475346 29 | 0.03182463627308607 30 | 0.028715165331959724 31 | 0.03064714837819338 32 | 0.028574727475643158 33 | 0.031066023744642735 34 | 0.028762156143784523 35 | 0.027465523220598698 36 | 0.02787941414862871 37 | 0.02755015157163143 38 | 0.02802269347012043 39 | 0.028581750579178333 40 | 0.026334763504564762 41 | 0.026825452223420143 42 | 0.02670316770672798 43 | 0.02603335492312908 44 | 0.025488858111202717 45 | 0.027477828785777092 46 | 0.02550355065613985 47 | 0.026508965529501438 48 | 0.02424653246998787 49 | 0.02420251350849867 50 | 0.024741491302847862 51 | 0.03815543949604035 52 | 0.024845311418175697 53 | 0.024306144565343857 54 | 0.02493119016289711 55 | 0.024438758194446564 56 | 0.021836227178573607 57 | 0.022118838876485823 58 | 0.02276018038392067 59 | 0.019801595807075502 60 | 0.018804560229182244 61 | 0.01913141254335642 62 | 0.018066196143627165 63 | 0.018252668902277946 64 | 0.017480477318167688 65 | 0.016695075295865537 66 | 0.018235534615814685 67 | 0.016669700480997564 68 | 0.01745656579732895 69 | 0.01661595106124878 70 | 0.014982381090521812 71 | 0.014259136654436589 72 | 0.01617119237780571 73 | 0.01583776492625475 74 | 0.015838896110653877 75 | 0.015466723032295704 76 | 0.014705226197838784 77 | 0.014486565068364144 78 | 0.0142423365265131 79 | 0.013639062829315662 80 | 0.013229098543524742 81 | 0.013664134219288826 82 | 0.014067459665238858 83 | 0.014119291864335536 84 | 0.014162952080368996 85 | 0.014096969552338124 86 | 0.014010479114949704 87 | 0.013855390436947345 88 | 0.01369147039949894 89 | 0.013611100800335407 90 | 0.013387569226324558 91 | 0.013233654387295245 92 | 0.013060701824724675 93 | 0.01311743687838316 94 | 0.013459368608891964 95 | 0.013417618162930012 96 | 0.013188641518354416 97 | 0.013131854124367237 98 | 0.013138605654239655 99 | 0.013040048442780972 100 | 0.013191545940935611 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_08_48_16/events.out.tfevents.1651049298.fef10e9dbba1.425.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_08_48_16/events.out.tfevents.1651049298.fef10e9dbba1.425.0 -------------------------------------------------------------------------------- /logs/loss_2022_04_27_10_38_48/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_10_38_48/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_04_27_10_38_48/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 4.417048931121826 2 | 2.7174118811433967 3 | 1.0889532132582231 4 | 0.3425311154939912 5 | 0.17422378638928587 6 | 0.13641497018662366 7 | 0.11632075736468489 8 | 0.11424875665794719 9 | 0.10951222343878313 10 | 0.11042191968722777 11 | 0.0965666960586201 12 | 0.09156128205358982 13 | 0.09250037236647173 14 | 0.09282402846623551 15 | 0.08625757846642625 16 | 0.07673129354688255 17 | 0.07389622215520252 18 | 0.07624811069531874 19 | 0.08134209279986945 20 | 0.08268712799657475 21 | 0.06569299051030116 22 | 0.06593379310586235 23 | 0.07313475605439056 24 | 0.06932794980027458 25 | 0.07105197571218014 26 | 0.05761696923185478 27 | 0.05699523843147538 28 | 0.05502087775279175 29 | 0.056425975635647774 30 | 0.060862130570140754 31 | 0.05275308594784953 32 | 0.05468131161548875 33 | 0.06639060936868191 34 | 0.0586402067406611 35 | 0.05531726946884936 36 | 0.05826686415821314 37 | 0.05614634239199487 38 | 0.060194396329197014 39 | 0.056169633330269295 40 | 0.05521787144243717 41 | 0.05759791826659983 42 | 0.06400778830390084 43 | 0.048669698648154736 44 | 0.05138815820894458 45 | 0.05391152406280691 46 | 0.048903680660507896 47 | 0.05098097136413509 48 | 0.046242827380245384 49 | 0.05179907051338391 50 | 0.0525860372422771 51 | 0.05424936364094416 52 | 0.049993348659740554 53 | 0.04597619854741626 54 | 0.04917745155592759 55 | 0.05255601741373539 56 | 0.04698830768465996 57 | 0.041387100517749784 58 | 0.04129959721532133 59 | 0.04556649559073978 60 | 0.036499715513653226 61 | 0.03981801929573218 62 | 0.04143420826229784 63 | 0.03435336612164974 64 | 0.03496221779949135 65 | 0.03109016865491867 66 | 0.03035914318429099 67 | 0.029583082410196464 68 | 0.03257722655932108 69 | 0.030363482443822754 70 | 0.027382713970210817 71 | 0.03354052487346861 72 | 0.02999182954016659 73 | 0.027540474219454658 74 | 0.03399232141673565 75 | 0.027007617097761897 76 | 0.025914737520118556 77 | 0.0295799125606815 78 | 0.02715012611200412 79 | 0.025495433765980933 80 | 0.0296443536463711 81 | 0.023164296481344434 82 | 0.025637096497747633 83 | 0.024675296164221233 84 | 0.02778547273741828 85 | 0.021970178662902778 86 | 0.023107113461527558 87 | 0.024780070698923535 88 | 0.022441018600430754 89 | 0.023930547055270937 90 | 0.0282184108470877 91 | 0.023034340888261794 92 | 0.024948879559006956 93 | 0.021047428602145778 94 | 0.019247366736332577 95 | 0.019984866658018696 96 | 0.02513700392511156 97 | 0.02460642974409792 98 | 0.0241888129669759 99 | 0.024461371141175428 100 | 0.023433364638023906 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_10_38_48/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 3.682404637336731 2 | 1.8932517766952515 3 | 0.5478550791740417 4 | 0.1596439927816391 5 | 0.1100359559059143 6 | 0.0877840518951416 7 | 0.07812783867120743 8 | 0.07114855200052261 9 | 0.06861080229282379 10 | 0.059281766414642334 11 | 0.057694293558597565 12 | 0.051728978753089905 13 | 0.052549805492162704 14 | 0.04606110043823719 15 | 0.04738330654799938 16 | 0.04431380145251751 17 | 0.04233948327600956 18 | 0.04040302708745003 19 | 0.038821205496788025 20 | 0.0383895430713892 21 | 0.03584542125463486 22 | 0.03636615164577961 23 | 0.03440128639340401 24 | 0.031500913202762604 25 | 0.03160226531326771 26 | 0.03259335644543171 27 | 0.03182834479957819 28 | 0.03255347441881895 29 | 0.03205320052802563 30 | 0.03115831222385168 31 | 0.030962957069277763 32 | 0.03099967911839485 33 | 0.028362704440951347 34 | 0.029792566783726215 35 | 0.029385950416326523 36 | 0.028081808239221573 37 | 0.02900168113410473 38 | 0.028213596902787685 39 | 0.026003092527389526 40 | 0.029015707783401012 41 | 0.027079648338258266 42 | 0.02746042888611555 43 | 0.026224803179502487 44 | 0.02623423095792532 45 | 0.026428623124957085 46 | 0.025775899179279804 47 | 0.025982394814491272 48 | 0.02434847690165043 49 | 0.027825096622109413 50 | 0.026163294911384583 51 | 0.029283170774579047 52 | 0.025315795838832856 53 | 0.027043038606643678 54 | 0.028298694640398026 55 | 0.024901207908987998 56 | 0.021958087757229804 57 | 0.02251458093523979 58 | 0.022333519905805586 59 | 0.021478286758065224 60 | 0.021176514402031898 61 | 0.018941503018140793 62 | 0.019572099670767784 63 | 0.018108497187495232 64 | 0.018086655251681804 65 | 0.017889507673680784 66 | 0.01727491766214371 67 | 0.01810304317623377 68 | 0.020134907588362692 69 | 0.018655003793537617 70 | 0.018117578141391276 71 | 0.017840097844600677 72 | 0.01779591590166092 73 | 0.016621771082282067 74 | 0.017149972915649413 75 | 0.016952383518218993 76 | 0.015586855821311474 77 | 0.01567951999604702 78 | 0.0161365307867527 79 | 0.01567267570644617 80 | 0.01678410042077303 81 | 0.015898118540644646 82 | 0.01655469797551632 83 | 0.015443072095513344 84 | 0.015269587188959122 85 | 0.015318373404443263 86 | 0.015480193309485912 87 | 0.015252745896577834 88 | 0.015485197678208351 89 | 0.01524040475487709 90 | 0.015235877968370915 91 | 0.015190575830638408 92 | 0.01506870575249195 93 | 0.015268886275589467 94 | 0.015318392775952816 95 | 0.015248116478323937 96 | 0.01509730275720358 97 | 0.015357919968664646 98 | 0.015471475012600423 99 | 0.015338210947811603 100 | 0.015286244638264179 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_10_38_48/events.out.tfevents.1651055931.9b45dd4991ae.367.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_10_38_48/events.out.tfevents.1651055931.9b45dd4991ae.367.0 -------------------------------------------------------------------------------- /logs/loss_2022_04_27_12_50_47/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_12_50_47/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_04_27_12_50_47/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 4.458093025467613 2 | 2.7262558070096103 3 | 1.0888537033037706 4 | 0.3306311368942261 5 | 0.1712129498747262 6 | 0.12332972951910713 7 | 0.1077601161192764 8 | 0.10889687660065564 9 | 0.10751076347448608 10 | 0.09971555254676125 11 | 0.09748913144523447 12 | 0.09051749330352653 13 | 0.08674890751188452 14 | 0.09196238592267036 15 | 0.0813636336136948 16 | 0.08286366950381886 17 | 0.07791051878170534 18 | 0.0753517130559141 19 | 0.07469043592837724 20 | 0.07069844498553059 21 | 0.06863954527811571 22 | 0.05802192301912741 23 | 0.07001199353147637 24 | 0.0646351370960474 25 | 0.0635682385076176 26 | 0.06396392174065113 27 | 0.062142887406728485 28 | 0.0702532638203014 29 | 0.056375787339427254 30 | 0.06388939967886968 31 | 0.05778990279544483 32 | 0.06408696647056124 33 | 0.06048921140080148 34 | 0.046278277158059856 35 | 0.05944571127607064 36 | 0.05725045552985235 37 | 0.05380251800472086 38 | 0.053617957894775 39 | 0.053481346842917526 40 | 0.05578712136908011 41 | 0.05615681384436109 42 | 0.0525641811334274 43 | 0.04595534486526793 44 | 0.04221054826947776 45 | 0.0491331076588143 46 | 0.04645225058563731 47 | 0.047417608005079354 48 | 0.045993872325528755 49 | 0.04980102206834338 50 | 0.05388529971241951 51 | 0.04780796766281128 52 | 0.051682502610815896 53 | 0.05296175873114003 54 | 0.04763079182141357 55 | 0.03715274184942245 56 | 0.038538362830877304 57 | 0.03803896543880304 58 | 0.04017537732919057 59 | 0.03992160202728377 60 | 0.03339115016990238 61 | 0.03391021318319771 62 | 0.03317808165318436 63 | 0.033503353450861244 64 | 0.034213335605131255 65 | 0.037453227241834 66 | 0.033429956477549344 67 | 0.032547304261889724 68 | 0.03456145400802294 69 | 0.026851379209094577 70 | 0.029029812270568476 71 | 0.02536299385958248 72 | 0.02381322646720542 73 | 0.02601998903685146 74 | 0.020065840913189782 75 | 0.02312256395816803 76 | 0.028637176213992966 77 | 0.023025286176966295 78 | 0.023644178753925695 79 | 0.024718130793836383 80 | 0.02247788065837489 81 | 0.023494062303668923 82 | 0.025069689253966014 83 | 0.02251974062787162 84 | 0.024839345862468085 85 | 0.021578845319648585 86 | 0.022635220984617867 87 | 0.022249876335263253 88 | 0.01972206729567713 89 | 0.018786311563518312 90 | 0.02083740762124459 91 | 0.02136736027896404 92 | 0.019557259066237342 93 | 0.018951669645806152 94 | 0.020326226308114 95 | 0.021592341653174824 96 | 0.019481366727915075 97 | 0.018176950762669244 98 | 0.02213383706079589 99 | 0.019981356461842854 100 | 0.020978835970163347 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_12_50_47/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 3.7051011323928833 2 | 1.8262890577316284 3 | 0.5144035518169403 4 | 0.16302762925624847 5 | 0.10760901868343353 6 | 0.09057768434286118 7 | 0.07540924847126007 8 | 0.07146378979086876 9 | 0.06520375981926918 10 | 0.05898746848106384 11 | 0.054325105622410774 12 | 0.05058479495346546 13 | 0.0504811592400074 14 | 0.046029604971408844 15 | 0.04258855804800987 16 | 0.042371716350317 17 | 0.040247365832328796 18 | 0.04038912057876587 19 | 0.03568720445036888 20 | 0.038001520559191704 21 | 0.03973718546330929 22 | 0.035464052110910416 23 | 0.03202499449253082 24 | 0.02998754195868969 25 | 0.032502518966794014 26 | 0.03302299045026302 27 | 0.03285937011241913 28 | 0.029083450324833393 29 | 0.029631994664669037 30 | 0.03396240994334221 31 | 0.029673300683498383 32 | 0.028280221857130527 33 | 0.027639511972665787 34 | 0.028393579646945 35 | 0.027291471138596535 36 | 0.026989608071744442 37 | 0.02653918694704771 38 | 0.027808908373117447 39 | 0.027841621078550816 40 | 0.02570505067706108 41 | 0.025745649822056293 42 | 0.026372630149126053 43 | 0.024600804783403873 44 | 0.026447951793670654 45 | 0.02569119818508625 46 | 0.026840184815227985 47 | 0.024051610380411148 48 | 0.02362955827265978 49 | 0.024365886114537716 50 | 0.024577765725553036 51 | 0.031041909381747244 52 | 0.02641780823469162 53 | 0.02472583018243313 54 | 0.02326701581478119 55 | 0.019615407288074493 56 | 0.021174174174666403 57 | 0.019675580970942973 58 | 0.01869105324149132 59 | 0.018909885734319686 60 | 0.019662134535610675 61 | 0.01899590715765953 62 | 0.016179793514311314 63 | 0.01545619908720255 64 | 0.015423668920993805 65 | 0.018800214119255542 66 | 0.0158102760091424 67 | 0.0158376544713974 68 | 0.01783675402402878 69 | 0.015972125343978405 70 | 0.01454415861517191 71 | 0.014743064902722836 72 | 0.013825051300227643 73 | 0.01407058835029602 74 | 0.013598379865288734 75 | 0.013919505663216114 76 | 0.013623752258718013 77 | 0.014403878897428512 78 | 0.014411385357379913 79 | 0.01337964329868555 80 | 0.013076365552842617 81 | 0.013368507660925389 82 | 0.013667609356343747 83 | 0.013365321420133114 84 | 0.013264597952365875 85 | 0.013465055078268052 86 | 0.01281917616724968 87 | 0.01263135802000761 88 | 0.012750985845923424 89 | 0.01290153805166483 90 | 0.01281326413154602 91 | 0.012850469164550304 92 | 0.012885735556483268 93 | 0.013168741390109063 94 | 0.013198709674179554 95 | 0.0126633545383811 96 | 0.012886124104261399 97 | 0.012797533720731735 98 | 0.012569484673440457 99 | 0.012130422703921794 100 | 0.012647346407175065 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_27_12_50_47/events.out.tfevents.1651063849.274e119c63fb.1015.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_12_50_47/events.out.tfevents.1651063849.274e119c63fb.1015.0 -------------------------------------------------------------------------------- /logs/loss_2022_04_28_00_40_54/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_00_40_54/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_04_28_00_40_54/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 4.65520715713501 2 | 3.142860672690652 3 | 1.5020794109864668 4 | 0.5057930661873384 5 | 0.231415910476988 6 | 0.1739024357362227 7 | 0.1501499665054408 8 | 0.13435510004108603 9 | 0.12552000412886793 10 | 0.1170116358182647 11 | 0.1097346202216365 12 | 0.10218094119971449 13 | 0.09653170305219563 14 | 0.09267877211624925 15 | 0.08959556709636342 16 | 0.08778026801618663 17 | 0.0813840397379615 18 | 0.08208547498692166 19 | 0.07795694809068333 20 | 0.0774568762968887 21 | 0.07742892002517526 22 | 0.07316952097144994 23 | 0.0717044398188591 24 | 0.07023497687822039 25 | 0.07019331865012646 26 | 0.06709351390600204 27 | 0.06731417910619215 28 | 0.06743009134449741 29 | 0.06635952317579226 30 | 0.06368578191507947 31 | 0.06163112514398315 32 | 0.06230247410183603 33 | 0.0609466726468368 34 | 0.059141877869313415 35 | 0.059421493925831535 36 | 0.05991599742661823 37 | 0.05664417435499755 38 | 0.05543165823275393 39 | 0.055084149945865975 40 | 0.05501931634816257 41 | 0.05503683621910485 42 | 0.05480257303199985 43 | 0.05537006275897676 44 | 0.05448474125428633 45 | 0.05232419649308378 46 | 0.05311859653077342 47 | 0.05284474231302738 48 | 0.051879515532742844 49 | 0.052160846746780655 50 | 0.048417276787486946 51 | 0.07137971396247546 52 | 0.06579171708888477 53 | 0.06337685022089216 54 | 0.058213022185696496 55 | 0.06011202625102467 56 | 0.05577432778146532 57 | 0.05307989873819881 58 | 0.05232232163349788 59 | 0.047045067904724014 60 | 0.045659234002232554 61 | 0.046541030332446096 62 | 0.041184055474069385 63 | 0.04066362182299296 64 | 0.041569982427689764 65 | 0.03817177605297831 66 | 0.0390163982907931 67 | 0.041840214654803275 68 | 0.038884344117509 69 | 0.03724856765733825 70 | 0.03528667270309395 71 | 0.03439781483676699 72 | 0.03381528837813271 73 | 0.03448933532668485 74 | 0.03202489465475082 75 | 0.03492107921176486 76 | 0.029904662817716598 77 | 0.03170571397576067 78 | 0.03179397972093688 79 | 0.0303279221471813 80 | 0.029197406230701342 81 | 0.02931012755466832 82 | 0.029168612303005326 83 | 0.027595289217101204 84 | 0.02744665356973807 85 | 0.026995969439546266 86 | 0.027659311725033654 87 | 0.02661879969139894 88 | 0.027540806722309855 89 | 0.025905532100134427 90 | 0.0255900744555725 91 | 0.026152818650007247 92 | 0.025521984696388243 93 | 0.025769058614969254 94 | 0.02644038177612755 95 | 0.02754443759719531 96 | 0.024427745077345107 97 | 0.025285613785187403 98 | 0.026757355800105465 99 | 0.02632749622894658 100 | 0.026431108307507303 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_28_00_40_54/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 3.979103207588196 2 | 2.2379150390625 3 | 0.7213477790355682 4 | 0.20374882966279984 5 | 0.13149111717939377 6 | 0.10669583082199097 7 | 0.08946957811713219 8 | 0.07844944670796394 9 | 0.07209542766213417 10 | 0.06465885788202286 11 | 0.060964012518525124 12 | 0.05698745884001255 13 | 0.053726550191640854 14 | 0.053231727331876755 15 | 0.05091492086648941 16 | 0.04869535565376282 17 | 0.045929690822958946 18 | 0.043502215296030045 19 | 0.04109686613082886 20 | 0.042073581367731094 21 | 0.03760443814098835 22 | 0.036989014595746994 23 | 0.0369559321552515 24 | 0.03501574695110321 25 | 0.03553796745836735 26 | 0.03463827446103096 27 | 0.03613190911710262 28 | 0.03488997742533684 29 | 0.03165611159056425 30 | 0.03400527499616146 31 | 0.03399870544672012 32 | 0.03354485519230366 33 | 0.030975072644650936 34 | 0.0297493115067482 35 | 0.029600737616419792 36 | 0.02729297336190939 37 | 0.027453931979835033 38 | 0.028598678298294544 39 | 0.027731974609196186 40 | 0.030310326255857944 41 | 0.026450641453266144 42 | 0.027599090710282326 43 | 0.027010041289031506 44 | 0.026624951511621475 45 | 0.027538660913705826 46 | 0.026772234588861465 47 | 0.026853609830141068 48 | 0.027332110330462456 49 | 0.026638195849955082 50 | 0.026076992973685265 51 | 0.029674236476421357 52 | 0.03184238411486149 53 | 0.02579696960747242 54 | 0.026541008800268173 55 | 0.028798045963048934 56 | 0.02365291155874729 57 | 0.024432314187288286 58 | 0.024038903787732123 59 | 0.022221024334430694 60 | 0.022891897335648538 61 | 0.01906990371644497 62 | 0.021012770757079125 63 | 0.020605479553341865 64 | 0.020398029685020448 65 | 0.019171418249607088 66 | 0.01934974603354931 67 | 0.020316287130117416 68 | 0.019410957768559455 69 | 0.018952558375895025 70 | 0.017280998453497887 71 | 0.0177790354937315 72 | 0.018064785189926623 73 | 0.01828454677015543 74 | 0.01720294840633869 75 | 0.01639395747333765 76 | 0.016722467541694642 77 | 0.016642549820244313 78 | 0.01656894329935312 79 | 0.015701821073889732 80 | 0.015975065901875495 81 | 0.016035530529916287 82 | 0.015547602623701095 83 | 0.01571439057588577 84 | 0.01621132455766201 85 | 0.015737788379192354 86 | 0.01545789260417223 87 | 0.015475354716181755 88 | 0.015286277420818806 89 | 0.015320570766925811 90 | 0.015739747881889345 91 | 0.015467294491827488 92 | 0.015462711267173291 93 | 0.015299991890788078 94 | 0.014891423098742963 95 | 0.014959413185715675 96 | 0.015149685740470886 97 | 0.015103902481496335 98 | 0.014999320358037948 99 | 0.015079839341342448 100 | 0.0150094548240304 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_28_00_40_54/events.out.tfevents.1651106457.117e69507361.564.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_00_40_54/events.out.tfevents.1651106457.117e69507361.564.0 -------------------------------------------------------------------------------- /logs/loss_2022_04_28_14_54_17/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_14_54_17/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_04_28_14_54_17/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 3.3427013629012636 2 | 0.590641807185279 3 | 0.20623346173928844 4 | 0.13935681179993684 5 | 0.11779505432479911 6 | 0.10669546342558331 7 | 0.0995730339239041 8 | 0.09289641034685903 9 | 0.08960233483877447 10 | 0.08865145291719172 11 | 0.08199652650703987 12 | 0.08332964736554357 13 | 0.08082385785463783 14 | 0.07951261059691508 15 | 0.07187494143015809 16 | 0.07693152552884486 17 | 0.07002928235257665 18 | 0.06805908863122265 19 | 0.06391975372615788 20 | 0.06560571603477001 21 | 0.06688064705166552 22 | 0.062423851289269 23 | 0.06189305805083778 24 | 0.06095021272905999 25 | 0.05913820943484704 26 | 0.05766822151425812 27 | 0.05601171863575776 28 | 0.050846687311099634 29 | 0.0500038359210723 30 | 0.05070198744845887 31 | 0.04995435054620935 32 | 0.04775367355905473 33 | 0.04747431728368004 34 | 0.05075365285803046 35 | 0.049145943324805964 36 | 0.04660840377522012 37 | 0.04236363642849028 38 | 0.04308449916231136 39 | 0.04134128590942257 40 | 0.04134896128024492 41 | 0.040451034003247816 42 | 0.040809157501078316 43 | 0.04189636699027485 44 | 0.03930564734877812 45 | 0.04004836426013046 46 | 0.03825837828529378 47 | 0.03547370240299238 48 | 0.03609677294476165 49 | 0.035196643017439376 50 | 0.03430712522628407 51 | 0.04613391875237641 52 | 0.05915206435084757 53 | 0.045893035898916426 54 | 0.04116026466298434 55 | 0.0429476417420018 56 | 0.03999344222247601 57 | 0.034763063090698175 58 | 0.03578517514720766 59 | 0.03375119598996308 60 | 0.03283411696967151 61 | 0.03579546554893669 62 | 0.03182236654813298 63 | 0.03289871994768166 64 | 0.03093694845964718 65 | 0.028104687105709066 66 | 0.0279214970392382 67 | 0.02814181201522135 68 | 0.026209147684534806 69 | 0.024499411086758807 70 | 0.02420818345660033 71 | 0.02401729004470528 72 | 0.02229024926847261 73 | 0.021894857381832684 74 | 0.021454263018677013 75 | 0.020758730825683518 76 | 0.02169692176976241 77 | 0.019593946940343207 78 | 0.019191343562367062 79 | 0.0194984604876178 80 | 0.02022809916266447 81 | 0.017767922341590747 82 | 0.01808840037944416 83 | 0.018055611812613077 84 | 0.017147960676164885 85 | 0.015863009145121194 86 | 0.015711418758534514 87 | 0.016356725540633003 88 | 0.016216116898512052 89 | 0.015499612758867442 90 | 0.015379458964647104 91 | 0.016735805649982973 92 | 0.014799573211025239 93 | 0.015743958410651734 94 | 0.014708074144113601 95 | 0.014328512709148021 96 | 0.015710317682371373 97 | 0.01542505334622951 98 | 0.014101080921439765 99 | 0.014700241691510503 100 | 0.014981216627832812 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_28_14_54_17/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 1.1948505997657777 2 | 0.2769960485398769 3 | 0.1309874437749386 4 | 0.10720247365534305 5 | 0.0823921812698245 6 | 0.06992402952164412 7 | 0.0779087346047163 8 | 0.06684023551642895 9 | 0.06127838855609298 10 | 0.06253754440695047 11 | 0.06560290511697531 12 | 0.05028826054185629 13 | 0.05307867294177413 14 | 0.046788199059665206 15 | 0.05016098273918033 16 | 0.041087670251727104 17 | 0.049103803001344204 18 | 0.04360529286786914 19 | 0.04554138630628586 20 | 0.03290841649286449 21 | 0.04053358295932412 22 | 0.038861811719834806 23 | 0.040706123877316716 24 | 0.03609397481195629 25 | 0.03557254578918219 26 | 0.03464236315339804 27 | 0.03329266821965575 28 | 0.03151600556448102 29 | 0.030487440805882216 30 | 0.03179679936729372 31 | 0.030378894181922078 32 | 0.03546885224059224 33 | 0.028008161624893547 34 | 0.030146837001666427 35 | 0.028426590701565148 36 | 0.030748564330860973 37 | 0.028618200030177832 38 | 0.03007163112051785 39 | 0.02537959101609886 40 | 0.028373095905408263 41 | 0.025091598788276315 42 | 0.027431158255785702 43 | 0.0274854336399585 44 | 0.0238998107612133 45 | 0.024188394332304596 46 | 0.025603410461917518 47 | 0.022463220916688443 48 | 0.021122918161563576 49 | 0.023449525656178593 50 | 0.02241856213659048 51 | 0.030004368303343652 52 | 0.03465683250688016 53 | 0.025661695492453875 54 | 0.025751420808956028 55 | 0.0250759432092309 56 | 0.024298161384649575 57 | 0.023818821809254587 58 | 0.02544179279357195 59 | 0.02248522681184113 60 | 0.02272053265478462 61 | 0.021450468467082828 62 | 0.022059163730591535 63 | 0.01965688676573336 64 | 0.019216149824205785 65 | 0.020135902601759882 66 | 0.02419198288116604 67 | 0.017368705407716335 68 | 0.01844585470389575 69 | 0.015960348234511913 70 | 0.017440078582149 71 | 0.015858469036174938 72 | 0.01589310457929969 73 | 0.01708033775212243 74 | 0.030576034029945732 75 | 0.014990652166306972 76 | 0.020580469502601773 77 | 0.01814356680260971 78 | 0.016363495017867536 79 | 0.016028978914255275 80 | 0.015470803889911622 81 | 0.017227034358074888 82 | 0.016705141763668507 83 | 0.01754759649047628 84 | 0.02099468276137486 85 | 0.02627454571193084 86 | 0.016601535107474773 87 | 0.019520913722226398 88 | 0.016074266715440898 89 | 0.015431905922014266 90 | 0.015508590545505286 91 | 0.013960553548531606 92 | 0.015237966080894694 93 | 0.015095379657577724 94 | 0.01584624971728772 95 | 0.015998882468556984 96 | 0.01559915920952335 97 | 0.01576072332682088 98 | 0.016472871112637223 99 | 0.014691755402600393 100 | 0.014136423316085712 101 | -------------------------------------------------------------------------------- /logs/loss_2022_04_28_14_54_17/events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_14_54_17/events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0 -------------------------------------------------------------------------------- /logs/loss_2022_05_02_14_57_57/epoch_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_05_02_14_57_57/epoch_loss.png -------------------------------------------------------------------------------- /logs/loss_2022_05_02_14_57_57/epoch_loss.txt: -------------------------------------------------------------------------------- 1 | 17.101406224568684 2 | 10.8318008740743 3 | 4.240671507517496 4 | 1.0019958794116974 5 | 0.37954812149206796 6 | 0.2687491794427236 7 | 0.22754189471403757 8 | 0.19753684798876445 9 | 0.1771739900112152 10 | 0.16613257378339769 11 | 0.14869885842005412 12 | 0.13755213419596354 13 | 0.13448657716313997 14 | 0.12195368086298307 15 | 0.1128251701593399 16 | 0.10961388771732648 17 | 0.10665635019540787 18 | 0.10061115821202596 19 | 0.0969288428624471 20 | 0.09855932394663493 21 | 0.0889915977915128 22 | 0.08737521395087242 23 | 0.08142138893405597 24 | 0.081571697195371 25 | 0.08513322671254477 26 | 0.0799174178391695 27 | 0.07576848641037941 28 | 0.07407469501097998 29 | 0.07028314856191477 30 | 0.07057048715651035 31 | 0.0709464654326439 32 | 0.07267625791331132 33 | 0.06727536929150423 34 | 0.0662232073644797 35 | 0.06310114165147146 36 | 0.06374188972016176 37 | 0.06626531345148881 38 | 0.05850081816315651 39 | 0.056352414563298224 40 | 0.05607227062185605 41 | 0.057017019018530846 42 | 0.05952403930326303 43 | 0.057178026810288426 44 | 0.051601182545224826 45 | 0.051208433136343955 46 | 0.051774655406673746 47 | 0.050313881536324816 48 | 0.04995381236076355 49 | 0.048258970181147255 50 | 0.04914092607796192 51 | 0.06768884502040844 52 | 0.06370118060149252 53 | 0.05913636611464123 54 | 0.05405666360942026 55 | 0.052676150932287176 56 | 0.04658079737176498 57 | 0.0453374430614834 58 | 0.04464669832183669 59 | 0.04386587947762261 60 | 0.038802354324919484 61 | 0.038647202278176945 62 | 0.03676449179959794 63 | 0.03481319181931516 64 | 0.0347878224371622 65 | 0.03463629183825105 66 | 0.03564592384112378 67 | 0.03169099524772415 68 | 0.03046195216011256 69 | 0.029932656922998527 70 | 0.02693921811878681 71 | 0.02624520653237899 72 | 0.02643638541145871 73 | 0.024267646336617568 74 | 0.02276813123996059 75 | 0.022201836206174146 76 | 0.025956252019386738 77 | 0.022044219623785465 78 | 0.01913531731891756 79 | 0.018665816611610354 80 | 0.020095466733134042 81 | 0.019377306945777186 82 | 0.019703271872519204 83 | 0.017145425283039608 84 | 0.017283631632259735 85 | 0.015655260040269545 86 | 0.017102580536932994 87 | 0.01568767197119693 88 | 0.015433585511830945 89 | 0.01649760961299762 90 | 0.01480112192220986 91 | 0.01458095806495597 92 | 0.01634620662080124 93 | 0.014586444144758086 94 | 0.01412225275610884 95 | 0.014443966598870853 96 | 0.014422304722635696 97 | 0.014611958689056338 98 | 0.01421121487316365 99 | 0.014518235716968775 100 | 0.01446291058867549 101 | -------------------------------------------------------------------------------- /logs/loss_2022_05_02_14_57_57/epoch_val_loss.txt: -------------------------------------------------------------------------------- 1 | 14.182828585306803 2 | 6.964454015096028 3 | 1.7364161411921184 4 | 0.4160226086775462 5 | 0.23061403135458627 6 | 0.18009933829307556 7 | 0.15316933890183768 8 | 0.12558546662330627 9 | 0.11013514300187428 10 | 0.10292657961448033 11 | 0.09011622269948323 12 | 0.0910362775127093 13 | 0.07362671693166097 14 | 0.06496318926413854 15 | 0.06620646268129349 16 | 0.05724670241276423 17 | 0.05412605529030164 18 | 0.05476600428422292 19 | 0.04998553295930227 20 | 0.04453219473361969 21 | 0.046111090729633965 22 | 0.03964699556430181 23 | 0.04128604009747505 24 | 0.0385576585928599 25 | 0.040300281097491585 26 | 0.036520869781573616 27 | 0.03233897313475609 28 | 0.03402836248278618 29 | 0.029543195540706318 30 | 0.03613479311267535 31 | 0.030847225338220596 32 | 0.03196833903590838 33 | 0.030614140133063 34 | 0.027615018809835117 35 | 0.029661099116007488 36 | 0.028920121490955353 37 | 0.031096385171016056 38 | 0.026975831637779873 39 | 0.02437760556737582 40 | 0.024089227120081585 41 | 0.024140140662590664 42 | 0.02602989909549554 43 | 0.023526831219593685 44 | 0.023234928647677105 45 | 0.02490025262037913 46 | 0.024476055055856705 47 | 0.02195119174818198 48 | 0.02400912468632062 49 | 0.021773086860775948 50 | 0.021737251430749893 51 | 0.03704084885808138 52 | 0.027747553415023364 53 | 0.02609148549918945 54 | 0.027060106253394715 55 | 0.02310138403509672 56 | 0.02209098207262846 57 | 0.019444907299027994 58 | 0.01728303673175665 59 | 0.022116302154385127 60 | 0.017028711091440458 61 | 0.018385969388943452 62 | 0.020397630233604174 63 | 0.017034396529197693 64 | 0.0161269146662492 65 | 0.014033435915525142 66 | 0.015593188958099255 67 | 0.015342251899150701 68 | 0.015232413147504512 69 | 0.01195777920432962 70 | 0.013383755532021705 71 | 0.01376453500527602 72 | 0.012433087345785819 73 | 0.010423123764877137 74 | 0.011021508405414911 75 | 0.010145062186683599 76 | 0.011127662809135823 77 | 0.009687475251177182 78 | 0.010067089210049463 79 | 0.008900713497916093 80 | 0.009318945392106589 81 | 0.008838421199470758 82 | 0.008917749107170563 83 | 0.008874757430301262 84 | 0.00834214468844808 85 | 0.009231974191677112 86 | 0.00839424731496435 87 | 0.00878818673439897 88 | 0.008268425169472512 89 | 0.008394974642075025 90 | 0.008387481507200461 91 | 0.008073390604784856 92 | 0.008447423434028259 93 | 0.007967768595195733 94 | 0.008031251589552714 95 | 0.007093459976693759 96 | 0.0077013208960684445 97 | 0.008188612150171628 98 | 0.008229664276139094 99 | 0.008362234892466893 100 | 0.0081037561624096 101 | -------------------------------------------------------------------------------- /logs/loss_2022_05_02_14_57_57/events.out.tfevents.1651503480.437fb01f4bb0.370.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_05_02_14_57_57/events.out.tfevents.1651503480.437fb01f4bb0.370.0 -------------------------------------------------------------------------------- /model_data/.gitattributes: -------------------------------------------------------------------------------- 1 | *.pth filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /model_data/gesture.yaml: -------------------------------------------------------------------------------- 1 | #------------------------------detect.py--------------------------------# 2 | # 这一部分是为了半自动标注数据,可以减轻负担,需要提前训练一个权重,以Labelme格式保存 3 | # dir_origin_path 图片存放位置 4 | # dir_save_path Annotation保存位置 5 | # ----------------------------------------------------------------------# 6 | dir_detect_path: ./JPEGImages 7 | detect_save_path: ./Annotation 8 | 9 | # ----------------------------- train.py -------------------------------# 10 | nc: 8 # 类别的数量 11 | classes: ["up","down","left","right","front","back","clockwise","anticlockwise"] # 类别 12 | confidence: 0.5 # 置信度 13 | nms_iou: 0.3 14 | letterbox_image: False 15 | 16 | lr_decay_type: cos # 使用到的学习率下降方式,可选的有step、cos 17 | # 用于设置是否使用多线程读取数据 18 | # 开启后会加快数据读取速度,但是会占用更多内存 19 | # 内存较小的电脑可以设置为2或者0,win建议设为0 20 | num_workers: 4 -------------------------------------------------------------------------------- /model_data/gesture_classes.txt: -------------------------------------------------------------------------------- 1 | up 2 | down 3 | left 4 | right 5 | front 6 | back 7 | clockwise 8 | anticlockwise -------------------------------------------------------------------------------- /model_data/simhei.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/model_data/simhei.ttf -------------------------------------------------------------------------------- /model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -------------------------------------------------------------------------------- /model_data/yolotiny_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 -------------------------------------------------------------------------------- /nets/CSPdarknet.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import OrderedDict 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | #-------------------------------------------------# 10 | # MISH激活函数 11 | #-------------------------------------------------# 12 | class Mish(nn.Module): 13 | def __init__(self): 14 | super(Mish, self).__init__() 15 | 16 | def forward(self, x): 17 | return x * torch.tanh(F.softplus(x)) 18 | 19 | #---------------------------------------------------# 20 | # 卷积块 -> 卷积 + 标准化 + 激活函数 21 | # Conv2d + BatchNormalization + Mish 22 | #---------------------------------------------------# 23 | class BasicConv(nn.Module): 24 | def __init__(self, in_channels, out_channels, kernel_size, stride=1): 25 | super(BasicConv, self).__init__() 26 | 27 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 28 | self.bn = nn.BatchNorm2d(out_channels) 29 | self.activation = Mish() 30 | 31 | def forward(self, x): 32 | x = self.conv(x) 33 | x = self.bn(x) 34 | x = self.activation(x) 35 | return x 36 | 37 | #---------------------------------------------------# 38 | # CSPdarknet的结构块的组成部分 39 | # 内部堆叠的残差块 40 | #---------------------------------------------------# 41 | class Resblock(nn.Module): 42 | def __init__(self, channels, hidden_channels=None): 43 | super(Resblock, self).__init__() 44 | 45 | if hidden_channels is None: 46 | hidden_channels = channels 47 | 48 | self.block = nn.Sequential( 49 | BasicConv(channels, hidden_channels, 1), 50 | BasicConv(hidden_channels, channels, 3) 51 | ) 52 | 53 | def forward(self, x): 54 | return x + self.block(x) 55 | 56 | #--------------------------------------------------------------------# 57 | # CSPdarknet的结构块 58 | # 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩 59 | # 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构 60 | # 主干部分会对num_blocks进行循环,循环内部是残差结构。 61 | # 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块 62 | #--------------------------------------------------------------------# 63 | class Resblock_body(nn.Module): 64 | def __init__(self, in_channels, out_channels, num_blocks, first): 65 | super(Resblock_body, self).__init__() 66 | #----------------------------------------------------------------# 67 | # 利用一个步长为2x2的卷积块进行高和宽的压缩 68 | #----------------------------------------------------------------# 69 | self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2) 70 | 71 | if first: 72 | #--------------------------------------------------------------------------# 73 | # 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构 74 | #--------------------------------------------------------------------------# 75 | self.split_conv0 = BasicConv(out_channels, out_channels, 1) 76 | 77 | #----------------------------------------------------------------# 78 | # 主干部分会对num_blocks进行循环,循环内部是残差结构。 79 | #----------------------------------------------------------------# 80 | self.split_conv1 = BasicConv(out_channels, out_channels, 1) 81 | self.blocks_conv = nn.Sequential( 82 | Resblock(channels=out_channels, hidden_channels=out_channels//2), 83 | BasicConv(out_channels, out_channels, 1) 84 | ) 85 | 86 | self.concat_conv = BasicConv(out_channels*2, out_channels, 1) 87 | else: 88 | #--------------------------------------------------------------------------# 89 | # 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构 90 | #--------------------------------------------------------------------------# 91 | self.split_conv0 = BasicConv(out_channels, out_channels//2, 1) 92 | 93 | #----------------------------------------------------------------# 94 | # 主干部分会对num_blocks进行循环,循环内部是残差结构。 95 | #----------------------------------------------------------------# 96 | self.split_conv1 = BasicConv(out_channels, out_channels//2, 1) 97 | self.blocks_conv = nn.Sequential( 98 | *[Resblock(out_channels//2) for _ in range(num_blocks)], 99 | BasicConv(out_channels//2, out_channels//2, 1) 100 | ) 101 | 102 | self.concat_conv = BasicConv(out_channels, out_channels, 1) 103 | 104 | def forward(self, x): 105 | x = self.downsample_conv(x) 106 | 107 | x0 = self.split_conv0(x) 108 | 109 | x1 = self.split_conv1(x) 110 | x1 = self.blocks_conv(x1) 111 | 112 | #------------------------------------# 113 | # 将大残差边再堆叠回来 114 | #------------------------------------# 115 | x = torch.cat([x1, x0], dim=1) 116 | #------------------------------------# 117 | # 最后对通道数进行整合 118 | #------------------------------------# 119 | x = self.concat_conv(x) 120 | 121 | return x 122 | 123 | #---------------------------------------------------# 124 | # CSPdarknet53 的主体部分 125 | # 输入为一张416x416x3的图片 126 | # 输出为三个有效特征层 127 | #---------------------------------------------------# 128 | class CSPDarkNet(nn.Module): 129 | def __init__(self, layers): 130 | super(CSPDarkNet, self).__init__() 131 | self.inplanes = 32 132 | # 416,416,3 -> 416,416,32 133 | self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1) 134 | self.feature_channels = [64, 128, 256, 512, 1024] 135 | 136 | self.stages = nn.ModuleList([ 137 | # 416,416,32 -> 208,208,64 138 | Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True), 139 | # 208,208,64 -> 104,104,128 140 | Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False), 141 | # 104,104,128 -> 52,52,256 142 | Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False), 143 | # 52,52,256 -> 26,26,512 144 | Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False), 145 | # 26,26,512 -> 13,13,1024 146 | Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False) 147 | ]) 148 | 149 | self.num_features = 1 150 | for m in self.modules(): 151 | if isinstance(m, nn.Conv2d): 152 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 153 | m.weight.data.normal_(0, math.sqrt(2. / n)) 154 | elif isinstance(m, nn.BatchNorm2d): 155 | m.weight.data.fill_(1) 156 | m.bias.data.zero_() 157 | 158 | 159 | def forward(self, x): 160 | x = self.conv1(x) 161 | 162 | x = self.stages[0](x) 163 | x = self.stages[1](x) 164 | out3 = self.stages[2](x) 165 | out4 = self.stages[3](out3) 166 | out5 = self.stages[4](out4) 167 | 168 | return out3, out4, out5 169 | 170 | def darknet53(pretrained): 171 | model = CSPDarkNet([1, 2, 8, 8, 4]) 172 | if pretrained: 173 | model.load_state_dict(torch.load("model_data/CSPdarknet53_backbone_weights.pth")) 174 | return model 175 | -------------------------------------------------------------------------------- /nets/CSPdarknet53_tiny.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | #-------------------------------------------------# 8 | # 卷积块 9 | # Conv2d + BatchNorm2d + LeakyReLU 10 | #-------------------------------------------------# 11 | class BasicConv(nn.Module): 12 | def __init__(self, in_channels, out_channels, kernel_size, stride=1): 13 | super(BasicConv, self).__init__() 14 | 15 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 16 | self.bn = nn.BatchNorm2d(out_channels) 17 | self.activation = nn.LeakyReLU(0.1) 18 | 19 | def forward(self, x): 20 | x = self.conv(x) 21 | x = self.bn(x) 22 | x = self.activation(x) 23 | return x 24 | 25 | 26 | ''' 27 | input 28 | | 29 | BasicConv 30 | ----------------------- 31 | | | 32 | route_group route 33 | | | 34 | BasicConv | 35 | | | 36 | ------------------- | 37 | | | | 38 | route_1 BasicConv | 39 | | | | 40 | -----------------cat | 41 | | | 42 | ---- BasicConv | 43 | | | | 44 | feat cat--------------------- 45 | | 46 | MaxPooling2D 47 | ''' 48 | #---------------------------------------------------# 49 | # CSPdarknet53-tiny的结构块 50 | # 存在一个大残差边 51 | # 这个大残差边绕过了很多的残差结构 52 | #---------------------------------------------------# 53 | class Resblock_body(nn.Module): 54 | def __init__(self, in_channels, out_channels): 55 | super(Resblock_body, self).__init__() 56 | self.out_channels = out_channels 57 | 58 | self.conv1 = BasicConv(in_channels, out_channels, 3) 59 | 60 | self.conv2 = BasicConv(out_channels//2, out_channels//2, 3) 61 | self.conv3 = BasicConv(out_channels//2, out_channels//2, 3) 62 | 63 | self.conv4 = BasicConv(out_channels, out_channels, 1) 64 | self.maxpool = nn.MaxPool2d([2,2],[2,2]) 65 | 66 | def forward(self, x): 67 | # 利用一个3x3卷积进行特征整合 68 | x = self.conv1(x) 69 | # 引出一个大的残差边route 70 | route = x 71 | 72 | c = self.out_channels 73 | # 对特征层的通道进行分割,取第二部分作为主干部分。 74 | x = torch.split(x, c//2, dim = 1)[1] 75 | # 对主干部分进行3x3卷积 76 | x = self.conv2(x) 77 | # 引出一个小的残差边route_1 78 | route1 = x 79 | # 对第主干部分进行3x3卷积 80 | x = self.conv3(x) 81 | # 主干部分与残差部分进行相接 82 | x = torch.cat([x,route1], dim = 1) 83 | 84 | # 对相接后的结果进行1x1卷积 85 | x = self.conv4(x) 86 | feat = x 87 | x = torch.cat([route, x], dim = 1) 88 | 89 | # 利用最大池化进行高和宽的压缩 90 | x = self.maxpool(x) 91 | return x,feat 92 | 93 | class CSPDarkNet(nn.Module): 94 | def __init__(self): 95 | super(CSPDarkNet, self).__init__() 96 | # 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩 97 | # 416,416,3 -> 208,208,32 -> 104,104,64 98 | self.conv1 = BasicConv(3, 32, kernel_size=3, stride=2) 99 | self.conv2 = BasicConv(32, 64, kernel_size=3, stride=2) 100 | 101 | # 104,104,64 -> 52,52,128 102 | self.resblock_body1 = Resblock_body(64, 64) 103 | # 52,52,128 -> 26,26,256 104 | self.resblock_body2 = Resblock_body(128, 128) 105 | # 26,26,256 -> 13,13,512 106 | self.resblock_body3 = Resblock_body(256, 256) 107 | # 13,13,512 -> 13,13,512 108 | self.conv3 = BasicConv(512, 512, kernel_size=3) 109 | 110 | self.num_features = 1 111 | # 进行权值初始化 112 | for m in self.modules(): 113 | if isinstance(m, nn.Conv2d): 114 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 115 | m.weight.data.normal_(0, math.sqrt(2. / n)) 116 | elif isinstance(m, nn.BatchNorm2d): 117 | m.weight.data.fill_(1) 118 | m.bias.data.zero_() 119 | 120 | 121 | def forward(self, x): 122 | # 416,416,3 -> 208,208,32 -> 104,104,64 123 | x = self.conv1(x) 124 | x = self.conv2(x) 125 | 126 | # 104,104,64 -> 52,52,128 127 | x, _ = self.resblock_body1(x) 128 | # 52,52,128 -> 26,26,256 129 | x, _ = self.resblock_body2(x) 130 | # 26,26,256 -> x为13,13,512 131 | # -> feat1为26,26,256 132 | x, feat1 = self.resblock_body3(x) 133 | 134 | # 13,13,512 -> 13,13,512 135 | x = self.conv3(x) 136 | feat2 = x 137 | return feat1,feat2 138 | 139 | def darknet53_tiny(pretrained, **kwargs): 140 | model = CSPDarkNet() 141 | if pretrained: 142 | model.load_state_dict(torch.load("model_data/CSPdarknet53_tiny_backbone_weights.pth")) 143 | return model 144 | -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /nets/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class se_block(nn.Module): 6 | def __init__(self, channel, ratio=16): 7 | super(se_block, self).__init__() 8 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 9 | self.fc = nn.Sequential( 10 | nn.Linear(channel, channel // ratio, bias=False), 11 | nn.ReLU(inplace=True), 12 | nn.Linear(channel // ratio, channel, bias=False), 13 | nn.Sigmoid() 14 | ) 15 | 16 | def forward(self, x): 17 | b, c, _, _ = x.size() 18 | y = self.avg_pool(x).view(b, c) 19 | y = self.fc(y).view(b, c, 1, 1) 20 | return x * y 21 | 22 | class ChannelAttention(nn.Module): 23 | def __init__(self, in_planes, ratio=8): 24 | super(ChannelAttention, self).__init__() 25 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 26 | self.max_pool = nn.AdaptiveMaxPool2d(1) 27 | 28 | # 利用1x1卷积代替全连接 29 | self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) 30 | self.relu1 = nn.ReLU() 31 | self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 32 | 33 | self.sigmoid = nn.Sigmoid() 34 | 35 | def forward(self, x): 36 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 37 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 38 | out = avg_out + max_out 39 | return self.sigmoid(out) 40 | 41 | class SpatialAttention(nn.Module): 42 | def __init__(self, kernel_size=7): 43 | super(SpatialAttention, self).__init__() 44 | 45 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 46 | padding = 3 if kernel_size == 7 else 1 47 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, x): 51 | avg_out = torch.mean(x, dim=1, keepdim=True) 52 | max_out, _ = torch.max(x, dim=1, keepdim=True) 53 | x = torch.cat([avg_out, max_out], dim=1) 54 | x = self.conv1(x) 55 | return self.sigmoid(x) 56 | 57 | class cbam_block(nn.Module): 58 | def __init__(self, channel, ratio=8, kernel_size=7): 59 | super(cbam_block, self).__init__() 60 | self.channelattention = ChannelAttention(channel, ratio=ratio) 61 | self.spatialattention = SpatialAttention(kernel_size=kernel_size) 62 | 63 | def forward(self, x): 64 | x = x*self.channelattention(x) 65 | x = x*self.spatialattention(x) 66 | return x 67 | 68 | class eca_block(nn.Module): 69 | def __init__(self, channel, b=1, gamma=2): 70 | super(eca_block, self).__init__() 71 | kernel_size = int(abs((math.log(channel, 2) + b) / gamma)) 72 | kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1 73 | 74 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 75 | self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False) 76 | self.sigmoid = nn.Sigmoid() 77 | 78 | def forward(self, x): 79 | y = self.avg_pool(x) 80 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 81 | y = self.sigmoid(y) 82 | return x * y.expand_as(x) 83 | 84 | class CA_Block(nn.Module): 85 | def __init__(self, channel, reduction=16): 86 | super(CA_Block, self).__init__() 87 | 88 | self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel//reduction, kernel_size=1, stride=1, bias=False) 89 | 90 | self.relu = nn.ReLU() 91 | self.bn = nn.BatchNorm2d(channel//reduction) 92 | 93 | self.F_h = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False) 94 | self.F_w = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False) 95 | 96 | self.sigmoid_h = nn.Sigmoid() 97 | self.sigmoid_w = nn.Sigmoid() 98 | 99 | def forward(self, x): 100 | _, _, h, w = x.size() 101 | 102 | x_h = torch.mean(x, dim = 3, keepdim = True).permute(0, 1, 3, 2) 103 | x_w = torch.mean(x, dim = 2, keepdim = True) 104 | 105 | x_cat_conv_relu = self.relu(self.bn(self.conv_1x1(torch.cat((x_h, x_w), 3)))) 106 | 107 | x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([h, w], 3) 108 | 109 | s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2))) 110 | s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w)) 111 | 112 | out = x * s_h.expand_as(x) * s_w.expand_as(x) 113 | return out 114 | -------------------------------------------------------------------------------- /nets/yolo.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from nets.CSPdarknet import darknet53 7 | 8 | 9 | def conv2d(filter_in, filter_out, kernel_size, stride=1): 10 | pad = (kernel_size - 1) // 2 if kernel_size else 0 11 | return nn.Sequential(OrderedDict([ 12 | ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)), 13 | ("bn", nn.BatchNorm2d(filter_out)), 14 | ("relu", nn.LeakyReLU(0.1)), 15 | ])) 16 | 17 | #---------------------------------------------------# 18 | # SPP结构,利用不同大小的池化核进行池化 19 | # 池化后堆叠 20 | #---------------------------------------------------# 21 | class SpatialPyramidPooling(nn.Module): 22 | def __init__(self, pool_sizes=[5, 9, 13]): 23 | super(SpatialPyramidPooling, self).__init__() 24 | 25 | self.maxpools = nn.ModuleList([nn.MaxPool2d(pool_size, 1, pool_size//2) for pool_size in pool_sizes]) 26 | 27 | def forward(self, x): 28 | features = [maxpool(x) for maxpool in self.maxpools[::-1]] 29 | features = torch.cat(features + [x], dim=1) 30 | 31 | return features 32 | 33 | #---------------------------------------------------# 34 | # 卷积 + 上采样 35 | #---------------------------------------------------# 36 | class Upsample(nn.Module): 37 | def __init__(self, in_channels, out_channels): 38 | super(Upsample, self).__init__() 39 | 40 | self.upsample = nn.Sequential( 41 | conv2d(in_channels, out_channels, 1), 42 | nn.Upsample(scale_factor=2, mode='nearest') 43 | ) 44 | 45 | def forward(self, x,): 46 | x = self.upsample(x) 47 | return x 48 | 49 | #---------------------------------------------------# 50 | # 三次卷积块 51 | #---------------------------------------------------# 52 | def make_three_conv(filters_list, in_filters): 53 | m = nn.Sequential( 54 | conv2d(in_filters, filters_list[0], 1), 55 | conv2d(filters_list[0], filters_list[1], 3), 56 | conv2d(filters_list[1], filters_list[0], 1), 57 | ) 58 | return m 59 | 60 | #---------------------------------------------------# 61 | # 五次卷积块 62 | #---------------------------------------------------# 63 | def make_five_conv(filters_list, in_filters): 64 | m = nn.Sequential( 65 | conv2d(in_filters, filters_list[0], 1), 66 | conv2d(filters_list[0], filters_list[1], 3), 67 | conv2d(filters_list[1], filters_list[0], 1), 68 | conv2d(filters_list[0], filters_list[1], 3), 69 | conv2d(filters_list[1], filters_list[0], 1), 70 | ) 71 | return m 72 | 73 | #---------------------------------------------------# 74 | # 最后获得yolov4的输出 75 | #---------------------------------------------------# 76 | def yolo_head(filters_list, in_filters): 77 | m = nn.Sequential( 78 | conv2d(in_filters, filters_list[0], 3), 79 | nn.Conv2d(filters_list[0], filters_list[1], 1), 80 | ) 81 | return m 82 | 83 | #---------------------------------------------------# 84 | # yolo_body 85 | #---------------------------------------------------# 86 | class YoloBody(nn.Module): 87 | def __init__(self, anchors_mask, num_classes, pretrained = False): 88 | super(YoloBody, self).__init__() 89 | #---------------------------------------------------# 90 | # 生成CSPdarknet53的主干模型 91 | # 获得三个有效特征层,他们的shape分别是: 92 | # 52,52,256 93 | # 26,26,512 94 | # 13,13,1024 95 | #---------------------------------------------------# 96 | self.backbone = darknet53(pretrained) 97 | 98 | self.conv1 = make_three_conv([512,1024],1024) 99 | self.SPP = SpatialPyramidPooling() 100 | self.conv2 = make_three_conv([512,1024],2048) 101 | 102 | self.upsample1 = Upsample(512,256) 103 | self.conv_for_P4 = conv2d(512,256,1) 104 | self.make_five_conv1 = make_five_conv([256, 512],512) 105 | 106 | self.upsample2 = Upsample(256,128) 107 | self.conv_for_P3 = conv2d(256,128,1) 108 | self.make_five_conv2 = make_five_conv([128, 256],256) 109 | 110 | # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75 111 | self.yolo_head3 = yolo_head([256, len(anchors_mask[0]) * (5 + num_classes)],128) 112 | 113 | self.down_sample1 = conv2d(128,256,3,stride=2) 114 | self.make_five_conv3 = make_five_conv([256, 512],512) 115 | 116 | # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75 117 | self.yolo_head2 = yolo_head([512, len(anchors_mask[1]) * (5 + num_classes)],256) 118 | 119 | self.down_sample2 = conv2d(256,512,3,stride=2) 120 | self.make_five_conv4 = make_five_conv([512, 1024],1024) 121 | 122 | # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75 123 | self.yolo_head1 = yolo_head([1024, len(anchors_mask[2]) * (5 + num_classes)],512) 124 | 125 | 126 | def forward(self, x): 127 | # backbone 128 | x2, x1, x0 = self.backbone(x) 129 | 130 | # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 131 | P5 = self.conv1(x0) 132 | P5 = self.SPP(P5) 133 | # 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512 134 | P5 = self.conv2(P5) 135 | 136 | # 13,13,512 -> 13,13,256 -> 26,26,256 137 | P5_upsample = self.upsample1(P5) 138 | # 26,26,512 -> 26,26,256 139 | P4 = self.conv_for_P4(x1) 140 | # 26,26,256 + 26,26,256 -> 26,26,512 141 | P4 = torch.cat([P4,P5_upsample],axis=1) 142 | # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 143 | P4 = self.make_five_conv1(P4) 144 | 145 | # 26,26,256 -> 26,26,128 -> 52,52,128 146 | P4_upsample = self.upsample2(P4) 147 | # 52,52,256 -> 52,52,128 148 | P3 = self.conv_for_P3(x2) 149 | # 52,52,128 + 52,52,128 -> 52,52,256 150 | P3 = torch.cat([P3,P4_upsample],axis=1) 151 | # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 152 | P3 = self.make_five_conv2(P3) 153 | 154 | # 52,52,128 -> 26,26,256 155 | P3_downsample = self.down_sample1(P3) 156 | # 26,26,256 + 26,26,256 -> 26,26,512 157 | P4 = torch.cat([P3_downsample,P4],axis=1) 158 | # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 159 | P4 = self.make_five_conv3(P4) 160 | 161 | # 26,26,256 -> 13,13,512 162 | P4_downsample = self.down_sample2(P4) 163 | # 13,13,512 + 13,13,512 -> 13,13,1024 164 | P5 = torch.cat([P4_downsample,P5],axis=1) 165 | # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 166 | P5 = self.make_five_conv4(P5) 167 | 168 | #---------------------------------------------------# 169 | # 第三个特征层 170 | # y3=(batch_size,75,52,52) 171 | #---------------------------------------------------# 172 | out2 = self.yolo_head3(P3) 173 | #---------------------------------------------------# 174 | # 第二个特征层 175 | # y2=(batch_size,75,26,26) 176 | #---------------------------------------------------# 177 | out1 = self.yolo_head2(P4) 178 | #---------------------------------------------------# 179 | # 第一个特征层 180 | # y1=(batch_size,75,13,13) 181 | #---------------------------------------------------# 182 | out0 = self.yolo_head1(P5) 183 | 184 | return out0, out1, out2 185 | 186 | -------------------------------------------------------------------------------- /nets/yolo_tiny.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from nets.CSPdarknet53_tiny import darknet53_tiny 5 | from nets.attention import cbam_block, eca_block, se_block, CA_Block 6 | 7 | attention_block = [se_block, cbam_block, eca_block, CA_Block] 8 | 9 | #-------------------------------------------------# 10 | # 卷积块 -> 卷积 + 标准化 + 激活函数 11 | # Conv2d + BatchNormalization + LeakyReLU 12 | #-------------------------------------------------# 13 | class BasicConv(nn.Module): 14 | def __init__(self, in_channels, out_channels, kernel_size, stride=1): 15 | super(BasicConv, self).__init__() 16 | 17 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 18 | self.bn = nn.BatchNorm2d(out_channels) 19 | self.activation = nn.LeakyReLU(0.1) 20 | 21 | def forward(self, x): 22 | x = self.conv(x) 23 | x = self.bn(x) 24 | x = self.activation(x) 25 | return x 26 | 27 | #---------------------------------------------------# 28 | # 卷积 + 上采样 29 | #---------------------------------------------------# 30 | class Upsample(nn.Module): 31 | def __init__(self, in_channels, out_channels): 32 | super(Upsample, self).__init__() 33 | 34 | self.upsample = nn.Sequential( 35 | BasicConv(in_channels, out_channels, 1), 36 | nn.Upsample(scale_factor=2, mode='nearest') 37 | ) 38 | 39 | def forward(self, x,): 40 | x = self.upsample(x) 41 | return x 42 | 43 | #---------------------------------------------------# 44 | # 最后获得yolov4的输出 45 | #---------------------------------------------------# 46 | def yolo_head(filters_list, in_filters): 47 | m = nn.Sequential( 48 | BasicConv(in_filters, filters_list[0], 3), 49 | nn.Conv2d(filters_list[0], filters_list[1], 1), 50 | ) 51 | return m 52 | #---------------------------------------------------# 53 | # yolo_body 54 | #---------------------------------------------------# 55 | class YoloBodytiny(nn.Module): 56 | def __init__(self, anchors_mask, num_classes, phi=0, pretrained=False): 57 | super(YoloBodytiny, self).__init__() 58 | self.phi = phi 59 | self.backbone = darknet53_tiny(pretrained) 60 | 61 | self.conv_for_P5 = BasicConv(512,256,1) 62 | self.yolo_headP5 = yolo_head([512, len(anchors_mask[0]) * (5 + num_classes)],256) 63 | 64 | self.upsample = Upsample(256,128) 65 | self.yolo_headP4 = yolo_head([256, len(anchors_mask[1]) * (5 + num_classes)],384) 66 | 67 | if 1 <= self.phi and self.phi <= 4: 68 | self.feat1_att = attention_block[self.phi - 1](256) 69 | self.feat2_att = attention_block[self.phi - 1](512) 70 | self.upsample_att = attention_block[self.phi - 1](128) 71 | 72 | def forward(self, x): 73 | #---------------------------------------------------# 74 | # 生成CSPdarknet53_tiny的主干模型 75 | # feat1的shape为26,26,256 76 | # feat2的shape为13,13,512 77 | #---------------------------------------------------# 78 | feat1, feat2 = self.backbone(x) 79 | if 1 <= self.phi and self.phi <= 4: 80 | feat1 = self.feat1_att(feat1) 81 | feat2 = self.feat2_att(feat2) 82 | 83 | # 13,13,512 -> 13,13,256 84 | P5 = self.conv_for_P5(feat2) 85 | # 13,13,256 -> 13,13,512 -> 13,13,255 86 | out0 = self.yolo_headP5(P5) 87 | 88 | # 13,13,256 -> 13,13,128 -> 26,26,128 89 | P5_Upsample = self.upsample(P5) 90 | # 26,26,256 + 26,26,128 -> 26,26,384 91 | if 1 <= self.phi and self.phi <= 4: 92 | P5_Upsample = self.upsample_att(P5_Upsample) 93 | P4 = torch.cat([P5_Upsample,feat1],axis=1) 94 | 95 | # 26,26,384 -> 26,26,256 -> 26,26,255 96 | out1 = self.yolo_headP4(P4) 97 | 98 | return out0, out1 99 | 100 | -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- 1 | freeglut3-dev 2 | libgtk2.0-dev -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | #-----------------------------------------------------------------------# 2 | # predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能 3 | # 整合到了一个py文件中,通过指定mode进行模式的修改。 4 | #-----------------------------------------------------------------------# 5 | import time 6 | import yaml 7 | import cv2 8 | import numpy as np 9 | from PIL import Image 10 | from get_yaml import get_config 11 | from yolo import YOLO 12 | import argparse 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--weights',type=str,default='model_data/yolotiny_SE_ep100.pth',help='initial weights path') 16 | parser.add_argument('--tiny',action='store_true',help='使用yolotiny模型') 17 | parser.add_argument('--phi',type=int,default=1,help='yolov4tiny注意力机制类型') 18 | parser.add_argument('--mode',type=str,choices=['dir_predict', 'video', 'fps','predict','heatmap','export_onnx'],default="dir_predict",help='预测的模式') 19 | parser.add_argument('--cuda',action='store_true',help='表示是否使用GPU') 20 | parser.add_argument('--shape',type=int,default=416,help='输入图像的shape') 21 | parser.add_argument('--video',type=str,default='',help='需要检测的视频文件') 22 | parser.add_argument('--save-video',type=str,default='',help='保存视频的位置') 23 | parser.add_argument('--confidence',type=float,default=0.5,help='只有得分大于置信度的预测框会被保留下来') 24 | parser.add_argument('--nms_iou',type=float,default=0.3,help='非极大抑制所用到的nms_iou大小') 25 | opt = parser.parse_args() 26 | print(opt) 27 | 28 | # 配置文件 29 | config = get_config() 30 | yolo = YOLO(opt) 31 | 32 | #----------------------------------------------------------------------------------------------------------# 33 | # mode用于指定测试的模式: 34 | # 'predict' 表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 35 | # 'video' 表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 36 | # 'fps' 表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 37 | # 'dir_predict' 表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 38 | # 'heatmap' 表示进行预测结果的热力图可视化,详情查看下方注释。 39 | # 'export_onnx' 表示将模型导出为onnx,需要pytorch1.7.1以上。 40 | #----------------------------------------------------------------------------------------------------------# 41 | mode = opt.mode 42 | #-------------------------------------------------------------------------# 43 | # crop 指定了是否在单张图片预测后对目标进行截取 44 | # count 指定了是否进行目标的计数 45 | # crop、count仅在mode='predict'时有效 46 | #-------------------------------------------------------------------------# 47 | crop = False 48 | count = False 49 | #----------------------------------------------------------------------------------------------------------# 50 | # video_path 用于指定视频的路径,当video_path=0时表示检测摄像头 51 | # 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 52 | # video_save_path 表示视频保存的路径,当video_save_path=""时表示不保存 53 | # 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 54 | # video_fps 用于保存的视频的fps 55 | # 56 | # video_path、video_save_path和video_fps仅在mode='video'时有效 57 | # 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。 58 | #----------------------------------------------------------------------------------------------------------# 59 | video_path = 0 if opt.video == '' else opt.video 60 | video_save_path = opt.save_video 61 | video_fps = 25.0 62 | #----------------------------------------------------------------------------------------------------------# 63 | # test_interval 用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。 64 | # fps_image_path 用于指定测试的fps图片 65 | # 66 | # test_interval和fps_image_path仅在mode='fps'有效 67 | #----------------------------------------------------------------------------------------------------------# 68 | test_interval = 100 69 | fps_image_path = "img/up.jpg" 70 | #-------------------------------------------------------------------------# 71 | # dir_origin_path 指定了用于检测的图片的文件夹路径 72 | # dir_save_path 指定了检测完图片的保存路径 73 | # 74 | # dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 75 | #-------------------------------------------------------------------------# 76 | dir_origin_path = "img/" 77 | dir_save_path = "img_out/" 78 | #-------------------------------------------------------------------------# 79 | # heatmap_save_path 热力图的保存路径,默认保存在model_data下 80 | # 81 | # heatmap_save_path仅在mode='heatmap'有效 82 | #-------------------------------------------------------------------------# 83 | heatmap_save_path = "model_data/heatmap_vision.png" 84 | #-------------------------------------------------------------------------# 85 | # simplify 使用Simplify onnx 86 | # onnx_save_path 指定了onnx的保存路径 87 | #-------------------------------------------------------------------------# 88 | simplify = True 89 | onnx_save_path = "model_data/models.onnx" 90 | 91 | if mode == "predict": 92 | ''' 93 | 1、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。 94 | 2、如果想要获得预测框的坐标,可以进入yolo.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。 95 | 3、如果想要利用预测框截取下目标,可以进入yolo.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值 96 | 在原图上利用矩阵的方式进行截取。 97 | 4、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入yolo.detect_image函数,在绘图部分对predicted_class进行判断, 98 | 比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。 99 | ''' 100 | while True: 101 | img = input('Input image filename:') 102 | try: 103 | image = Image.open(img) 104 | except: 105 | print('Open Error! Try again!') 106 | continue 107 | else: 108 | r_image = yolo.detect_image(image, crop = crop, count=count) 109 | r_image.show() 110 | r_image.save(dir_save_path + 'img_result.jpg') 111 | 112 | elif mode == "video": 113 | capture = cv2.VideoCapture(video_path) 114 | if video_save_path != '': 115 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 116 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) 117 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) 118 | 119 | ref, frame = capture.read() 120 | if not ref: 121 | raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。") 122 | 123 | fps = 0.0 124 | while(True): 125 | t1 = time.time() 126 | # 读取某一帧 127 | ref, frame = capture.read() 128 | if not ref: 129 | break 130 | # 格式转变,BGRtoRGB 131 | frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) 132 | # 转变成Image 133 | frame = Image.fromarray(np.uint8(frame)) 134 | # 进行检测 135 | frame = np.array(yolo.detect_image(frame)) 136 | # RGBtoBGR满足opencv显示格式 137 | frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) 138 | 139 | fps = ( fps + (1./(time.time()-t1)) ) / 2 140 | print("fps= %.2f"%(fps)) 141 | frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 142 | 143 | cv2.imshow("video",frame) 144 | c= cv2.waitKey(1) & 0xff 145 | if video_save_path != '': 146 | out.write(frame) 147 | 148 | if c==27: 149 | capture.release() 150 | break 151 | 152 | print("Video Detection Done!") 153 | capture.release() 154 | if video_save_path != '': 155 | print("Save processed video to the path :" + video_save_path) 156 | out.release() 157 | cv2.destroyAllWindows() 158 | 159 | elif mode == "fps": 160 | img = Image.open(fps_image_path) 161 | tact_time = yolo.get_FPS(img, test_interval) 162 | print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1') 163 | 164 | elif mode == "dir_predict": 165 | import os 166 | 167 | from tqdm import tqdm 168 | 169 | img_names = os.listdir(dir_origin_path) 170 | for img_name in tqdm(img_names): 171 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): 172 | image_path = os.path.join(dir_origin_path, img_name) 173 | image = Image.open(image_path) 174 | r_image = yolo.detect_image(image) 175 | if not os.path.exists(dir_save_path): 176 | os.makedirs(dir_save_path) 177 | r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0) 178 | 179 | elif mode == "heatmap": 180 | while True: 181 | img = input('Input image filename:') 182 | try: 183 | image = Image.open(img) 184 | except: 185 | print('Open Error! Try again!') 186 | continue 187 | else: 188 | yolo.detect_heatmap(image, heatmap_save_path) 189 | 190 | elif mode == "export_onnx": 191 | yolo.convert_to_onnx(simplify, onnx_save_path) 192 | 193 | else: 194 | raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps', 'heatmap', 'export_onnx', 'dir_predict'.") 195 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy 3 | matplotlib==3.7.0 4 | opencv_python 5 | torch==1.8.1 6 | torchvision==0.9.1 7 | tqdm==4.60.0 8 | Pillow==8.2.0 9 | h5py==2.10.0 10 | tensorboard 11 | pyyaml==6.0 12 | torchinfo 13 | labelimg==1.8.6 14 | streamlit==1.8.1 15 | opencv-python-headless==4.5.2.52 16 | streamlit<=1.11.* 17 | -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- 1 | #--------------------------------------------# 2 | # 该部分代码用于看网络结构 3 | #--------------------------------------------# 4 | import torch 5 | from torchinfo import summary 6 | 7 | from nets.yolo import YoloBody 8 | from nets.yolo_tiny import YoloBodytiny 9 | if __name__ == "__main__": 10 | # 需要使用device来指定网络在GPU还是CPU运行 11 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 12 | m = YoloBody([[6, 7, 8], [3, 4, 5], [0, 1, 2]], 80).to(device) 13 | summary(m, input_size=(1,3, 416, 416)) 14 | 15 | m = YoloBodytiny([[3, 4, 5], [1, 2, 3]], 80, phi = 1).to(device) 16 | summary(m, input_size=(1,3, 416, 416)) 17 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import torch 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | import scipy.signal 8 | from matplotlib import pyplot as plt 9 | from torch.utils.tensorboard import SummaryWriter 10 | 11 | 12 | class LossHistory(): 13 | def __init__(self, log_dir, model, input_shape): 14 | time_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S') 15 | self.log_dir = os.path.join(log_dir, "loss_" + str(time_str)) 16 | self.losses = [] 17 | self.val_loss = [] 18 | 19 | os.makedirs(self.log_dir) 20 | self.writer = SummaryWriter(self.log_dir) 21 | try: 22 | dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1]) 23 | self.writer.add_graph(model, dummy_input) 24 | except: 25 | pass 26 | 27 | 28 | def append_loss(self, epoch, loss, val_loss): 29 | if not os.path.exists(self.log_dir): 30 | os.makedirs(self.log_dir) 31 | 32 | self.losses.append(loss) 33 | self.val_loss.append(val_loss) 34 | 35 | with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f: 36 | f.write(str(loss)) 37 | f.write("\n") 38 | with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f: 39 | f.write(str(val_loss)) 40 | f.write("\n") 41 | 42 | self.writer.add_scalar('loss', loss, epoch) 43 | self.writer.add_scalar('val_loss', val_loss, epoch) 44 | self.loss_plot() 45 | 46 | def loss_plot(self): 47 | iters = range(len(self.losses)) 48 | 49 | plt.figure() 50 | plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss') 51 | plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss') 52 | try: 53 | if len(self.losses) < 25: 54 | num = 5 55 | else: 56 | num = 15 57 | 58 | plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss') 59 | plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss') 60 | except: 61 | pass 62 | 63 | plt.grid(True) 64 | plt.xlabel('Epoch') 65 | plt.ylabel('Loss') 66 | plt.legend(loc="upper right") 67 | 68 | plt.savefig(os.path.join(self.log_dir, "epoch_loss.png")) 69 | 70 | plt.cla() 71 | plt.close("all") 72 | -------------------------------------------------------------------------------- /utils/dataloader.py: -------------------------------------------------------------------------------- 1 | from random import sample, shuffle 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | from PIL import Image 7 | from torch.utils.data.dataset import Dataset 8 | 9 | from utils.utils import cvtColor, preprocess_input 10 | 11 | 12 | class YoloDataset(Dataset): 13 | def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio = 0.7): 14 | super(YoloDataset, self).__init__() 15 | self.annotation_lines = annotation_lines 16 | self.input_shape = input_shape 17 | self.num_classes = num_classes 18 | self.epoch_length = epoch_length 19 | self.mosaic = mosaic 20 | self.train = train 21 | self.mosaic_ratio = mosaic_ratio 22 | 23 | self.epoch_now = -1 24 | self.length = len(self.annotation_lines) 25 | 26 | def __len__(self): 27 | return self.length 28 | 29 | def __getitem__(self, index): 30 | index = index % self.length 31 | 32 | #---------------------------------------------------# 33 | # 训练时进行数据的随机增强 34 | # 验证时不进行数据的随机增强 35 | #---------------------------------------------------# 36 | if self.mosaic: 37 | if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio: 38 | lines = sample(self.annotation_lines, 3) 39 | lines.append(self.annotation_lines[index]) 40 | shuffle(lines) 41 | image, box = self.get_random_data_with_Mosaic(lines, self.input_shape) 42 | else: 43 | image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) 44 | else: 45 | image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) 46 | image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1)) 47 | box = np.array(box, dtype=np.float32) 48 | if len(box) != 0: 49 | box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1] 50 | box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0] 51 | 52 | box[:, 2:4] = box[:, 2:4] - box[:, 0:2] 53 | box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2 54 | return image, box 55 | 56 | def rand(self, a=0, b=1): 57 | return np.random.rand()*(b-a) + a 58 | 59 | def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): 60 | line = annotation_line.split() 61 | #------------------------------# 62 | # 读取图像并转换成RGB图像 63 | #------------------------------# 64 | image = Image.open(line[0]) 65 | image = cvtColor(image) 66 | #------------------------------# 67 | # 获得图像的高宽与目标高宽 68 | #------------------------------# 69 | iw, ih = image.size 70 | h, w = input_shape 71 | #------------------------------# 72 | # 获得预测框 73 | #------------------------------# 74 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 75 | 76 | if not random: 77 | scale = min(w/iw, h/ih) 78 | nw = int(iw*scale) 79 | nh = int(ih*scale) 80 | dx = (w-nw)//2 81 | dy = (h-nh)//2 82 | 83 | #---------------------------------# 84 | # 将图像多余的部分加上灰条 85 | #---------------------------------# 86 | image = image.resize((nw,nh), Image.BICUBIC) 87 | new_image = Image.new('RGB', (w,h), (128,128,128)) 88 | new_image.paste(image, (dx, dy)) 89 | image_data = np.array(new_image, np.float32) 90 | 91 | #---------------------------------# 92 | # 对真实框进行调整 93 | #---------------------------------# 94 | if len(box)>0: 95 | np.random.shuffle(box) 96 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 97 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 98 | box[:, 0:2][box[:, 0:2]<0] = 0 99 | box[:, 2][box[:, 2]>w] = w 100 | box[:, 3][box[:, 3]>h] = h 101 | box_w = box[:, 2] - box[:, 0] 102 | box_h = box[:, 3] - box[:, 1] 103 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 104 | 105 | return image_data, box 106 | 107 | #------------------------------------------# 108 | # 对图像进行缩放并且进行长和宽的扭曲 109 | #------------------------------------------# 110 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 111 | scale = self.rand(.25, 2) 112 | if new_ar < 1: 113 | nh = int(scale*h) 114 | nw = int(nh*new_ar) 115 | else: 116 | nw = int(scale*w) 117 | nh = int(nw/new_ar) 118 | image = image.resize((nw,nh), Image.BICUBIC) 119 | 120 | #------------------------------------------# 121 | # 将图像多余的部分加上灰条 122 | #------------------------------------------# 123 | dx = int(self.rand(0, w-nw)) 124 | dy = int(self.rand(0, h-nh)) 125 | new_image = Image.new('RGB', (w,h), (128,128,128)) 126 | new_image.paste(image, (dx, dy)) 127 | image = new_image 128 | 129 | #------------------------------------------# 130 | # 翻转图像 131 | #------------------------------------------# 132 | flip = self.rand()<.5 133 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 134 | 135 | image_data = np.array(image, np.uint8) 136 | #---------------------------------# 137 | # 对图像进行色域变换 138 | # 计算色域变换的参数 139 | #---------------------------------# 140 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 141 | #---------------------------------# 142 | # 将图像转到HSV上 143 | #---------------------------------# 144 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) 145 | dtype = image_data.dtype 146 | #---------------------------------# 147 | # 应用变换 148 | #---------------------------------# 149 | x = np.arange(0, 256, dtype=r.dtype) 150 | lut_hue = ((x * r[0]) % 180).astype(dtype) 151 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 152 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 153 | 154 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 155 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) 156 | 157 | #---------------------------------# 158 | # 对真实框进行调整 159 | #---------------------------------# 160 | if len(box)>0: 161 | np.random.shuffle(box) 162 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 163 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 164 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 165 | box[:, 0:2][box[:, 0:2]<0] = 0 166 | box[:, 2][box[:, 2]>w] = w 167 | box[:, 3][box[:, 3]>h] = h 168 | box_w = box[:, 2] - box[:, 0] 169 | box_h = box[:, 3] - box[:, 1] 170 | box = box[np.logical_and(box_w>1, box_h>1)] 171 | 172 | return image_data, box 173 | 174 | def merge_bboxes(self, bboxes, cutx, cuty): 175 | merge_bbox = [] 176 | for i in range(len(bboxes)): 177 | for box in bboxes[i]: 178 | tmp_box = [] 179 | x1, y1, x2, y2 = box[0], box[1], box[2], box[3] 180 | 181 | if i == 0: 182 | if y1 > cuty or x1 > cutx: 183 | continue 184 | if y2 >= cuty and y1 <= cuty: 185 | y2 = cuty 186 | if x2 >= cutx and x1 <= cutx: 187 | x2 = cutx 188 | 189 | if i == 1: 190 | if y2 < cuty or x1 > cutx: 191 | continue 192 | if y2 >= cuty and y1 <= cuty: 193 | y1 = cuty 194 | if x2 >= cutx and x1 <= cutx: 195 | x2 = cutx 196 | 197 | if i == 2: 198 | if y2 < cuty or x2 < cutx: 199 | continue 200 | if y2 >= cuty and y1 <= cuty: 201 | y1 = cuty 202 | if x2 >= cutx and x1 <= cutx: 203 | x1 = cutx 204 | 205 | if i == 3: 206 | if y1 > cuty or x2 < cutx: 207 | continue 208 | if y2 >= cuty and y1 <= cuty: 209 | y2 = cuty 210 | if x2 >= cutx and x1 <= cutx: 211 | x1 = cutx 212 | tmp_box.append(x1) 213 | tmp_box.append(y1) 214 | tmp_box.append(x2) 215 | tmp_box.append(y2) 216 | tmp_box.append(box[-1]) 217 | merge_bbox.append(tmp_box) 218 | return merge_bbox 219 | 220 | def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): 221 | h, w = input_shape 222 | min_offset_x = self.rand(0.3, 0.7) 223 | min_offset_y = self.rand(0.3, 0.7) 224 | 225 | image_datas = [] 226 | box_datas = [] 227 | index = 0 228 | for line in annotation_line: 229 | #---------------------------------# 230 | # 每一行进行分割 231 | #---------------------------------# 232 | line_content = line.split() 233 | #---------------------------------# 234 | # 打开图片 235 | #---------------------------------# 236 | image = Image.open(line_content[0]) 237 | image = cvtColor(image) 238 | 239 | #---------------------------------# 240 | # 图片的大小 241 | #---------------------------------# 242 | iw, ih = image.size 243 | #---------------------------------# 244 | # 保存框的位置 245 | #---------------------------------# 246 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) 247 | 248 | #---------------------------------# 249 | # 是否翻转图片 250 | #---------------------------------# 251 | flip = self.rand()<.5 252 | if flip and len(box)>0: 253 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 254 | box[:, [0,2]] = iw - box[:, [2,0]] 255 | 256 | #------------------------------------------# 257 | # 对图像进行缩放并且进行长和宽的扭曲 258 | #------------------------------------------# 259 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 260 | scale = self.rand(.4, 1) 261 | if new_ar < 1: 262 | nh = int(scale*h) 263 | nw = int(nh*new_ar) 264 | else: 265 | nw = int(scale*w) 266 | nh = int(nw/new_ar) 267 | image = image.resize((nw, nh), Image.BICUBIC) 268 | 269 | #-----------------------------------------------# 270 | # 将图片进行放置,分别对应四张分割图片的位置 271 | #-----------------------------------------------# 272 | if index == 0: 273 | dx = int(w*min_offset_x) - nw 274 | dy = int(h*min_offset_y) - nh 275 | elif index == 1: 276 | dx = int(w*min_offset_x) - nw 277 | dy = int(h*min_offset_y) 278 | elif index == 2: 279 | dx = int(w*min_offset_x) 280 | dy = int(h*min_offset_y) 281 | elif index == 3: 282 | dx = int(w*min_offset_x) 283 | dy = int(h*min_offset_y) - nh 284 | 285 | new_image = Image.new('RGB', (w,h), (128,128,128)) 286 | new_image.paste(image, (dx, dy)) 287 | image_data = np.array(new_image) 288 | 289 | index = index + 1 290 | box_data = [] 291 | #---------------------------------# 292 | # 对box进行重新处理 293 | #---------------------------------# 294 | if len(box)>0: 295 | np.random.shuffle(box) 296 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 297 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 298 | box[:, 0:2][box[:, 0:2]<0] = 0 299 | box[:, 2][box[:, 2]>w] = w 300 | box[:, 3][box[:, 3]>h] = h 301 | box_w = box[:, 2] - box[:, 0] 302 | box_h = box[:, 3] - box[:, 1] 303 | box = box[np.logical_and(box_w>1, box_h>1)] 304 | box_data = np.zeros((len(box),5)) 305 | box_data[:len(box)] = box 306 | 307 | image_datas.append(image_data) 308 | box_datas.append(box_data) 309 | 310 | #---------------------------------# 311 | # 将图片分割,放在一起 312 | #---------------------------------# 313 | cutx = int(w * min_offset_x) 314 | cuty = int(h * min_offset_y) 315 | 316 | new_image = np.zeros([h, w, 3]) 317 | new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] 318 | new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] 319 | new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] 320 | new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] 321 | 322 | new_image = np.array(new_image, np.uint8) 323 | #---------------------------------# 324 | # 对图像进行色域变换 325 | # 计算色域变换的参数 326 | #---------------------------------# 327 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 328 | #---------------------------------# 329 | # 将图像转到HSV上 330 | #---------------------------------# 331 | hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) 332 | dtype = new_image.dtype 333 | #---------------------------------# 334 | # 应用变换 335 | #---------------------------------# 336 | x = np.arange(0, 256, dtype=r.dtype) 337 | lut_hue = ((x * r[0]) % 180).astype(dtype) 338 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 339 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 340 | 341 | new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 342 | new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) 343 | 344 | #---------------------------------# 345 | # 对框进行进一步的处理 346 | #---------------------------------# 347 | new_boxes = self.merge_bboxes(box_datas, cutx, cuty) 348 | 349 | return new_image, new_boxes 350 | 351 | # DataLoader中collate_fn使用 352 | def yolo_dataset_collate(batch): 353 | images = [] 354 | bboxes = [] 355 | for img, box in batch: 356 | images.append(img) 357 | bboxes.append(box) 358 | images = torch.from_numpy(np.array(images)).type(torch.FloatTensor) 359 | bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes] 360 | return images, bboxes 361 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | #---------------------------------------------------------# 5 | # 将图像转换成RGB图像,防止灰度图在预测时报错。 6 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 7 | #---------------------------------------------------------# 8 | def cvtColor(image): 9 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: 10 | return image 11 | else: 12 | image = image.convert('RGB') 13 | return image 14 | 15 | #---------------------------------------------------# 16 | # 对输入图像进行resize 17 | #---------------------------------------------------# 18 | def resize_image(image, size, letterbox_image): 19 | iw, ih = image.size 20 | w, h = size 21 | if letterbox_image: 22 | scale = min(w/iw, h/ih) 23 | nw = int(iw*scale) 24 | nh = int(ih*scale) 25 | 26 | image = image.resize((nw,nh), Image.BICUBIC) 27 | new_image = Image.new('RGB', size, (128,128,128)) 28 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 29 | else: 30 | new_image = image.resize((w, h), Image.BICUBIC) 31 | return new_image 32 | 33 | #---------------------------------------------------# 34 | # 获得类 35 | #---------------------------------------------------# 36 | def get_classes(classes_path): 37 | with open(classes_path, encoding='utf-8') as f: 38 | class_names = f.readlines() 39 | class_names = [c.strip() for c in class_names] 40 | return class_names, len(class_names) 41 | 42 | #---------------------------------------------------# 43 | # 获得先验框 44 | #---------------------------------------------------# 45 | def get_anchors(anchors_path): 46 | '''loads the anchors from a file''' 47 | with open(anchors_path, encoding='utf-8') as f: 48 | anchors = f.readline() 49 | anchors = [float(x) for x in anchors.split(',')] 50 | anchors = np.array(anchors).reshape(-1, 2) 51 | return anchors, len(anchors) 52 | 53 | #---------------------------------------------------# 54 | # 获得学习率 55 | #---------------------------------------------------# 56 | def get_lr(optimizer): 57 | for param_group in optimizer.param_groups: 58 | return param_group['lr'] 59 | 60 | def preprocess_input(image): 61 | image /= 255.0 62 | return image 63 | -------------------------------------------------------------------------------- /utils/utils_bbox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.ops import nms 4 | import numpy as np 5 | 6 | class DecodeBox(): 7 | def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]): 8 | super(DecodeBox, self).__init__() 9 | self.anchors = anchors 10 | self.num_classes = num_classes 11 | self.bbox_attrs = 5 + num_classes 12 | self.input_shape = input_shape 13 | #-----------------------------------------------------------# 14 | # 13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401] 15 | # 26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146] 16 | # 52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28] 17 | #-----------------------------------------------------------# 18 | self.anchors_mask = anchors_mask 19 | 20 | def decode_box(self, inputs): 21 | outputs = [] 22 | for i, input in enumerate(inputs): 23 | #-----------------------------------------------# 24 | # 输入的input一共有三个,他们的shape分别是 25 | # batch_size, 255, 13, 13 26 | # batch_size, 255, 26, 26 27 | # batch_size, 255, 52, 52 28 | #-----------------------------------------------# 29 | batch_size = input.size(0) 30 | input_height = input.size(2) 31 | input_width = input.size(3) 32 | 33 | #-----------------------------------------------# 34 | # 输入为416x416时 35 | # stride_h = stride_w = 32、16、8 36 | #-----------------------------------------------# 37 | stride_h = self.input_shape[0] / input_height 38 | stride_w = self.input_shape[1] / input_width 39 | #-------------------------------------------------# 40 | # 此时获得的scaled_anchors大小是相对于特征层的 41 | #-------------------------------------------------# 42 | scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]] 43 | 44 | #-----------------------------------------------# 45 | # 输入的input一共有三个,他们的shape分别是 46 | # batch_size, 3, 13, 13, 85 47 | # batch_size, 3, 26, 26, 85 48 | # batch_size, 3, 52, 52, 85 49 | #-----------------------------------------------# 50 | prediction = input.view(batch_size, len(self.anchors_mask[i]), 51 | self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous() 52 | 53 | #-----------------------------------------------# 54 | # 先验框的中心位置的调整参数 55 | #-----------------------------------------------# 56 | x = torch.sigmoid(prediction[..., 0]) 57 | y = torch.sigmoid(prediction[..., 1]) 58 | #-----------------------------------------------# 59 | # 先验框的宽高调整参数 60 | #-----------------------------------------------# 61 | w = prediction[..., 2] 62 | h = prediction[..., 3] 63 | #-----------------------------------------------# 64 | # 获得置信度,是否有物体 65 | #-----------------------------------------------# 66 | conf = torch.sigmoid(prediction[..., 4]) 67 | #-----------------------------------------------# 68 | # 种类置信度 69 | #-----------------------------------------------# 70 | pred_cls = torch.sigmoid(prediction[..., 5:]) 71 | 72 | FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor 73 | LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor 74 | 75 | #----------------------------------------------------------# 76 | # 生成网格,先验框中心,网格左上角 77 | # batch_size,3,13,13 78 | #----------------------------------------------------------# 79 | grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat( 80 | batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor) 81 | grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat( 82 | batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor) 83 | 84 | #----------------------------------------------------------# 85 | # 按照网格格式生成先验框的宽高 86 | # batch_size,3,13,13 87 | #----------------------------------------------------------# 88 | anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) 89 | anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) 90 | anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape) 91 | anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape) 92 | 93 | #----------------------------------------------------------# 94 | # 利用预测结果对先验框进行调整 95 | # 首先调整先验框的中心,从先验框中心向右下角偏移 96 | # 再调整先验框的宽高。 97 | #----------------------------------------------------------# 98 | pred_boxes = FloatTensor(prediction[..., :4].shape) 99 | pred_boxes[..., 0] = x.data + grid_x 100 | pred_boxes[..., 1] = y.data + grid_y 101 | pred_boxes[..., 2] = torch.exp(w.data) * anchor_w 102 | pred_boxes[..., 3] = torch.exp(h.data) * anchor_h 103 | 104 | #----------------------------------------------------------# 105 | # 将输出结果归一化成小数的形式 106 | #----------------------------------------------------------# 107 | _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor) 108 | output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale, 109 | conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1) 110 | outputs.append(output.data) 111 | return outputs 112 | 113 | def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image): 114 | #-----------------------------------------------------------------# 115 | # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 116 | #-----------------------------------------------------------------# 117 | box_yx = box_xy[..., ::-1] 118 | box_hw = box_wh[..., ::-1] 119 | input_shape = np.array(input_shape) 120 | image_shape = np.array(image_shape) 121 | 122 | if letterbox_image: 123 | #-----------------------------------------------------------------# 124 | # 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 125 | # new_shape指的是宽高缩放情况 126 | #-----------------------------------------------------------------# 127 | new_shape = np.round(image_shape * np.min(input_shape/image_shape)) 128 | offset = (input_shape - new_shape)/2./input_shape 129 | scale = input_shape/new_shape 130 | 131 | box_yx = (box_yx - offset) * scale 132 | box_hw *= scale 133 | 134 | box_mins = box_yx - (box_hw / 2.) 135 | box_maxes = box_yx + (box_hw / 2.) 136 | boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1) 137 | boxes *= np.concatenate([image_shape, image_shape], axis=-1) 138 | return boxes 139 | 140 | def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4): 141 | #----------------------------------------------------------# 142 | # 将预测结果的格式转换成左上角右下角的格式。 143 | # prediction [batch_size, num_anchors, 85] 144 | #----------------------------------------------------------# 145 | box_corner = prediction.new(prediction.shape) 146 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 147 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 148 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 149 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 150 | prediction[:, :, :4] = box_corner[:, :, :4] 151 | 152 | output = [None for _ in range(len(prediction))] 153 | for i, image_pred in enumerate(prediction): 154 | #----------------------------------------------------------# 155 | # 对种类预测部分取max。 156 | # class_conf [num_anchors, 1] 种类置信度 157 | # class_pred [num_anchors, 1] 种类 158 | #----------------------------------------------------------# 159 | class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True) 160 | 161 | #----------------------------------------------------------# 162 | # 利用置信度进行第一轮筛选 163 | #----------------------------------------------------------# 164 | conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze() 165 | 166 | #----------------------------------------------------------# 167 | # 根据置信度进行预测结果的筛选 168 | #----------------------------------------------------------# 169 | image_pred = image_pred[conf_mask] 170 | class_conf = class_conf[conf_mask] 171 | class_pred = class_pred[conf_mask] 172 | if not image_pred.size(0): 173 | continue 174 | #-------------------------------------------------------------------------# 175 | # detections [num_anchors, 7] 176 | # 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred 177 | #-------------------------------------------------------------------------# 178 | detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1) 179 | 180 | #------------------------------------------# 181 | # 获得预测结果中包含的所有种类 182 | #------------------------------------------# 183 | unique_labels = detections[:, -1].cpu().unique() 184 | 185 | if prediction.is_cuda: 186 | unique_labels = unique_labels.cuda() 187 | detections = detections.cuda() 188 | 189 | for c in unique_labels: 190 | #------------------------------------------# 191 | # 获得某一类得分筛选后全部的预测结果 192 | #------------------------------------------# 193 | detections_class = detections[detections[:, -1] == c] 194 | 195 | #------------------------------------------# 196 | # 使用官方自带的非极大抑制会速度更快一些! 197 | #------------------------------------------# 198 | keep = nms( 199 | detections_class[:, :4], 200 | detections_class[:, 4] * detections_class[:, 5], 201 | nms_thres 202 | ) 203 | max_detections = detections_class[keep] 204 | 205 | # # 按照存在物体的置信度排序 206 | # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True) 207 | # detections_class = detections_class[conf_sort_index] 208 | # # 进行非极大抑制 209 | # max_detections = [] 210 | # while detections_class.size(0): 211 | # # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉 212 | # max_detections.append(detections_class[0].unsqueeze(0)) 213 | # if len(detections_class) == 1: 214 | # break 215 | # ious = bbox_iou(max_detections[-1], detections_class[1:]) 216 | # detections_class = detections_class[1:][ious < nms_thres] 217 | # # 堆叠 218 | # max_detections = torch.cat(max_detections).data 219 | 220 | # Add max detections to outputs 221 | output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections)) 222 | 223 | if output[i] is not None: 224 | output[i] = output[i].cpu().numpy() 225 | box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2] 226 | output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image) 227 | return output 228 | -------------------------------------------------------------------------------- /utils/utils_fit.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from tqdm import tqdm 5 | 6 | from utils.utils import get_lr 7 | 8 | 9 | def fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0): 10 | loss = 0 11 | val_loss = 0 12 | 13 | if local_rank == 0: 14 | print('Start Train') 15 | pbar = tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) 16 | model_train.train() 17 | for iteration, batch in enumerate(gen): 18 | if iteration >= epoch_step: 19 | break 20 | 21 | images, targets = batch[0], batch[1] 22 | with torch.no_grad(): 23 | if cuda: 24 | images = images.cuda() 25 | targets = [ann.cuda() for ann in targets] 26 | #----------------------# 27 | # 清零梯度 28 | #----------------------# 29 | optimizer.zero_grad() 30 | if not fp16: 31 | #----------------------# 32 | # 前向传播 33 | #----------------------# 34 | outputs = model_train(images) 35 | 36 | loss_value_all = 0 37 | #----------------------# 38 | # 计算损失 39 | #----------------------# 40 | for l in range(len(outputs)): 41 | loss_item = yolo_loss(l, outputs[l], targets) 42 | loss_value_all += loss_item 43 | loss_value = loss_value_all 44 | 45 | #----------------------# 46 | # 反向传播 47 | #----------------------# 48 | loss_value.backward() 49 | optimizer.step() 50 | else: 51 | from torch.cuda.amp import autocast 52 | with autocast(): 53 | #----------------------# 54 | # 前向传播 55 | #----------------------# 56 | outputs = model_train(images) 57 | 58 | loss_value_all = 0 59 | #----------------------# 60 | # 计算损失 61 | #----------------------# 62 | for l in range(len(outputs)): 63 | loss_item = yolo_loss(l, outputs[l], targets) 64 | loss_value_all += loss_item 65 | loss_value = loss_value_all 66 | 67 | #----------------------# 68 | # 反向传播 69 | #----------------------# 70 | scaler.scale(loss_value).backward() 71 | scaler.step(optimizer) 72 | scaler.update() 73 | 74 | loss += loss_value.item() 75 | 76 | if local_rank == 0: 77 | pbar.set_postfix(**{'loss' : loss / (iteration + 1), 78 | 'lr' : get_lr(optimizer)}) 79 | pbar.update(1) 80 | 81 | if local_rank == 0: 82 | pbar.close() 83 | print('Finish Train') 84 | print('Start Validation') 85 | pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) 86 | 87 | model_train.eval() 88 | for iteration, batch in enumerate(gen_val): 89 | if iteration >= epoch_step_val: 90 | break 91 | images, targets = batch[0], batch[1] 92 | with torch.no_grad(): 93 | if cuda: 94 | images = images.cuda() 95 | targets = [ann.cuda() for ann in targets] 96 | #----------------------# 97 | # 清零梯度 98 | #----------------------# 99 | optimizer.zero_grad() 100 | #----------------------# 101 | # 前向传播 102 | #----------------------# 103 | outputs = model_train(images) 104 | 105 | loss_value_all = 0 106 | #----------------------# 107 | # 计算损失 108 | #----------------------# 109 | for l in range(len(outputs)): 110 | loss_item = yolo_loss(l, outputs[l], targets) 111 | loss_value_all += loss_item 112 | loss_value = loss_value_all 113 | 114 | val_loss += loss_value.item() 115 | if local_rank == 0: 116 | pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)}) 117 | pbar.update(1) 118 | 119 | if local_rank == 0: 120 | pbar.close() 121 | print('Finish Validation') 122 | loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val) 123 | print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch)) 124 | print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val)) 125 | if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch: 126 | torch.save(model.state_dict(), os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val))) 127 | # 每次保存最后一个权重 128 | torch.save(model.state_dict(), os.path.join(save_dir, "last.pth" )) -------------------------------------------------------------------------------- /utils_coco/coco_annotation.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------# 2 | # 用于处理COCO数据集,根据json文件生成txt文件用于训练 3 | #-------------------------------------------------------# 4 | import json 5 | import os 6 | from collections import defaultdict 7 | 8 | #-------------------------------------------------------# 9 | # 指向了COCO训练集与验证集图片的路径 10 | #-------------------------------------------------------# 11 | train_datasets_path = "coco_dataset/train2017" 12 | val_datasets_path = "coco_dataset/val2017" 13 | 14 | #-------------------------------------------------------# 15 | # 指向了COCO训练集与验证集标签的路径 16 | #-------------------------------------------------------# 17 | train_annotation_path = "coco_dataset/annotations/instances_train2017.json" 18 | val_annotation_path = "coco_dataset/annotations/instances_val2017.json" 19 | 20 | #-------------------------------------------------------# 21 | # 生成的txt文件路径 22 | #-------------------------------------------------------# 23 | train_output_path = "coco_train.txt" 24 | val_output_path = "coco_val.txt" 25 | 26 | if __name__ == "__main__": 27 | name_box_id = defaultdict(list) 28 | id_name = dict() 29 | f = open(train_annotation_path, encoding='utf-8') 30 | data = json.load(f) 31 | 32 | annotations = data['annotations'] 33 | for ant in annotations: 34 | id = ant['image_id'] 35 | name = os.path.join(train_datasets_path, '%012d.jpg' % id) 36 | cat = ant['category_id'] 37 | if cat >= 1 and cat <= 11: 38 | cat = cat - 1 39 | elif cat >= 13 and cat <= 25: 40 | cat = cat - 2 41 | elif cat >= 27 and cat <= 28: 42 | cat = cat - 3 43 | elif cat >= 31 and cat <= 44: 44 | cat = cat - 5 45 | elif cat >= 46 and cat <= 65: 46 | cat = cat - 6 47 | elif cat == 67: 48 | cat = cat - 7 49 | elif cat == 70: 50 | cat = cat - 9 51 | elif cat >= 72 and cat <= 82: 52 | cat = cat - 10 53 | elif cat >= 84 and cat <= 90: 54 | cat = cat - 11 55 | name_box_id[name].append([ant['bbox'], cat]) 56 | 57 | f = open(train_output_path, 'w') 58 | for key in name_box_id.keys(): 59 | f.write(key) 60 | box_infos = name_box_id[key] 61 | for info in box_infos: 62 | x_min = int(info[0][0]) 63 | y_min = int(info[0][1]) 64 | x_max = x_min + int(info[0][2]) 65 | y_max = y_min + int(info[0][3]) 66 | 67 | box_info = " %d,%d,%d,%d,%d" % ( 68 | x_min, y_min, x_max, y_max, int(info[1])) 69 | f.write(box_info) 70 | f.write('\n') 71 | f.close() 72 | 73 | name_box_id = defaultdict(list) 74 | id_name = dict() 75 | f = open(val_annotation_path, encoding='utf-8') 76 | data = json.load(f) 77 | 78 | annotations = data['annotations'] 79 | for ant in annotations: 80 | id = ant['image_id'] 81 | name = os.path.join(val_datasets_path, '%012d.jpg' % id) 82 | cat = ant['category_id'] 83 | if cat >= 1 and cat <= 11: 84 | cat = cat - 1 85 | elif cat >= 13 and cat <= 25: 86 | cat = cat - 2 87 | elif cat >= 27 and cat <= 28: 88 | cat = cat - 3 89 | elif cat >= 31 and cat <= 44: 90 | cat = cat - 5 91 | elif cat >= 46 and cat <= 65: 92 | cat = cat - 6 93 | elif cat == 67: 94 | cat = cat - 7 95 | elif cat == 70: 96 | cat = cat - 9 97 | elif cat >= 72 and cat <= 82: 98 | cat = cat - 10 99 | elif cat >= 84 and cat <= 90: 100 | cat = cat - 11 101 | name_box_id[name].append([ant['bbox'], cat]) 102 | 103 | f = open(val_output_path, 'w') 104 | for key in name_box_id.keys(): 105 | f.write(key) 106 | box_infos = name_box_id[key] 107 | for info in box_infos: 108 | x_min = int(info[0][0]) 109 | y_min = int(info[0][1]) 110 | x_max = x_min + int(info[0][2]) 111 | y_max = y_min + int(info[0][3]) 112 | 113 | box_info = " %d,%d,%d,%d,%d" % ( 114 | x_min, y_min, x_max, y_max, int(info[1])) 115 | f.write(box_info) 116 | f.write('\n') 117 | f.close() 118 | -------------------------------------------------------------------------------- /utils_coco/get_map_coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import numpy as np 5 | import torch 6 | from PIL import Image 7 | from pycocotools.coco import COCO 8 | from pycocotools.cocoeval import COCOeval 9 | from tqdm import tqdm 10 | 11 | from utils.utils import cvtColor, preprocess_input, resize_image 12 | from yolo import YOLO 13 | 14 | #---------------------------------------------------------------------------# 15 | # map_mode用于指定该文件运行时计算的内容 16 | # map_mode为0代表整个map计算流程,包括获得预测结果、计算map。 17 | # map_mode为1代表仅仅获得预测结果。 18 | # map_mode为2代表仅仅获得计算map。 19 | #---------------------------------------------------------------------------# 20 | map_mode = 0 21 | #-------------------------------------------------------# 22 | # 指向了验证集标签与图片路径 23 | #-------------------------------------------------------# 24 | cocoGt_path = 'coco_dataset/annotations/instances_val2017.json' 25 | dataset_img_path = 'coco_dataset/val2017' 26 | #-------------------------------------------------------# 27 | # 结果输出的文件夹,默认为map_out 28 | #-------------------------------------------------------# 29 | temp_save_path = 'map_out/coco_eval' 30 | 31 | class mAP_YOLO(YOLO): 32 | #---------------------------------------------------# 33 | # 检测图片 34 | #---------------------------------------------------# 35 | def detect_image(self, image_id, image, results): 36 | #---------------------------------------------------# 37 | # 计算输入图片的高和宽 38 | #---------------------------------------------------# 39 | image_shape = np.array(np.shape(image)[0:2]) 40 | #---------------------------------------------------------# 41 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 42 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 43 | #---------------------------------------------------------# 44 | image = cvtColor(image) 45 | #---------------------------------------------------------# 46 | # 给图像增加灰条,实现不失真的resize 47 | # 也可以直接resize进行识别 48 | #---------------------------------------------------------# 49 | image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) 50 | #---------------------------------------------------------# 51 | # 添加上batch_size维度 52 | #---------------------------------------------------------# 53 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 54 | 55 | with torch.no_grad(): 56 | images = torch.from_numpy(image_data) 57 | if self.cuda: 58 | images = images.cuda() 59 | #---------------------------------------------------------# 60 | # 将图像输入网络当中进行预测! 61 | #---------------------------------------------------------# 62 | outputs = self.net(images) 63 | outputs = self.bbox_util.decode_box(outputs) 64 | #---------------------------------------------------------# 65 | # 将预测框进行堆叠,然后进行非极大抑制 66 | #---------------------------------------------------------# 67 | outputs = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 68 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 69 | 70 | if outputs[0] is None: 71 | return results 72 | 73 | top_label = np.array(outputs[0][:, 6], dtype = 'int32') 74 | top_conf = outputs[0][:, 4] * outputs[0][:, 5] 75 | top_boxes = outputs[0][:, :4] 76 | 77 | for i, c in enumerate(top_label): 78 | result = {} 79 | top, left, bottom, right = top_boxes[i] 80 | 81 | result["image_id"] = int(image_id) 82 | result["category_id"] = clsid2catid[c] 83 | result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)] 84 | result["score"] = float(top_conf[i]) 85 | results.append(result) 86 | return results 87 | 88 | if __name__ == "__main__": 89 | if not os.path.exists(temp_save_path): 90 | os.makedirs(temp_save_path) 91 | 92 | cocoGt = COCO(cocoGt_path) 93 | ids = list(cocoGt.imgToAnns.keys()) 94 | clsid2catid = cocoGt.getCatIds() 95 | 96 | if map_mode == 0 or map_mode == 1: 97 | yolo = mAP_YOLO(confidence = 0.001, nms_iou = 0.65) 98 | 99 | with open(os.path.join(temp_save_path, 'eval_results.json'),"w") as f: 100 | results = [] 101 | for image_id in tqdm(ids): 102 | image_path = os.path.join(dataset_img_path, cocoGt.loadImgs(image_id)[0]['file_name']) 103 | image = Image.open(image_path) 104 | results = yolo.detect_image(image_id, image, results) 105 | json.dump(results, f) 106 | 107 | if map_mode == 0 or map_mode == 2: 108 | cocoDt = cocoGt.loadRes(os.path.join(temp_save_path, 'eval_results.json')) 109 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 110 | cocoEval.evaluate() 111 | cocoEval.accumulate() 112 | cocoEval.summarize() 113 | print("Get map done.") 114 | -------------------------------------------------------------------------------- /voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | from get_yaml import get_config 5 | from utils.utils import get_classes 6 | 7 | #--------------------------------------------------------------------------------------------------------------------------------# 8 | # annotation_mode用于指定该文件运行时计算的内容 9 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt 10 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt 11 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt 12 | #--------------------------------------------------------------------------------------------------------------------------------# 13 | annotation_mode = 0 14 | #-------------------------------------------------------------------# 15 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 16 | # 与训练和预测所用的classes_path一致即可 17 | # 如果生成的2007_train.txt里面没有目标信息 18 | # 那么就是因为classes没有设定正确 19 | # 仅在annotation_mode为0和2的时候有效 20 | #-------------------------------------------------------------------# 21 | # classes_path = 'model_data/gesture_classes.txt' 22 | #--------------------------------------------------------------------------------------------------------------------------------# 23 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 24 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 25 | # 仅在annotation_mode为0和1的时候有效 26 | #--------------------------------------------------------------------------------------------------------------------------------# 27 | trainval_percent = 1 28 | train_percent = 0.9 29 | #-------------------------------------------------------# 30 | # 指向VOC数据集所在的文件夹 31 | # 默认指向根目录下的VOC数据集 32 | #-------------------------------------------------------# 33 | VOCdevkit_path = 'VOCdevkit' 34 | 35 | VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')] 36 | # classes, _ = get_classes(classes_path) 37 | config = get_config() 38 | classes = config['classes'] 39 | def convert_annotation(year, image_id, list_file): 40 | in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8') 41 | tree=ET.parse(in_file) 42 | root = tree.getroot() 43 | 44 | for obj in root.iter('object'): 45 | difficult = 0 46 | if obj.find('difficult')!=None: 47 | difficult = obj.find('difficult').text 48 | cls = obj.find('name').text 49 | if cls not in classes or int(difficult)==1: 50 | continue 51 | cls_id = classes.index(cls) 52 | xmlbox = obj.find('bndbox') 53 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 54 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 55 | 56 | if __name__ == "__main__": 57 | random.seed(0) 58 | if annotation_mode == 0 or annotation_mode == 1: 59 | print("Generate txt in ImageSets.") 60 | xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations') 61 | saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main') 62 | temp_xml = os.listdir(xmlfilepath) 63 | total_xml = [] 64 | for xml in temp_xml: 65 | if xml.endswith(".xml"): 66 | total_xml.append(xml) 67 | 68 | num = len(total_xml) 69 | list = range(num) 70 | tv = int(num*trainval_percent) 71 | tr = int(tv*train_percent) 72 | trainval= random.sample(list,tv) 73 | train = random.sample(trainval,tr) 74 | 75 | print("train and val size",tv) 76 | print("train size",tr) 77 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') 78 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') 79 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') 80 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w') 81 | 82 | for i in list: 83 | name=total_xml[i][:-4]+'\n' 84 | if i in trainval: 85 | ftrainval.write(name) 86 | if i in train: 87 | ftrain.write(name) 88 | else: 89 | fval.write(name) 90 | else: 91 | ftest.write(name) 92 | 93 | ftrainval.close() 94 | ftrain.close() 95 | fval.close() 96 | ftest.close() 97 | print("Generate txt in ImageSets done.") 98 | 99 | if annotation_mode == 0 or annotation_mode == 2: 100 | print("Generate gesture_train.txt and 2007_val.txt for train.") 101 | for year, image_set in VOCdevkit_sets: 102 | image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split() 103 | list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8') 104 | for image_id in image_ids: 105 | list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id)) 106 | 107 | convert_annotation(year, image_id, list_file) 108 | list_file.write('\n') 109 | list_file.close() 110 | print("Generate gesture_train.txt and gesture_val.txt for train done.") 111 | -------------------------------------------------------------------------------- /yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 105,107, 118,136, 152,122, 114,165, 139,151, 160,156, 152,185, 181,167, 192,197 --------------------------------------------------------------------------------