├── .devcontainer
    └── devcontainer.json
├── .gitignore
├── 2007_train.txt
├── 2007_val.txt
├── Pipfile
├── README.md
├── VOCdevkit
    └── VOC2007
    │   ├── Annotations
    │       ├── 1.xml
    │       ├── 2.xml
    │       ├── 3.xml
    │       ├── 4.xml
    │       ├── 5.xml
    │       └── README.md
    │   ├── ImageSets
    │       └── Main
    │       │   ├── README.md
    │       │   ├── test.txt
    │       │   ├── train.txt
    │       │   ├── trainval.txt
    │       │   └── val.txt
    │   └── JPEGImages
    │       ├── 1.jpg
    │       ├── 2.jpg
    │       ├── 3.jpg
    │       ├── 4.jpg
    │       └── 5.jpg
├── YOLOv4-study学习资料md
├── detect.py
├── gen_annotation.py
├── gesture.streamlit.py
├── get_map.py
├── get_yaml.py
├── img
    ├── anticlockwise.jpg
    ├── back.jpg
    ├── clockwise.jpg
    ├── down.jpg
    ├── front.jpg
    ├── left.jpg
    ├── right.jpg
    └── up.jpg
├── instructions.md
├── kmeans_for_anchors.jpg
├── kmeans_for_anchors.py
├── logs
    ├── README.md
    ├── gesture_loss_2021_11_14_22_04_00
    │   ├── epoch_loss_2021_11_14_22_04_00.png
    │   ├── epoch_loss_2021_11_14_22_04_00.txt
    │   └── epoch_val_loss_2021_11_14_22_04_00.txt
    ├── loss_2022_04_27_08_48_16
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651049298.fef10e9dbba1.425.0
    ├── loss_2022_04_27_10_38_48
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651055931.9b45dd4991ae.367.0
    ├── loss_2022_04_27_12_50_47
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651063849.274e119c63fb.1015.0
    ├── loss_2022_04_28_00_40_54
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651106457.117e69507361.564.0
    ├── loss_2022_04_28_14_54_17
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0
    └── loss_2022_05_02_14_57_57
    │   ├── epoch_loss.png
    │   ├── epoch_loss.txt
    │   ├── epoch_val_loss.txt
    │   └── events.out.tfevents.1651503480.437fb01f4bb0.370.0
├── model_data
    ├── .gitattributes
    ├── gesture.yaml
    ├── gesture_classes.txt
    ├── simhei.ttf
    ├── yolo_anchors.txt
    └── yolotiny_anchors.txt
├── nets
    ├── CSPdarknet.py
    ├── CSPdarknet53_tiny.py
    ├── __init__.py
    ├── attention.py
    ├── yolo.py
    ├── yolo_tiny.py
    ├── yolo_training.py
    └── yolotiny_training.py
├── packages.txt
├── predict.py
├── requirements.txt
├── summary.py
├── train.py
├── utils
    ├── __init__.py
    ├── callbacks.py
    ├── dataloader.py
    ├── utils.py
    ├── utils_bbox.py
    ├── utils_fit.py
    └── utils_map.py
├── utils_coco
    ├── coco_annotation.py
    └── get_map_coco.py
├── voc_annotation.py
├── yolo.py
├── yolo_anchors.txt
└── yolov4-gesture-tutorial.ipynb


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "gesture_streamlit.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run gesture_streamlit.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.DS_Store


--------------------------------------------------------------------------------
/2007_val.txt:
--------------------------------------------------------------------------------
  1 | VOCdevkit/VOC2007/JPEGImages/1.jpg 21,7,174,210,1
  2 | VOCdevkit/VOC2007/JPEGImages/1004.jpg 23,24,120,144,2
  3 | VOCdevkit/VOC2007/JPEGImages/1018.jpg 22,6,138,142,2
  4 | VOCdevkit/VOC2007/JPEGImages/1027.jpg 33,16,99,98,0
  5 | VOCdevkit/VOC2007/JPEGImages/1049.jpg 53,45,152,256,0
  6 | VOCdevkit/VOC2007/JPEGImages/1055.jpg 48,22,134,177,0
  7 | VOCdevkit/VOC2007/JPEGImages/1073.jpg 12,9,59,83,0
  8 | VOCdevkit/VOC2007/JPEGImages/1082.jpg 21,50,210,139,0
  9 | VOCdevkit/VOC2007/JPEGImages/1087.jpg 32,22,102,88,0
 10 | VOCdevkit/VOC2007/JPEGImages/1092.jpg 20,10,208,249,0
 11 | VOCdevkit/VOC2007/JPEGImages/1093.jpg 57,41,212,239,0
 12 | VOCdevkit/VOC2007/JPEGImages/1095.jpg 46,43,131,136,0
 13 | VOCdevkit/VOC2007/JPEGImages/1097.jpg 30,37,135,246,0
 14 | VOCdevkit/VOC2007/JPEGImages/1099.jpg 33,46,181,147,0
 15 | VOCdevkit/VOC2007/JPEGImages/1110.jpg 38,48,169,273,0
 16 | VOCdevkit/VOC2007/JPEGImages/1128.jpg 638,1292,1922,3136,3
 17 | VOCdevkit/VOC2007/JPEGImages/1129.jpg 606,896,1986,3352,3
 18 | VOCdevkit/VOC2007/JPEGImages/113.jpg 23,19,103,120,3
 19 | VOCdevkit/VOC2007/JPEGImages/1143.jpg 558,612,2258,3508,3
 20 | VOCdevkit/VOC2007/JPEGImages/115.jpg 34,29,325,382,3
 21 | VOCdevkit/VOC2007/JPEGImages/1154.jpg 346,832,2058,2272,3
 22 | VOCdevkit/VOC2007/JPEGImages/1159.jpg 218,716,2350,3308,3
 23 | VOCdevkit/VOC2007/JPEGImages/1177.jpg 27,538,1138,1402,3
 24 | VOCdevkit/VOC2007/JPEGImages/1194.jpg 87,113,1196,1609,3
 25 | VOCdevkit/VOC2007/JPEGImages/1200.jpg
 26 | VOCdevkit/VOC2007/JPEGImages/1201.jpg 398,609,962,1667,7
 27 | VOCdevkit/VOC2007/JPEGImages/1212.jpg 93,96,1105,1627,7
 28 | VOCdevkit/VOC2007/JPEGImages/1215.jpg 40,389,780,1538,7
 29 | VOCdevkit/VOC2007/JPEGImages/1233.jpg 693,239,1419,772,6
 30 | VOCdevkit/VOC2007/JPEGImages/1245.jpg 38,34,114,129,1
 31 | VOCdevkit/VOC2007/JPEGImages/1246.jpg 4,6,114,111,1
 32 | VOCdevkit/VOC2007/JPEGImages/125.jpg 15,3,95,113,3
 33 | VOCdevkit/VOC2007/JPEGImages/1258.jpg 51,37,170,209,1
 34 | VOCdevkit/VOC2007/JPEGImages/1262.jpg 57,66,309,354,1
 35 | VOCdevkit/VOC2007/JPEGImages/1283.jpg 19,16,52,58,1
 36 | VOCdevkit/VOC2007/JPEGImages/1294.jpg 41,36,129,219,1
 37 | VOCdevkit/VOC2007/JPEGImages/1309.jpg 25,55,287,273,1
 38 | VOCdevkit/VOC2007/JPEGImages/1315.jpg 16,2,180,191,1
 39 | VOCdevkit/VOC2007/JPEGImages/1319.jpg 29,9,98,72,1
 40 | VOCdevkit/VOC2007/JPEGImages/132.jpg 28,7,134,138,3
 41 | VOCdevkit/VOC2007/JPEGImages/1337.jpg 16,23,210,238,5
 42 | VOCdevkit/VOC2007/JPEGImages/1339.jpg 13,15,209,196,5
 43 | VOCdevkit/VOC2007/JPEGImages/1341.jpg 44,20,323,294,5
 44 | VOCdevkit/VOC2007/JPEGImages/1351.jpg 76,19,512,432,5
 45 | VOCdevkit/VOC2007/JPEGImages/1368.jpg 24,23,135,144,5
 46 | VOCdevkit/VOC2007/JPEGImages/1371.jpg 21,48,245,301,5
 47 | VOCdevkit/VOC2007/JPEGImages/1393.jpg 23,50,307,296,5
 48 | VOCdevkit/VOC2007/JPEGImages/140.jpg 12,1,76,95,3
 49 | VOCdevkit/VOC2007/JPEGImages/1408.jpg 42,38,283,291,5
 50 | VOCdevkit/VOC2007/JPEGImages/1415.jpg 30,38,180,242,7
 51 | VOCdevkit/VOC2007/JPEGImages/1419.jpg 24,28,133,177,7
 52 | VOCdevkit/VOC2007/JPEGImages/1423.jpg 34,31,152,200,7
 53 | VOCdevkit/VOC2007/JPEGImages/1438.jpg 21,11,63,79,7
 54 | VOCdevkit/VOC2007/JPEGImages/144.jpg 24,34,218,342,3
 55 | VOCdevkit/VOC2007/JPEGImages/1447.jpg 33,116,322,500,7
 56 | VOCdevkit/VOC2007/JPEGImages/1451.jpg 27,36,237,219,6
 57 | VOCdevkit/VOC2007/JPEGImages/1453.jpg 24,9,99,67,6
 58 | VOCdevkit/VOC2007/JPEGImages/1455.jpg 34,36,124,90,6
 59 | VOCdevkit/VOC2007/JPEGImages/1457.jpg 5,18,102,102,6
 60 | VOCdevkit/VOC2007/JPEGImages/1469.jpg 29,57,239,178,6
 61 | VOCdevkit/VOC2007/JPEGImages/1481.jpg 17,22,83,56,6
 62 | VOCdevkit/VOC2007/JPEGImages/1486.jpg 562,1620,2470,2908,6
 63 | VOCdevkit/VOC2007/JPEGImages/1493.jpg 462,1176,2102,3120,6
 64 | VOCdevkit/VOC2007/JPEGImages/1505.jpg 526,800,2494,2940,6
 65 | VOCdevkit/VOC2007/JPEGImages/1514.jpg 346,1440,2466,3064,6
 66 | VOCdevkit/VOC2007/JPEGImages/153.jpg 11,5,56,79,3
 67 | VOCdevkit/VOC2007/JPEGImages/1531.jpg 154,580,2430,2876,6
 68 | VOCdevkit/VOC2007/JPEGImages/1538.jpg 367,466,949,1216,5
 69 | VOCdevkit/VOC2007/JPEGImages/155.jpg 10,7,127,172,3
 70 | VOCdevkit/VOC2007/JPEGImages/1553.jpg 31,293,1240,1427,5
 71 | VOCdevkit/VOC2007/JPEGImages/1570.jpg 325,714,925,1644,7
 72 | VOCdevkit/VOC2007/JPEGImages/1590.jpg 183,380,963,1544,7
 73 | VOCdevkit/VOC2007/JPEGImages/167.jpg 10,5,66,79,3
 74 | VOCdevkit/VOC2007/JPEGImages/179.jpg 4,7,112,176,3
 75 | VOCdevkit/VOC2007/JPEGImages/180.jpg 10,23,141,150,3
 76 | VOCdevkit/VOC2007/JPEGImages/181.jpg 8,5,60,84,3
 77 | VOCdevkit/VOC2007/JPEGImages/188.jpg 14,1,170,224,3
 78 | VOCdevkit/VOC2007/JPEGImages/191.jpg 15,23,98,140,3
 79 | VOCdevkit/VOC2007/JPEGImages/192.jpg 16,5,60,82,3
 80 | VOCdevkit/VOC2007/JPEGImages/20.jpg 9,3,174,216,1
 81 | VOCdevkit/VOC2007/JPEGImages/211.jpg 40,36,230,310,2
 82 | VOCdevkit/VOC2007/JPEGImages/229.jpg 18,10,186,195,2
 83 | VOCdevkit/VOC2007/JPEGImages/23.jpg 47,56,212,252,1
 84 | VOCdevkit/VOC2007/JPEGImages/248.jpg 53,41,222,271,2
 85 | VOCdevkit/VOC2007/JPEGImages/249.jpg 45,14,282,349,2
 86 | VOCdevkit/VOC2007/JPEGImages/264.jpg 20,29,170,211,2
 87 | VOCdevkit/VOC2007/JPEGImages/282.jpg 31,21,121,149,2
 88 | VOCdevkit/VOC2007/JPEGImages/283.jpg 55,41,257,315,2
 89 | VOCdevkit/VOC2007/JPEGImages/289.jpg 60,30,198,241,2
 90 | VOCdevkit/VOC2007/JPEGImages/29.jpg 9,2,183,240,1
 91 | VOCdevkit/VOC2007/JPEGImages/290.jpg 28,24,91,124,2
 92 | VOCdevkit/VOC2007/JPEGImages/292.jpg 23,28,133,172,2
 93 | VOCdevkit/VOC2007/JPEGImages/293.jpg 31,7,185,299,2
 94 | VOCdevkit/VOC2007/JPEGImages/333.jpg 123,1,697,567,0
 95 | VOCdevkit/VOC2007/JPEGImages/333.jpg 123,1,697,567,0
 96 | VOCdevkit/VOC2007/JPEGImages/336.jpg 58,45,164,216,0
 97 | VOCdevkit/VOC2007/JPEGImages/352.jpg 55,59,190,146,0
 98 | VOCdevkit/VOC2007/JPEGImages/359.jpg 104,44,257,208,0
 99 | VOCdevkit/VOC2007/JPEGImages/381.jpg 62,41,199,277,0
100 | VOCdevkit/VOC2007/JPEGImages/403.jpg 57,56,196,202,4
101 | VOCdevkit/VOC2007/JPEGImages/424.jpg 36,30,107,126,4
102 | VOCdevkit/VOC2007/JPEGImages/435.jpg 66,55,199,230,4
103 | VOCdevkit/VOC2007/JPEGImages/456.jpg 40,23,114,111,4
104 | VOCdevkit/VOC2007/JPEGImages/460.jpg 25,15,90,104,4
105 | VOCdevkit/VOC2007/JPEGImages/463.jpg 15,12,65,76,4
106 | VOCdevkit/VOC2007/JPEGImages/469.jpg 48,51,217,230,4
107 | VOCdevkit/VOC2007/JPEGImages/510.jpg 55,75,289,309,5
108 | VOCdevkit/VOC2007/JPEGImages/511.jpg 15,6,110,93,5
109 | VOCdevkit/VOC2007/JPEGImages/52.jpg 47,32,187,196,1
110 | VOCdevkit/VOC2007/JPEGImages/525.jpg 8,1,269,230,5
111 | VOCdevkit/VOC2007/JPEGImages/531.jpg 31,32,257,248,5
112 | VOCdevkit/VOC2007/JPEGImages/535.jpg 17,6,256,222,5
113 | VOCdevkit/VOC2007/JPEGImages/538.jpg 15,14,234,195,5
114 | VOCdevkit/VOC2007/JPEGImages/548.jpg 9,7,71,89,5
115 | VOCdevkit/VOC2007/JPEGImages/550.jpg 25,7,214,218,5
116 | VOCdevkit/VOC2007/JPEGImages/555.jpg 18,25,196,258,5
117 | VOCdevkit/VOC2007/JPEGImages/558.jpg 33,20,305,298,5
118 | VOCdevkit/VOC2007/JPEGImages/560.jpg 22,1,234,226,5
119 | VOCdevkit/VOC2007/JPEGImages/574.jpg 18,8,82,76,5
120 | VOCdevkit/VOC2007/JPEGImages/576.jpg 21,37,260,260,5
121 | VOCdevkit/VOC2007/JPEGImages/584.jpg 27,36,253,262,5
122 | VOCdevkit/VOC2007/JPEGImages/595.jpg 70,52,549,579,5
123 | VOCdevkit/VOC2007/JPEGImages/60.jpg 5,9,89,96,1
124 | VOCdevkit/VOC2007/JPEGImages/607.jpg 29,37,159,219,7
125 | VOCdevkit/VOC2007/JPEGImages/624.jpg 29,59,189,224,7
126 | VOCdevkit/VOC2007/JPEGImages/659.jpg 15,7,60,88,7
127 | VOCdevkit/VOC2007/JPEGImages/661.jpg 8,5,50,53,7
128 | VOCdevkit/VOC2007/JPEGImages/674.jpg 75,44,339,443,7
129 | VOCdevkit/VOC2007/JPEGImages/697.jpg 50,52,275,373,7
130 | VOCdevkit/VOC2007/JPEGImages/7.jpg 45,66,172,236,1
131 | VOCdevkit/VOC2007/JPEGImages/707.jpg 50,22,506,531,6
132 | VOCdevkit/VOC2007/JPEGImages/712.jpg 18,29,345,268,6
133 | VOCdevkit/VOC2007/JPEGImages/728.jpg 16,13,108,96,6
134 | VOCdevkit/VOC2007/JPEGImages/73.jpg 52,27,176,194,1
135 | VOCdevkit/VOC2007/JPEGImages/74.jpg 26,26,162,140,1
136 | VOCdevkit/VOC2007/JPEGImages/754.jpg 40,67,265,275,6
137 | VOCdevkit/VOC2007/JPEGImages/758.jpg 9,14,110,70,6
138 | VOCdevkit/VOC2007/JPEGImages/765.jpg 13,10,51,52,6
139 | VOCdevkit/VOC2007/JPEGImages/768.jpg 4,9,120,86,6
140 | VOCdevkit/VOC2007/JPEGImages/782.jpg 62,23,295,257,6
141 | VOCdevkit/VOC2007/JPEGImages/811.jpg
142 | VOCdevkit/VOC2007/JPEGImages/817.jpg
143 | VOCdevkit/VOC2007/JPEGImages/818.jpg
144 | VOCdevkit/VOC2007/JPEGImages/835.jpg
145 | VOCdevkit/VOC2007/JPEGImages/865.jpg
146 | VOCdevkit/VOC2007/JPEGImages/871.jpg
147 | VOCdevkit/VOC2007/JPEGImages/875.jpg
148 | VOCdevkit/VOC2007/JPEGImages/881.jpg
149 | VOCdevkit/VOC2007/JPEGImages/894.jpg
150 | VOCdevkit/VOC2007/JPEGImages/898.jpg 13,12,118,170,3
151 | VOCdevkit/VOC2007/JPEGImages/90.jpg 15,16,48,60,1
152 | VOCdevkit/VOC2007/JPEGImages/907.jpg 21,14,132,167,3
153 | VOCdevkit/VOC2007/JPEGImages/922.jpg 23,1,160,200,2
154 | VOCdevkit/VOC2007/JPEGImages/937.jpg 56,30,268,365,2
155 | VOCdevkit/VOC2007/JPEGImages/939.jpg 41,5,322,419,2
156 | VOCdevkit/VOC2007/JPEGImages/942.jpg 39,34,173,209,2
157 | VOCdevkit/VOC2007/JPEGImages/962.jpg 27,7,101,125,2
158 | VOCdevkit/VOC2007/JPEGImages/964.jpg 18,26,128,155,2
159 | VOCdevkit/VOC2007/JPEGImages/980.jpg 26,21,106,129,2
160 | VOCdevkit/VOC2007/JPEGImages/993.jpg 36,29,163,183,2
161 | VOCdevkit/VOC2007/JPEGImages/999.jpg 28,23,270,316,2
162 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | streamlit = "<1.11.*"
10 | opencv-python = "4.5.2.52"
11 | numpy = "*"
12 | torchvision = "0.9.1"
13 | torch = "1.8.1"
14 | Pillow = "8.2.0"
15 | pyyaml = "6.0"
16 | matplotlib = "*"
17 | opencv-python-headless = "4.5.2.52"
18 | av = "*"
19 | streamlit-webrtc = "0.36.1"
20 | altair = "4.2.2"
21 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/1.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>JPEGImages</folder>
 3 | 	<filename>1.jpg</filename>
 4 | 	<path>E:\handpose_x_gesture_v2\JPEGImages\1.jpg</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>175</width>
10 | 		<height>223</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>down</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>21</xmin>
21 | 			<ymin>7</ymin>
22 | 			<xmax>174</xmax>
23 | 			<ymax>210</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | </annotation>
27 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/2.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>JPEGImages</folder>
 3 | 	<filename>2.jpg</filename>
 4 | 	<path>E:\handpose_x_gesture_v2\JPEGImages\2.jpg</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>274</width>
10 | 		<height>295</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>down</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>44</xmin>
21 | 			<ymin>20</ymin>
22 | 			<xmax>259</xmax>
23 | 			<ymax>264</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | </annotation>
27 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/3.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>JPEGImages</folder>
 3 | 	<filename>3.jpg</filename>
 4 | 	<path>E:\handpose_x_gesture_v2\JPEGImages\3.jpg</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>325</width>
10 | 		<height>363</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>down</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>30</xmin>
21 | 			<ymin>59</ymin>
22 | 			<xmax>261</xmax>
23 | 			<ymax>297</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | </annotation>
27 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/4.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>JPEGImages</folder>
 3 | 	<filename>4.jpg</filename>
 4 | 	<path>E:\handpose_x_gesture_v2\JPEGImages\4.jpg</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>306</width>
10 | 		<height>299</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>down</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>44</xmin>
21 | 			<ymin>45</ymin>
22 | 			<xmax>264</xmax>
23 | 			<ymax>256</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | </annotation>
27 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/5.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>JPEGImages</folder>
 3 | 	<filename>5.jpg</filename>
 4 | 	<path>E:\handpose_x_gesture_v2\JPEGImages\5.jpg</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>191</width>
10 | 		<height>211</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>down</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>31</xmin>
21 | 			<ymin>19</ymin>
22 | 			<xmax>152</xmax>
23 | 			<ymax>167</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | </annotation>
27 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/README.md:
--------------------------------------------------------------------------------
1 | 存放标签文件


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/README.md:
--------------------------------------------------------------------------------
1 | 存放训练索引文件


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/ImageSets/Main/test.txt


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/train.txt:
--------------------------------------------------------------------------------
   1 | 10
   2 | 100
   3 | 1000
   4 | 1001
   5 | 1002
   6 | 1003
   7 | 1005
   8 | 1006
   9 | 1007
  10 | 1008
  11 | 1009
  12 | 101
  13 | 1010
  14 | 1011
  15 | 1012
  16 | 1013
  17 | 1014
  18 | 1015
  19 | 1016
  20 | 1017
  21 | 1019
  22 | 102
  23 | 1020
  24 | 1021
  25 | 1022
  26 | 1023
  27 | 1024
  28 | 1025
  29 | 1026
  30 | 1028
  31 | 1029
  32 | 103
  33 | 1030
  34 | 1031
  35 | 1032
  36 | 1033
  37 | 1034
  38 | 1035
  39 | 1036
  40 | 1037
  41 | 1038
  42 | 1039
  43 | 104
  44 | 1040
  45 | 1041
  46 | 1042
  47 | 1043
  48 | 1044
  49 | 1045
  50 | 1046
  51 | 1047
  52 | 1048
  53 | 105
  54 | 1050
  55 | 1051
  56 | 1052
  57 | 1053
  58 | 1054
  59 | 1056
  60 | 1057
  61 | 1058
  62 | 1059
  63 | 106
  64 | 1060
  65 | 1061
  66 | 1062
  67 | 1063
  68 | 1064
  69 | 1065
  70 | 1066
  71 | 1067
  72 | 1068
  73 | 1069
  74 | 107
  75 | 1070
  76 | 1071
  77 | 1072
  78 | 1074
  79 | 1075
  80 | 1076
  81 | 1077
  82 | 1078
  83 | 1079
  84 | 108
  85 | 1080
  86 | 1081
  87 | 1083
  88 | 1084
  89 | 1085
  90 | 1086
  91 | 1088
  92 | 1089
  93 | 109
  94 | 1090
  95 | 1091
  96 | 1094
  97 | 1096
  98 | 1098
  99 | 11
 100 | 110
 101 | 1100
 102 | 1101
 103 | 1102
 104 | 1103
 105 | 1104
 106 | 1105
 107 | 1106
 108 | 1107
 109 | 1108
 110 | 1109
 111 | 111
 112 | 1111
 113 | 1112
 114 | 1113
 115 | 1114
 116 | 1115
 117 | 1116
 118 | 1117
 119 | 1118
 120 | 1119
 121 | 112
 122 | 1120
 123 | 1121
 124 | 1122
 125 | 1123
 126 | 1124
 127 | 1125
 128 | 1126
 129 | 1127
 130 | 1130
 131 | 1131
 132 | 1132
 133 | 1133
 134 | 1134
 135 | 1135
 136 | 1136
 137 | 1137
 138 | 1138
 139 | 1139
 140 | 114
 141 | 1140
 142 | 1141
 143 | 1142
 144 | 1144
 145 | 1145
 146 | 1146
 147 | 1147
 148 | 1148
 149 | 1149
 150 | 1150
 151 | 1151
 152 | 1152
 153 | 1153
 154 | 1155
 155 | 1156
 156 | 1157
 157 | 1158
 158 | 116
 159 | 1160
 160 | 1161
 161 | 1162
 162 | 1163
 163 | 1164
 164 | 1165
 165 | 1166
 166 | 1167
 167 | 1168
 168 | 1169
 169 | 117
 170 | 1170
 171 | 1171
 172 | 1172
 173 | 1173
 174 | 1174
 175 | 1175
 176 | 1176
 177 | 1178
 178 | 1179
 179 | 118
 180 | 1180
 181 | 1181
 182 | 1182
 183 | 1183
 184 | 1184
 185 | 1185
 186 | 1186
 187 | 1187
 188 | 1188
 189 | 1189
 190 | 119
 191 | 1190
 192 | 1191
 193 | 1192
 194 | 1193
 195 | 1195
 196 | 1196
 197 | 1197
 198 | 1198
 199 | 1199
 200 | 12
 201 | 120
 202 | 1202
 203 | 1203
 204 | 1204
 205 | 1205
 206 | 1206
 207 | 1207
 208 | 1208
 209 | 1209
 210 | 121
 211 | 1210
 212 | 1211
 213 | 1213
 214 | 1214
 215 | 1216
 216 | 1217
 217 | 1218
 218 | 1219
 219 | 122
 220 | 1220
 221 | 1221
 222 | 1222
 223 | 1223
 224 | 1224
 225 | 1225
 226 | 1226
 227 | 1227
 228 | 1228
 229 | 1229
 230 | 123
 231 | 1230
 232 | 1231
 233 | 1232
 234 | 1234
 235 | 1235
 236 | 1236
 237 | 1237
 238 | 1238
 239 | 1239
 240 | 124
 241 | 1240
 242 | 1241
 243 | 1242
 244 | 1243
 245 | 1244
 246 | 1247
 247 | 1248
 248 | 1249
 249 | 1250
 250 | 1251
 251 | 1252
 252 | 1253
 253 | 1254
 254 | 1255
 255 | 1256
 256 | 1257
 257 | 1259
 258 | 126
 259 | 1260
 260 | 1261
 261 | 1263
 262 | 1264
 263 | 1265
 264 | 1266
 265 | 1267
 266 | 1268
 267 | 1269
 268 | 127
 269 | 1270
 270 | 1271
 271 | 1272
 272 | 1273
 273 | 1274
 274 | 1275
 275 | 1276
 276 | 1277
 277 | 1278
 278 | 1279
 279 | 128
 280 | 1280
 281 | 1281
 282 | 1282
 283 | 1284
 284 | 1285
 285 | 1286
 286 | 1287
 287 | 1288
 288 | 1289
 289 | 129
 290 | 1290
 291 | 1291
 292 | 1292
 293 | 1293
 294 | 1295
 295 | 1296
 296 | 1297
 297 | 1298
 298 | 1299
 299 | 13
 300 | 130
 301 | 1300
 302 | 1301
 303 | 1302
 304 | 1303
 305 | 1304
 306 | 1305
 307 | 1306
 308 | 1307
 309 | 1308
 310 | 131
 311 | 1310
 312 | 1311
 313 | 1312
 314 | 1313
 315 | 1314
 316 | 1316
 317 | 1317
 318 | 1318
 319 | 1320
 320 | 1321
 321 | 1322
 322 | 1323
 323 | 1324
 324 | 1325
 325 | 1326
 326 | 1327
 327 | 1328
 328 | 1329
 329 | 133
 330 | 1330
 331 | 1331
 332 | 1332
 333 | 1333
 334 | 1334
 335 | 1335
 336 | 1336
 337 | 1338
 338 | 134
 339 | 1340
 340 | 1342
 341 | 1343
 342 | 1344
 343 | 1345
 344 | 1346
 345 | 1347
 346 | 1348
 347 | 1349
 348 | 135
 349 | 1350
 350 | 1352
 351 | 1353
 352 | 1354
 353 | 1355
 354 | 1356
 355 | 1357
 356 | 1358
 357 | 1359
 358 | 136
 359 | 1360
 360 | 1361
 361 | 1362
 362 | 1363
 363 | 1364
 364 | 1365
 365 | 1366
 366 | 1367
 367 | 1369
 368 | 137
 369 | 1370
 370 | 1372
 371 | 1373
 372 | 1374
 373 | 1375
 374 | 1376
 375 | 1377
 376 | 1378
 377 | 1379
 378 | 138
 379 | 1380
 380 | 1381
 381 | 1382
 382 | 1383
 383 | 1384
 384 | 1385
 385 | 1386
 386 | 1387
 387 | 1388
 388 | 1389
 389 | 139
 390 | 1390
 391 | 1391
 392 | 1392
 393 | 1394
 394 | 1395
 395 | 1396
 396 | 1397
 397 | 1398
 398 | 1399
 399 | 14
 400 | 1400
 401 | 1401
 402 | 1402
 403 | 1403
 404 | 1404
 405 | 1405
 406 | 1406
 407 | 1407
 408 | 1409
 409 | 141
 410 | 1410
 411 | 1411
 412 | 1412
 413 | 1413
 414 | 1414
 415 | 1416
 416 | 1417
 417 | 1418
 418 | 142
 419 | 1420
 420 | 1421
 421 | 1422
 422 | 1424
 423 | 1425
 424 | 1426
 425 | 1427
 426 | 1428
 427 | 1429
 428 | 143
 429 | 1430
 430 | 1431
 431 | 1432
 432 | 1433
 433 | 1434
 434 | 1435
 435 | 1436
 436 | 1437
 437 | 1439
 438 | 1440
 439 | 1441
 440 | 1442
 441 | 1443
 442 | 1444
 443 | 1445
 444 | 1446
 445 | 1448
 446 | 1449
 447 | 145
 448 | 1450
 449 | 1452
 450 | 1454
 451 | 1456
 452 | 1458
 453 | 1459
 454 | 146
 455 | 1460
 456 | 1461
 457 | 1462
 458 | 1463
 459 | 1464
 460 | 1465
 461 | 1466
 462 | 1467
 463 | 1468
 464 | 147
 465 | 1470
 466 | 1471
 467 | 1472
 468 | 1473
 469 | 1474
 470 | 1475
 471 | 1476
 472 | 1477
 473 | 1478
 474 | 1479
 475 | 148
 476 | 1480
 477 | 1482
 478 | 1483
 479 | 1484
 480 | 1485
 481 | 1487
 482 | 1488
 483 | 1489
 484 | 149
 485 | 1490
 486 | 1491
 487 | 1492
 488 | 1494
 489 | 1495
 490 | 1496
 491 | 1497
 492 | 1498
 493 | 1499
 494 | 15
 495 | 150
 496 | 1500
 497 | 1501
 498 | 1502
 499 | 1503
 500 | 1504
 501 | 1506
 502 | 1507
 503 | 1508
 504 | 1509
 505 | 151
 506 | 1510
 507 | 1511
 508 | 1512
 509 | 1513
 510 | 1515
 511 | 1516
 512 | 1517
 513 | 1518
 514 | 1519
 515 | 152
 516 | 1520
 517 | 1521
 518 | 1522
 519 | 1523
 520 | 1524
 521 | 1525
 522 | 1526
 523 | 1527
 524 | 1528
 525 | 1529
 526 | 1530
 527 | 1532
 528 | 1533
 529 | 1534
 530 | 1535
 531 | 1536
 532 | 1537
 533 | 1539
 534 | 154
 535 | 1540
 536 | 1541
 537 | 1542
 538 | 1543
 539 | 1544
 540 | 1545
 541 | 1546
 542 | 1547
 543 | 1548
 544 | 1549
 545 | 1550
 546 | 1551
 547 | 1552
 548 | 1554
 549 | 1555
 550 | 1556
 551 | 1557
 552 | 1558
 553 | 1559
 554 | 156
 555 | 1560
 556 | 1561
 557 | 1562
 558 | 1563
 559 | 1564
 560 | 1565
 561 | 1566
 562 | 1567
 563 | 1568
 564 | 1569
 565 | 157
 566 | 1571
 567 | 1572
 568 | 1573
 569 | 1574
 570 | 1575
 571 | 1576
 572 | 1577
 573 | 1578
 574 | 1579
 575 | 158
 576 | 1580
 577 | 1581
 578 | 1582
 579 | 1583
 580 | 1584
 581 | 1585
 582 | 1586
 583 | 1587
 584 | 1588
 585 | 1589
 586 | 159
 587 | 1591
 588 | 1592
 589 | 1593
 590 | 1594
 591 | 1595
 592 | 1596
 593 | 1597
 594 | 1598
 595 | 1599
 596 | 16
 597 | 160
 598 | 1600
 599 | 161
 600 | 162
 601 | 163
 602 | 164
 603 | 165
 604 | 166
 605 | 168
 606 | 169
 607 | 17
 608 | 170
 609 | 171
 610 | 172
 611 | 173
 612 | 174
 613 | 175
 614 | 176
 615 | 177
 616 | 178
 617 | 18
 618 | 182
 619 | 183
 620 | 184
 621 | 185
 622 | 186
 623 | 187
 624 | 189
 625 | 19
 626 | 190
 627 | 193
 628 | 194
 629 | 195
 630 | 196
 631 | 197
 632 | 198
 633 | 199
 634 | 2
 635 | 200
 636 | 201
 637 | 202
 638 | 203
 639 | 204
 640 | 205
 641 | 206
 642 | 207
 643 | 208
 644 | 209
 645 | 21
 646 | 210
 647 | 212
 648 | 213
 649 | 214
 650 | 215
 651 | 216
 652 | 217
 653 | 218
 654 | 219
 655 | 22
 656 | 220
 657 | 221
 658 | 222
 659 | 223
 660 | 224
 661 | 225
 662 | 226
 663 | 227
 664 | 228
 665 | 230
 666 | 231
 667 | 232
 668 | 233
 669 | 234
 670 | 235
 671 | 236
 672 | 237
 673 | 238
 674 | 239
 675 | 24
 676 | 240
 677 | 241
 678 | 242
 679 | 243
 680 | 244
 681 | 245
 682 | 246
 683 | 247
 684 | 25
 685 | 250
 686 | 251
 687 | 252
 688 | 253
 689 | 254
 690 | 255
 691 | 256
 692 | 257
 693 | 258
 694 | 259
 695 | 26
 696 | 260
 697 | 261
 698 | 262
 699 | 263
 700 | 265
 701 | 266
 702 | 267
 703 | 268
 704 | 269
 705 | 27
 706 | 270
 707 | 271
 708 | 272
 709 | 273
 710 | 274
 711 | 275
 712 | 276
 713 | 277
 714 | 278
 715 | 279
 716 | 28
 717 | 280
 718 | 281
 719 | 284
 720 | 285
 721 | 286
 722 | 287
 723 | 288
 724 | 291
 725 | 294
 726 | 295
 727 | 296
 728 | 297
 729 | 298
 730 | 299
 731 | 3
 732 | 30
 733 | 300
 734 | 301
 735 | 302
 736 | 303
 737 | 304
 738 | 305
 739 | 306
 740 | 307
 741 | 308
 742 | 309
 743 | 31
 744 | 310
 745 | 311
 746 | 312
 747 | 313
 748 | 314
 749 | 315
 750 | 316
 751 | 317
 752 | 318
 753 | 319
 754 | 32
 755 | 320
 756 | 321
 757 | 322
 758 | 323
 759 | 324
 760 | 325
 761 | 326
 762 | 327
 763 | 328
 764 | 329
 765 | 33
 766 | 330
 767 | 331
 768 | 332
 769 | 334
 770 | 335
 771 | 337
 772 | 338
 773 | 339
 774 | 34
 775 | 340
 776 | 341
 777 | 342
 778 | 343
 779 | 344
 780 | 345
 781 | 346
 782 | 347
 783 | 348
 784 | 349
 785 | 35
 786 | 350
 787 | 351
 788 | 353
 789 | 354
 790 | 355
 791 | 356
 792 | 357
 793 | 358
 794 | 36
 795 | 360
 796 | 361
 797 | 362
 798 | 363
 799 | 364
 800 | 365
 801 | 366
 802 | 367
 803 | 368
 804 | 369
 805 | 37
 806 | 370
 807 | 371
 808 | 372
 809 | 373
 810 | 374
 811 | 375
 812 | 376
 813 | 377
 814 | 378
 815 | 379
 816 | 38
 817 | 380
 818 | 382
 819 | 383
 820 | 384
 821 | 385
 822 | 386
 823 | 387
 824 | 388
 825 | 389
 826 | 39
 827 | 390
 828 | 391
 829 | 392
 830 | 393
 831 | 394
 832 | 395
 833 | 396
 834 | 397
 835 | 398
 836 | 399
 837 | 4
 838 | 40
 839 | 400
 840 | 401
 841 | 402
 842 | 404
 843 | 405
 844 | 406
 845 | 407
 846 | 408
 847 | 409
 848 | 41
 849 | 410
 850 | 411
 851 | 412
 852 | 413
 853 | 414
 854 | 415
 855 | 416
 856 | 417
 857 | 418
 858 | 419
 859 | 42
 860 | 420
 861 | 421
 862 | 422
 863 | 423
 864 | 425
 865 | 426
 866 | 427
 867 | 428
 868 | 429
 869 | 43
 870 | 430
 871 | 431
 872 | 432
 873 | 433
 874 | 434
 875 | 436
 876 | 437
 877 | 438
 878 | 439
 879 | 44
 880 | 440
 881 | 441
 882 | 442
 883 | 443
 884 | 444
 885 | 445
 886 | 446
 887 | 447
 888 | 448
 889 | 449
 890 | 45
 891 | 450
 892 | 451
 893 | 452
 894 | 453
 895 | 454
 896 | 455
 897 | 457
 898 | 458
 899 | 459
 900 | 46
 901 | 461
 902 | 462
 903 | 464
 904 | 465
 905 | 466
 906 | 467
 907 | 468
 908 | 47
 909 | 470
 910 | 471
 911 | 472
 912 | 473
 913 | 474
 914 | 475
 915 | 476
 916 | 477
 917 | 478
 918 | 479
 919 | 48
 920 | 480
 921 | 481
 922 | 482
 923 | 483
 924 | 484
 925 | 485
 926 | 486
 927 | 487
 928 | 488
 929 | 489
 930 | 49
 931 | 490
 932 | 491
 933 | 492
 934 | 493
 935 | 494
 936 | 495
 937 | 496
 938 | 497
 939 | 498
 940 | 499
 941 | 5
 942 | 50
 943 | 500
 944 | 501
 945 | 502
 946 | 503
 947 | 504
 948 | 505
 949 | 506
 950 | 507
 951 | 508
 952 | 509
 953 | 51
 954 | 512
 955 | 513
 956 | 514
 957 | 515
 958 | 516
 959 | 517
 960 | 518
 961 | 519
 962 | 520
 963 | 521
 964 | 522
 965 | 523
 966 | 524
 967 | 526
 968 | 527
 969 | 528
 970 | 529
 971 | 53
 972 | 530
 973 | 532
 974 | 533
 975 | 534
 976 | 536
 977 | 537
 978 | 539
 979 | 54
 980 | 540
 981 | 541
 982 | 542
 983 | 543
 984 | 544
 985 | 545
 986 | 546
 987 | 547
 988 | 549
 989 | 55
 990 | 551
 991 | 552
 992 | 553
 993 | 554
 994 | 556
 995 | 557
 996 | 559
 997 | 56
 998 | 561
 999 | 562
1000 | 563
1001 | 564
1002 | 565
1003 | 566
1004 | 567
1005 | 568
1006 | 569
1007 | 57
1008 | 570
1009 | 571
1010 | 572
1011 | 573
1012 | 575
1013 | 577
1014 | 578
1015 | 579
1016 | 58
1017 | 580
1018 | 581
1019 | 582
1020 | 583
1021 | 585
1022 | 586
1023 | 587
1024 | 588
1025 | 589
1026 | 59
1027 | 590
1028 | 591
1029 | 592
1030 | 593
1031 | 594
1032 | 596
1033 | 597
1034 | 598
1035 | 599
1036 | 6
1037 | 600
1038 | 601
1039 | 602
1040 | 603
1041 | 604
1042 | 605
1043 | 606
1044 | 608
1045 | 609
1046 | 61
1047 | 610
1048 | 611
1049 | 612
1050 | 613
1051 | 614
1052 | 615
1053 | 616
1054 | 617
1055 | 618
1056 | 619
1057 | 62
1058 | 620
1059 | 621
1060 | 622
1061 | 623
1062 | 625
1063 | 626
1064 | 627
1065 | 628
1066 | 629
1067 | 63
1068 | 630
1069 | 631
1070 | 632
1071 | 633
1072 | 634
1073 | 635
1074 | 636
1075 | 637
1076 | 638
1077 | 639
1078 | 64
1079 | 640
1080 | 641
1081 | 642
1082 | 643
1083 | 644
1084 | 645
1085 | 646
1086 | 647
1087 | 648
1088 | 649
1089 | 65
1090 | 650
1091 | 651
1092 | 652
1093 | 653
1094 | 654
1095 | 655
1096 | 656
1097 | 657
1098 | 658
1099 | 66
1100 | 660
1101 | 662
1102 | 663
1103 | 664
1104 | 665
1105 | 666
1106 | 667
1107 | 668
1108 | 669
1109 | 67
1110 | 670
1111 | 671
1112 | 672
1113 | 673
1114 | 675
1115 | 676
1116 | 677
1117 | 678
1118 | 679
1119 | 68
1120 | 680
1121 | 681
1122 | 682
1123 | 683
1124 | 684
1125 | 685
1126 | 686
1127 | 687
1128 | 688
1129 | 689
1130 | 69
1131 | 690
1132 | 691
1133 | 692
1134 | 693
1135 | 694
1136 | 695
1137 | 696
1138 | 698
1139 | 699
1140 | 70
1141 | 700
1142 | 701
1143 | 702
1144 | 703
1145 | 704
1146 | 705
1147 | 706
1148 | 708
1149 | 709
1150 | 71
1151 | 710
1152 | 711
1153 | 713
1154 | 714
1155 | 715
1156 | 716
1157 | 717
1158 | 718
1159 | 719
1160 | 72
1161 | 720
1162 | 721
1163 | 722
1164 | 723
1165 | 724
1166 | 725
1167 | 726
1168 | 727
1169 | 729
1170 | 730
1171 | 731
1172 | 732
1173 | 733
1174 | 734
1175 | 735
1176 | 736
1177 | 737
1178 | 738
1179 | 739
1180 | 740
1181 | 741
1182 | 742
1183 | 743
1184 | 744
1185 | 745
1186 | 746
1187 | 747
1188 | 748
1189 | 749
1190 | 75
1191 | 750
1192 | 751
1193 | 752
1194 | 753
1195 | 755
1196 | 756
1197 | 757
1198 | 759
1199 | 76
1200 | 760
1201 | 761
1202 | 762
1203 | 763
1204 | 764
1205 | 766
1206 | 767
1207 | 769
1208 | 77
1209 | 770
1210 | 771
1211 | 772
1212 | 773
1213 | 774
1214 | 775
1215 | 776
1216 | 777
1217 | 778
1218 | 779
1219 | 78
1220 | 780
1221 | 781
1222 | 783
1223 | 784
1224 | 785
1225 | 786
1226 | 787
1227 | 788
1228 | 789
1229 | 79
1230 | 790
1231 | 791
1232 | 792
1233 | 793
1234 | 794
1235 | 795
1236 | 796
1237 | 797
1238 | 798
1239 | 799
1240 | 8
1241 | 80
1242 | 800
1243 | 801
1244 | 802
1245 | 803
1246 | 804
1247 | 805
1248 | 806
1249 | 807
1250 | 808
1251 | 809
1252 | 81
1253 | 810
1254 | 812
1255 | 813
1256 | 814
1257 | 815
1258 | 816
1259 | 819
1260 | 82
1261 | 820
1262 | 821
1263 | 822
1264 | 823
1265 | 824
1266 | 825
1267 | 826
1268 | 827
1269 | 828
1270 | 829
1271 | 83
1272 | 830
1273 | 831
1274 | 832
1275 | 833
1276 | 834
1277 | 836
1278 | 837
1279 | 838
1280 | 839
1281 | 84
1282 | 840
1283 | 841
1284 | 842
1285 | 843
1286 | 844
1287 | 845
1288 | 846
1289 | 847
1290 | 848
1291 | 849
1292 | 85
1293 | 850
1294 | 851
1295 | 852
1296 | 853
1297 | 854
1298 | 855
1299 | 856
1300 | 857
1301 | 858
1302 | 859
1303 | 86
1304 | 860
1305 | 861
1306 | 862
1307 | 863
1308 | 864
1309 | 866
1310 | 867
1311 | 868
1312 | 869
1313 | 87
1314 | 870
1315 | 872
1316 | 873
1317 | 874
1318 | 876
1319 | 877
1320 | 878
1321 | 879
1322 | 88
1323 | 880
1324 | 882
1325 | 883
1326 | 884
1327 | 885
1328 | 886
1329 | 887
1330 | 888
1331 | 889
1332 | 89
1333 | 890
1334 | 891
1335 | 892
1336 | 893
1337 | 895
1338 | 896
1339 | 897
1340 | 899
1341 | 9
1342 | 900
1343 | 901
1344 | 902
1345 | 903
1346 | 904
1347 | 905
1348 | 906
1349 | 908
1350 | 909
1351 | 91
1352 | 910
1353 | 911
1354 | 912
1355 | 913
1356 | 914
1357 | 915
1358 | 916
1359 | 917
1360 | 918
1361 | 919
1362 | 92
1363 | 920
1364 | 921
1365 | 923
1366 | 924
1367 | 925
1368 | 926
1369 | 927
1370 | 928
1371 | 929
1372 | 93
1373 | 930
1374 | 931
1375 | 932
1376 | 933
1377 | 934
1378 | 935
1379 | 936
1380 | 938
1381 | 94
1382 | 940
1383 | 941
1384 | 943
1385 | 944
1386 | 945
1387 | 946
1388 | 947
1389 | 948
1390 | 949
1391 | 95
1392 | 950
1393 | 951
1394 | 952
1395 | 953
1396 | 954
1397 | 955
1398 | 956
1399 | 957
1400 | 958
1401 | 959
1402 | 96
1403 | 960
1404 | 961
1405 | 963
1406 | 965
1407 | 966
1408 | 967
1409 | 968
1410 | 969
1411 | 97
1412 | 970
1413 | 971
1414 | 972
1415 | 973
1416 | 974
1417 | 975
1418 | 976
1419 | 977
1420 | 978
1421 | 979
1422 | 98
1423 | 981
1424 | 982
1425 | 983
1426 | 984
1427 | 985
1428 | 986
1429 | 987
1430 | 988
1431 | 989
1432 | 99
1433 | 990
1434 | 991
1435 | 992
1436 | 994
1437 | 995
1438 | 996
1439 | 997
1440 | 998
1441 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt:
--------------------------------------------------------------------------------
   1 | 1
   2 | 10
   3 | 100
   4 | 1000
   5 | 1001
   6 | 1002
   7 | 1003
   8 | 1004
   9 | 1005
  10 | 1006
  11 | 1007
  12 | 1008
  13 | 1009
  14 | 101
  15 | 1010
  16 | 1011
  17 | 1012
  18 | 1013
  19 | 1014
  20 | 1015
  21 | 1016
  22 | 1017
  23 | 1018
  24 | 1019
  25 | 102
  26 | 1020
  27 | 1021
  28 | 1022
  29 | 1023
  30 | 1024
  31 | 1025
  32 | 1026
  33 | 1027
  34 | 1028
  35 | 1029
  36 | 103
  37 | 1030
  38 | 1031
  39 | 1032
  40 | 1033
  41 | 1034
  42 | 1035
  43 | 1036
  44 | 1037
  45 | 1038
  46 | 1039
  47 | 104
  48 | 1040
  49 | 1041
  50 | 1042
  51 | 1043
  52 | 1044
  53 | 1045
  54 | 1046
  55 | 1047
  56 | 1048
  57 | 1049
  58 | 105
  59 | 1050
  60 | 1051
  61 | 1052
  62 | 1053
  63 | 1054
  64 | 1055
  65 | 1056
  66 | 1057
  67 | 1058
  68 | 1059
  69 | 106
  70 | 1060
  71 | 1061
  72 | 1062
  73 | 1063
  74 | 1064
  75 | 1065
  76 | 1066
  77 | 1067
  78 | 1068
  79 | 1069
  80 | 107
  81 | 1070
  82 | 1071
  83 | 1072
  84 | 1073
  85 | 1074
  86 | 1075
  87 | 1076
  88 | 1077
  89 | 1078
  90 | 1079
  91 | 108
  92 | 1080
  93 | 1081
  94 | 1082
  95 | 1083
  96 | 1084
  97 | 1085
  98 | 1086
  99 | 1087
 100 | 1088
 101 | 1089
 102 | 109
 103 | 1090
 104 | 1091
 105 | 1092
 106 | 1093
 107 | 1094
 108 | 1095
 109 | 1096
 110 | 1097
 111 | 1098
 112 | 1099
 113 | 11
 114 | 110
 115 | 1100
 116 | 1101
 117 | 1102
 118 | 1103
 119 | 1104
 120 | 1105
 121 | 1106
 122 | 1107
 123 | 1108
 124 | 1109
 125 | 111
 126 | 1110
 127 | 1111
 128 | 1112
 129 | 1113
 130 | 1114
 131 | 1115
 132 | 1116
 133 | 1117
 134 | 1118
 135 | 1119
 136 | 112
 137 | 1120
 138 | 1121
 139 | 1122
 140 | 1123
 141 | 1124
 142 | 1125
 143 | 1126
 144 | 1127
 145 | 1128
 146 | 1129
 147 | 113
 148 | 1130
 149 | 1131
 150 | 1132
 151 | 1133
 152 | 1134
 153 | 1135
 154 | 1136
 155 | 1137
 156 | 1138
 157 | 1139
 158 | 114
 159 | 1140
 160 | 1141
 161 | 1142
 162 | 1143
 163 | 1144
 164 | 1145
 165 | 1146
 166 | 1147
 167 | 1148
 168 | 1149
 169 | 115
 170 | 1150
 171 | 1151
 172 | 1152
 173 | 1153
 174 | 1154
 175 | 1155
 176 | 1156
 177 | 1157
 178 | 1158
 179 | 1159
 180 | 116
 181 | 1160
 182 | 1161
 183 | 1162
 184 | 1163
 185 | 1164
 186 | 1165
 187 | 1166
 188 | 1167
 189 | 1168
 190 | 1169
 191 | 117
 192 | 1170
 193 | 1171
 194 | 1172
 195 | 1173
 196 | 1174
 197 | 1175
 198 | 1176
 199 | 1177
 200 | 1178
 201 | 1179
 202 | 118
 203 | 1180
 204 | 1181
 205 | 1182
 206 | 1183
 207 | 1184
 208 | 1185
 209 | 1186
 210 | 1187
 211 | 1188
 212 | 1189
 213 | 119
 214 | 1190
 215 | 1191
 216 | 1192
 217 | 1193
 218 | 1194
 219 | 1195
 220 | 1196
 221 | 1197
 222 | 1198
 223 | 1199
 224 | 12
 225 | 120
 226 | 1200
 227 | 1201
 228 | 1202
 229 | 1203
 230 | 1204
 231 | 1205
 232 | 1206
 233 | 1207
 234 | 1208
 235 | 1209
 236 | 121
 237 | 1210
 238 | 1211
 239 | 1212
 240 | 1213
 241 | 1214
 242 | 1215
 243 | 1216
 244 | 1217
 245 | 1218
 246 | 1219
 247 | 122
 248 | 1220
 249 | 1221
 250 | 1222
 251 | 1223
 252 | 1224
 253 | 1225
 254 | 1226
 255 | 1227
 256 | 1228
 257 | 1229
 258 | 123
 259 | 1230
 260 | 1231
 261 | 1232
 262 | 1233
 263 | 1234
 264 | 1235
 265 | 1236
 266 | 1237
 267 | 1238
 268 | 1239
 269 | 124
 270 | 1240
 271 | 1241
 272 | 1242
 273 | 1243
 274 | 1244
 275 | 1245
 276 | 1246
 277 | 1247
 278 | 1248
 279 | 1249
 280 | 125
 281 | 1250
 282 | 1251
 283 | 1252
 284 | 1253
 285 | 1254
 286 | 1255
 287 | 1256
 288 | 1257
 289 | 1258
 290 | 1259
 291 | 126
 292 | 1260
 293 | 1261
 294 | 1262
 295 | 1263
 296 | 1264
 297 | 1265
 298 | 1266
 299 | 1267
 300 | 1268
 301 | 1269
 302 | 127
 303 | 1270
 304 | 1271
 305 | 1272
 306 | 1273
 307 | 1274
 308 | 1275
 309 | 1276
 310 | 1277
 311 | 1278
 312 | 1279
 313 | 128
 314 | 1280
 315 | 1281
 316 | 1282
 317 | 1283
 318 | 1284
 319 | 1285
 320 | 1286
 321 | 1287
 322 | 1288
 323 | 1289
 324 | 129
 325 | 1290
 326 | 1291
 327 | 1292
 328 | 1293
 329 | 1294
 330 | 1295
 331 | 1296
 332 | 1297
 333 | 1298
 334 | 1299
 335 | 13
 336 | 130
 337 | 1300
 338 | 1301
 339 | 1302
 340 | 1303
 341 | 1304
 342 | 1305
 343 | 1306
 344 | 1307
 345 | 1308
 346 | 1309
 347 | 131
 348 | 1310
 349 | 1311
 350 | 1312
 351 | 1313
 352 | 1314
 353 | 1315
 354 | 1316
 355 | 1317
 356 | 1318
 357 | 1319
 358 | 132
 359 | 1320
 360 | 1321
 361 | 1322
 362 | 1323
 363 | 1324
 364 | 1325
 365 | 1326
 366 | 1327
 367 | 1328
 368 | 1329
 369 | 133
 370 | 1330
 371 | 1331
 372 | 1332
 373 | 1333
 374 | 1334
 375 | 1335
 376 | 1336
 377 | 1337
 378 | 1338
 379 | 1339
 380 | 134
 381 | 1340
 382 | 1341
 383 | 1342
 384 | 1343
 385 | 1344
 386 | 1345
 387 | 1346
 388 | 1347
 389 | 1348
 390 | 1349
 391 | 135
 392 | 1350
 393 | 1351
 394 | 1352
 395 | 1353
 396 | 1354
 397 | 1355
 398 | 1356
 399 | 1357
 400 | 1358
 401 | 1359
 402 | 136
 403 | 1360
 404 | 1361
 405 | 1362
 406 | 1363
 407 | 1364
 408 | 1365
 409 | 1366
 410 | 1367
 411 | 1368
 412 | 1369
 413 | 137
 414 | 1370
 415 | 1371
 416 | 1372
 417 | 1373
 418 | 1374
 419 | 1375
 420 | 1376
 421 | 1377
 422 | 1378
 423 | 1379
 424 | 138
 425 | 1380
 426 | 1381
 427 | 1382
 428 | 1383
 429 | 1384
 430 | 1385
 431 | 1386
 432 | 1387
 433 | 1388
 434 | 1389
 435 | 139
 436 | 1390
 437 | 1391
 438 | 1392
 439 | 1393
 440 | 1394
 441 | 1395
 442 | 1396
 443 | 1397
 444 | 1398
 445 | 1399
 446 | 14
 447 | 140
 448 | 1400
 449 | 1401
 450 | 1402
 451 | 1403
 452 | 1404
 453 | 1405
 454 | 1406
 455 | 1407
 456 | 1408
 457 | 1409
 458 | 141
 459 | 1410
 460 | 1411
 461 | 1412
 462 | 1413
 463 | 1414
 464 | 1415
 465 | 1416
 466 | 1417
 467 | 1418
 468 | 1419
 469 | 142
 470 | 1420
 471 | 1421
 472 | 1422
 473 | 1423
 474 | 1424
 475 | 1425
 476 | 1426
 477 | 1427
 478 | 1428
 479 | 1429
 480 | 143
 481 | 1430
 482 | 1431
 483 | 1432
 484 | 1433
 485 | 1434
 486 | 1435
 487 | 1436
 488 | 1437
 489 | 1438
 490 | 1439
 491 | 144
 492 | 1440
 493 | 1441
 494 | 1442
 495 | 1443
 496 | 1444
 497 | 1445
 498 | 1446
 499 | 1447
 500 | 1448
 501 | 1449
 502 | 145
 503 | 1450
 504 | 1451
 505 | 1452
 506 | 1453
 507 | 1454
 508 | 1455
 509 | 1456
 510 | 1457
 511 | 1458
 512 | 1459
 513 | 146
 514 | 1460
 515 | 1461
 516 | 1462
 517 | 1463
 518 | 1464
 519 | 1465
 520 | 1466
 521 | 1467
 522 | 1468
 523 | 1469
 524 | 147
 525 | 1470
 526 | 1471
 527 | 1472
 528 | 1473
 529 | 1474
 530 | 1475
 531 | 1476
 532 | 1477
 533 | 1478
 534 | 1479
 535 | 148
 536 | 1480
 537 | 1481
 538 | 1482
 539 | 1483
 540 | 1484
 541 | 1485
 542 | 1486
 543 | 1487
 544 | 1488
 545 | 1489
 546 | 149
 547 | 1490
 548 | 1491
 549 | 1492
 550 | 1493
 551 | 1494
 552 | 1495
 553 | 1496
 554 | 1497
 555 | 1498
 556 | 1499
 557 | 15
 558 | 150
 559 | 1500
 560 | 1501
 561 | 1502
 562 | 1503
 563 | 1504
 564 | 1505
 565 | 1506
 566 | 1507
 567 | 1508
 568 | 1509
 569 | 151
 570 | 1510
 571 | 1511
 572 | 1512
 573 | 1513
 574 | 1514
 575 | 1515
 576 | 1516
 577 | 1517
 578 | 1518
 579 | 1519
 580 | 152
 581 | 1520
 582 | 1521
 583 | 1522
 584 | 1523
 585 | 1524
 586 | 1525
 587 | 1526
 588 | 1527
 589 | 1528
 590 | 1529
 591 | 153
 592 | 1530
 593 | 1531
 594 | 1532
 595 | 1533
 596 | 1534
 597 | 1535
 598 | 1536
 599 | 1537
 600 | 1538
 601 | 1539
 602 | 154
 603 | 1540
 604 | 1541
 605 | 1542
 606 | 1543
 607 | 1544
 608 | 1545
 609 | 1546
 610 | 1547
 611 | 1548
 612 | 1549
 613 | 155
 614 | 1550
 615 | 1551
 616 | 1552
 617 | 1553
 618 | 1554
 619 | 1555
 620 | 1556
 621 | 1557
 622 | 1558
 623 | 1559
 624 | 156
 625 | 1560
 626 | 1561
 627 | 1562
 628 | 1563
 629 | 1564
 630 | 1565
 631 | 1566
 632 | 1567
 633 | 1568
 634 | 1569
 635 | 157
 636 | 1570
 637 | 1571
 638 | 1572
 639 | 1573
 640 | 1574
 641 | 1575
 642 | 1576
 643 | 1577
 644 | 1578
 645 | 1579
 646 | 158
 647 | 1580
 648 | 1581
 649 | 1582
 650 | 1583
 651 | 1584
 652 | 1585
 653 | 1586
 654 | 1587
 655 | 1588
 656 | 1589
 657 | 159
 658 | 1590
 659 | 1591
 660 | 1592
 661 | 1593
 662 | 1594
 663 | 1595
 664 | 1596
 665 | 1597
 666 | 1598
 667 | 1599
 668 | 16
 669 | 160
 670 | 1600
 671 | 161
 672 | 162
 673 | 163
 674 | 164
 675 | 165
 676 | 166
 677 | 167
 678 | 168
 679 | 169
 680 | 17
 681 | 170
 682 | 171
 683 | 172
 684 | 173
 685 | 174
 686 | 175
 687 | 176
 688 | 177
 689 | 178
 690 | 179
 691 | 18
 692 | 180
 693 | 181
 694 | 182
 695 | 183
 696 | 184
 697 | 185
 698 | 186
 699 | 187
 700 | 188
 701 | 189
 702 | 19
 703 | 190
 704 | 191
 705 | 192
 706 | 193
 707 | 194
 708 | 195
 709 | 196
 710 | 197
 711 | 198
 712 | 199
 713 | 2
 714 | 20
 715 | 200
 716 | 201
 717 | 202
 718 | 203
 719 | 204
 720 | 205
 721 | 206
 722 | 207
 723 | 208
 724 | 209
 725 | 21
 726 | 210
 727 | 211
 728 | 212
 729 | 213
 730 | 214
 731 | 215
 732 | 216
 733 | 217
 734 | 218
 735 | 219
 736 | 22
 737 | 220
 738 | 221
 739 | 222
 740 | 223
 741 | 224
 742 | 225
 743 | 226
 744 | 227
 745 | 228
 746 | 229
 747 | 23
 748 | 230
 749 | 231
 750 | 232
 751 | 233
 752 | 234
 753 | 235
 754 | 236
 755 | 237
 756 | 238
 757 | 239
 758 | 24
 759 | 240
 760 | 241
 761 | 242
 762 | 243
 763 | 244
 764 | 245
 765 | 246
 766 | 247
 767 | 248
 768 | 249
 769 | 25
 770 | 250
 771 | 251
 772 | 252
 773 | 253
 774 | 254
 775 | 255
 776 | 256
 777 | 257
 778 | 258
 779 | 259
 780 | 26
 781 | 260
 782 | 261
 783 | 262
 784 | 263
 785 | 264
 786 | 265
 787 | 266
 788 | 267
 789 | 268
 790 | 269
 791 | 27
 792 | 270
 793 | 271
 794 | 272
 795 | 273
 796 | 274
 797 | 275
 798 | 276
 799 | 277
 800 | 278
 801 | 279
 802 | 28
 803 | 280
 804 | 281
 805 | 282
 806 | 283
 807 | 284
 808 | 285
 809 | 286
 810 | 287
 811 | 288
 812 | 289
 813 | 29
 814 | 290
 815 | 291
 816 | 292
 817 | 293
 818 | 294
 819 | 295
 820 | 296
 821 | 297
 822 | 298
 823 | 299
 824 | 3
 825 | 30
 826 | 300
 827 | 301
 828 | 302
 829 | 303
 830 | 304
 831 | 305
 832 | 306
 833 | 307
 834 | 308
 835 | 309
 836 | 31
 837 | 310
 838 | 311
 839 | 312
 840 | 313
 841 | 314
 842 | 315
 843 | 316
 844 | 317
 845 | 318
 846 | 319
 847 | 32
 848 | 320
 849 | 321
 850 | 322
 851 | 323
 852 | 324
 853 | 325
 854 | 326
 855 | 327
 856 | 328
 857 | 329
 858 | 33
 859 | 330
 860 | 331
 861 | 332
 862 | 333 
 863 | 333
 864 | 334
 865 | 335
 866 | 336
 867 | 337
 868 | 338
 869 | 339
 870 | 34
 871 | 340
 872 | 341
 873 | 342
 874 | 343
 875 | 344
 876 | 345
 877 | 346
 878 | 347
 879 | 348
 880 | 349
 881 | 35
 882 | 350
 883 | 351
 884 | 352
 885 | 353
 886 | 354
 887 | 355
 888 | 356
 889 | 357
 890 | 358
 891 | 359
 892 | 36
 893 | 360
 894 | 361
 895 | 362
 896 | 363
 897 | 364
 898 | 365
 899 | 366
 900 | 367
 901 | 368
 902 | 369
 903 | 37
 904 | 370
 905 | 371
 906 | 372
 907 | 373
 908 | 374
 909 | 375
 910 | 376
 911 | 377
 912 | 378
 913 | 379
 914 | 38
 915 | 380
 916 | 381
 917 | 382
 918 | 383
 919 | 384
 920 | 385
 921 | 386
 922 | 387
 923 | 388
 924 | 389
 925 | 39
 926 | 390
 927 | 391
 928 | 392
 929 | 393
 930 | 394
 931 | 395
 932 | 396
 933 | 397
 934 | 398
 935 | 399
 936 | 4
 937 | 40
 938 | 400
 939 | 401
 940 | 402
 941 | 403
 942 | 404
 943 | 405
 944 | 406
 945 | 407
 946 | 408
 947 | 409
 948 | 41
 949 | 410
 950 | 411
 951 | 412
 952 | 413
 953 | 414
 954 | 415
 955 | 416
 956 | 417
 957 | 418
 958 | 419
 959 | 42
 960 | 420
 961 | 421
 962 | 422
 963 | 423
 964 | 424
 965 | 425
 966 | 426
 967 | 427
 968 | 428
 969 | 429
 970 | 43
 971 | 430
 972 | 431
 973 | 432
 974 | 433
 975 | 434
 976 | 435
 977 | 436
 978 | 437
 979 | 438
 980 | 439
 981 | 44
 982 | 440
 983 | 441
 984 | 442
 985 | 443
 986 | 444
 987 | 445
 988 | 446
 989 | 447
 990 | 448
 991 | 449
 992 | 45
 993 | 450
 994 | 451
 995 | 452
 996 | 453
 997 | 454
 998 | 455
 999 | 456
1000 | 457
1001 | 458
1002 | 459
1003 | 46
1004 | 460
1005 | 461
1006 | 462
1007 | 463
1008 | 464
1009 | 465
1010 | 466
1011 | 467
1012 | 468
1013 | 469
1014 | 47
1015 | 470
1016 | 471
1017 | 472
1018 | 473
1019 | 474
1020 | 475
1021 | 476
1022 | 477
1023 | 478
1024 | 479
1025 | 48
1026 | 480
1027 | 481
1028 | 482
1029 | 483
1030 | 484
1031 | 485
1032 | 486
1033 | 487
1034 | 488
1035 | 489
1036 | 49
1037 | 490
1038 | 491
1039 | 492
1040 | 493
1041 | 494
1042 | 495
1043 | 496
1044 | 497
1045 | 498
1046 | 499
1047 | 5
1048 | 50
1049 | 500
1050 | 501
1051 | 502
1052 | 503
1053 | 504
1054 | 505
1055 | 506
1056 | 507
1057 | 508
1058 | 509
1059 | 51
1060 | 510
1061 | 511
1062 | 512
1063 | 513
1064 | 514
1065 | 515
1066 | 516
1067 | 517
1068 | 518
1069 | 519
1070 | 52
1071 | 520
1072 | 521
1073 | 522
1074 | 523
1075 | 524
1076 | 525
1077 | 526
1078 | 527
1079 | 528
1080 | 529
1081 | 53
1082 | 530
1083 | 531
1084 | 532
1085 | 533
1086 | 534
1087 | 535
1088 | 536
1089 | 537
1090 | 538
1091 | 539
1092 | 54
1093 | 540
1094 | 541
1095 | 542
1096 | 543
1097 | 544
1098 | 545
1099 | 546
1100 | 547
1101 | 548
1102 | 549
1103 | 55
1104 | 550
1105 | 551
1106 | 552
1107 | 553
1108 | 554
1109 | 555
1110 | 556
1111 | 557
1112 | 558
1113 | 559
1114 | 56
1115 | 560
1116 | 561
1117 | 562
1118 | 563
1119 | 564
1120 | 565
1121 | 566
1122 | 567
1123 | 568
1124 | 569
1125 | 57
1126 | 570
1127 | 571
1128 | 572
1129 | 573
1130 | 574
1131 | 575
1132 | 576
1133 | 577
1134 | 578
1135 | 579
1136 | 58
1137 | 580
1138 | 581
1139 | 582
1140 | 583
1141 | 584
1142 | 585
1143 | 586
1144 | 587
1145 | 588
1146 | 589
1147 | 59
1148 | 590
1149 | 591
1150 | 592
1151 | 593
1152 | 594
1153 | 595
1154 | 596
1155 | 597
1156 | 598
1157 | 599
1158 | 6
1159 | 60
1160 | 600
1161 | 601
1162 | 602
1163 | 603
1164 | 604
1165 | 605
1166 | 606
1167 | 607
1168 | 608
1169 | 609
1170 | 61
1171 | 610
1172 | 611
1173 | 612
1174 | 613
1175 | 614
1176 | 615
1177 | 616
1178 | 617
1179 | 618
1180 | 619
1181 | 62
1182 | 620
1183 | 621
1184 | 622
1185 | 623
1186 | 624
1187 | 625
1188 | 626
1189 | 627
1190 | 628
1191 | 629
1192 | 63
1193 | 630
1194 | 631
1195 | 632
1196 | 633
1197 | 634
1198 | 635
1199 | 636
1200 | 637
1201 | 638
1202 | 639
1203 | 64
1204 | 640
1205 | 641
1206 | 642
1207 | 643
1208 | 644
1209 | 645
1210 | 646
1211 | 647
1212 | 648
1213 | 649
1214 | 65
1215 | 650
1216 | 651
1217 | 652
1218 | 653
1219 | 654
1220 | 655
1221 | 656
1222 | 657
1223 | 658
1224 | 659
1225 | 66
1226 | 660
1227 | 661
1228 | 662
1229 | 663
1230 | 664
1231 | 665
1232 | 666
1233 | 667
1234 | 668
1235 | 669
1236 | 67
1237 | 670
1238 | 671
1239 | 672
1240 | 673
1241 | 674
1242 | 675
1243 | 676
1244 | 677
1245 | 678
1246 | 679
1247 | 68
1248 | 680
1249 | 681
1250 | 682
1251 | 683
1252 | 684
1253 | 685
1254 | 686
1255 | 687
1256 | 688
1257 | 689
1258 | 69
1259 | 690
1260 | 691
1261 | 692
1262 | 693
1263 | 694
1264 | 695
1265 | 696
1266 | 697
1267 | 698
1268 | 699
1269 | 7
1270 | 70
1271 | 700
1272 | 701
1273 | 702
1274 | 703
1275 | 704
1276 | 705
1277 | 706
1278 | 707
1279 | 708
1280 | 709
1281 | 71
1282 | 710
1283 | 711
1284 | 712
1285 | 713
1286 | 714
1287 | 715
1288 | 716
1289 | 717
1290 | 718
1291 | 719
1292 | 72
1293 | 720
1294 | 721
1295 | 722
1296 | 723
1297 | 724
1298 | 725
1299 | 726
1300 | 727
1301 | 728
1302 | 729
1303 | 73
1304 | 730
1305 | 731
1306 | 732
1307 | 733
1308 | 734
1309 | 735
1310 | 736
1311 | 737
1312 | 738
1313 | 739
1314 | 74
1315 | 740
1316 | 741
1317 | 742
1318 | 743
1319 | 744
1320 | 745
1321 | 746
1322 | 747
1323 | 748
1324 | 749
1325 | 75
1326 | 750
1327 | 751
1328 | 752
1329 | 753
1330 | 754
1331 | 755
1332 | 756
1333 | 757
1334 | 758
1335 | 759
1336 | 76
1337 | 760
1338 | 761
1339 | 762
1340 | 763
1341 | 764
1342 | 765
1343 | 766
1344 | 767
1345 | 768
1346 | 769
1347 | 77
1348 | 770
1349 | 771
1350 | 772
1351 | 773
1352 | 774
1353 | 775
1354 | 776
1355 | 777
1356 | 778
1357 | 779
1358 | 78
1359 | 780
1360 | 781
1361 | 782
1362 | 783
1363 | 784
1364 | 785
1365 | 786
1366 | 787
1367 | 788
1368 | 789
1369 | 79
1370 | 790
1371 | 791
1372 | 792
1373 | 793
1374 | 794
1375 | 795
1376 | 796
1377 | 797
1378 | 798
1379 | 799
1380 | 8
1381 | 80
1382 | 800
1383 | 801
1384 | 802
1385 | 803
1386 | 804
1387 | 805
1388 | 806
1389 | 807
1390 | 808
1391 | 809
1392 | 81
1393 | 810
1394 | 811
1395 | 812
1396 | 813
1397 | 814
1398 | 815
1399 | 816
1400 | 817
1401 | 818
1402 | 819
1403 | 82
1404 | 820
1405 | 821
1406 | 822
1407 | 823
1408 | 824
1409 | 825
1410 | 826
1411 | 827
1412 | 828
1413 | 829
1414 | 83
1415 | 830
1416 | 831
1417 | 832
1418 | 833
1419 | 834
1420 | 835
1421 | 836
1422 | 837
1423 | 838
1424 | 839
1425 | 84
1426 | 840
1427 | 841
1428 | 842
1429 | 843
1430 | 844
1431 | 845
1432 | 846
1433 | 847
1434 | 848
1435 | 849
1436 | 85
1437 | 850
1438 | 851
1439 | 852
1440 | 853
1441 | 854
1442 | 855
1443 | 856
1444 | 857
1445 | 858
1446 | 859
1447 | 86
1448 | 860
1449 | 861
1450 | 862
1451 | 863
1452 | 864
1453 | 865
1454 | 866
1455 | 867
1456 | 868
1457 | 869
1458 | 87
1459 | 870
1460 | 871
1461 | 872
1462 | 873
1463 | 874
1464 | 875
1465 | 876
1466 | 877
1467 | 878
1468 | 879
1469 | 88
1470 | 880
1471 | 881
1472 | 882
1473 | 883
1474 | 884
1475 | 885
1476 | 886
1477 | 887
1478 | 888
1479 | 889
1480 | 89
1481 | 890
1482 | 891
1483 | 892
1484 | 893
1485 | 894
1486 | 895
1487 | 896
1488 | 897
1489 | 898
1490 | 899
1491 | 9
1492 | 90
1493 | 900
1494 | 901
1495 | 902
1496 | 903
1497 | 904
1498 | 905
1499 | 906
1500 | 907
1501 | 908
1502 | 909
1503 | 91
1504 | 910
1505 | 911
1506 | 912
1507 | 913
1508 | 914
1509 | 915
1510 | 916
1511 | 917
1512 | 918
1513 | 919
1514 | 92
1515 | 920
1516 | 921
1517 | 922
1518 | 923
1519 | 924
1520 | 925
1521 | 926
1522 | 927
1523 | 928
1524 | 929
1525 | 93
1526 | 930
1527 | 931
1528 | 932
1529 | 933
1530 | 934
1531 | 935
1532 | 936
1533 | 937
1534 | 938
1535 | 939
1536 | 94
1537 | 940
1538 | 941
1539 | 942
1540 | 943
1541 | 944
1542 | 945
1543 | 946
1544 | 947
1545 | 948
1546 | 949
1547 | 95
1548 | 950
1549 | 951
1550 | 952
1551 | 953
1552 | 954
1553 | 955
1554 | 956
1555 | 957
1556 | 958
1557 | 959
1558 | 96
1559 | 960
1560 | 961
1561 | 962
1562 | 963
1563 | 964
1564 | 965
1565 | 966
1566 | 967
1567 | 968
1568 | 969
1569 | 97
1570 | 970
1571 | 971
1572 | 972
1573 | 973
1574 | 974
1575 | 975
1576 | 976
1577 | 977
1578 | 978
1579 | 979
1580 | 98
1581 | 980
1582 | 981
1583 | 982
1584 | 983
1585 | 984
1586 | 985
1587 | 986
1588 | 987
1589 | 988
1590 | 989
1591 | 99
1592 | 990
1593 | 991
1594 | 992
1595 | 993
1596 | 994
1597 | 995
1598 | 996
1599 | 997
1600 | 998
1601 | 999
1602 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/val.txt:
--------------------------------------------------------------------------------
  1 | 1
  2 | 1004
  3 | 1018
  4 | 1027
  5 | 1049
  6 | 1055
  7 | 1073
  8 | 1082
  9 | 1087
 10 | 1092
 11 | 1093
 12 | 1095
 13 | 1097
 14 | 1099
 15 | 1110
 16 | 1128
 17 | 1129
 18 | 113
 19 | 1143
 20 | 115
 21 | 1154
 22 | 1159
 23 | 1177
 24 | 1194
 25 | 1200
 26 | 1201
 27 | 1212
 28 | 1215
 29 | 1233
 30 | 1245
 31 | 1246
 32 | 125
 33 | 1258
 34 | 1262
 35 | 1283
 36 | 1294
 37 | 1309
 38 | 1315
 39 | 1319
 40 | 132
 41 | 1337
 42 | 1339
 43 | 1341
 44 | 1351
 45 | 1368
 46 | 1371
 47 | 1393
 48 | 140
 49 | 1408
 50 | 1415
 51 | 1419
 52 | 1423
 53 | 1438
 54 | 144
 55 | 1447
 56 | 1451
 57 | 1453
 58 | 1455
 59 | 1457
 60 | 1469
 61 | 1481
 62 | 1486
 63 | 1493
 64 | 1505
 65 | 1514
 66 | 153
 67 | 1531
 68 | 1538
 69 | 155
 70 | 1553
 71 | 1570
 72 | 1590
 73 | 167
 74 | 179
 75 | 180
 76 | 181
 77 | 188
 78 | 191
 79 | 192
 80 | 20
 81 | 211
 82 | 229
 83 | 23
 84 | 248
 85 | 249
 86 | 264
 87 | 282
 88 | 283
 89 | 289
 90 | 29
 91 | 290
 92 | 292
 93 | 293
 94 | 333 
 95 | 333
 96 | 336
 97 | 352
 98 | 359
 99 | 381
100 | 403
101 | 424
102 | 435
103 | 456
104 | 460
105 | 463
106 | 469
107 | 510
108 | 511
109 | 52
110 | 525
111 | 531
112 | 535
113 | 538
114 | 548
115 | 550
116 | 555
117 | 558
118 | 560
119 | 574
120 | 576
121 | 584
122 | 595
123 | 60
124 | 607
125 | 624
126 | 659
127 | 661
128 | 674
129 | 697
130 | 7
131 | 707
132 | 712
133 | 728
134 | 73
135 | 74
136 | 754
137 | 758
138 | 765
139 | 768
140 | 782
141 | 811
142 | 817
143 | 818
144 | 835
145 | 865
146 | 871
147 | 875
148 | 881
149 | 894
150 | 898
151 | 90
152 | 907
153 | 922
154 | 937
155 | 939
156 | 942
157 | 962
158 | 964
159 | 980
160 | 993
161 | 999
162 | 


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/1.jpg


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/2.jpg


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/3.jpg


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/4.jpg


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/VOCdevkit/VOC2007/JPEGImages/5.jpg


--------------------------------------------------------------------------------
/YOLOv4-study学习资料md:
--------------------------------------------------------------------------------
 1 | # YOLOv4 学习资料
 2 | 
 3 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/e3551e344873465d8ad884f856d652ed.png)
 4 | 
 5 | [Tianxiaomo](https://github.com/Tianxiaomo)/**[pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)**   star 3.5k 
 6 | 
 7 | PyTorch ,ONNX and TensorRT implementation of *YOLOv4*
 8 | 
 9 | [WongKinYiu](https://github.com/WongKinYiu)/**[PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)**   star 1.5k 
10 | 
11 | PyTorch implementation of *YOLOv4*
12 | 
13 | [argusswift](https://github.com/argusswift)/**[YOLOv4-pytorch ](https://github.com/argusswift/YOLOv4-pytorch)** star 1.4k 
14 | 
15 | This is a pytorch repository of *YOLOv4*, attentive *YOLOv4* and mobilenet *YOLOv4* with PASCAL VOC and COCO
16 | 
17 |  [bubbliiiing/*yolov4*-pytorch ](https://github.com/bubbliiiing/yolov4-pytorch) star 1.2k
18 | 
19 | 这是一个*YoloV4*-pytorch的源码，可以用于训练自己的模型。
20 | 
21 | 
22 | 
23 | 
24 | 
25 | ## 扩展
26 | 
27 |  [Bil369](https://github.com/Bil369)/**[MaskDetect-YOLOv4-PyTorch](https://github.com/Bil369/MaskDetect-YOLOv4-PyTorch)**
28 | 
29 | 基于*PyTorch*&*YOLOv4*实现的口罩佩戴检测 ⭐ 自建口罩数据集分享
30 | 
31 | [bobo0810](https://github.com/bobo0810)/**[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)**
32 | 
33 | 项目注释+论文复现+算法竞赛+Pytorch指北
34 | 
35 | [Bil369](https://github.com/Bil369)/**[YOLOv4-PyTorch-Simple-Implementation](https://github.com/Bil369/YOLOv4-PyTorch-Simple-Implementation)**
36 | 
37 | *YOLOv4* *PyTorch* Simple Implementation


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------------------#
 2 | #   detect.py 是用来尝试利用小模型半自动化进行标注数据
 3 | #-----------------------------------------------------------------------#
 4 | import numpy as np
 5 | from PIL import Image
 6 | from get_yaml import get_config
 7 | 
 8 | from yolo import YOLO
 9 | from gen_annotation import GEN_Annotations
10 | 
11 | if __name__ == "__main__":# 配置文件
12 |     # 配置文件
13 |     config = get_config()
14 |     yolo = YOLO()
15 | 
16 |     dir_detect_path = config['dir_detect_path']
17 |     detect_save_path   = config['detect_save_path']
18 |     
19 |     import os
20 |     from tqdm import tqdm
21 |     
22 |     img_names = os.listdir(dir_detect_path)
23 |     for img_name in tqdm(img_names):
24 |         
25 |         if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
26 |             # if int(img_name.split('.')[0][-4:]) < 355:
27 |             #     continue
28 |             image_path  = os.path.join(dir_detect_path, img_name)
29 |             image       = Image.open(image_path)
30 |             boxes       = yolo.get_box(image)
31 |             if not os.path.exists(detect_save_path):
32 |                 os.makedirs(detect_save_path)
33 | 
34 |             annotation        = GEN_Annotations(img_name)
35 |             w,h = np.array(np.shape(image)[0:2])
36 |             annotation.set_size(w,h,3)
37 |             if boxes:
38 |                 for box in boxes:
39 |                     label,ymin,xmin,ymax,xmax = box
40 |                     annotation.add_pic_attr(label,xmin,ymin,xmax,ymax)
41 |                 annotation_path  = os.path.join(detect_save_path, img_name.split('.')[0])
42 |                 annotation.savefile("{}.xml".format(annotation_path ))
43 |             # print(img_name,'已经半自动标注')


--------------------------------------------------------------------------------
/gen_annotation.py:
--------------------------------------------------------------------------------
 1 | from lxml import etree
 2 |  
 3 | class GEN_Annotations:
 4 |     def __init__(self, filename):
 5 |         self.root = etree.Element("annotation")
 6 |  
 7 |         child1 = etree.SubElement(self.root, "folder")
 8 |         child1.text = "VOC2007"
 9 |  
10 |         child2 = etree.SubElement(self.root, "filename")
11 |         child2.text = filename
12 |  
13 |         child3 = etree.SubElement(self.root, "source")
14 |  
15 |         child4 = etree.SubElement(child3, "annotation")
16 |         child4.text = "PASCAL VOC2007"
17 |         child5 = etree.SubElement(child3, "database")
18 |         child5.text = "Unknown"
19 |  
20 | ##        child6 = etree.SubElement(child3, "image")
21 | ##        child6.text = "flickr"
22 | ##        child7 = etree.SubElement(child3, "flickrid")
23 | ##        child7.text = "35435"
24 |  
25 |  
26 |     def set_size(self,witdh,height,channel):
27 |         size = etree.SubElement(self.root, "size")
28 |         widthn = etree.SubElement(size, "width")
29 |         widthn.text = str(witdh)
30 |         heightn = etree.SubElement(size, "height")
31 |         heightn.text = str(height)
32 |         channeln = etree.SubElement(size, "depth")
33 |         channeln.text = str(channel)
34 |     def savefile(self,filename):
35 |         tree = etree.ElementTree(self.root)
36 |         tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8')
37 |     def add_pic_attr(self,label,xmin,ymin,xmax,ymax):
38 |         object = etree.SubElement(self.root, "object")
39 |         namen = etree.SubElement(object, "name")
40 |         namen.text = label
41 |         bndbox = etree.SubElement(object, "bndbox")
42 |         xminn = etree.SubElement(bndbox, "xmin")
43 |         xminn.text = str(xmin)
44 |         yminn = etree.SubElement(bndbox, "ymin")
45 |         yminn.text = str(ymin)
46 |         xmaxn = etree.SubElement(bndbox, "xmax")
47 |         xmaxn.text = str(xmax)
48 |         ymaxn = etree.SubElement(bndbox, "ymax")
49 |         ymaxn.text = str(ymax)
50 |  
51 |  
52 | if __name__ == '__main__':
53 |     filename="000001.jpg"
54 |     anno= GEN_Annotations(filename)
55 |     anno.set_size(1280,720,3)
56 |     for i in range(3):
57 |         xmin=i+1
58 |         ymin=i+10
59 |         xmax=i+100
60 |         ymax=i+100
61 |         anno.add_pic_attr("pikachu",xmin,ymin,xmax,ymax)
62 |     anno.savefile("00001.xml")
63 | 


--------------------------------------------------------------------------------
/gesture.streamlit.py:
--------------------------------------------------------------------------------
  1 | """Create an Object Detection Web App using PyTorch and Streamlit."""
  2 | # import libraries
  3 | from PIL import Image
  4 | from torchvision import models, transforms
  5 | import torch
  6 | import streamlit as st
  7 | from yolo import YOLO
  8 | import os
  9 | import urllib
 10 | import numpy as np
 11 | from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
 12 | import av
 13 | # 设置网页的icon
 14 | st.set_page_config(page_title='Gesture Detector', page_icon='✌',
 15 |                    layout='centered', initial_sidebar_state='expanded')
 16 | 
 17 | RTC_CONFIGURATION = RTCConfiguration(
 18 |     {
 19 |       "RTCIceServer": [{
 20 |         "urls": ["stun:stun.l.google.com:19302"],
 21 |         "username": "pikachu",
 22 |         "credential": "1234",
 23 |       }]
 24 |     }
 25 | )
 26 | def main():
 27 |     # Render the readme as markdown using st.markdown.
 28 |     readme_text = st.markdown(open("instructions.md",encoding='utf-8').read())
 29 | 
 30 |     
 31 |     # Once we have the dependencies, add a selector for the app mode on the sidebar.
 32 |     st.sidebar.title("What to do")
 33 |     app_mode = st.sidebar.selectbox("Choose the app mode",
 34 |         ["Show instructions", "Run the app", "Show the source code"])
 35 |     if app_mode == "Show instructions":
 36 |         st.sidebar.success('To continue select "Run the app".')
 37 |     elif app_mode == "Show the source code":
 38 |         readme_text.empty()
 39 |         st.code(open("gesture.streamlit.py",encoding='utf-8').read())
 40 |     elif app_mode == "Run the app":
 41 |         # Download external dependencies.
 42 |         for filename in EXTERNAL_DEPENDENCIES.keys():
 43 |             download_file(filename)
 44 | 
 45 |         readme_text.empty()
 46 |         run_the_app()
 47 | 
 48 | # External files to download.
 49 | EXTERNAL_DEPENDENCIES = {
 50 |     "yolov4_tiny.pth": {
 51 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_tiny.pth",
 52 |         "size": 23631189 
 53 |     },
 54 |     "yolov4_SE.pth": {
 55 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_SE.pth",
 56 |         "size": 23806027
 57 |     },
 58 |     "yolov4_CBAM.pth":{
 59 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_CBAM.pth",
 60 |         "size": 23981478
 61 |     },
 62 |     "yolov4_ECA.pth":{
 63 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_ECA.pth",
 64 |         "size": 23632688
 65 |     },
 66 |     "yolov4_weights_ep150_608.pth":{
 67 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_weights_ep150_608.pth",
 68 |         "size": 256423031
 69 |     },
 70 |     "yolov4_weights_ep150_416.pth":{
 71 |         "url": "https://github.com/Kedreamix/YoloGesture/releases/download/v1.0/yolov4_weights_ep150_416.pth",
 72 |         "size": 256423031
 73 |     },
 74 | }
 75 | 
 76 | 
 77 | # This file downloader demonstrates Streamlit animation.
 78 | def download_file(file_path):
 79 |     # Don't download the file twice. (If possible, verify the download using the file length.)
 80 |     if os.path.exists(file_path):
 81 |         if "size" not in EXTERNAL_DEPENDENCIES[file_path]:
 82 |             return
 83 |         elif os.path.getsize(file_path) == EXTERNAL_DEPENDENCIES[file_path]["size"]:
 84 |             return
 85 |     # print(os.path.getsize(file_path))
 86 |     # These are handles to two visual elements to animate.
 87 |     weights_warning, progress_bar = None, None
 88 |     try:
 89 |         weights_warning = st.warning("Downloading %s..." % file_path)
 90 |         progress_bar = st.progress(0)
 91 |         with open(file_path, "wb") as output_file:
 92 |             with urllib.request.urlopen(EXTERNAL_DEPENDENCIES[file_path]["url"]) as response:
 93 |                 length = int(response.info()["Content-Length"])
 94 |                 counter = 0.0
 95 |                 MEGABYTES = 2.0 ** 20.0
 96 |                 while True:
 97 |                     data = response.read(8192)
 98 |                     if not data:
 99 |                         break
100 |                     counter += len(data)
101 |                     output_file.write(data)
102 | 
103 |                     # We perform animation by overwriting the elements.
104 |                     weights_warning.warning("Downloading %s... (%6.2f/%6.2f MB)" %
105 |                         (file_path, counter / MEGABYTES, length / MEGABYTES))
106 |                     progress_bar.progress(min(counter / length, 1.0))
107 |     except Exception as e:
108 |         print(e)
109 |     # Finally, we remove these visual elements by calling .empty().
110 |     finally:
111 |         if weights_warning is not None:
112 |             weights_warning.empty()
113 |         if progress_bar is not None:
114 |             progress_bar.empty()
115 | 
116 | # This is the main app app itself, which appears when the user selects "Run the app".
117 | def run_the_app():    
118 |     class Config():
119 |         def __init__(self, weights = 'yolov4_tiny.pth', tiny = True, phi = 0, shape = 416,nms_iou = 0.3, confidence = 0.5):
120 |             self.weights = weights
121 |             self.tiny = tiny
122 |             self.phi = phi
123 |             self.cuda = False
124 |             self.shape = shape
125 |             self.confidence = confidence
126 |             self.nms_iou = nms_iou
127 |     # set title of app
128 |     st.markdown('<h1 align="center">✌ Gesture Detection</h1>',
129 |                 unsafe_allow_html=True)
130 |     st.sidebar.markdown("# Gesture Detection on?")
131 |     activities = ["Example","Image", "Camera", "FPS", "Heatmap","Real Time", "Video"]
132 |     choice = st.sidebar.selectbox("Choose among the given options:", activities)
133 |     phi = st.sidebar.selectbox("yolov4-tiny 使用的自注意力模式:",('0tiny','1SE','2CABM','3ECA'))
134 |     print("")
135 | 
136 |     tiny = st.sidebar.checkbox('是否使用 yolov4 tiny 模型')
137 |     if not tiny:
138 |         shape = st.sidebar.selectbox("Choose shape to Input:", [416,608])
139 |     conf,nms = object_detector_ui()
140 |     @st.cache
141 |     def get_yolo(tiny,phi,conf,nms,shape=416):
142 |         weights = 'yolov4_tiny.pth'
143 |         if tiny:
144 |             if phi == '0tiny':
145 |                 weights = 'yolov4_tiny.pth'
146 |             elif phi == '1SE':
147 |                 weights = 'yolov4_SE.pth'
148 |             elif phi == '2CABM':
149 |                 weights = 'yolov4_CBAM.pth'
150 |             elif phi == '3ECA':
151 |                 weights = 'yolov4_ECA.pth'
152 |         else:
153 |             if shape == 608:
154 |                 weights = 'yolov4_weights_ep150_608.pth'
155 |             elif shape == 416:
156 |                 weights = 'yolov4_weights_ep150_416.pth'
157 |         opt = Config(weights = weights, tiny = tiny , phi = int(phi[0]), shape = shape,nms_iou = nms, confidence = conf)
158 |         yolo = YOLO(opt)
159 |         return yolo
160 |     
161 |     if tiny:
162 |         yolo = get_yolo(tiny, phi, conf, nms)
163 |         st.write("YOLOV4 tiny 模型加载完毕")
164 |     else:
165 |         yolo = get_yolo(tiny, phi, conf, nms, shape)
166 |         st.write("YOLOV4 模型加载完毕")
167 |     
168 |     if choice == 'Image':
169 |         detect_image(yolo)
170 |     elif choice =='Camera':
171 |         detect_camera(yolo)
172 |     elif choice == 'FPS':
173 |         detect_fps(yolo)
174 |     elif choice == "Heatmap":
175 |         detect_heatmap(yolo)
176 |     elif choice == "Example":
177 |         detect_example(yolo)
178 |     elif choice == "Real Time":
179 |         detect_realtime(yolo)
180 |     elif choice == "Video":
181 |         detect_video(yolo)
182 |         
183 | 
184 | 
185 | # This sidebar UI lets the user select parameters for the YOLO object detector.
186 | def object_detector_ui():
187 |     st.sidebar.markdown("# Model")
188 |     confidence_threshold = st.sidebar.slider("Confidence threshold", 0.0, 1.0, 0.5, 0.01)
189 |     overlap_threshold = st.sidebar.slider("Overlap threshold", 0.0, 1.0, 0.3, 0.01)
190 |     return confidence_threshold, overlap_threshold
191 | 
192 | def predict(image,yolo):
193 |     """Return predictions.
194 | 
195 |     Parameters
196 |     ----------
197 |     :param image: uploaded image
198 |     :type image: jpg
199 |     :rtype: list
200 |     :return: none
201 |     """
202 |     crop            = False
203 |     count           = False
204 |     try:
205 |         # image = Image.open(image)
206 |         r_image = yolo.detect_image(image, crop = crop, count=count)
207 |         transform = transforms.Compose([transforms.ToTensor()])        
208 |         result = transform(r_image)
209 |         st.image(result.permute(1,2,0).numpy(), caption = 'Processed Image.', use_column_width = True)
210 |     except Exception as e:
211 |         print(e)
212 | 
213 | def fps(image,yolo):
214 |     test_interval = 50
215 |     tact_time = yolo.get_FPS(image, test_interval)
216 |     st.write(str(tact_time) + ' seconds, ', str(1/tact_time),'FPS, @batch_size 1')
217 |     return tact_time
218 |     # print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
219 | 
220 | 
221 | def detect_image(yolo):
222 |     # enable users to upload images for the model to make predictions
223 |     file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"])
224 |     classes = ["up","down","left","right","front","back","clockwise","anticlockwise"]
225 |     class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)}
226 |     st.sidebar.markdown("See the model preformance and play with it")
227 |     if file_up is not None:
228 |         with st.spinner(text='Preparing Image'):
229 |             # display image that user uploaded
230 |             image = Image.open(file_up)
231 |             st.image(image, caption = 'Uploaded Image.', use_column_width = True)
232 |             st.balloons()
233 |             detect = st.button("开始检测Image")
234 |             if detect:
235 |                 st.write("")
236 |                 st.write("Just a second ...")
237 |                 predict(image,yolo)
238 |                 st.balloons()
239 | 
240 | 
241 | 
242 | def detect_camera(yolo):
243 |     picture = st.camera_input("Take a picture")
244 |     if picture:
245 |         filters_to_funcs = {
246 |             "No filter": predict,
247 |             "Heatmap": heatmap,
248 |             "FPS": fps,
249 |         }
250 |         filters = st.selectbox("...and now, apply a filter!", filters_to_funcs.keys())
251 |         image = Image.open(picture)
252 |         with st.spinner(text='Preparing Image'):
253 |             filters_to_funcs[filters](image,yolo)
254 |             st.balloons()
255 | 
256 | def detect_fps(yolo):
257 |     file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"])
258 |     classes = ["up","down","left","right","front","back","clockwise","anticlockwise"]
259 |     class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)}
260 |     st.sidebar.markdown("See the model preformance and play with it")
261 |     if file_up is not None:
262 |         # display image that user uploaded
263 |         image = Image.open(file_up)
264 |         st.image(image, caption = 'Uploaded Image.', use_column_width = True)
265 |         st.balloons()
266 |         detect = st.button("开始检测 FPS")
267 |         if detect:
268 |             with st.spinner(text='Preparing Image'):
269 |                 st.write("")
270 |                 st.write("Just a second ...")
271 |                 tact_time = fps(image,yolo)
272 |                 # st.write(str(tact_time) + ' seconds, ', str(1/tact_time),'FPS, @batch_size 1')
273 |                 st.balloons()
274 | 
275 | def heatmap(image,yolo):
276 |     heatmap_save_path = "heatmap_vision.png"
277 |     yolo.detect_heatmap(image, heatmap_save_path)
278 |     img = Image.open(heatmap_save_path)
279 |     transform = transforms.Compose([transforms.ToTensor()])        
280 |     result = transform(img)
281 |     st.image(result.permute(1,2,0).numpy(), caption = 'Processed Image.', use_column_width = True)
282 | 
283 | def detect_heatmap(yolo):
284 |     file_up = st.file_uploader("Upload an image", type = ["jpg","png","jpeg"])
285 |     classes = ["up","down","left","right","front","back","clockwise","anticlockwise"]
286 |     class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)}
287 |     st.sidebar.markdown("See the model preformance and play with it")
288 |     if file_up is not None:
289 |         # display image that user uploaded
290 |         image = Image.open(file_up)
291 |         st.image(image, caption = 'Uploaded Image.', use_column_width = True)
292 |         st.balloons()
293 |         detect = st.button("开始检测 heatmap")
294 |         if detect:
295 |             with st.spinner(text='Preparing Heatmap'):
296 |                 st.write("")
297 |                 st.write("Just a second ...")
298 |                 heatmap(image,yolo)
299 |                 st.balloons()
300 | 
301 | def detect_example(yolo):
302 |     st.sidebar.title("Choose an Image as a example")
303 |     images = os.listdir('./img')
304 |     images.sort()
305 |     image = st.sidebar.selectbox("Image Name", images)
306 |     st.sidebar.markdown("See the model preformance and play with it")
307 |     image = Image.open(os.path.join('img',image))
308 |     st.image(image, caption = 'Choose Image.', use_column_width = True)
309 |     st.balloons()
310 |     detect = st.button("开始检测Image")
311 |     if detect:
312 |         st.write("")
313 |         st.write("Just a second ...")
314 |         predict(image,yolo)
315 |         st.balloons()
316 | 
317 | def detect_realtime(yolo):
318 | 
319 |     class VideoProcessor:
320 |         def recv(self, frame):
321 |             img = frame.to_ndarray(format="bgr24")
322 |             img = Image.fromarray(img)
323 |             crop            = False
324 |             count           = False
325 |             r_image = yolo.detect_image(img, crop = crop, count=count)
326 |             transform = transforms.Compose([transforms.ToTensor()])        
327 |             result = transform(r_image)
328 |             result = result.permute(1,2,0).numpy()
329 |             result = (result * 255).astype(np.uint8)
330 |             return av.VideoFrame.from_ndarray(result, format="bgr24")
331 |        
332 |     webrtc_ctx = webrtc_streamer(
333 |         key="example",
334 |         mode=WebRtcMode.SENDRECV,
335 |         rtc_configuration=RTC_CONFIGURATION,
336 |         media_stream_constraints={"video": True, "audio": False},
337 |         async_processing=False,
338 |         video_processor_factory=VideoProcessor
339 |     )
340 | 
341 | import cv2
342 | import time
343 | def detect_video(yolo):
344 |     file_up = st.file_uploader("Upload a video", type = ["mp4"])
345 |     print(file_up)
346 |     classes = ["up","down","left","right","front","back","clockwise","anticlockwise"]
347 |     
348 |     if file_up is not None:
349 |         video_path = 'video.mp4'
350 |         st.video(file_up)
351 |         with open(video_path, 'wb') as f:
352 |             f.write(file_up.read())       
353 |         detect = st.button("开始检测 Video")
354 |         
355 |         if detect: 
356 |             video_save_path = 'video2.mp4'
357 |             # display image that user uploaded
358 |             capture = cv2.VideoCapture(video_path)
359 |             
360 |             video_fps = st.slider("Video FPS", 5, 30, int(capture.get(cv2.CAP_PROP_FPS)), 1)
361 |             fourcc  = cv2.VideoWriter_fourcc(*'XVID')
362 |             size    = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
363 |             out     = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
364 | 
365 | 
366 |             
367 |             while(True):
368 |                 # 读取某一帧
369 |                 ref, frame = capture.read()
370 |                 if not ref:
371 |                     break
372 |                 # 转变成Image
373 |                 # frame = Image.fromarray(np.uint8(frame))
374 |                 # 格式转变，BGRtoRGB
375 |                 frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
376 |                 # 转变成Image
377 |                 frame = Image.fromarray(np.uint8(frame))
378 |                 # 进行检测
379 |                 frame = np.array(yolo.detect_image(frame))
380 |                 # RGBtoBGR满足opencv显示格式
381 |                 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
382 | 
383 |                 # print("fps= %.2f"%(fps))
384 |                 # frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
385 |                 out.write(frame)
386 |                 
387 |             out.release()
388 |             capture.release()
389 |             print("Save processed video to the path :" + video_save_path)
390 |             
391 |             with open(video_save_path, "rb") as file:
392 |                 btn = st.download_button(
393 |                         label="Download Video",
394 |                         data=file,
395 |                         file_name="video.mp4",
396 |                     )
397 |             st.balloons()
398 | 
399 | if __name__ == "__main__":
400 |     main()


--------------------------------------------------------------------------------
/get_map.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | from PIL import Image
  5 | from tqdm import tqdm
  6 | import yaml
  7 | from utils.utils import get_classes
  8 | from utils.utils_map import get_coco_map, get_map
  9 | from yolo import YOLO
 10 | from get_yaml import get_config
 11 | import argparse
 12 | if __name__ == "__main__":
 13 |     '''
 14 |     Recall和Precision不像AP是一个面积的概念，在门限值不同时，网络的Recall和Precision值是不同的。
 15 |     map计算结果中的Recall和Precision代表的是当预测时，门限置信度为0.5时，所对应的Recall和Precision值。
 16 | 
 17 |     此处获得的./map_out/detection-results/里面的txt的框的数量会比直接predict多一些，这是因为这里的门限低，
 18 |     目的是为了计算不同门限条件下的Recall和Precision值，从而实现map的计算。
 19 |     '''
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument('--weights',type=str,default='model_data/yolotiny_SE_ep100.pth',help='initial weights path')
 22 |     parser.add_argument('--tiny',action='store_true',help='使用yolotiny模型')
 23 |     parser.add_argument('--phi',type=int,default=1,help='yolov4tiny注意力机制类型')
 24 |     parser.add_argument('--mode',type=int,default=0,help='get map的模式')
 25 |     parser.add_argument('--cuda',action='store_true',help='表示是否使用GPU')
 26 |     parser.add_argument('--shape',type=int,default=416,help='输入图像的shape')
 27 |     parser.add_argument('--confidence',type=float,default=0.5,help='只有得分大于置信度的预测框会被保留下来')
 28 |     parser.add_argument('--nms_iou',type=float,default=0.3,help='非极大抑制所用到的nms_iou大小')
 29 |     opt = parser.parse_args()
 30 |     print(opt)
 31 |     # 配置文件
 32 |     config = get_config()
 33 | 
 34 |     #------------------------------------------------------------------------------------------------------------------#
 35 |     #   map_mode用于指定该文件运行时计算的内容
 36 |     #   map_mode为0代表整个map计算流程，包括获得预测结果、获得真实框、计算VOC_map。
 37 |     #   map_mode为1代表仅仅获得预测结果。
 38 |     #   map_mode为2代表仅仅获得真实框。
 39 |     #   map_mode为3代表仅仅计算VOC_map。
 40 |     #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
 41 |     #-------------------------------------------------------------------------------------------------------------------#
 42 |     map_mode        = opt.mode
 43 |     #-------------------------------------------------------#
 44 |     #   MINOVERLAP用于指定想要获得的mAP0.x
 45 |     #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
 46 |     #-------------------------------------------------------#
 47 |     MINOVERLAP      = 0.5
 48 |     #-------------------------------------------------------#
 49 |     #   map_vis用于指定是否开启VOC_map计算的可视化
 50 |     #-------------------------------------------------------#
 51 |     map_vis         = False
 52 |     #-------------------------------------------------------#
 53 |     #   指向VOC数据集所在的文件夹
 54 |     #   默认指向根目录下的VOC数据集
 55 |     #-------------------------------------------------------#
 56 |     VOCdevkit_path  = 'VOCdevkit'
 57 |     #-------------------------------------------------------#
 58 |     #   结果输出的文件夹，默认为map_out
 59 |     #-------------------------------------------------------#
 60 |     map_out_path    = 'map_out'
 61 | 
 62 |     image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/val.txt")).read().strip().split()
 63 | 
 64 |     if not os.path.exists(map_out_path):
 65 |         os.makedirs(map_out_path)
 66 |     if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
 67 |         os.makedirs(os.path.join(map_out_path, 'ground-truth'))
 68 |     if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
 69 |         os.makedirs(os.path.join(map_out_path, 'detection-results'))
 70 |     if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
 71 |         os.makedirs(os.path.join(map_out_path, 'images-optional'))
 72 | 
 73 |     class_names  = config['classes']
 74 | 
 75 |     if map_mode == 0 or map_mode == 1:
 76 |         print("Load model.")
 77 |         yolo = YOLO(opt, confidence = 0.001, nms_iou = 0.5)
 78 |         print("Load model done.")
 79 | 
 80 |         print("Get predict result.")
 81 |         for image_id in tqdm(image_ids):
 82 |             image_path  = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg")
 83 |             image       = Image.open(image_path)
 84 |             if map_vis:
 85 |                 image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
 86 |             yolo.get_map_txt(image_id, image, class_names, map_out_path)
 87 |         print("Get predict result done.")
 88 |         
 89 |     if map_mode == 0 or map_mode == 2:
 90 |         print("Get ground truth result.")
 91 |         for image_id in tqdm(image_ids):
 92 |             with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
 93 |                 root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
 94 |                 for obj in root.findall('object'):
 95 |                     difficult_flag = False
 96 |                     if obj.find('difficult')!=None:
 97 |                         difficult = obj.find('difficult').text
 98 |                         if int(difficult)==1:
 99 |                             difficult_flag = True
100 |                     obj_name = obj.find('name').text
101 |                     if obj_name not in class_names:
102 |                         continue
103 |                     bndbox  = obj.find('bndbox')
104 |                     left    = bndbox.find('xmin').text
105 |                     top     = bndbox.find('ymin').text
106 |                     right   = bndbox.find('xmax').text
107 |                     bottom  = bndbox.find('ymax').text
108 | 
109 |                     if difficult_flag:
110 |                         new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
111 |                     else:
112 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
113 |         print("Get ground truth result done.")
114 | 
115 |     if map_mode == 0 or map_mode == 3:
116 |         print("Get map.")
117 |         get_map(MINOVERLAP, True, path = map_out_path)
118 |         print("Get map done.")
119 | 
120 |     if map_mode == 4:
121 |         print("Get map.")
122 |         get_coco_map(class_names = class_names, path = map_out_path)
123 |         print("Get map done.")
124 | 


--------------------------------------------------------------------------------
/get_yaml.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import yaml
 4 | 
 5 | def get_config():
 6 |     yaml_path = 'model_data/gesture.yaml'
 7 |     f = open(yaml_path,'r',encoding='utf-8')
 8 |     config = yaml.load(f,Loader =yaml.FullLoader)
 9 |     f.close()
10 |     return config
11 | 
12 | if __name__ == "__main__":
13 |     config = get_config()
14 |     print(config)


--------------------------------------------------------------------------------
/img/anticlockwise.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/anticlockwise.jpg


--------------------------------------------------------------------------------
/img/back.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/back.jpg


--------------------------------------------------------------------------------
/img/clockwise.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/clockwise.jpg


--------------------------------------------------------------------------------
/img/down.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/down.jpg


--------------------------------------------------------------------------------
/img/front.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/front.jpg


--------------------------------------------------------------------------------
/img/left.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/left.jpg


--------------------------------------------------------------------------------
/img/right.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/right.jpg


--------------------------------------------------------------------------------
/img/up.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/img/up.jpg


--------------------------------------------------------------------------------
/instructions.md:
--------------------------------------------------------------------------------
 1 | # ✌ Gesture Detection
 2 | 
 3 | 
 4 | 这是一个基于无人机视觉图像手势识别控制系统，选择了YOLOv4模型进行训练
 5 | 
 6 |  **YOLOv4 = CSPDarknet53（主干） + SPP** **附加模块（颈** **） +** **PANet** **路径聚合（颈** **） + YOLOv3（头部）**
 7 | 
 8 | ![img](https://pdf.cdn.readpaper.com/parsed/fetch_target/699143cdb334ecfc63caf8192472490c_0_Figure_1.png)
 9 | 
10 | 


--------------------------------------------------------------------------------
/kmeans_for_anchors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/kmeans_for_anchors.jpg


--------------------------------------------------------------------------------
/kmeans_for_anchors.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------------------------------#
  2 | #   kmeans虽然会对数据集中的框进行聚类，但是很多数据集由于框的大小相近，聚类出来的9个框相差不大，
  3 | #   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，shape越小的特征层适合越大的先验框
  4 | #   原始网络的先验框已经按大中小比例分配好了，不进行聚类也会有非常好的效果。
  5 | #-------------------------------------------------------------------------------------------------------#
  6 | import glob
  7 | import xml.etree.ElementTree as ET
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | from tqdm import tqdm
 12 | 
 13 | 
 14 | def cas_iou(box, cluster):
 15 |     x = np.minimum(cluster[:, 0], box[0])
 16 |     y = np.minimum(cluster[:, 1], box[1])
 17 | 
 18 |     intersection = x * y
 19 |     area1 = box[0] * box[1]
 20 | 
 21 |     area2 = cluster[:,0] * cluster[:,1]
 22 |     iou = intersection / (area1 + area2 - intersection)
 23 | 
 24 |     return iou
 25 | 
 26 | def avg_iou(box, cluster):
 27 |     return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])
 28 | 
 29 | def kmeans(box, k):
 30 |     #-------------------------------------------------------------#
 31 |     #   取出一共有多少框
 32 |     #-------------------------------------------------------------#
 33 |     row = box.shape[0]
 34 |     
 35 |     #-------------------------------------------------------------#
 36 |     #   每个框各个点的位置
 37 |     #-------------------------------------------------------------#
 38 |     distance = np.empty((row, k))
 39 |     
 40 |     #-------------------------------------------------------------#
 41 |     #   最后的聚类位置
 42 |     #-------------------------------------------------------------#
 43 |     last_clu = np.zeros((row, ))
 44 | 
 45 |     np.random.seed()
 46 | 
 47 |     #-------------------------------------------------------------#
 48 |     #   随机选5个当聚类中心
 49 |     #-------------------------------------------------------------#
 50 |     cluster = box[np.random.choice(row, k, replace = False)]
 51 | 
 52 |     iter = 0
 53 |     while True:
 54 |         #-------------------------------------------------------------#
 55 |         #   计算当前框和先验框的宽高比例
 56 |         #-------------------------------------------------------------#
 57 |         for i in range(row):
 58 |             distance[i] = 1 - cas_iou(box[i], cluster)
 59 |         
 60 |         #-------------------------------------------------------------#
 61 |         #   取出最小点
 62 |         #-------------------------------------------------------------#
 63 |         near = np.argmin(distance, axis=1)
 64 | 
 65 |         if (last_clu == near).all():
 66 |             break
 67 |         
 68 |         #-------------------------------------------------------------#
 69 |         #   求每一个类的中位点
 70 |         #-------------------------------------------------------------#
 71 |         for j in range(k):
 72 |             cluster[j] = np.median(
 73 |                 box[near == j],axis=0)
 74 | 
 75 |         last_clu = near
 76 |         if iter % 5 == 0:
 77 |             print('iter: {:d}. avg_iou:{:.2f}'.format(iter, avg_iou(box, cluster)))
 78 |         iter += 1
 79 | 
 80 |     return cluster, near
 81 | 
 82 | def load_data(path):
 83 |     data = []
 84 |     #-------------------------------------------------------------#
 85 |     #   对于每一个xml都寻找box
 86 |     #-------------------------------------------------------------#
 87 |     for xml_file in tqdm(glob.glob('{}/*xml'.format(path))):
 88 |         tree    = ET.parse(xml_file)
 89 |         height  = int(tree.findtext('./size/height'))
 90 |         width   = int(tree.findtext('./size/width'))
 91 |         if height<=0 or width<=0:
 92 |             continue
 93 |         
 94 |         #-------------------------------------------------------------#
 95 |         #   对于每一个目标都获得它的宽高
 96 |         #-------------------------------------------------------------#
 97 |         for obj in tree.iter('object'):
 98 |             xmin = int(float(obj.findtext('bndbox/xmin'))) / width
 99 |             ymin = int(float(obj.findtext('bndbox/ymin'))) / height
100 |             xmax = int(float(obj.findtext('bndbox/xmax'))) / width
101 |             ymax = int(float(obj.findtext('bndbox/ymax'))) / height
102 | 
103 |             xmin = np.float64(xmin)
104 |             ymin = np.float64(ymin)
105 |             xmax = np.float64(xmax)
106 |             ymax = np.float64(ymax)
107 |             # 得到宽高
108 |             data.append([xmax - xmin, ymax - ymin])
109 |     return np.array(data)
110 | 
111 | if __name__ == '__main__':
112 |     np.random.seed(0)
113 |     #-------------------------------------------------------------#
114 |     #   运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
115 |     #   会生成yolo_anchors.txt
116 |     #-------------------------------------------------------------#
117 |     input_shape = [224, 224]
118 |     anchors_num = 9
119 |     #-------------------------------------------------------------#
120 |     #   载入数据集，可以使用VOC的xml
121 |     #-------------------------------------------------------------#
122 |     path        = 'VOCdevkit/VOC2007/Annotations'
123 |     
124 |     #-------------------------------------------------------------#
125 |     #   载入所有的xml
126 |     #   存储格式为转化为比例后的width,height
127 |     #-------------------------------------------------------------#
128 |     print('Load xmls.')
129 |     data = load_data(path)
130 |     print('Load xmls done.')
131 |     
132 |     #-------------------------------------------------------------#
133 |     #   使用k聚类算法
134 |     #-------------------------------------------------------------#
135 |     print('K-means boxes.')
136 |     cluster, near   = kmeans(data, anchors_num)
137 |     print('K-means boxes done.')
138 |     data            = data * np.array([input_shape[1], input_shape[0]])
139 |     cluster         = cluster * np.array([input_shape[1], input_shape[0]])
140 | 
141 |     #-------------------------------------------------------------#
142 |     #   绘图
143 |     #-------------------------------------------------------------#
144 |     for j in range(anchors_num):
145 |         plt.scatter(data[near == j][:,0], data[near == j][:,1])
146 |         plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black')
147 |     plt.savefig("kmeans_for_anchors.jpg")
148 |     plt.show()
149 |     print('Save kmeans_for_anchors.jpg in root dir.')
150 | 
151 |     cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])]
152 |     print('avg_ratio:{:.2f}'.format(avg_iou(data, cluster)))
153 |     print(cluster)
154 | 
155 |     f = open("yolo_anchors.txt", 'w')
156 |     row = np.shape(cluster)[0]
157 |     for i in range(row):
158 |         if i == 0:
159 |             x_y = "%d,%d" % (cluster[i][0], cluster[i][1])
160 |         else:
161 |             x_y = ", %d,%d" % (cluster[i][0], cluster[i][1])
162 |         f.write(x_y)
163 |     f.close()
164 | 


--------------------------------------------------------------------------------
/logs/README.md:
--------------------------------------------------------------------------------
1 | 用于存放训练好的文件


--------------------------------------------------------------------------------
/logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.png


--------------------------------------------------------------------------------
/logs/gesture_loss_2021_11_14_22_04_00/epoch_loss_2021_11_14_22_04_00.txt:
--------------------------------------------------------------------------------
  1 | 390.34399642473386
  2 | 21.87092101721116
  3 | 14.030741856421953
  4 | 11.276778338867942
  5 | 9.814540598127577
  6 | 8.89100271978496
  7 | 8.609104168267898
  8 | 7.924442773983802
  9 | 7.723959027984996
 10 | 7.2670195367601185
 11 | 7.255199196897907
 12 | 6.893556188654016
 13 | 6.661026071619104
 14 | 6.5294443972316785
 15 | 6.535371827490536
 16 | 6.529178083678822
 17 | 6.403998654565694
 18 | 6.439444012112087
 19 | 6.092924733220795
 20 | 5.926193254965323
 21 | 5.9576384785734575
 22 | 5.8119951972255
 23 | 5.6878520206168846
 24 | 5.819804650765878
 25 | 5.707105348139633
 26 | 5.458082881974585
 27 | 5.665041320117903
 28 | 5.317585485952872
 29 | 5.349038653903538
 30 | 5.283199619363855
 31 | 5.064980445084749
 32 | 5.070186079284291
 33 | 4.9681971073150635
 34 | 4.793072164794545
 35 | 4.973145805759194
 36 | 4.918124354915855
 37 | 4.663256362632469
 38 | 4.837633197690233
 39 | 4.743683688434554
 40 | 4.616998254516979
 41 | 4.524823586146037
 42 | 4.4209345593864535
 43 | 4.558955289699413
 44 | 4.333138801433422
 45 | 4.426347941528132
 46 | 4.412103137852233
 47 | 4.295655697952082
 48 | 4.364107617625484
 49 | 4.211893027211413
 50 | 4.084111590444306
 51 | 4.018801480163763
 52 | 3.8647101366961443
 53 | 3.734398615581018
 54 | 3.6937082122873375
 55 | 3.793811333032302
 56 | 3.429193678093545
 57 | 3.6194330038111886
 58 | 3.4087822738988898
 59 | 3.331124112193967
 60 | 3.3782305434162234
 61 | 3.3561593158009613
 62 | 3.25705443598606
 63 | 3.2106575075490973
 64 | 3.0107549484129303
 65 | 3.0536143231539077
 66 | 2.9674469438599953
 67 | 3.1300665189822516
 68 | 2.909559675204901
 69 | 2.9446099194479576
 70 | 2.8209132660686236
 71 | 2.8917798992292383
 72 | 2.815192371238897
 73 | 2.861111617566627
 74 | 2.9016677490722986
 75 | 2.8193857658792427
 76 | 2.8216423440126723
 77 | 2.777715330874478
 78 | 2.725730179820532
 79 | 2.589312877184079
 80 | 2.670389473438263
 81 | 2.626439411331106
 82 | 2.57100960759469
 83 | 2.6649326178026786
 84 | 2.449705180930503
 85 | 2.6089335954115715
 86 | 2.666015229291386
 87 | 2.5139025822281837
 88 | 2.4510488511971484
 89 | 2.60918134248551
 90 | 2.615589211384455
 91 | 2.4221341083815067
 92 | 2.5034887735490448
 93 | 2.3411180855315408
 94 | 2.3742799654970934
 95 | 2.4252039420383946
 96 | 2.5134657593788923
 97 | 2.5887757239886273
 98 | 2.5031773506859203
 99 | 2.3927585335425388
100 | 2.4924555529414874
101 | 2.3816184005987497
102 | 2.3525361067351
103 | 2.35756847280779
104 | 2.4606370890030154
105 | 2.262793848084079
106 | 2.283497501026701
107 | 2.2522216586419095
108 | 2.3806339068177307
109 | 2.345363718767961
110 | 2.305632569723659
111 | 2.1932848855669116
112 | 2.332635486199532
113 | 2.2705356725204138
114 | 2.233249652992796
115 | 2.4728508678115446
116 | 2.3142452859952125
117 | 2.3585592800820314
118 | 2.335805359078042
119 | 2.337391757118849
120 | 2.391327069129473
121 | 2.3404054016242792
122 | 2.3145943543425314
123 | 2.196398460570677
124 | 2.2358641638248056
125 | 2.3038836038774915
126 | 2.2790947368851415
127 | 2.2812541202630525
128 | 2.2533860233278924
129 | 2.3108025224488458
130 | 2.2092323683110284
131 | 2.308551702050515
132 | 2.2422945557369127
133 | 2.1741022714126257
134 | 2.44105933992951
135 | 2.3797168718811905
136 | 2.231722431326354
137 | 2.3973163276174922
138 | 2.1568032256615015
139 | 2.239097781571341
140 | 2.2258979082107544
141 | 2.1682290563612807
142 | 2.2031694714117935
143 | 2.2706658139272973
144 | 2.329095835854978
145 | 2.255610410262037
146 | 2.2977319957665454
147 | 2.3046101513836117
148 | 2.249893919369321
149 | 2.2964354607242123
150 | 2.315463280696192
151 | 


--------------------------------------------------------------------------------
/logs/gesture_loss_2021_11_14_22_04_00/epoch_val_loss_2021_11_14_22_04_00.txt:
--------------------------------------------------------------------------------
  1 | 28.558996200561523
  2 | 15.032766554090712
  3 | 11.545120133293999
  4 | 9.72215329276191
  5 | 8.58862935172187
  6 | 8.486469162835014
  7 | 7.804132832421197
  8 | 7.238262918260363
  9 | 6.890773402320014
 10 | 6.530833350287543
 11 | 6.475247330135769
 12 | 6.4751937124464245
 13 | 6.239521026611328
 14 | 6.0489738782246905
 15 | 6.12673372692532
 16 | 5.641317420535618
 17 | 6.040707217322455
 18 | 5.724527147081163
 19 | 5.265863656997681
 20 | 5.316834555731879
 21 | 5.4665877024332685
 22 | 5.622564209832086
 23 | 5.04600026872423
 24 | 5.060362259546916
 25 | 5.527375910017225
 26 | 5.435662375556098
 27 | 5.021538707945082
 28 | 5.028834872775608
 29 | 4.896508720186022
 30 | 4.989696582158406
 31 | 5.161070320341322
 32 | 5.098267449273004
 33 | 4.707995070351495
 34 | 4.600137048297459
 35 | 4.426739745669895
 36 | 4.481476042005751
 37 | 4.555791060129802
 38 | 4.693203316794501
 39 | 4.515556865268284
 40 | 4.371145274904039
 41 | 4.138098372353448
 42 | 4.548380348417494
 43 | 4.3106510109371605
 44 | 4.320602138837178
 45 | 4.131023804346721
 46 | 4.0555612511105
 47 | 4.217087030410767
 48 | 4.128190358479817
 49 | 4.032541698879665
 50 | 3.99964001443651
 51 | 3.741890834437476
 52 | 3.749820719162623
 53 | 3.6366468982564077
 54 | 3.5657983157369824
 55 | 3.9311270780033536
 56 | 3.6530382368299694
 57 | 4.012030104796092
 58 | 3.8975768751568265
 59 | 3.764561494191488
 60 | 3.4476174149248333
 61 | 3.535598119099935
 62 | 3.998010264502631
 63 | 3.88807831870185
 64 | 3.810675323009491
 65 | 3.8832875225279064
 66 | 3.532531124022272
 67 | 3.9232571257485285
 68 | 3.58525949716568
 69 | 3.7238865759637623
 70 | 3.7168162133958607
 71 | 3.503431843386756
 72 | 3.5310314959949918
 73 | 3.7993387116326227
 74 | 3.5516341394848294
 75 | 3.6795931648876934
 76 | 3.564246873060862
 77 | 3.484692699379391
 78 | 3.7236365245448217
 79 | 3.7466657956441245
 80 | 3.66163033246994
 81 | 3.751209259033203
 82 | 3.6696145402060614
 83 | 3.5883768465783863
 84 | 3.853155712286631
 85 | 3.4928252498308816
 86 | 3.602889382176929
 87 | 3.7287648055288525
 88 | 3.6207654832137957
 89 | 3.610999337500996
 90 | 3.8127831634547977
 91 | 3.6820534533924527
 92 | 3.716387847231494
 93 | 3.6561857561270394
 94 | 3.703249845239851
 95 | 3.686804783013132
 96 | 3.687538597318861
 97 | 3.8072550859716205
 98 | 3.6593143989642463
 99 | 3.707283900843726
100 | 3.7246316257450314
101 | 3.8617856800556183
102 | 3.573318580786387
103 | 3.531035871969329
104 | 3.6177483134799533
105 | 3.6122085054715476
106 | 3.5437003208531275
107 | 3.5555910716454187
108 | 3.6909723381201425
109 | 3.5987775524457297
110 | 3.646808198756642
111 | 3.6476809779802957
112 | 3.615621048543188
113 | 3.8375576469633312
114 | 3.7161912678016558
115 | 3.694040416015519
116 | 3.677286409669452
117 | 3.6777902278635235
118 | 3.7830483151806726
119 | 3.707444575097826
120 | 3.7904206779268055
121 | 3.5872142712275186
122 | 3.6864367392328052
123 | 3.7757607218292026
124 | 3.835707320107354
125 | 3.6799587168627315
126 | 3.8233094347847834
127 | 3.6921923756599426
128 | 3.7244893974728055
129 | 3.6797771288288965
130 | 3.711515542533663
131 | 3.8481360466943846
132 | 3.8577410876750946
133 | 3.710074722766876
134 | 3.8249045742882624
135 | 3.7864705423514047
136 | 3.6575047771135965
137 | 3.8352384832170276
138 | 3.7801570263173847
139 | 3.7013448344336615
140 | 3.6655967930952706
141 | 3.657223959763845
142 | 3.722360614273283
143 | 3.772919843594233
144 | 3.7007322708765664
145 | 3.7042017413510218
146 | 3.8934470083978443
147 | 3.8964318566852145
148 | 3.6877589921156564
149 | 3.713595751259062
150 | 3.597744878795412
151 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_08_48_16/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_08_48_16/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_08_48_16/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 4.311199968511408
  2 | 2.641528855670582
  3 | 1.0470811074430293
  4 | 0.3173784383318641
  5 | 0.1660231321372769
  6 | 0.12659757448868317
  7 | 0.11646865105087106
  8 | 0.1186594499105757
  9 | 0.11129742149602283
 10 | 0.09524408660151741
 11 | 0.09781679036942395
 12 | 0.09211275726556778
 13 | 0.08542741784317927
 14 | 0.08707698925652287
 15 | 0.08003932000561194
 16 | 0.09124952453103932
 17 | 0.07743281058289787
 18 | 0.07542280463332479
 19 | 0.062316759235479614
 20 | 0.07161653380502354
 21 | 0.06821535866368901
 22 | 0.07083209519359199
 23 | 0.07460641437633471
 24 | 0.07450477220118046
 25 | 0.06487809985198757
 26 | 0.050884095443920654
 27 | 0.07091375355693427
 28 | 0.06433163752610033
 29 | 0.0656029749661684
 30 | 0.05935167453505776
 31 | 0.06459851512177424
 32 | 0.06376675008372827
 33 | 0.05718133259903301
 34 | 0.05716039274226536
 35 | 0.05911739483814348
 36 | 0.05761603875593706
 37 | 0.051265862939709965
 38 | 0.047803171148354355
 39 | 0.0480937244031917
 40 | 0.05439905263483524
 41 | 0.058482232080264526
 42 | 0.05515999550169164
 43 | 0.049258994361893696
 44 | 0.050817748277702114
 45 | 0.05204320927573876
 46 | 0.04787483066320419
 47 | 0.050909879194064575
 48 | 0.04848571375689723
 49 | 0.050943345593457874
 50 | 0.04928677469830622
 51 | 0.05230807525416215
 52 | 0.054047910206847724
 53 | 0.04724785503413942
 54 | 0.04339685816731718
 55 | 0.04393725813262993
 56 | 0.04542147194345792
 57 | 0.046219487115740775
 58 | 0.04159199959701962
 59 | 0.0356766721026765
 60 | 0.0347878428383006
 61 | 0.0335447210404608
 62 | 0.03512532735864322
 63 | 0.032664823532104495
 64 | 0.035281008275018795
 65 | 0.027731664727131525
 66 | 0.03222298233045472
 67 | 0.03146794889536169
 68 | 0.02836602210170693
 69 | 0.028307923198574118
 70 | 0.027572717414134078
 71 | 0.026898101448184913
 72 | 0.029324432876374987
 73 | 0.02880634083929989
 74 | 0.024556251760158274
 75 | 0.027897736864785354
 76 | 0.024288477210534943
 77 | 0.022848848750193915
 78 | 0.023355903372996385
 79 | 0.02707639779481623
 80 | 0.022250585506359735
 81 | 0.025191593791047732
 82 | 0.022139282586673897
 83 | 0.02378465121404992
 84 | 0.02341305265824
 85 | 0.02176100810368856
 86 | 0.025529090170231132
 87 | 0.023221762292087077
 88 | 0.02107305938584937
 89 | 0.019723483237127463
 90 | 0.027768902087377176
 91 | 0.023790666233334277
 92 | 0.02183559000906017
 93 | 0.019348353561427858
 94 | 0.021541342077155908
 95 | 0.020851219362682766
 96 | 0.01955224501176013
 97 | 0.02228688634932041
 98 | 0.018856989074912338
 99 | 0.01816959279692835
100 | 0.024754421909650166
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_08_48_16/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 3.5736865997314453
  2 | 1.7812694907188416
  3 | 0.5147329270839691
  4 | 0.15201690793037415
  5 | 0.10024188458919525
  6 | 0.08380990475416183
  7 | 0.07576803863048553
  8 | 0.06853799521923065
  9 | 0.06467496231198311
 10 | 0.060902709141373634
 11 | 0.05481202341616154
 12 | 0.05164487101137638
 13 | 0.046625690534710884
 14 | 0.046081338077783585
 15 | 0.04508414678275585
 16 | 0.046726442873477936
 17 | 0.041066285222768784
 18 | 0.039722129702568054
 19 | 0.0392248947173357
 20 | 0.04033488966524601
 21 | 0.03738676756620407
 22 | 0.0356711745262146
 23 | 0.03774934820830822
 24 | 0.035463595762848854
 25 | 0.03278419189155102
 26 | 0.03250573016703129
 27 | 0.03182028792798519
 28 | 0.031694755889475346
 29 | 0.03182463627308607
 30 | 0.028715165331959724
 31 | 0.03064714837819338
 32 | 0.028574727475643158
 33 | 0.031066023744642735
 34 | 0.028762156143784523
 35 | 0.027465523220598698
 36 | 0.02787941414862871
 37 | 0.02755015157163143
 38 | 0.02802269347012043
 39 | 0.028581750579178333
 40 | 0.026334763504564762
 41 | 0.026825452223420143
 42 | 0.02670316770672798
 43 | 0.02603335492312908
 44 | 0.025488858111202717
 45 | 0.027477828785777092
 46 | 0.02550355065613985
 47 | 0.026508965529501438
 48 | 0.02424653246998787
 49 | 0.02420251350849867
 50 | 0.024741491302847862
 51 | 0.03815543949604035
 52 | 0.024845311418175697
 53 | 0.024306144565343857
 54 | 0.02493119016289711
 55 | 0.024438758194446564
 56 | 0.021836227178573607
 57 | 0.022118838876485823
 58 | 0.02276018038392067
 59 | 0.019801595807075502
 60 | 0.018804560229182244
 61 | 0.01913141254335642
 62 | 0.018066196143627165
 63 | 0.018252668902277946
 64 | 0.017480477318167688
 65 | 0.016695075295865537
 66 | 0.018235534615814685
 67 | 0.016669700480997564
 68 | 0.01745656579732895
 69 | 0.01661595106124878
 70 | 0.014982381090521812
 71 | 0.014259136654436589
 72 | 0.01617119237780571
 73 | 0.01583776492625475
 74 | 0.015838896110653877
 75 | 0.015466723032295704
 76 | 0.014705226197838784
 77 | 0.014486565068364144
 78 | 0.0142423365265131
 79 | 0.013639062829315662
 80 | 0.013229098543524742
 81 | 0.013664134219288826
 82 | 0.014067459665238858
 83 | 0.014119291864335536
 84 | 0.014162952080368996
 85 | 0.014096969552338124
 86 | 0.014010479114949704
 87 | 0.013855390436947345
 88 | 0.01369147039949894
 89 | 0.013611100800335407
 90 | 0.013387569226324558
 91 | 0.013233654387295245
 92 | 0.013060701824724675
 93 | 0.01311743687838316
 94 | 0.013459368608891964
 95 | 0.013417618162930012
 96 | 0.013188641518354416
 97 | 0.013131854124367237
 98 | 0.013138605654239655
 99 | 0.013040048442780972
100 | 0.013191545940935611
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_08_48_16/events.out.tfevents.1651049298.fef10e9dbba1.425.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_08_48_16/events.out.tfevents.1651049298.fef10e9dbba1.425.0


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_10_38_48/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_10_38_48/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_10_38_48/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 4.417048931121826
  2 | 2.7174118811433967
  3 | 1.0889532132582231
  4 | 0.3425311154939912
  5 | 0.17422378638928587
  6 | 0.13641497018662366
  7 | 0.11632075736468489
  8 | 0.11424875665794719
  9 | 0.10951222343878313
 10 | 0.11042191968722777
 11 | 0.0965666960586201
 12 | 0.09156128205358982
 13 | 0.09250037236647173
 14 | 0.09282402846623551
 15 | 0.08625757846642625
 16 | 0.07673129354688255
 17 | 0.07389622215520252
 18 | 0.07624811069531874
 19 | 0.08134209279986945
 20 | 0.08268712799657475
 21 | 0.06569299051030116
 22 | 0.06593379310586235
 23 | 0.07313475605439056
 24 | 0.06932794980027458
 25 | 0.07105197571218014
 26 | 0.05761696923185478
 27 | 0.05699523843147538
 28 | 0.05502087775279175
 29 | 0.056425975635647774
 30 | 0.060862130570140754
 31 | 0.05275308594784953
 32 | 0.05468131161548875
 33 | 0.06639060936868191
 34 | 0.0586402067406611
 35 | 0.05531726946884936
 36 | 0.05826686415821314
 37 | 0.05614634239199487
 38 | 0.060194396329197014
 39 | 0.056169633330269295
 40 | 0.05521787144243717
 41 | 0.05759791826659983
 42 | 0.06400778830390084
 43 | 0.048669698648154736
 44 | 0.05138815820894458
 45 | 0.05391152406280691
 46 | 0.048903680660507896
 47 | 0.05098097136413509
 48 | 0.046242827380245384
 49 | 0.05179907051338391
 50 | 0.0525860372422771
 51 | 0.05424936364094416
 52 | 0.049993348659740554
 53 | 0.04597619854741626
 54 | 0.04917745155592759
 55 | 0.05255601741373539
 56 | 0.04698830768465996
 57 | 0.041387100517749784
 58 | 0.04129959721532133
 59 | 0.04556649559073978
 60 | 0.036499715513653226
 61 | 0.03981801929573218
 62 | 0.04143420826229784
 63 | 0.03435336612164974
 64 | 0.03496221779949135
 65 | 0.03109016865491867
 66 | 0.03035914318429099
 67 | 0.029583082410196464
 68 | 0.03257722655932108
 69 | 0.030363482443822754
 70 | 0.027382713970210817
 71 | 0.03354052487346861
 72 | 0.02999182954016659
 73 | 0.027540474219454658
 74 | 0.03399232141673565
 75 | 0.027007617097761897
 76 | 0.025914737520118556
 77 | 0.0295799125606815
 78 | 0.02715012611200412
 79 | 0.025495433765980933
 80 | 0.0296443536463711
 81 | 0.023164296481344434
 82 | 0.025637096497747633
 83 | 0.024675296164221233
 84 | 0.02778547273741828
 85 | 0.021970178662902778
 86 | 0.023107113461527558
 87 | 0.024780070698923535
 88 | 0.022441018600430754
 89 | 0.023930547055270937
 90 | 0.0282184108470877
 91 | 0.023034340888261794
 92 | 0.024948879559006956
 93 | 0.021047428602145778
 94 | 0.019247366736332577
 95 | 0.019984866658018696
 96 | 0.02513700392511156
 97 | 0.02460642974409792
 98 | 0.0241888129669759
 99 | 0.024461371141175428
100 | 0.023433364638023906
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_10_38_48/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 3.682404637336731
  2 | 1.8932517766952515
  3 | 0.5478550791740417
  4 | 0.1596439927816391
  5 | 0.1100359559059143
  6 | 0.0877840518951416
  7 | 0.07812783867120743
  8 | 0.07114855200052261
  9 | 0.06861080229282379
 10 | 0.059281766414642334
 11 | 0.057694293558597565
 12 | 0.051728978753089905
 13 | 0.052549805492162704
 14 | 0.04606110043823719
 15 | 0.04738330654799938
 16 | 0.04431380145251751
 17 | 0.04233948327600956
 18 | 0.04040302708745003
 19 | 0.038821205496788025
 20 | 0.0383895430713892
 21 | 0.03584542125463486
 22 | 0.03636615164577961
 23 | 0.03440128639340401
 24 | 0.031500913202762604
 25 | 0.03160226531326771
 26 | 0.03259335644543171
 27 | 0.03182834479957819
 28 | 0.03255347441881895
 29 | 0.03205320052802563
 30 | 0.03115831222385168
 31 | 0.030962957069277763
 32 | 0.03099967911839485
 33 | 0.028362704440951347
 34 | 0.029792566783726215
 35 | 0.029385950416326523
 36 | 0.028081808239221573
 37 | 0.02900168113410473
 38 | 0.028213596902787685
 39 | 0.026003092527389526
 40 | 0.029015707783401012
 41 | 0.027079648338258266
 42 | 0.02746042888611555
 43 | 0.026224803179502487
 44 | 0.02623423095792532
 45 | 0.026428623124957085
 46 | 0.025775899179279804
 47 | 0.025982394814491272
 48 | 0.02434847690165043
 49 | 0.027825096622109413
 50 | 0.026163294911384583
 51 | 0.029283170774579047
 52 | 0.025315795838832856
 53 | 0.027043038606643678
 54 | 0.028298694640398026
 55 | 0.024901207908987998
 56 | 0.021958087757229804
 57 | 0.02251458093523979
 58 | 0.022333519905805586
 59 | 0.021478286758065224
 60 | 0.021176514402031898
 61 | 0.018941503018140793
 62 | 0.019572099670767784
 63 | 0.018108497187495232
 64 | 0.018086655251681804
 65 | 0.017889507673680784
 66 | 0.01727491766214371
 67 | 0.01810304317623377
 68 | 0.020134907588362692
 69 | 0.018655003793537617
 70 | 0.018117578141391276
 71 | 0.017840097844600677
 72 | 0.01779591590166092
 73 | 0.016621771082282067
 74 | 0.017149972915649413
 75 | 0.016952383518218993
 76 | 0.015586855821311474
 77 | 0.01567951999604702
 78 | 0.0161365307867527
 79 | 0.01567267570644617
 80 | 0.01678410042077303
 81 | 0.015898118540644646
 82 | 0.01655469797551632
 83 | 0.015443072095513344
 84 | 0.015269587188959122
 85 | 0.015318373404443263
 86 | 0.015480193309485912
 87 | 0.015252745896577834
 88 | 0.015485197678208351
 89 | 0.01524040475487709
 90 | 0.015235877968370915
 91 | 0.015190575830638408
 92 | 0.01506870575249195
 93 | 0.015268886275589467
 94 | 0.015318392775952816
 95 | 0.015248116478323937
 96 | 0.01509730275720358
 97 | 0.015357919968664646
 98 | 0.015471475012600423
 99 | 0.015338210947811603
100 | 0.015286244638264179
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_10_38_48/events.out.tfevents.1651055931.9b45dd4991ae.367.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_10_38_48/events.out.tfevents.1651055931.9b45dd4991ae.367.0


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_12_50_47/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_12_50_47/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_12_50_47/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 4.458093025467613
  2 | 2.7262558070096103
  3 | 1.0888537033037706
  4 | 0.3306311368942261
  5 | 0.1712129498747262
  6 | 0.12332972951910713
  7 | 0.1077601161192764
  8 | 0.10889687660065564
  9 | 0.10751076347448608
 10 | 0.09971555254676125
 11 | 0.09748913144523447
 12 | 0.09051749330352653
 13 | 0.08674890751188452
 14 | 0.09196238592267036
 15 | 0.0813636336136948
 16 | 0.08286366950381886
 17 | 0.07791051878170534
 18 | 0.0753517130559141
 19 | 0.07469043592837724
 20 | 0.07069844498553059
 21 | 0.06863954527811571
 22 | 0.05802192301912741
 23 | 0.07001199353147637
 24 | 0.0646351370960474
 25 | 0.0635682385076176
 26 | 0.06396392174065113
 27 | 0.062142887406728485
 28 | 0.0702532638203014
 29 | 0.056375787339427254
 30 | 0.06388939967886968
 31 | 0.05778990279544483
 32 | 0.06408696647056124
 33 | 0.06048921140080148
 34 | 0.046278277158059856
 35 | 0.05944571127607064
 36 | 0.05725045552985235
 37 | 0.05380251800472086
 38 | 0.053617957894775
 39 | 0.053481346842917526
 40 | 0.05578712136908011
 41 | 0.05615681384436109
 42 | 0.0525641811334274
 43 | 0.04595534486526793
 44 | 0.04221054826947776
 45 | 0.0491331076588143
 46 | 0.04645225058563731
 47 | 0.047417608005079354
 48 | 0.045993872325528755
 49 | 0.04980102206834338
 50 | 0.05388529971241951
 51 | 0.04780796766281128
 52 | 0.051682502610815896
 53 | 0.05296175873114003
 54 | 0.04763079182141357
 55 | 0.03715274184942245
 56 | 0.038538362830877304
 57 | 0.03803896543880304
 58 | 0.04017537732919057
 59 | 0.03992160202728377
 60 | 0.03339115016990238
 61 | 0.03391021318319771
 62 | 0.03317808165318436
 63 | 0.033503353450861244
 64 | 0.034213335605131255
 65 | 0.037453227241834
 66 | 0.033429956477549344
 67 | 0.032547304261889724
 68 | 0.03456145400802294
 69 | 0.026851379209094577
 70 | 0.029029812270568476
 71 | 0.02536299385958248
 72 | 0.02381322646720542
 73 | 0.02601998903685146
 74 | 0.020065840913189782
 75 | 0.02312256395816803
 76 | 0.028637176213992966
 77 | 0.023025286176966295
 78 | 0.023644178753925695
 79 | 0.024718130793836383
 80 | 0.02247788065837489
 81 | 0.023494062303668923
 82 | 0.025069689253966014
 83 | 0.02251974062787162
 84 | 0.024839345862468085
 85 | 0.021578845319648585
 86 | 0.022635220984617867
 87 | 0.022249876335263253
 88 | 0.01972206729567713
 89 | 0.018786311563518312
 90 | 0.02083740762124459
 91 | 0.02136736027896404
 92 | 0.019557259066237342
 93 | 0.018951669645806152
 94 | 0.020326226308114
 95 | 0.021592341653174824
 96 | 0.019481366727915075
 97 | 0.018176950762669244
 98 | 0.02213383706079589
 99 | 0.019981356461842854
100 | 0.020978835970163347
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_12_50_47/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 3.7051011323928833
  2 | 1.8262890577316284
  3 | 0.5144035518169403
  4 | 0.16302762925624847
  5 | 0.10760901868343353
  6 | 0.09057768434286118
  7 | 0.07540924847126007
  8 | 0.07146378979086876
  9 | 0.06520375981926918
 10 | 0.05898746848106384
 11 | 0.054325105622410774
 12 | 0.05058479495346546
 13 | 0.0504811592400074
 14 | 0.046029604971408844
 15 | 0.04258855804800987
 16 | 0.042371716350317
 17 | 0.040247365832328796
 18 | 0.04038912057876587
 19 | 0.03568720445036888
 20 | 0.038001520559191704
 21 | 0.03973718546330929
 22 | 0.035464052110910416
 23 | 0.03202499449253082
 24 | 0.02998754195868969
 25 | 0.032502518966794014
 26 | 0.03302299045026302
 27 | 0.03285937011241913
 28 | 0.029083450324833393
 29 | 0.029631994664669037
 30 | 0.03396240994334221
 31 | 0.029673300683498383
 32 | 0.028280221857130527
 33 | 0.027639511972665787
 34 | 0.028393579646945
 35 | 0.027291471138596535
 36 | 0.026989608071744442
 37 | 0.02653918694704771
 38 | 0.027808908373117447
 39 | 0.027841621078550816
 40 | 0.02570505067706108
 41 | 0.025745649822056293
 42 | 0.026372630149126053
 43 | 0.024600804783403873
 44 | 0.026447951793670654
 45 | 0.02569119818508625
 46 | 0.026840184815227985
 47 | 0.024051610380411148
 48 | 0.02362955827265978
 49 | 0.024365886114537716
 50 | 0.024577765725553036
 51 | 0.031041909381747244
 52 | 0.02641780823469162
 53 | 0.02472583018243313
 54 | 0.02326701581478119
 55 | 0.019615407288074493
 56 | 0.021174174174666403
 57 | 0.019675580970942973
 58 | 0.01869105324149132
 59 | 0.018909885734319686
 60 | 0.019662134535610675
 61 | 0.01899590715765953
 62 | 0.016179793514311314
 63 | 0.01545619908720255
 64 | 0.015423668920993805
 65 | 0.018800214119255542
 66 | 0.0158102760091424
 67 | 0.0158376544713974
 68 | 0.01783675402402878
 69 | 0.015972125343978405
 70 | 0.01454415861517191
 71 | 0.014743064902722836
 72 | 0.013825051300227643
 73 | 0.01407058835029602
 74 | 0.013598379865288734
 75 | 0.013919505663216114
 76 | 0.013623752258718013
 77 | 0.014403878897428512
 78 | 0.014411385357379913
 79 | 0.01337964329868555
 80 | 0.013076365552842617
 81 | 0.013368507660925389
 82 | 0.013667609356343747
 83 | 0.013365321420133114
 84 | 0.013264597952365875
 85 | 0.013465055078268052
 86 | 0.01281917616724968
 87 | 0.01263135802000761
 88 | 0.012750985845923424
 89 | 0.01290153805166483
 90 | 0.01281326413154602
 91 | 0.012850469164550304
 92 | 0.012885735556483268
 93 | 0.013168741390109063
 94 | 0.013198709674179554
 95 | 0.0126633545383811
 96 | 0.012886124104261399
 97 | 0.012797533720731735
 98 | 0.012569484673440457
 99 | 0.012130422703921794
100 | 0.012647346407175065
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_27_12_50_47/events.out.tfevents.1651063849.274e119c63fb.1015.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_27_12_50_47/events.out.tfevents.1651063849.274e119c63fb.1015.0


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_00_40_54/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_00_40_54/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_00_40_54/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 4.65520715713501
  2 | 3.142860672690652
  3 | 1.5020794109864668
  4 | 0.5057930661873384
  5 | 0.231415910476988
  6 | 0.1739024357362227
  7 | 0.1501499665054408
  8 | 0.13435510004108603
  9 | 0.12552000412886793
 10 | 0.1170116358182647
 11 | 0.1097346202216365
 12 | 0.10218094119971449
 13 | 0.09653170305219563
 14 | 0.09267877211624925
 15 | 0.08959556709636342
 16 | 0.08778026801618663
 17 | 0.0813840397379615
 18 | 0.08208547498692166
 19 | 0.07795694809068333
 20 | 0.0774568762968887
 21 | 0.07742892002517526
 22 | 0.07316952097144994
 23 | 0.0717044398188591
 24 | 0.07023497687822039
 25 | 0.07019331865012646
 26 | 0.06709351390600204
 27 | 0.06731417910619215
 28 | 0.06743009134449741
 29 | 0.06635952317579226
 30 | 0.06368578191507947
 31 | 0.06163112514398315
 32 | 0.06230247410183603
 33 | 0.0609466726468368
 34 | 0.059141877869313415
 35 | 0.059421493925831535
 36 | 0.05991599742661823
 37 | 0.05664417435499755
 38 | 0.05543165823275393
 39 | 0.055084149945865975
 40 | 0.05501931634816257
 41 | 0.05503683621910485
 42 | 0.05480257303199985
 43 | 0.05537006275897676
 44 | 0.05448474125428633
 45 | 0.05232419649308378
 46 | 0.05311859653077342
 47 | 0.05284474231302738
 48 | 0.051879515532742844
 49 | 0.052160846746780655
 50 | 0.048417276787486946
 51 | 0.07137971396247546
 52 | 0.06579171708888477
 53 | 0.06337685022089216
 54 | 0.058213022185696496
 55 | 0.06011202625102467
 56 | 0.05577432778146532
 57 | 0.05307989873819881
 58 | 0.05232232163349788
 59 | 0.047045067904724014
 60 | 0.045659234002232554
 61 | 0.046541030332446096
 62 | 0.041184055474069385
 63 | 0.04066362182299296
 64 | 0.041569982427689764
 65 | 0.03817177605297831
 66 | 0.0390163982907931
 67 | 0.041840214654803275
 68 | 0.038884344117509
 69 | 0.03724856765733825
 70 | 0.03528667270309395
 71 | 0.03439781483676699
 72 | 0.03381528837813271
 73 | 0.03448933532668485
 74 | 0.03202489465475082
 75 | 0.03492107921176486
 76 | 0.029904662817716598
 77 | 0.03170571397576067
 78 | 0.03179397972093688
 79 | 0.0303279221471813
 80 | 0.029197406230701342
 81 | 0.02931012755466832
 82 | 0.029168612303005326
 83 | 0.027595289217101204
 84 | 0.02744665356973807
 85 | 0.026995969439546266
 86 | 0.027659311725033654
 87 | 0.02661879969139894
 88 | 0.027540806722309855
 89 | 0.025905532100134427
 90 | 0.0255900744555725
 91 | 0.026152818650007247
 92 | 0.025521984696388243
 93 | 0.025769058614969254
 94 | 0.02644038177612755
 95 | 0.02754443759719531
 96 | 0.024427745077345107
 97 | 0.025285613785187403
 98 | 0.026757355800105465
 99 | 0.02632749622894658
100 | 0.026431108307507303
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_00_40_54/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 3.979103207588196
  2 | 2.2379150390625
  3 | 0.7213477790355682
  4 | 0.20374882966279984
  5 | 0.13149111717939377
  6 | 0.10669583082199097
  7 | 0.08946957811713219
  8 | 0.07844944670796394
  9 | 0.07209542766213417
 10 | 0.06465885788202286
 11 | 0.060964012518525124
 12 | 0.05698745884001255
 13 | 0.053726550191640854
 14 | 0.053231727331876755
 15 | 0.05091492086648941
 16 | 0.04869535565376282
 17 | 0.045929690822958946
 18 | 0.043502215296030045
 19 | 0.04109686613082886
 20 | 0.042073581367731094
 21 | 0.03760443814098835
 22 | 0.036989014595746994
 23 | 0.0369559321552515
 24 | 0.03501574695110321
 25 | 0.03553796745836735
 26 | 0.03463827446103096
 27 | 0.03613190911710262
 28 | 0.03488997742533684
 29 | 0.03165611159056425
 30 | 0.03400527499616146
 31 | 0.03399870544672012
 32 | 0.03354485519230366
 33 | 0.030975072644650936
 34 | 0.0297493115067482
 35 | 0.029600737616419792
 36 | 0.02729297336190939
 37 | 0.027453931979835033
 38 | 0.028598678298294544
 39 | 0.027731974609196186
 40 | 0.030310326255857944
 41 | 0.026450641453266144
 42 | 0.027599090710282326
 43 | 0.027010041289031506
 44 | 0.026624951511621475
 45 | 0.027538660913705826
 46 | 0.026772234588861465
 47 | 0.026853609830141068
 48 | 0.027332110330462456
 49 | 0.026638195849955082
 50 | 0.026076992973685265
 51 | 0.029674236476421357
 52 | 0.03184238411486149
 53 | 0.02579696960747242
 54 | 0.026541008800268173
 55 | 0.028798045963048934
 56 | 0.02365291155874729
 57 | 0.024432314187288286
 58 | 0.024038903787732123
 59 | 0.022221024334430694
 60 | 0.022891897335648538
 61 | 0.01906990371644497
 62 | 0.021012770757079125
 63 | 0.020605479553341865
 64 | 0.020398029685020448
 65 | 0.019171418249607088
 66 | 0.01934974603354931
 67 | 0.020316287130117416
 68 | 0.019410957768559455
 69 | 0.018952558375895025
 70 | 0.017280998453497887
 71 | 0.0177790354937315
 72 | 0.018064785189926623
 73 | 0.01828454677015543
 74 | 0.01720294840633869
 75 | 0.01639395747333765
 76 | 0.016722467541694642
 77 | 0.016642549820244313
 78 | 0.01656894329935312
 79 | 0.015701821073889732
 80 | 0.015975065901875495
 81 | 0.016035530529916287
 82 | 0.015547602623701095
 83 | 0.01571439057588577
 84 | 0.01621132455766201
 85 | 0.015737788379192354
 86 | 0.01545789260417223
 87 | 0.015475354716181755
 88 | 0.015286277420818806
 89 | 0.015320570766925811
 90 | 0.015739747881889345
 91 | 0.015467294491827488
 92 | 0.015462711267173291
 93 | 0.015299991890788078
 94 | 0.014891423098742963
 95 | 0.014959413185715675
 96 | 0.015149685740470886
 97 | 0.015103902481496335
 98 | 0.014999320358037948
 99 | 0.015079839341342448
100 | 0.0150094548240304
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_00_40_54/events.out.tfevents.1651106457.117e69507361.564.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_00_40_54/events.out.tfevents.1651106457.117e69507361.564.0


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_14_54_17/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_14_54_17/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_14_54_17/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 3.3427013629012636
  2 | 0.590641807185279
  3 | 0.20623346173928844
  4 | 0.13935681179993684
  5 | 0.11779505432479911
  6 | 0.10669546342558331
  7 | 0.0995730339239041
  8 | 0.09289641034685903
  9 | 0.08960233483877447
 10 | 0.08865145291719172
 11 | 0.08199652650703987
 12 | 0.08332964736554357
 13 | 0.08082385785463783
 14 | 0.07951261059691508
 15 | 0.07187494143015809
 16 | 0.07693152552884486
 17 | 0.07002928235257665
 18 | 0.06805908863122265
 19 | 0.06391975372615788
 20 | 0.06560571603477001
 21 | 0.06688064705166552
 22 | 0.062423851289269
 23 | 0.06189305805083778
 24 | 0.06095021272905999
 25 | 0.05913820943484704
 26 | 0.05766822151425812
 27 | 0.05601171863575776
 28 | 0.050846687311099634
 29 | 0.0500038359210723
 30 | 0.05070198744845887
 31 | 0.04995435054620935
 32 | 0.04775367355905473
 33 | 0.04747431728368004
 34 | 0.05075365285803046
 35 | 0.049145943324805964
 36 | 0.04660840377522012
 37 | 0.04236363642849028
 38 | 0.04308449916231136
 39 | 0.04134128590942257
 40 | 0.04134896128024492
 41 | 0.040451034003247816
 42 | 0.040809157501078316
 43 | 0.04189636699027485
 44 | 0.03930564734877812
 45 | 0.04004836426013046
 46 | 0.03825837828529378
 47 | 0.03547370240299238
 48 | 0.03609677294476165
 49 | 0.035196643017439376
 50 | 0.03430712522628407
 51 | 0.04613391875237641
 52 | 0.05915206435084757
 53 | 0.045893035898916426
 54 | 0.04116026466298434
 55 | 0.0429476417420018
 56 | 0.03999344222247601
 57 | 0.034763063090698175
 58 | 0.03578517514720766
 59 | 0.03375119598996308
 60 | 0.03283411696967151
 61 | 0.03579546554893669
 62 | 0.03182236654813298
 63 | 0.03289871994768166
 64 | 0.03093694845964718
 65 | 0.028104687105709066
 66 | 0.0279214970392382
 67 | 0.02814181201522135
 68 | 0.026209147684534806
 69 | 0.024499411086758807
 70 | 0.02420818345660033
 71 | 0.02401729004470528
 72 | 0.02229024926847261
 73 | 0.021894857381832684
 74 | 0.021454263018677013
 75 | 0.020758730825683518
 76 | 0.02169692176976241
 77 | 0.019593946940343207
 78 | 0.019191343562367062
 79 | 0.0194984604876178
 80 | 0.02022809916266447
 81 | 0.017767922341590747
 82 | 0.01808840037944416
 83 | 0.018055611812613077
 84 | 0.017147960676164885
 85 | 0.015863009145121194
 86 | 0.015711418758534514
 87 | 0.016356725540633003
 88 | 0.016216116898512052
 89 | 0.015499612758867442
 90 | 0.015379458964647104
 91 | 0.016735805649982973
 92 | 0.014799573211025239
 93 | 0.015743958410651734
 94 | 0.014708074144113601
 95 | 0.014328512709148021
 96 | 0.015710317682371373
 97 | 0.01542505334622951
 98 | 0.014101080921439765
 99 | 0.014700241691510503
100 | 0.014981216627832812
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_14_54_17/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 1.1948505997657777
  2 | 0.2769960485398769
  3 | 0.1309874437749386
  4 | 0.10720247365534305
  5 | 0.0823921812698245
  6 | 0.06992402952164412
  7 | 0.0779087346047163
  8 | 0.06684023551642895
  9 | 0.06127838855609298
 10 | 0.06253754440695047
 11 | 0.06560290511697531
 12 | 0.05028826054185629
 13 | 0.05307867294177413
 14 | 0.046788199059665206
 15 | 0.05016098273918033
 16 | 0.041087670251727104
 17 | 0.049103803001344204
 18 | 0.04360529286786914
 19 | 0.04554138630628586
 20 | 0.03290841649286449
 21 | 0.04053358295932412
 22 | 0.038861811719834806
 23 | 0.040706123877316716
 24 | 0.03609397481195629
 25 | 0.03557254578918219
 26 | 0.03464236315339804
 27 | 0.03329266821965575
 28 | 0.03151600556448102
 29 | 0.030487440805882216
 30 | 0.03179679936729372
 31 | 0.030378894181922078
 32 | 0.03546885224059224
 33 | 0.028008161624893547
 34 | 0.030146837001666427
 35 | 0.028426590701565148
 36 | 0.030748564330860973
 37 | 0.028618200030177832
 38 | 0.03007163112051785
 39 | 0.02537959101609886
 40 | 0.028373095905408263
 41 | 0.025091598788276315
 42 | 0.027431158255785702
 43 | 0.0274854336399585
 44 | 0.0238998107612133
 45 | 0.024188394332304596
 46 | 0.025603410461917518
 47 | 0.022463220916688443
 48 | 0.021122918161563576
 49 | 0.023449525656178593
 50 | 0.02241856213659048
 51 | 0.030004368303343652
 52 | 0.03465683250688016
 53 | 0.025661695492453875
 54 | 0.025751420808956028
 55 | 0.0250759432092309
 56 | 0.024298161384649575
 57 | 0.023818821809254587
 58 | 0.02544179279357195
 59 | 0.02248522681184113
 60 | 0.02272053265478462
 61 | 0.021450468467082828
 62 | 0.022059163730591535
 63 | 0.01965688676573336
 64 | 0.019216149824205785
 65 | 0.020135902601759882
 66 | 0.02419198288116604
 67 | 0.017368705407716335
 68 | 0.01844585470389575
 69 | 0.015960348234511913
 70 | 0.017440078582149
 71 | 0.015858469036174938
 72 | 0.01589310457929969
 73 | 0.01708033775212243
 74 | 0.030576034029945732
 75 | 0.014990652166306972
 76 | 0.020580469502601773
 77 | 0.01814356680260971
 78 | 0.016363495017867536
 79 | 0.016028978914255275
 80 | 0.015470803889911622
 81 | 0.017227034358074888
 82 | 0.016705141763668507
 83 | 0.01754759649047628
 84 | 0.02099468276137486
 85 | 0.02627454571193084
 86 | 0.016601535107474773
 87 | 0.019520913722226398
 88 | 0.016074266715440898
 89 | 0.015431905922014266
 90 | 0.015508590545505286
 91 | 0.013960553548531606
 92 | 0.015237966080894694
 93 | 0.015095379657577724
 94 | 0.01584624971728772
 95 | 0.015998882468556984
 96 | 0.01559915920952335
 97 | 0.01576072332682088
 98 | 0.016472871112637223
 99 | 0.014691755402600393
100 | 0.014136423316085712
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_04_28_14_54_17/events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_04_28_14_54_17/events.out.tfevents.1651128857.LAPTOP-IE5MVR15.24536.0


--------------------------------------------------------------------------------
/logs/loss_2022_05_02_14_57_57/epoch_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_05_02_14_57_57/epoch_loss.png


--------------------------------------------------------------------------------
/logs/loss_2022_05_02_14_57_57/epoch_loss.txt:
--------------------------------------------------------------------------------
  1 | 17.101406224568684
  2 | 10.8318008740743
  3 | 4.240671507517496
  4 | 1.0019958794116974
  5 | 0.37954812149206796
  6 | 0.2687491794427236
  7 | 0.22754189471403757
  8 | 0.19753684798876445
  9 | 0.1771739900112152
 10 | 0.16613257378339769
 11 | 0.14869885842005412
 12 | 0.13755213419596354
 13 | 0.13448657716313997
 14 | 0.12195368086298307
 15 | 0.1128251701593399
 16 | 0.10961388771732648
 17 | 0.10665635019540787
 18 | 0.10061115821202596
 19 | 0.0969288428624471
 20 | 0.09855932394663493
 21 | 0.0889915977915128
 22 | 0.08737521395087242
 23 | 0.08142138893405597
 24 | 0.081571697195371
 25 | 0.08513322671254477
 26 | 0.0799174178391695
 27 | 0.07576848641037941
 28 | 0.07407469501097998
 29 | 0.07028314856191477
 30 | 0.07057048715651035
 31 | 0.0709464654326439
 32 | 0.07267625791331132
 33 | 0.06727536929150423
 34 | 0.0662232073644797
 35 | 0.06310114165147146
 36 | 0.06374188972016176
 37 | 0.06626531345148881
 38 | 0.05850081816315651
 39 | 0.056352414563298224
 40 | 0.05607227062185605
 41 | 0.057017019018530846
 42 | 0.05952403930326303
 43 | 0.057178026810288426
 44 | 0.051601182545224826
 45 | 0.051208433136343955
 46 | 0.051774655406673746
 47 | 0.050313881536324816
 48 | 0.04995381236076355
 49 | 0.048258970181147255
 50 | 0.04914092607796192
 51 | 0.06768884502040844
 52 | 0.06370118060149252
 53 | 0.05913636611464123
 54 | 0.05405666360942026
 55 | 0.052676150932287176
 56 | 0.04658079737176498
 57 | 0.0453374430614834
 58 | 0.04464669832183669
 59 | 0.04386587947762261
 60 | 0.038802354324919484
 61 | 0.038647202278176945
 62 | 0.03676449179959794
 63 | 0.03481319181931516
 64 | 0.0347878224371622
 65 | 0.03463629183825105
 66 | 0.03564592384112378
 67 | 0.03169099524772415
 68 | 0.03046195216011256
 69 | 0.029932656922998527
 70 | 0.02693921811878681
 71 | 0.02624520653237899
 72 | 0.02643638541145871
 73 | 0.024267646336617568
 74 | 0.02276813123996059
 75 | 0.022201836206174146
 76 | 0.025956252019386738
 77 | 0.022044219623785465
 78 | 0.01913531731891756
 79 | 0.018665816611610354
 80 | 0.020095466733134042
 81 | 0.019377306945777186
 82 | 0.019703271872519204
 83 | 0.017145425283039608
 84 | 0.017283631632259735
 85 | 0.015655260040269545
 86 | 0.017102580536932994
 87 | 0.01568767197119693
 88 | 0.015433585511830945
 89 | 0.01649760961299762
 90 | 0.01480112192220986
 91 | 0.01458095806495597
 92 | 0.01634620662080124
 93 | 0.014586444144758086
 94 | 0.01412225275610884
 95 | 0.014443966598870853
 96 | 0.014422304722635696
 97 | 0.014611958689056338
 98 | 0.01421121487316365
 99 | 0.014518235716968775
100 | 0.01446291058867549
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_05_02_14_57_57/epoch_val_loss.txt:
--------------------------------------------------------------------------------
  1 | 14.182828585306803
  2 | 6.964454015096028
  3 | 1.7364161411921184
  4 | 0.4160226086775462
  5 | 0.23061403135458627
  6 | 0.18009933829307556
  7 | 0.15316933890183768
  8 | 0.12558546662330627
  9 | 0.11013514300187428
 10 | 0.10292657961448033
 11 | 0.09011622269948323
 12 | 0.0910362775127093
 13 | 0.07362671693166097
 14 | 0.06496318926413854
 15 | 0.06620646268129349
 16 | 0.05724670241276423
 17 | 0.05412605529030164
 18 | 0.05476600428422292
 19 | 0.04998553295930227
 20 | 0.04453219473361969
 21 | 0.046111090729633965
 22 | 0.03964699556430181
 23 | 0.04128604009747505
 24 | 0.0385576585928599
 25 | 0.040300281097491585
 26 | 0.036520869781573616
 27 | 0.03233897313475609
 28 | 0.03402836248278618
 29 | 0.029543195540706318
 30 | 0.03613479311267535
 31 | 0.030847225338220596
 32 | 0.03196833903590838
 33 | 0.030614140133063
 34 | 0.027615018809835117
 35 | 0.029661099116007488
 36 | 0.028920121490955353
 37 | 0.031096385171016056
 38 | 0.026975831637779873
 39 | 0.02437760556737582
 40 | 0.024089227120081585
 41 | 0.024140140662590664
 42 | 0.02602989909549554
 43 | 0.023526831219593685
 44 | 0.023234928647677105
 45 | 0.02490025262037913
 46 | 0.024476055055856705
 47 | 0.02195119174818198
 48 | 0.02400912468632062
 49 | 0.021773086860775948
 50 | 0.021737251430749893
 51 | 0.03704084885808138
 52 | 0.027747553415023364
 53 | 0.02609148549918945
 54 | 0.027060106253394715
 55 | 0.02310138403509672
 56 | 0.02209098207262846
 57 | 0.019444907299027994
 58 | 0.01728303673175665
 59 | 0.022116302154385127
 60 | 0.017028711091440458
 61 | 0.018385969388943452
 62 | 0.020397630233604174
 63 | 0.017034396529197693
 64 | 0.0161269146662492
 65 | 0.014033435915525142
 66 | 0.015593188958099255
 67 | 0.015342251899150701
 68 | 0.015232413147504512
 69 | 0.01195777920432962
 70 | 0.013383755532021705
 71 | 0.01376453500527602
 72 | 0.012433087345785819
 73 | 0.010423123764877137
 74 | 0.011021508405414911
 75 | 0.010145062186683599
 76 | 0.011127662809135823
 77 | 0.009687475251177182
 78 | 0.010067089210049463
 79 | 0.008900713497916093
 80 | 0.009318945392106589
 81 | 0.008838421199470758
 82 | 0.008917749107170563
 83 | 0.008874757430301262
 84 | 0.00834214468844808
 85 | 0.009231974191677112
 86 | 0.00839424731496435
 87 | 0.00878818673439897
 88 | 0.008268425169472512
 89 | 0.008394974642075025
 90 | 0.008387481507200461
 91 | 0.008073390604784856
 92 | 0.008447423434028259
 93 | 0.007967768595195733
 94 | 0.008031251589552714
 95 | 0.007093459976693759
 96 | 0.0077013208960684445
 97 | 0.008188612150171628
 98 | 0.008229664276139094
 99 | 0.008362234892466893
100 | 0.0081037561624096
101 | 


--------------------------------------------------------------------------------
/logs/loss_2022_05_02_14_57_57/events.out.tfevents.1651503480.437fb01f4bb0.370.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/logs/loss_2022_05_02_14_57_57/events.out.tfevents.1651503480.437fb01f4bb0.370.0


--------------------------------------------------------------------------------
/model_data/.gitattributes:
--------------------------------------------------------------------------------
1 | *.pth filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/model_data/gesture.yaml:
--------------------------------------------------------------------------------
 1 | #------------------------------detect.py--------------------------------#
 2 | # 这一部分是为了半自动标注数据，可以减轻负担，需要提前训练一个权重，以Labelme格式保存
 3 | # dir_origin_path 图片存放位置
 4 | # dir_save_path Annotation保存位置
 5 | # ----------------------------------------------------------------------#
 6 | dir_detect_path: ./JPEGImages 
 7 | detect_save_path: ./Annotation
 8 | 
 9 | # ----------------------------- train.py -------------------------------#
10 | nc: 8 # 类别的数量
11 | classes: ["up","down","left","right","front","back","clockwise","anticlockwise"] # 类别
12 | confidence: 0.5 # 置信度
13 | nms_iou: 0.3
14 | letterbox_image: False
15 | 
16 | lr_decay_type: cos # 使用到的学习率下降方式，可选的有step、cos
17 | # 用于设置是否使用多线程读取数据
18 | # 开启后会加快数据读取速度，但是会占用更多内存
19 | # 内存较小的电脑可以设置为2或者0，win建议设为0
20 | num_workers: 4


--------------------------------------------------------------------------------
/model_data/gesture_classes.txt:
--------------------------------------------------------------------------------
1 | up
2 | down
3 | left
4 | right
5 | front
6 | back
7 | clockwise
8 | anticlockwise


--------------------------------------------------------------------------------
/model_data/simhei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/YoloGesture/f4e9ddb5451067c68d0eaec7e4dc63faa044b566/model_data/simhei.ttf


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 12, 16,  19, 36,  40, 28,  36, 75,  76, 55,  72, 146,  142, 110,  192, 243,  459, 401


--------------------------------------------------------------------------------
/model_data/yolotiny_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,14,  23,27,  37,58,  81,82,  135,169,  344,319


--------------------------------------------------------------------------------
/nets/CSPdarknet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from collections import OrderedDict
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | #-------------------------------------------------#
 10 | #   MISH激活函数
 11 | #-------------------------------------------------#
 12 | class Mish(nn.Module):
 13 |     def __init__(self):
 14 |         super(Mish, self).__init__()
 15 | 
 16 |     def forward(self, x):
 17 |         return x * torch.tanh(F.softplus(x))
 18 | 
 19 | #---------------------------------------------------#
 20 | #   卷积块 -> 卷积 + 标准化 + 激活函数
 21 | #   Conv2d + BatchNormalization + Mish
 22 | #---------------------------------------------------#
 23 | class BasicConv(nn.Module):
 24 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1):
 25 |         super(BasicConv, self).__init__()
 26 | 
 27 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 28 |         self.bn = nn.BatchNorm2d(out_channels)
 29 |         self.activation = Mish()
 30 | 
 31 |     def forward(self, x):
 32 |         x = self.conv(x)
 33 |         x = self.bn(x)
 34 |         x = self.activation(x)
 35 |         return x
 36 | 
 37 | #---------------------------------------------------#
 38 | #   CSPdarknet的结构块的组成部分
 39 | #   内部堆叠的残差块
 40 | #---------------------------------------------------#
 41 | class Resblock(nn.Module):
 42 |     def __init__(self, channels, hidden_channels=None):
 43 |         super(Resblock, self).__init__()
 44 | 
 45 |         if hidden_channels is None:
 46 |             hidden_channels = channels
 47 | 
 48 |         self.block = nn.Sequential(
 49 |             BasicConv(channels, hidden_channels, 1),
 50 |             BasicConv(hidden_channels, channels, 3)
 51 |         )
 52 | 
 53 |     def forward(self, x):
 54 |         return x + self.block(x)
 55 | 
 56 | #--------------------------------------------------------------------#
 57 | #   CSPdarknet的结构块
 58 | #   首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
 59 | #   然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
 60 | #   主干部分会对num_blocks进行循环，循环内部是残差结构。
 61 | #   对于整个CSPdarknet的结构块，就是一个大残差块+内部多个小残差块
 62 | #--------------------------------------------------------------------#
 63 | class Resblock_body(nn.Module):
 64 |     def __init__(self, in_channels, out_channels, num_blocks, first):
 65 |         super(Resblock_body, self).__init__()
 66 |         #----------------------------------------------------------------#
 67 |         #   利用一个步长为2x2的卷积块进行高和宽的压缩
 68 |         #----------------------------------------------------------------#
 69 |         self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2)
 70 | 
 71 |         if first:
 72 |             #--------------------------------------------------------------------------#
 73 |             #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
 74 |             #--------------------------------------------------------------------------#
 75 |             self.split_conv0 = BasicConv(out_channels, out_channels, 1)
 76 | 
 77 |             #----------------------------------------------------------------#
 78 |             #   主干部分会对num_blocks进行循环，循环内部是残差结构。
 79 |             #----------------------------------------------------------------#
 80 |             self.split_conv1 = BasicConv(out_channels, out_channels, 1)  
 81 |             self.blocks_conv = nn.Sequential(
 82 |                 Resblock(channels=out_channels, hidden_channels=out_channels//2),
 83 |                 BasicConv(out_channels, out_channels, 1)
 84 |             )
 85 | 
 86 |             self.concat_conv = BasicConv(out_channels*2, out_channels, 1)
 87 |         else:
 88 |             #--------------------------------------------------------------------------#
 89 |             #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
 90 |             #--------------------------------------------------------------------------#
 91 |             self.split_conv0 = BasicConv(out_channels, out_channels//2, 1)
 92 | 
 93 |             #----------------------------------------------------------------#
 94 |             #   主干部分会对num_blocks进行循环，循环内部是残差结构。
 95 |             #----------------------------------------------------------------#
 96 |             self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
 97 |             self.blocks_conv = nn.Sequential(
 98 |                 *[Resblock(out_channels//2) for _ in range(num_blocks)],
 99 |                 BasicConv(out_channels//2, out_channels//2, 1)
100 |             )
101 | 
102 |             self.concat_conv = BasicConv(out_channels, out_channels, 1)
103 | 
104 |     def forward(self, x):
105 |         x = self.downsample_conv(x)
106 | 
107 |         x0 = self.split_conv0(x)
108 | 
109 |         x1 = self.split_conv1(x)
110 |         x1 = self.blocks_conv(x1)
111 | 
112 |         #------------------------------------#
113 |         #   将大残差边再堆叠回来
114 |         #------------------------------------#
115 |         x = torch.cat([x1, x0], dim=1)
116 |         #------------------------------------#
117 |         #   最后对通道数进行整合
118 |         #------------------------------------#
119 |         x = self.concat_conv(x)
120 | 
121 |         return x
122 | 
123 | #---------------------------------------------------#
124 | #   CSPdarknet53 的主体部分
125 | #   输入为一张416x416x3的图片
126 | #   输出为三个有效特征层
127 | #---------------------------------------------------#
128 | class CSPDarkNet(nn.Module):
129 |     def __init__(self, layers):
130 |         super(CSPDarkNet, self).__init__()
131 |         self.inplanes = 32
132 |         # 416,416,3 -> 416,416,32
133 |         self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1)
134 |         self.feature_channels = [64, 128, 256, 512, 1024]
135 | 
136 |         self.stages = nn.ModuleList([
137 |             # 416,416,32 -> 208,208,64
138 |             Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True),
139 |             # 208,208,64 -> 104,104,128
140 |             Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False),
141 |             # 104,104,128 -> 52,52,256
142 |             Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False),
143 |             # 52,52,256 -> 26,26,512
144 |             Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False),
145 |             # 26,26,512 -> 13,13,1024
146 |             Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False)
147 |         ])
148 | 
149 |         self.num_features = 1
150 |         for m in self.modules():
151 |             if isinstance(m, nn.Conv2d):
152 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
153 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
154 |             elif isinstance(m, nn.BatchNorm2d):
155 |                 m.weight.data.fill_(1)
156 |                 m.bias.data.zero_()
157 | 
158 | 
159 |     def forward(self, x):
160 |         x = self.conv1(x)
161 | 
162 |         x = self.stages[0](x)
163 |         x = self.stages[1](x)
164 |         out3 = self.stages[2](x)
165 |         out4 = self.stages[3](out3)
166 |         out5 = self.stages[4](out4)
167 | 
168 |         return out3, out4, out5
169 |     
170 | def darknet53(pretrained):
171 |     model = CSPDarkNet([1, 2, 8, 8, 4])
172 |     if pretrained:
173 |         model.load_state_dict(torch.load("model_data/CSPdarknet53_backbone_weights.pth"))
174 |     return model
175 | 


--------------------------------------------------------------------------------
/nets/CSPdarknet53_tiny.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | #-------------------------------------------------#
  8 | #   卷积块
  9 | #   Conv2d + BatchNorm2d + LeakyReLU
 10 | #-------------------------------------------------#
 11 | class BasicConv(nn.Module):
 12 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1):
 13 |         super(BasicConv, self).__init__()
 14 | 
 15 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 16 |         self.bn = nn.BatchNorm2d(out_channels)
 17 |         self.activation = nn.LeakyReLU(0.1)
 18 | 
 19 |     def forward(self, x):
 20 |         x = self.conv(x)
 21 |         x = self.bn(x)
 22 |         x = self.activation(x)
 23 |         return x
 24 | 
 25 |          
 26 | '''
 27 |                     input
 28 |                       |
 29 |                   BasicConv
 30 |                       -----------------------
 31 |                       |                     |
 32 |                  route_group              route
 33 |                       |                     |
 34 |                   BasicConv                 |
 35 |                       |                     |
 36 |     -------------------                     |
 37 |     |                 |                     |
 38 |  route_1          BasicConv                 |
 39 |     |                 |                     |
 40 |     -----------------cat                    |
 41 |                       |                     |
 42 |         ----      BasicConv                 |
 43 |         |             |                     |
 44 |       feat           cat---------------------
 45 |                       |
 46 |                  MaxPooling2D
 47 | '''
 48 | #---------------------------------------------------#
 49 | #   CSPdarknet53-tiny的结构块
 50 | #   存在一个大残差边
 51 | #   这个大残差边绕过了很多的残差结构
 52 | #---------------------------------------------------#
 53 | class Resblock_body(nn.Module):
 54 |     def __init__(self, in_channels, out_channels):
 55 |         super(Resblock_body, self).__init__()
 56 |         self.out_channels = out_channels
 57 | 
 58 |         self.conv1 = BasicConv(in_channels, out_channels, 3)
 59 | 
 60 |         self.conv2 = BasicConv(out_channels//2, out_channels//2, 3)
 61 |         self.conv3 = BasicConv(out_channels//2, out_channels//2, 3)
 62 | 
 63 |         self.conv4 = BasicConv(out_channels, out_channels, 1)
 64 |         self.maxpool = nn.MaxPool2d([2,2],[2,2])
 65 | 
 66 |     def forward(self, x):
 67 |         # 利用一个3x3卷积进行特征整合
 68 |         x = self.conv1(x)
 69 |         # 引出一个大的残差边route
 70 |         route = x
 71 |         
 72 |         c = self.out_channels
 73 |         # 对特征层的通道进行分割，取第二部分作为主干部分。
 74 |         x = torch.split(x, c//2, dim = 1)[1]
 75 |         # 对主干部分进行3x3卷积
 76 |         x = self.conv2(x)
 77 |         # 引出一个小的残差边route_1
 78 |         route1 = x
 79 |         # 对第主干部分进行3x3卷积
 80 |         x = self.conv3(x)
 81 |         # 主干部分与残差部分进行相接
 82 |         x = torch.cat([x,route1], dim = 1) 
 83 | 
 84 |         # 对相接后的结果进行1x1卷积
 85 |         x = self.conv4(x)
 86 |         feat = x
 87 |         x = torch.cat([route, x], dim = 1)
 88 |         
 89 |         # 利用最大池化进行高和宽的压缩
 90 |         x = self.maxpool(x)
 91 |         return x,feat
 92 | 
 93 | class CSPDarkNet(nn.Module):
 94 |     def __init__(self):
 95 |         super(CSPDarkNet, self).__init__()
 96 |         # 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩
 97 |         # 416,416,3 -> 208,208,32 -> 104,104,64
 98 |         self.conv1 = BasicConv(3, 32, kernel_size=3, stride=2)
 99 |         self.conv2 = BasicConv(32, 64, kernel_size=3, stride=2)
100 | 
101 |         # 104,104,64 -> 52,52,128
102 |         self.resblock_body1 =  Resblock_body(64, 64)
103 |         # 52,52,128 -> 26,26,256
104 |         self.resblock_body2 =  Resblock_body(128, 128)
105 |         # 26,26,256 -> 13,13,512
106 |         self.resblock_body3 =  Resblock_body(256, 256)
107 |         # 13,13,512 -> 13,13,512
108 |         self.conv3 = BasicConv(512, 512, kernel_size=3)
109 | 
110 |         self.num_features = 1
111 |         # 进行权值初始化
112 |         for m in self.modules():
113 |             if isinstance(m, nn.Conv2d):
114 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
115 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
116 |             elif isinstance(m, nn.BatchNorm2d):
117 |                 m.weight.data.fill_(1)
118 |                 m.bias.data.zero_()
119 | 
120 | 
121 |     def forward(self, x):
122 |         # 416,416,3 -> 208,208,32 -> 104,104,64
123 |         x = self.conv1(x)
124 |         x = self.conv2(x)
125 | 
126 |         # 104,104,64 -> 52,52,128
127 |         x, _    = self.resblock_body1(x)
128 |         # 52,52,128 -> 26,26,256
129 |         x, _    = self.resblock_body2(x)
130 |         # 26,26,256 -> x为13,13,512
131 |         #           -> feat1为26,26,256
132 |         x, feat1    = self.resblock_body3(x)
133 | 
134 |         # 13,13,512 -> 13,13,512
135 |         x = self.conv3(x)
136 |         feat2 = x
137 |         return feat1,feat2
138 | 
139 | def darknet53_tiny(pretrained, **kwargs):
140 |     model = CSPDarkNet()
141 |     if pretrained:
142 |         model.load_state_dict(torch.load("model_data/CSPdarknet53_tiny_backbone_weights.pth"))
143 |     return model
144 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/nets/attention.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | 
  5 | class se_block(nn.Module):
  6 |     def __init__(self, channel, ratio=16):
  7 |         super(se_block, self).__init__()
  8 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
  9 |         self.fc = nn.Sequential(
 10 |                 nn.Linear(channel, channel // ratio, bias=False),
 11 |                 nn.ReLU(inplace=True),
 12 |                 nn.Linear(channel // ratio, channel, bias=False),
 13 |                 nn.Sigmoid()
 14 |         )
 15 | 
 16 |     def forward(self, x):
 17 |         b, c, _, _ = x.size()
 18 |         y = self.avg_pool(x).view(b, c)
 19 |         y = self.fc(y).view(b, c, 1, 1)
 20 |         return x * y
 21 | 
 22 | class ChannelAttention(nn.Module):
 23 |     def __init__(self, in_planes, ratio=8):
 24 |         super(ChannelAttention, self).__init__()
 25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 26 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
 27 | 
 28 |         # 利用1x1卷积代替全连接
 29 |         self.fc1   = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
 30 |         self.relu1 = nn.ReLU()
 31 |         self.fc2   = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
 32 | 
 33 |         self.sigmoid = nn.Sigmoid()
 34 | 
 35 |     def forward(self, x):
 36 |         avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
 37 |         max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
 38 |         out = avg_out + max_out
 39 |         return self.sigmoid(out)
 40 | 
 41 | class SpatialAttention(nn.Module):
 42 |     def __init__(self, kernel_size=7):
 43 |         super(SpatialAttention, self).__init__()
 44 | 
 45 |         assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
 46 |         padding = 3 if kernel_size == 7 else 1
 47 |         self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
 48 |         self.sigmoid = nn.Sigmoid()
 49 | 
 50 |     def forward(self, x):
 51 |         avg_out = torch.mean(x, dim=1, keepdim=True)
 52 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
 53 |         x = torch.cat([avg_out, max_out], dim=1)
 54 |         x = self.conv1(x)
 55 |         return self.sigmoid(x)
 56 | 
 57 | class cbam_block(nn.Module):
 58 |     def __init__(self, channel, ratio=8, kernel_size=7):
 59 |         super(cbam_block, self).__init__()
 60 |         self.channelattention = ChannelAttention(channel, ratio=ratio)
 61 |         self.spatialattention = SpatialAttention(kernel_size=kernel_size)
 62 | 
 63 |     def forward(self, x):
 64 |         x = x*self.channelattention(x)
 65 |         x = x*self.spatialattention(x)
 66 |         return x
 67 | 
 68 | class eca_block(nn.Module):
 69 |     def __init__(self, channel, b=1, gamma=2):
 70 |         super(eca_block, self).__init__()
 71 |         kernel_size = int(abs((math.log(channel, 2) + b) / gamma))
 72 |         kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1
 73 |         
 74 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 75 |         self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False) 
 76 |         self.sigmoid = nn.Sigmoid()
 77 | 
 78 |     def forward(self, x):
 79 |         y = self.avg_pool(x)
 80 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
 81 |         y = self.sigmoid(y)
 82 |         return x * y.expand_as(x)
 83 | 
 84 | class CA_Block(nn.Module):
 85 |     def __init__(self, channel, reduction=16):
 86 |         super(CA_Block, self).__init__()
 87 |         
 88 |         self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel//reduction, kernel_size=1, stride=1, bias=False)
 89 |  
 90 |         self.relu   = nn.ReLU()
 91 |         self.bn     = nn.BatchNorm2d(channel//reduction)
 92 |  
 93 |         self.F_h = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
 94 |         self.F_w = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
 95 |  
 96 |         self.sigmoid_h = nn.Sigmoid()
 97 |         self.sigmoid_w = nn.Sigmoid()
 98 |  
 99 |     def forward(self, x):
100 |         _, _, h, w = x.size()
101 |         
102 |         x_h = torch.mean(x, dim = 3, keepdim = True).permute(0, 1, 3, 2)
103 |         x_w = torch.mean(x, dim = 2, keepdim = True)
104 |  
105 |         x_cat_conv_relu = self.relu(self.bn(self.conv_1x1(torch.cat((x_h, x_w), 3))))
106 |  
107 |         x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([h, w], 3)
108 |  
109 |         s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2)))
110 |         s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w))
111 |  
112 |         out = x * s_h.expand_as(x) * s_w.expand_as(x)
113 |         return out
114 |  


--------------------------------------------------------------------------------
/nets/yolo.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from nets.CSPdarknet import darknet53
  7 | 
  8 | 
  9 | def conv2d(filter_in, filter_out, kernel_size, stride=1):
 10 |     pad = (kernel_size - 1) // 2 if kernel_size else 0
 11 |     return nn.Sequential(OrderedDict([
 12 |         ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)),
 13 |         ("bn", nn.BatchNorm2d(filter_out)),
 14 |         ("relu", nn.LeakyReLU(0.1)),
 15 |     ]))
 16 | 
 17 | #---------------------------------------------------#
 18 | #   SPP结构，利用不同大小的池化核进行池化
 19 | #   池化后堆叠
 20 | #---------------------------------------------------#
 21 | class SpatialPyramidPooling(nn.Module):
 22 |     def __init__(self, pool_sizes=[5, 9, 13]):
 23 |         super(SpatialPyramidPooling, self).__init__()
 24 | 
 25 |         self.maxpools = nn.ModuleList([nn.MaxPool2d(pool_size, 1, pool_size//2) for pool_size in pool_sizes])
 26 | 
 27 |     def forward(self, x):
 28 |         features = [maxpool(x) for maxpool in self.maxpools[::-1]]
 29 |         features = torch.cat(features + [x], dim=1)
 30 | 
 31 |         return features
 32 | 
 33 | #---------------------------------------------------#
 34 | #   卷积 + 上采样
 35 | #---------------------------------------------------#
 36 | class Upsample(nn.Module):
 37 |     def __init__(self, in_channels, out_channels):
 38 |         super(Upsample, self).__init__()
 39 | 
 40 |         self.upsample = nn.Sequential(
 41 |             conv2d(in_channels, out_channels, 1),
 42 |             nn.Upsample(scale_factor=2, mode='nearest')
 43 |         )
 44 | 
 45 |     def forward(self, x,):
 46 |         x = self.upsample(x)
 47 |         return x
 48 | 
 49 | #---------------------------------------------------#
 50 | #   三次卷积块
 51 | #---------------------------------------------------#
 52 | def make_three_conv(filters_list, in_filters):
 53 |     m = nn.Sequential(
 54 |         conv2d(in_filters, filters_list[0], 1),
 55 |         conv2d(filters_list[0], filters_list[1], 3),
 56 |         conv2d(filters_list[1], filters_list[0], 1),
 57 |     )
 58 |     return m
 59 | 
 60 | #---------------------------------------------------#
 61 | #   五次卷积块
 62 | #---------------------------------------------------#
 63 | def make_five_conv(filters_list, in_filters):
 64 |     m = nn.Sequential(
 65 |         conv2d(in_filters, filters_list[0], 1),
 66 |         conv2d(filters_list[0], filters_list[1], 3),
 67 |         conv2d(filters_list[1], filters_list[0], 1),
 68 |         conv2d(filters_list[0], filters_list[1], 3),
 69 |         conv2d(filters_list[1], filters_list[0], 1),
 70 |     )
 71 |     return m
 72 | 
 73 | #---------------------------------------------------#
 74 | #   最后获得yolov4的输出
 75 | #---------------------------------------------------#
 76 | def yolo_head(filters_list, in_filters):
 77 |     m = nn.Sequential(
 78 |         conv2d(in_filters, filters_list[0], 3),
 79 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 80 |     )
 81 |     return m
 82 | 
 83 | #---------------------------------------------------#
 84 | #   yolo_body
 85 | #---------------------------------------------------#
 86 | class YoloBody(nn.Module):
 87 |     def __init__(self, anchors_mask, num_classes, pretrained = False):
 88 |         super(YoloBody, self).__init__()
 89 |         #---------------------------------------------------#   
 90 |         #   生成CSPdarknet53的主干模型
 91 |         #   获得三个有效特征层，他们的shape分别是：
 92 |         #   52,52,256
 93 |         #   26,26,512
 94 |         #   13,13,1024
 95 |         #---------------------------------------------------#
 96 |         self.backbone   = darknet53(pretrained)
 97 | 
 98 |         self.conv1      = make_three_conv([512,1024],1024)
 99 |         self.SPP        = SpatialPyramidPooling()
100 |         self.conv2      = make_three_conv([512,1024],2048)
101 | 
102 |         self.upsample1          = Upsample(512,256)
103 |         self.conv_for_P4        = conv2d(512,256,1)
104 |         self.make_five_conv1    = make_five_conv([256, 512],512)
105 | 
106 |         self.upsample2          = Upsample(256,128)
107 |         self.conv_for_P3        = conv2d(256,128,1)
108 |         self.make_five_conv2    = make_five_conv([128, 256],256)
109 | 
110 |         # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
111 |         self.yolo_head3         = yolo_head([256, len(anchors_mask[0]) * (5 + num_classes)],128)
112 | 
113 |         self.down_sample1       = conv2d(128,256,3,stride=2)
114 |         self.make_five_conv3    = make_five_conv([256, 512],512)
115 | 
116 |         # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
117 |         self.yolo_head2         = yolo_head([512, len(anchors_mask[1]) * (5 + num_classes)],256)
118 | 
119 |         self.down_sample2       = conv2d(256,512,3,stride=2)
120 |         self.make_five_conv4    = make_five_conv([512, 1024],1024)
121 | 
122 |         # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
123 |         self.yolo_head1         = yolo_head([1024, len(anchors_mask[2]) * (5 + num_classes)],512)
124 | 
125 | 
126 |     def forward(self, x):
127 |         #  backbone
128 |         x2, x1, x0 = self.backbone(x)
129 | 
130 |         # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 
131 |         P5 = self.conv1(x0)
132 |         P5 = self.SPP(P5)
133 |         # 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
134 |         P5 = self.conv2(P5)
135 | 
136 |         # 13,13,512 -> 13,13,256 -> 26,26,256
137 |         P5_upsample = self.upsample1(P5)
138 |         # 26,26,512 -> 26,26,256
139 |         P4 = self.conv_for_P4(x1)
140 |         # 26,26,256 + 26,26,256 -> 26,26,512
141 |         P4 = torch.cat([P4,P5_upsample],axis=1)
142 |         # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
143 |         P4 = self.make_five_conv1(P4)
144 | 
145 |         # 26,26,256 -> 26,26,128 -> 52,52,128
146 |         P4_upsample = self.upsample2(P4)
147 |         # 52,52,256 -> 52,52,128
148 |         P3 = self.conv_for_P3(x2)
149 |         # 52,52,128 + 52,52,128 -> 52,52,256
150 |         P3 = torch.cat([P3,P4_upsample],axis=1)
151 |         # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
152 |         P3 = self.make_five_conv2(P3)
153 | 
154 |         # 52,52,128 -> 26,26,256
155 |         P3_downsample = self.down_sample1(P3)
156 |         # 26,26,256 + 26,26,256 -> 26,26,512
157 |         P4 = torch.cat([P3_downsample,P4],axis=1)
158 |         # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
159 |         P4 = self.make_five_conv3(P4)
160 | 
161 |         # 26,26,256 -> 13,13,512
162 |         P4_downsample = self.down_sample2(P4)
163 |         # 13,13,512 + 13,13,512 -> 13,13,1024
164 |         P5 = torch.cat([P4_downsample,P5],axis=1)
165 |         # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
166 |         P5 = self.make_five_conv4(P5)
167 | 
168 |         #---------------------------------------------------#
169 |         #   第三个特征层
170 |         #   y3=(batch_size,75,52,52)
171 |         #---------------------------------------------------#
172 |         out2 = self.yolo_head3(P3)
173 |         #---------------------------------------------------#
174 |         #   第二个特征层
175 |         #   y2=(batch_size,75,26,26)
176 |         #---------------------------------------------------#
177 |         out1 = self.yolo_head2(P4)
178 |         #---------------------------------------------------#
179 |         #   第一个特征层
180 |         #   y1=(batch_size,75,13,13)
181 |         #---------------------------------------------------#
182 |         out0 = self.yolo_head1(P5)
183 | 
184 |         return out0, out1, out2
185 | 
186 | 


--------------------------------------------------------------------------------
/nets/yolo_tiny.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from nets.CSPdarknet53_tiny import darknet53_tiny
  5 | from nets.attention import cbam_block, eca_block, se_block, CA_Block
  6 | 
  7 | attention_block = [se_block, cbam_block, eca_block, CA_Block]
  8 | 
  9 | #-------------------------------------------------#
 10 | #   卷积块 -> 卷积 + 标准化 + 激活函数
 11 | #   Conv2d + BatchNormalization + LeakyReLU
 12 | #-------------------------------------------------#
 13 | class BasicConv(nn.Module):
 14 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1):
 15 |         super(BasicConv, self).__init__()
 16 | 
 17 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 18 |         self.bn = nn.BatchNorm2d(out_channels)
 19 |         self.activation = nn.LeakyReLU(0.1)
 20 | 
 21 |     def forward(self, x):
 22 |         x = self.conv(x)
 23 |         x = self.bn(x)
 24 |         x = self.activation(x)
 25 |         return x
 26 | 
 27 | #---------------------------------------------------#
 28 | #   卷积 + 上采样
 29 | #---------------------------------------------------#
 30 | class Upsample(nn.Module):
 31 |     def __init__(self, in_channels, out_channels):
 32 |         super(Upsample, self).__init__()
 33 | 
 34 |         self.upsample = nn.Sequential(
 35 |             BasicConv(in_channels, out_channels, 1),
 36 |             nn.Upsample(scale_factor=2, mode='nearest')
 37 |         )
 38 | 
 39 |     def forward(self, x,):
 40 |         x = self.upsample(x)
 41 |         return x
 42 | 
 43 | #---------------------------------------------------#
 44 | #   最后获得yolov4的输出
 45 | #---------------------------------------------------#
 46 | def yolo_head(filters_list, in_filters):
 47 |     m = nn.Sequential(
 48 |         BasicConv(in_filters, filters_list[0], 3),
 49 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 50 |     )
 51 |     return m
 52 | #---------------------------------------------------#
 53 | #   yolo_body
 54 | #---------------------------------------------------#
 55 | class YoloBodytiny(nn.Module):
 56 |     def __init__(self, anchors_mask, num_classes, phi=0, pretrained=False):
 57 |         super(YoloBodytiny, self).__init__()
 58 |         self.phi            = phi
 59 |         self.backbone       = darknet53_tiny(pretrained)
 60 | 
 61 |         self.conv_for_P5    = BasicConv(512,256,1)
 62 |         self.yolo_headP5    = yolo_head([512, len(anchors_mask[0]) * (5 + num_classes)],256)
 63 | 
 64 |         self.upsample       = Upsample(256,128)
 65 |         self.yolo_headP4    = yolo_head([256, len(anchors_mask[1]) * (5 + num_classes)],384)
 66 | 
 67 |         if 1 <= self.phi and self.phi <= 4:
 68 |             self.feat1_att      = attention_block[self.phi - 1](256)
 69 |             self.feat2_att      = attention_block[self.phi - 1](512)
 70 |             self.upsample_att   = attention_block[self.phi - 1](128)
 71 | 
 72 |     def forward(self, x):
 73 |         #---------------------------------------------------#
 74 |         #   生成CSPdarknet53_tiny的主干模型
 75 |         #   feat1的shape为26,26,256
 76 |         #   feat2的shape为13,13,512
 77 |         #---------------------------------------------------#
 78 |         feat1, feat2 = self.backbone(x)
 79 |         if 1 <= self.phi and self.phi <= 4:
 80 |             feat1 = self.feat1_att(feat1)
 81 |             feat2 = self.feat2_att(feat2)
 82 | 
 83 |         # 13,13,512 -> 13,13,256
 84 |         P5 = self.conv_for_P5(feat2)
 85 |         # 13,13,256 -> 13,13,512 -> 13,13,255
 86 |         out0 = self.yolo_headP5(P5) 
 87 | 
 88 |         # 13,13,256 -> 13,13,128 -> 26,26,128
 89 |         P5_Upsample = self.upsample(P5)
 90 |         # 26,26,256 + 26,26,128 -> 26,26,384
 91 |         if 1 <= self.phi and self.phi <= 4:
 92 |             P5_Upsample = self.upsample_att(P5_Upsample)
 93 |         P4 = torch.cat([P5_Upsample,feat1],axis=1)
 94 | 
 95 |         # 26,26,384 -> 26,26,256 -> 26,26,255
 96 |         out1 = self.yolo_headP4(P4)
 97 |         
 98 |         return out0, out1
 99 | 
100 | 


--------------------------------------------------------------------------------
/packages.txt:
--------------------------------------------------------------------------------
1 | freeglut3-dev
2 | libgtk2.0-dev


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | #-----------------------------------------------------------------------#
  2 | #   predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能
  3 | #   整合到了一个py文件中，通过指定mode进行模式的修改。
  4 | #-----------------------------------------------------------------------#
  5 | import time
  6 | import yaml
  7 | import cv2
  8 | import numpy as np
  9 | from PIL import Image
 10 | from get_yaml import get_config
 11 | from yolo import YOLO
 12 | import argparse
 13 | if __name__ == "__main__":
 14 |     parser = argparse.ArgumentParser()
 15 |     parser.add_argument('--weights',type=str,default='model_data/yolotiny_SE_ep100.pth',help='initial weights path')
 16 |     parser.add_argument('--tiny',action='store_true',help='使用yolotiny模型')
 17 |     parser.add_argument('--phi',type=int,default=1,help='yolov4tiny注意力机制类型')
 18 |     parser.add_argument('--mode',type=str,choices=['dir_predict', 'video', 'fps','predict','heatmap','export_onnx'],default="dir_predict",help='预测的模式')
 19 |     parser.add_argument('--cuda',action='store_true',help='表示是否使用GPU')
 20 |     parser.add_argument('--shape',type=int,default=416,help='输入图像的shape')
 21 |     parser.add_argument('--video',type=str,default='',help='需要检测的视频文件')
 22 |     parser.add_argument('--save-video',type=str,default='',help='保存视频的位置')
 23 |     parser.add_argument('--confidence',type=float,default=0.5,help='只有得分大于置信度的预测框会被保留下来')
 24 |     parser.add_argument('--nms_iou',type=float,default=0.3,help='非极大抑制所用到的nms_iou大小')
 25 |     opt = parser.parse_args()
 26 |     print(opt)
 27 | 
 28 |     # 配置文件
 29 |     config = get_config()
 30 |     yolo = YOLO(opt)
 31 | 
 32 |     #----------------------------------------------------------------------------------------------------------#
 33 |     #   mode用于指定测试的模式：
 34 |     #   'predict'           表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
 35 |     #   'video'             表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
 36 |     #   'fps'               表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
 37 |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
 38 |     #   'heatmap'           表示进行预测结果的热力图可视化，详情查看下方注释。
 39 |     #   'export_onnx'       表示将模型导出为onnx，需要pytorch1.7.1以上。
 40 |     #----------------------------------------------------------------------------------------------------------#
 41 |     mode = opt.mode
 42 |     #-------------------------------------------------------------------------#
 43 |     #   crop                指定了是否在单张图片预测后对目标进行截取
 44 |     #   count               指定了是否进行目标的计数
 45 |     #   crop、count仅在mode='predict'时有效
 46 |     #-------------------------------------------------------------------------#
 47 |     crop            = False
 48 |     count           = False
 49 |     #----------------------------------------------------------------------------------------------------------#
 50 |     #   video_path          用于指定视频的路径，当video_path=0时表示检测摄像头
 51 |     #                       想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
 52 |     #   video_save_path     表示视频保存的路径，当video_save_path=""时表示不保存
 53 |     #                       想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
 54 |     #   video_fps           用于保存的视频的fps
 55 |     #
 56 |     #   video_path、video_save_path和video_fps仅在mode='video'时有效
 57 |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
 58 |     #----------------------------------------------------------------------------------------------------------#
 59 |     video_path      = 0 if opt.video == '' else opt.video
 60 |     video_save_path = opt.save_video
 61 |     video_fps       = 25.0
 62 |     #----------------------------------------------------------------------------------------------------------#
 63 |     #   test_interval       用于指定测量fps的时候，图片检测的次数。理论上test_interval越大，fps越准确。
 64 |     #   fps_image_path      用于指定测试的fps图片
 65 |     #   
 66 |     #   test_interval和fps_image_path仅在mode='fps'有效
 67 |     #----------------------------------------------------------------------------------------------------------#
 68 |     test_interval   = 100
 69 |     fps_image_path  = "img/up.jpg"
 70 |     #-------------------------------------------------------------------------#
 71 |     #   dir_origin_path     指定了用于检测的图片的文件夹路径
 72 |     #   dir_save_path       指定了检测完图片的保存路径
 73 |     #   
 74 |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
 75 |     #-------------------------------------------------------------------------#
 76 |     dir_origin_path = "img/"
 77 |     dir_save_path   = "img_out/"
 78 |     #-------------------------------------------------------------------------#
 79 |     #   heatmap_save_path   热力图的保存路径，默认保存在model_data下
 80 |     #   
 81 |     #   heatmap_save_path仅在mode='heatmap'有效
 82 |     #-------------------------------------------------------------------------#
 83 |     heatmap_save_path = "model_data/heatmap_vision.png"
 84 |     #-------------------------------------------------------------------------#
 85 |     #   simplify            使用Simplify onnx
 86 |     #   onnx_save_path      指定了onnx的保存路径
 87 |     #-------------------------------------------------------------------------#
 88 |     simplify        = True
 89 |     onnx_save_path  = "model_data/models.onnx"
 90 | 
 91 |     if mode == "predict":
 92 |         '''
 93 |         1、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
 94 |         2、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
 95 |         3、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
 96 |         在原图上利用矩阵的方式进行截取。
 97 |         4、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
 98 |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
 99 |         '''
100 |         while True:
101 |             img = input('Input image filename:')
102 |             try:
103 |                 image = Image.open(img)
104 |             except:
105 |                 print('Open Error! Try again!')
106 |                 continue
107 |             else:
108 |                 r_image = yolo.detect_image(image, crop = crop, count=count)
109 |                 r_image.show()
110 |                 r_image.save(dir_save_path + 'img_result.jpg')
111 | 
112 |     elif mode == "video":
113 |         capture = cv2.VideoCapture(video_path)
114 |         if video_save_path != '':
115 |             fourcc  = cv2.VideoWriter_fourcc(*'XVID')
116 |             size    = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
117 |             out     = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
118 | 
119 |         ref, frame = capture.read()
120 |         if not ref:
121 |             raise ValueError("未能正确读取摄像头（视频），请注意是否正确安装摄像头（是否正确填写视频路径）。")
122 | 
123 |         fps = 0.0
124 |         while(True):
125 |             t1 = time.time()
126 |             # 读取某一帧
127 |             ref, frame = capture.read()
128 |             if not ref:
129 |                 break
130 |             # 格式转变，BGRtoRGB
131 |             frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
132 |             # 转变成Image
133 |             frame = Image.fromarray(np.uint8(frame))
134 |             # 进行检测
135 |             frame = np.array(yolo.detect_image(frame))
136 |             # RGBtoBGR满足opencv显示格式
137 |             frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
138 |             
139 |             fps  = ( fps + (1./(time.time()-t1)) ) / 2
140 |             print("fps= %.2f"%(fps))
141 |             frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
142 |             
143 |             cv2.imshow("video",frame)
144 |             c= cv2.waitKey(1) & 0xff 
145 |             if video_save_path != '':
146 |                 out.write(frame)
147 | 
148 |             if c==27:
149 |                 capture.release()
150 |                 break
151 | 
152 |         print("Video Detection Done!")
153 |         capture.release()
154 |         if video_save_path != '':
155 |             print("Save processed video to the path :" + video_save_path)
156 |             out.release()
157 |         cv2.destroyAllWindows()
158 |         
159 |     elif mode == "fps":
160 |         img = Image.open(fps_image_path)
161 |         tact_time = yolo.get_FPS(img, test_interval)
162 |         print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
163 | 
164 |     elif mode == "dir_predict":
165 |         import os
166 | 
167 |         from tqdm import tqdm
168 | 
169 |         img_names = os.listdir(dir_origin_path)
170 |         for img_name in tqdm(img_names):
171 |             if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
172 |                 image_path  = os.path.join(dir_origin_path, img_name)
173 |                 image       = Image.open(image_path)
174 |                 r_image     = yolo.detect_image(image)
175 |                 if not os.path.exists(dir_save_path):
176 |                     os.makedirs(dir_save_path)
177 |                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
178 | 
179 |     elif mode == "heatmap":
180 |         while True:
181 |             img = input('Input image filename:')
182 |             try:
183 |                 image = Image.open(img)
184 |             except:
185 |                 print('Open Error! Try again!')
186 |                 continue
187 |             else:
188 |                 yolo.detect_heatmap(image, heatmap_save_path)
189 |                 
190 |     elif mode == "export_onnx":
191 |         yolo.convert_to_onnx(simplify, onnx_save_path)
192 |         
193 |     else:
194 |         raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps', 'heatmap', 'export_onnx', 'dir_predict'.")
195 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy
 2 | numpy
 3 | matplotlib==3.7.0
 4 | opencv_python
 5 | torch==1.8.1
 6 | torchvision==0.9.1
 7 | tqdm==4.60.0
 8 | Pillow==8.2.0
 9 | h5py==2.10.0
10 | tensorboard
11 | pyyaml==6.0
12 | torchinfo
13 | labelimg==1.8.6
14 | streamlit==1.8.1
15 | opencv-python-headless==4.5.2.52
16 | streamlit<=1.11.*
17 | 


--------------------------------------------------------------------------------
/summary.py:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------#
 2 | #   该部分代码用于看网络结构
 3 | #--------------------------------------------#
 4 | import torch
 5 | from torchinfo import summary
 6 | 
 7 | from nets.yolo import YoloBody
 8 | from nets.yolo_tiny import YoloBodytiny
 9 | if __name__ == "__main__":
10 |     # 需要使用device来指定网络在GPU还是CPU运行
11 |     device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12 |     m       = YoloBody([[6, 7, 8], [3, 4, 5], [0, 1, 2]], 80).to(device)
13 |     summary(m, input_size=(1,3, 416, 416))
14 | 
15 |     m       = YoloBodytiny([[3, 4, 5], [1, 2, 3]], 80, phi = 1).to(device)
16 |     summary(m, input_size=(1,3, 416, 416))
17 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | import torch
 5 | import matplotlib
 6 | matplotlib.use('Agg')
 7 | import scipy.signal
 8 | from matplotlib import pyplot as plt
 9 | from torch.utils.tensorboard import SummaryWriter
10 | 
11 | 
12 | class LossHistory():
13 |     def __init__(self, log_dir, model, input_shape):
14 |         time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
15 |         self.log_dir    = os.path.join(log_dir, "loss_" + str(time_str))
16 |         self.losses     = []
17 |         self.val_loss   = []
18 |         
19 |         os.makedirs(self.log_dir)
20 |         self.writer     = SummaryWriter(self.log_dir)
21 |         try:
22 |             dummy_input     = torch.randn(2, 3, input_shape[0], input_shape[1])
23 |             self.writer.add_graph(model, dummy_input)
24 |         except:
25 |             pass
26 | 
27 | 
28 |     def append_loss(self, epoch, loss, val_loss):
29 |         if not os.path.exists(self.log_dir):
30 |             os.makedirs(self.log_dir)
31 | 
32 |         self.losses.append(loss)
33 |         self.val_loss.append(val_loss)
34 | 
35 |         with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
36 |             f.write(str(loss))
37 |             f.write("\n")
38 |         with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
39 |             f.write(str(val_loss))
40 |             f.write("\n")
41 | 
42 |         self.writer.add_scalar('loss', loss, epoch)
43 |         self.writer.add_scalar('val_loss', val_loss, epoch)
44 |         self.loss_plot()
45 | 
46 |     def loss_plot(self):
47 |         iters = range(len(self.losses))
48 | 
49 |         plt.figure()
50 |         plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
51 |         plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
52 |         try:
53 |             if len(self.losses) < 25:
54 |                 num = 5
55 |             else:
56 |                 num = 15
57 |             
58 |             plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
59 |             plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
60 |         except:
61 |             pass
62 | 
63 |         plt.grid(True)
64 |         plt.xlabel('Epoch')
65 |         plt.ylabel('Loss')
66 |         plt.legend(loc="upper right")
67 | 
68 |         plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
69 | 
70 |         plt.cla()
71 |         plt.close("all")
72 | 


--------------------------------------------------------------------------------
/utils/dataloader.py:
--------------------------------------------------------------------------------
  1 | from random import sample, shuffle
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | from PIL import Image
  7 | from torch.utils.data.dataset import Dataset
  8 | 
  9 | from utils.utils import cvtColor, preprocess_input
 10 | 
 11 | 
 12 | class YoloDataset(Dataset):
 13 |     def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio = 0.7):
 14 |         super(YoloDataset, self).__init__()
 15 |         self.annotation_lines   = annotation_lines
 16 |         self.input_shape        = input_shape
 17 |         self.num_classes        = num_classes
 18 |         self.epoch_length       = epoch_length
 19 |         self.mosaic             = mosaic
 20 |         self.train              = train
 21 |         self.mosaic_ratio       = mosaic_ratio
 22 | 
 23 |         self.epoch_now          = -1
 24 |         self.length             = len(self.annotation_lines)
 25 | 
 26 |     def __len__(self):
 27 |         return self.length
 28 | 
 29 |     def __getitem__(self, index):
 30 |         index       = index % self.length
 31 | 
 32 |         #---------------------------------------------------#
 33 |         #   训练时进行数据的随机增强
 34 |         #   验证时不进行数据的随机增强
 35 |         #---------------------------------------------------#
 36 |         if self.mosaic:
 37 |             if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:
 38 |                 lines = sample(self.annotation_lines, 3)
 39 |                 lines.append(self.annotation_lines[index])
 40 |                 shuffle(lines)
 41 |                 image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
 42 |             else:
 43 |                 image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
 44 |         else:
 45 |             image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
 46 |         image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
 47 |         box         = np.array(box, dtype=np.float32)
 48 |         if len(box) != 0:
 49 |             box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
 50 |             box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
 51 | 
 52 |             box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
 53 |             box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
 54 |         return image, box
 55 | 
 56 |     def rand(self, a=0, b=1):
 57 |         return np.random.rand()*(b-a) + a
 58 | 
 59 |     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
 60 |         line    = annotation_line.split()
 61 |         #------------------------------#
 62 |         #   读取图像并转换成RGB图像
 63 |         #------------------------------#
 64 |         image   = Image.open(line[0])
 65 |         image   = cvtColor(image)
 66 |         #------------------------------#
 67 |         #   获得图像的高宽与目标高宽
 68 |         #------------------------------#
 69 |         iw, ih  = image.size
 70 |         h, w    = input_shape
 71 |         #------------------------------#
 72 |         #   获得预测框
 73 |         #------------------------------#
 74 |         box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 75 | 
 76 |         if not random:
 77 |             scale = min(w/iw, h/ih)
 78 |             nw = int(iw*scale)
 79 |             nh = int(ih*scale)
 80 |             dx = (w-nw)//2
 81 |             dy = (h-nh)//2
 82 | 
 83 |             #---------------------------------#
 84 |             #   将图像多余的部分加上灰条
 85 |             #---------------------------------#
 86 |             image       = image.resize((nw,nh), Image.BICUBIC)
 87 |             new_image   = Image.new('RGB', (w,h), (128,128,128))
 88 |             new_image.paste(image, (dx, dy))
 89 |             image_data  = np.array(new_image, np.float32)
 90 | 
 91 |             #---------------------------------#
 92 |             #   对真实框进行调整
 93 |             #---------------------------------#
 94 |             if len(box)>0:
 95 |                 np.random.shuffle(box)
 96 |                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
 97 |                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
 98 |                 box[:, 0:2][box[:, 0:2]<0] = 0
 99 |                 box[:, 2][box[:, 2]>w] = w
100 |                 box[:, 3][box[:, 3]>h] = h
101 |                 box_w = box[:, 2] - box[:, 0]
102 |                 box_h = box[:, 3] - box[:, 1]
103 |                 box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
104 | 
105 |             return image_data, box
106 |                 
107 |         #------------------------------------------#
108 |         #   对图像进行缩放并且进行长和宽的扭曲
109 |         #------------------------------------------#
110 |         new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
111 |         scale = self.rand(.25, 2)
112 |         if new_ar < 1:
113 |             nh = int(scale*h)
114 |             nw = int(nh*new_ar)
115 |         else:
116 |             nw = int(scale*w)
117 |             nh = int(nw/new_ar)
118 |         image = image.resize((nw,nh), Image.BICUBIC)
119 | 
120 |         #------------------------------------------#
121 |         #   将图像多余的部分加上灰条
122 |         #------------------------------------------#
123 |         dx = int(self.rand(0, w-nw))
124 |         dy = int(self.rand(0, h-nh))
125 |         new_image = Image.new('RGB', (w,h), (128,128,128))
126 |         new_image.paste(image, (dx, dy))
127 |         image = new_image
128 | 
129 |         #------------------------------------------#
130 |         #   翻转图像
131 |         #------------------------------------------#
132 |         flip = self.rand()<.5
133 |         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
134 | 
135 |         image_data      = np.array(image, np.uint8)
136 |         #---------------------------------#
137 |         #   对图像进行色域变换
138 |         #   计算色域变换的参数
139 |         #---------------------------------#
140 |         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
141 |         #---------------------------------#
142 |         #   将图像转到HSV上
143 |         #---------------------------------#
144 |         hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
145 |         dtype           = image_data.dtype
146 |         #---------------------------------#
147 |         #   应用变换
148 |         #---------------------------------#
149 |         x       = np.arange(0, 256, dtype=r.dtype)
150 |         lut_hue = ((x * r[0]) % 180).astype(dtype)
151 |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
152 |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
153 | 
154 |         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
155 |         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
156 | 
157 |         #---------------------------------#
158 |         #   对真实框进行调整
159 |         #---------------------------------#
160 |         if len(box)>0:
161 |             np.random.shuffle(box)
162 |             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
163 |             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
164 |             if flip: box[:, [0,2]] = w - box[:, [2,0]]
165 |             box[:, 0:2][box[:, 0:2]<0] = 0
166 |             box[:, 2][box[:, 2]>w] = w
167 |             box[:, 3][box[:, 3]>h] = h
168 |             box_w = box[:, 2] - box[:, 0]
169 |             box_h = box[:, 3] - box[:, 1]
170 |             box = box[np.logical_and(box_w>1, box_h>1)] 
171 |         
172 |         return image_data, box
173 |     
174 |     def merge_bboxes(self, bboxes, cutx, cuty):
175 |         merge_bbox = []
176 |         for i in range(len(bboxes)):
177 |             for box in bboxes[i]:
178 |                 tmp_box = []
179 |                 x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
180 | 
181 |                 if i == 0:
182 |                     if y1 > cuty or x1 > cutx:
183 |                         continue
184 |                     if y2 >= cuty and y1 <= cuty:
185 |                         y2 = cuty
186 |                     if x2 >= cutx and x1 <= cutx:
187 |                         x2 = cutx
188 | 
189 |                 if i == 1:
190 |                     if y2 < cuty or x1 > cutx:
191 |                         continue
192 |                     if y2 >= cuty and y1 <= cuty:
193 |                         y1 = cuty
194 |                     if x2 >= cutx and x1 <= cutx:
195 |                         x2 = cutx
196 | 
197 |                 if i == 2:
198 |                     if y2 < cuty or x2 < cutx:
199 |                         continue
200 |                     if y2 >= cuty and y1 <= cuty:
201 |                         y1 = cuty
202 |                     if x2 >= cutx and x1 <= cutx:
203 |                         x1 = cutx
204 | 
205 |                 if i == 3:
206 |                     if y1 > cuty or x2 < cutx:
207 |                         continue
208 |                     if y2 >= cuty and y1 <= cuty:
209 |                         y2 = cuty
210 |                     if x2 >= cutx and x1 <= cutx:
211 |                         x1 = cutx
212 |                 tmp_box.append(x1)
213 |                 tmp_box.append(y1)
214 |                 tmp_box.append(x2)
215 |                 tmp_box.append(y2)
216 |                 tmp_box.append(box[-1])
217 |                 merge_bbox.append(tmp_box)
218 |         return merge_bbox
219 | 
220 |     def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
221 |         h, w = input_shape
222 |         min_offset_x = self.rand(0.3, 0.7)
223 |         min_offset_y = self.rand(0.3, 0.7)
224 | 
225 |         image_datas = [] 
226 |         box_datas   = []
227 |         index       = 0
228 |         for line in annotation_line:
229 |             #---------------------------------#
230 |             #   每一行进行分割
231 |             #---------------------------------#
232 |             line_content = line.split()
233 |             #---------------------------------#
234 |             #   打开图片
235 |             #---------------------------------#
236 |             image = Image.open(line_content[0])
237 |             image = cvtColor(image)
238 |             
239 |             #---------------------------------#
240 |             #   图片的大小
241 |             #---------------------------------#
242 |             iw, ih = image.size
243 |             #---------------------------------#
244 |             #   保存框的位置
245 |             #---------------------------------#
246 |             box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
247 |             
248 |             #---------------------------------#
249 |             #   是否翻转图片
250 |             #---------------------------------#
251 |             flip = self.rand()<.5
252 |             if flip and len(box)>0:
253 |                 image = image.transpose(Image.FLIP_LEFT_RIGHT)
254 |                 box[:, [0,2]] = iw - box[:, [2,0]]
255 | 
256 |             #------------------------------------------#
257 |             #   对图像进行缩放并且进行长和宽的扭曲
258 |             #------------------------------------------#
259 |             new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
260 |             scale = self.rand(.4, 1)
261 |             if new_ar < 1:
262 |                 nh = int(scale*h)
263 |                 nw = int(nh*new_ar)
264 |             else:
265 |                 nw = int(scale*w)
266 |                 nh = int(nw/new_ar)
267 |             image = image.resize((nw, nh), Image.BICUBIC)
268 | 
269 |             #-----------------------------------------------#
270 |             #   将图片进行放置，分别对应四张分割图片的位置
271 |             #-----------------------------------------------#
272 |             if index == 0:
273 |                 dx = int(w*min_offset_x) - nw
274 |                 dy = int(h*min_offset_y) - nh
275 |             elif index == 1:
276 |                 dx = int(w*min_offset_x) - nw
277 |                 dy = int(h*min_offset_y)
278 |             elif index == 2:
279 |                 dx = int(w*min_offset_x)
280 |                 dy = int(h*min_offset_y)
281 |             elif index == 3:
282 |                 dx = int(w*min_offset_x)
283 |                 dy = int(h*min_offset_y) - nh
284 |             
285 |             new_image = Image.new('RGB', (w,h), (128,128,128))
286 |             new_image.paste(image, (dx, dy))
287 |             image_data = np.array(new_image)
288 | 
289 |             index = index + 1
290 |             box_data = []
291 |             #---------------------------------#
292 |             #   对box进行重新处理
293 |             #---------------------------------#
294 |             if len(box)>0:
295 |                 np.random.shuffle(box)
296 |                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
297 |                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
298 |                 box[:, 0:2][box[:, 0:2]<0] = 0
299 |                 box[:, 2][box[:, 2]>w] = w
300 |                 box[:, 3][box[:, 3]>h] = h
301 |                 box_w = box[:, 2] - box[:, 0]
302 |                 box_h = box[:, 3] - box[:, 1]
303 |                 box = box[np.logical_and(box_w>1, box_h>1)]
304 |                 box_data = np.zeros((len(box),5))
305 |                 box_data[:len(box)] = box
306 |             
307 |             image_datas.append(image_data)
308 |             box_datas.append(box_data)
309 | 
310 |         #---------------------------------#
311 |         #   将图片分割，放在一起
312 |         #---------------------------------#
313 |         cutx = int(w * min_offset_x)
314 |         cuty = int(h * min_offset_y)
315 | 
316 |         new_image = np.zeros([h, w, 3])
317 |         new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
318 |         new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
319 |         new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
320 |         new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
321 | 
322 |         new_image       = np.array(new_image, np.uint8)
323 |         #---------------------------------#
324 |         #   对图像进行色域变换
325 |         #   计算色域变换的参数
326 |         #---------------------------------#
327 |         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
328 |         #---------------------------------#
329 |         #   将图像转到HSV上
330 |         #---------------------------------#
331 |         hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
332 |         dtype           = new_image.dtype
333 |         #---------------------------------#
334 |         #   应用变换
335 |         #---------------------------------#
336 |         x       = np.arange(0, 256, dtype=r.dtype)
337 |         lut_hue = ((x * r[0]) % 180).astype(dtype)
338 |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
339 |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
340 | 
341 |         new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
342 |         new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
343 | 
344 |         #---------------------------------#
345 |         #   对框进行进一步的处理
346 |         #---------------------------------#
347 |         new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
348 | 
349 |         return new_image, new_boxes
350 | 
351 | # DataLoader中collate_fn使用
352 | def yolo_dataset_collate(batch):
353 |     images = []
354 |     bboxes = []
355 |     for img, box in batch:
356 |         images.append(img)
357 |         bboxes.append(box)
358 |     images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
359 |     bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
360 |     return images, bboxes
361 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | 
 4 | #---------------------------------------------------------#
 5 | #   将图像转换成RGB图像，防止灰度图在预测时报错。
 6 | #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
 7 | #---------------------------------------------------------#
 8 | def cvtColor(image):
 9 |     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
10 |         return image 
11 |     else:
12 |         image = image.convert('RGB')
13 |         return image 
14 |     
15 | #---------------------------------------------------#
16 | #   对输入图像进行resize
17 | #---------------------------------------------------#
18 | def resize_image(image, size, letterbox_image):
19 |     iw, ih  = image.size
20 |     w, h    = size
21 |     if letterbox_image:
22 |         scale   = min(w/iw, h/ih)
23 |         nw      = int(iw*scale)
24 |         nh      = int(ih*scale)
25 | 
26 |         image   = image.resize((nw,nh), Image.BICUBIC)
27 |         new_image = Image.new('RGB', size, (128,128,128))
28 |         new_image.paste(image, ((w-nw)//2, (h-nh)//2))
29 |     else:
30 |         new_image = image.resize((w, h), Image.BICUBIC)
31 |     return new_image
32 | 
33 | #---------------------------------------------------#
34 | #   获得类
35 | #---------------------------------------------------#
36 | def get_classes(classes_path):
37 |     with open(classes_path, encoding='utf-8') as f:
38 |         class_names = f.readlines()
39 |     class_names = [c.strip() for c in class_names]
40 |     return class_names, len(class_names)
41 | 
42 | #---------------------------------------------------#
43 | #   获得先验框
44 | #---------------------------------------------------#
45 | def get_anchors(anchors_path):
46 |     '''loads the anchors from a file'''
47 |     with open(anchors_path, encoding='utf-8') as f:
48 |         anchors = f.readline()
49 |     anchors = [float(x) for x in anchors.split(',')]
50 |     anchors = np.array(anchors).reshape(-1, 2)
51 |     return anchors, len(anchors)
52 | 
53 | #---------------------------------------------------#
54 | #   获得学习率
55 | #---------------------------------------------------#
56 | def get_lr(optimizer):
57 |     for param_group in optimizer.param_groups:
58 |         return param_group['lr']
59 | 
60 | def preprocess_input(image):
61 |     image /= 255.0
62 |     return image
63 | 


--------------------------------------------------------------------------------
/utils/utils_bbox.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.ops import nms
  4 | import numpy as np
  5 | 
  6 | class DecodeBox():
  7 |     def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
  8 |         super(DecodeBox, self).__init__()
  9 |         self.anchors        = anchors
 10 |         self.num_classes    = num_classes
 11 |         self.bbox_attrs     = 5 + num_classes
 12 |         self.input_shape    = input_shape
 13 |         #-----------------------------------------------------------#
 14 |         #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
 15 |         #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
 16 |         #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
 17 |         #-----------------------------------------------------------#
 18 |         self.anchors_mask   = anchors_mask
 19 | 
 20 |     def decode_box(self, inputs):
 21 |         outputs = []
 22 |         for i, input in enumerate(inputs):
 23 |             #-----------------------------------------------#
 24 |             #   输入的input一共有三个，他们的shape分别是
 25 |             #   batch_size, 255, 13, 13
 26 |             #   batch_size, 255, 26, 26
 27 |             #   batch_size, 255, 52, 52
 28 |             #-----------------------------------------------#
 29 |             batch_size      = input.size(0)
 30 |             input_height    = input.size(2)
 31 |             input_width     = input.size(3)
 32 | 
 33 |             #-----------------------------------------------#
 34 |             #   输入为416x416时
 35 |             #   stride_h = stride_w = 32、16、8
 36 |             #-----------------------------------------------#
 37 |             stride_h = self.input_shape[0] / input_height
 38 |             stride_w = self.input_shape[1] / input_width
 39 |             #-------------------------------------------------#
 40 |             #   此时获得的scaled_anchors大小是相对于特征层的
 41 |             #-------------------------------------------------#
 42 |             scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]]
 43 | 
 44 |             #-----------------------------------------------#
 45 |             #   输入的input一共有三个，他们的shape分别是
 46 |             #   batch_size, 3, 13, 13, 85
 47 |             #   batch_size, 3, 26, 26, 85
 48 |             #   batch_size, 3, 52, 52, 85
 49 |             #-----------------------------------------------#
 50 |             prediction = input.view(batch_size, len(self.anchors_mask[i]),
 51 |                                     self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
 52 | 
 53 |             #-----------------------------------------------#
 54 |             #   先验框的中心位置的调整参数
 55 |             #-----------------------------------------------#
 56 |             x = torch.sigmoid(prediction[..., 0])  
 57 |             y = torch.sigmoid(prediction[..., 1])
 58 |             #-----------------------------------------------#
 59 |             #   先验框的宽高调整参数
 60 |             #-----------------------------------------------#
 61 |             w = prediction[..., 2]
 62 |             h = prediction[..., 3]
 63 |             #-----------------------------------------------#
 64 |             #   获得置信度，是否有物体
 65 |             #-----------------------------------------------#
 66 |             conf        = torch.sigmoid(prediction[..., 4])
 67 |             #-----------------------------------------------#
 68 |             #   种类置信度
 69 |             #-----------------------------------------------#
 70 |             pred_cls    = torch.sigmoid(prediction[..., 5:])
 71 | 
 72 |             FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
 73 |             LongTensor  = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
 74 | 
 75 |             #----------------------------------------------------------#
 76 |             #   生成网格，先验框中心，网格左上角 
 77 |             #   batch_size,3,13,13
 78 |             #----------------------------------------------------------#
 79 |             grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
 80 |                 batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor)
 81 |             grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
 82 |                 batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor)
 83 | 
 84 |             #----------------------------------------------------------#
 85 |             #   按照网格格式生成先验框的宽高
 86 |             #   batch_size,3,13,13
 87 |             #----------------------------------------------------------#
 88 |             anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
 89 |             anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
 90 |             anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
 91 |             anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
 92 | 
 93 |             #----------------------------------------------------------#
 94 |             #   利用预测结果对先验框进行调整
 95 |             #   首先调整先验框的中心，从先验框中心向右下角偏移
 96 |             #   再调整先验框的宽高。
 97 |             #----------------------------------------------------------#
 98 |             pred_boxes          = FloatTensor(prediction[..., :4].shape)
 99 |             pred_boxes[..., 0]  = x.data + grid_x
100 |             pred_boxes[..., 1]  = y.data + grid_y
101 |             pred_boxes[..., 2]  = torch.exp(w.data) * anchor_w
102 |             pred_boxes[..., 3]  = torch.exp(h.data) * anchor_h
103 | 
104 |             #----------------------------------------------------------#
105 |             #   将输出结果归一化成小数的形式
106 |             #----------------------------------------------------------#
107 |             _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor)
108 |             output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale,
109 |                                 conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
110 |             outputs.append(output.data)
111 |         return outputs
112 | 
113 |     def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
114 |         #-----------------------------------------------------------------#
115 |         #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
116 |         #-----------------------------------------------------------------#
117 |         box_yx = box_xy[..., ::-1]
118 |         box_hw = box_wh[..., ::-1]
119 |         input_shape = np.array(input_shape)
120 |         image_shape = np.array(image_shape)
121 | 
122 |         if letterbox_image:
123 |             #-----------------------------------------------------------------#
124 |             #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
125 |             #   new_shape指的是宽高缩放情况
126 |             #-----------------------------------------------------------------#
127 |             new_shape = np.round(image_shape * np.min(input_shape/image_shape))
128 |             offset  = (input_shape - new_shape)/2./input_shape
129 |             scale   = input_shape/new_shape
130 | 
131 |             box_yx  = (box_yx - offset) * scale
132 |             box_hw *= scale
133 | 
134 |         box_mins    = box_yx - (box_hw / 2.)
135 |         box_maxes   = box_yx + (box_hw / 2.)
136 |         boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
137 |         boxes *= np.concatenate([image_shape, image_shape], axis=-1)
138 |         return boxes
139 | 
140 |     def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):
141 |         #----------------------------------------------------------#
142 |         #   将预测结果的格式转换成左上角右下角的格式。
143 |         #   prediction  [batch_size, num_anchors, 85]
144 |         #----------------------------------------------------------#
145 |         box_corner          = prediction.new(prediction.shape)
146 |         box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
147 |         box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
148 |         box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
149 |         box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
150 |         prediction[:, :, :4] = box_corner[:, :, :4]
151 | 
152 |         output = [None for _ in range(len(prediction))]
153 |         for i, image_pred in enumerate(prediction):
154 |             #----------------------------------------------------------#
155 |             #   对种类预测部分取max。
156 |             #   class_conf  [num_anchors, 1]    种类置信度
157 |             #   class_pred  [num_anchors, 1]    种类
158 |             #----------------------------------------------------------#
159 |             class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
160 | 
161 |             #----------------------------------------------------------#
162 |             #   利用置信度进行第一轮筛选
163 |             #----------------------------------------------------------#
164 |             conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
165 | 
166 |             #----------------------------------------------------------#
167 |             #   根据置信度进行预测结果的筛选
168 |             #----------------------------------------------------------#
169 |             image_pred = image_pred[conf_mask]
170 |             class_conf = class_conf[conf_mask]
171 |             class_pred = class_pred[conf_mask]
172 |             if not image_pred.size(0):
173 |                 continue
174 |             #-------------------------------------------------------------------------#
175 |             #   detections  [num_anchors, 7]
176 |             #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
177 |             #-------------------------------------------------------------------------#
178 |             detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
179 | 
180 |             #------------------------------------------#
181 |             #   获得预测结果中包含的所有种类
182 |             #------------------------------------------#
183 |             unique_labels = detections[:, -1].cpu().unique()
184 | 
185 |             if prediction.is_cuda:
186 |                 unique_labels = unique_labels.cuda()
187 |                 detections = detections.cuda()
188 | 
189 |             for c in unique_labels:
190 |                 #------------------------------------------#
191 |                 #   获得某一类得分筛选后全部的预测结果
192 |                 #------------------------------------------#
193 |                 detections_class = detections[detections[:, -1] == c]
194 | 
195 |                 #------------------------------------------#
196 |                 #   使用官方自带的非极大抑制会速度更快一些！
197 |                 #------------------------------------------#
198 |                 keep = nms(
199 |                     detections_class[:, :4],
200 |                     detections_class[:, 4] * detections_class[:, 5],
201 |                     nms_thres
202 |                 )
203 |                 max_detections = detections_class[keep]
204 |                 
205 |                 # # 按照存在物体的置信度排序
206 |                 # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
207 |                 # detections_class = detections_class[conf_sort_index]
208 |                 # # 进行非极大抑制
209 |                 # max_detections = []
210 |                 # while detections_class.size(0):
211 |                 #     # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
212 |                 #     max_detections.append(detections_class[0].unsqueeze(0))
213 |                 #     if len(detections_class) == 1:
214 |                 #         break
215 |                 #     ious = bbox_iou(max_detections[-1], detections_class[1:])
216 |                 #     detections_class = detections_class[1:][ious < nms_thres]
217 |                 # # 堆叠
218 |                 # max_detections = torch.cat(max_detections).data
219 |                 
220 |                 # Add max detections to outputs
221 |                 output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
222 |             
223 |             if output[i] is not None:
224 |                 output[i]           = output[i].cpu().numpy()
225 |                 box_xy, box_wh      = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
226 |                 output[i][:, :4]    = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
227 |         return output
228 | 


--------------------------------------------------------------------------------
/utils/utils_fit.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | from tqdm import tqdm
  5 | 
  6 | from utils.utils import get_lr
  7 | 
  8 | 
  9 | def fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0):
 10 |     loss        = 0
 11 |     val_loss    = 0
 12 | 
 13 |     if local_rank == 0:
 14 |         print('Start Train')
 15 |         pbar = tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
 16 |     model_train.train()
 17 |     for iteration, batch in enumerate(gen):
 18 |         if iteration >= epoch_step:
 19 |             break
 20 | 
 21 |         images, targets = batch[0], batch[1]
 22 |         with torch.no_grad():
 23 |             if cuda:
 24 |                 images  = images.cuda()
 25 |                 targets = [ann.cuda() for ann in targets]
 26 |         #----------------------#
 27 |         #   清零梯度
 28 |         #----------------------#
 29 |         optimizer.zero_grad()
 30 |         if not fp16:
 31 |             #----------------------#
 32 |             #   前向传播
 33 |             #----------------------#
 34 |             outputs         = model_train(images)
 35 | 
 36 |             loss_value_all  = 0
 37 |             #----------------------#
 38 |             #   计算损失
 39 |             #----------------------#
 40 |             for l in range(len(outputs)):
 41 |                 loss_item = yolo_loss(l, outputs[l], targets)
 42 |                 loss_value_all  += loss_item
 43 |             loss_value = loss_value_all
 44 | 
 45 |             #----------------------#
 46 |             #   反向传播
 47 |             #----------------------#
 48 |             loss_value.backward()
 49 |             optimizer.step()
 50 |         else:
 51 |             from torch.cuda.amp import autocast
 52 |             with autocast():
 53 |                 #----------------------#
 54 |                 #   前向传播
 55 |                 #----------------------#
 56 |                 outputs         = model_train(images)
 57 | 
 58 |                 loss_value_all  = 0
 59 |                 #----------------------#
 60 |                 #   计算损失
 61 |                 #----------------------#
 62 |                 for l in range(len(outputs)):
 63 |                     loss_item = yolo_loss(l, outputs[l], targets)
 64 |                     loss_value_all  += loss_item
 65 |                 loss_value = loss_value_all
 66 | 
 67 |             #----------------------#
 68 |             #   反向传播
 69 |             #----------------------#
 70 |             scaler.scale(loss_value).backward()
 71 |             scaler.step(optimizer)
 72 |             scaler.update()
 73 | 
 74 |         loss += loss_value.item()
 75 |         
 76 |         if local_rank == 0:
 77 |             pbar.set_postfix(**{'loss'  : loss / (iteration + 1), 
 78 |                                 'lr'    : get_lr(optimizer)})
 79 |             pbar.update(1)
 80 | 
 81 |     if local_rank == 0:
 82 |         pbar.close()
 83 |         print('Finish Train')
 84 |         print('Start Validation')
 85 |         pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
 86 | 
 87 |     model_train.eval()
 88 |     for iteration, batch in enumerate(gen_val):
 89 |         if iteration >= epoch_step_val:
 90 |             break
 91 |         images, targets = batch[0], batch[1]
 92 |         with torch.no_grad():
 93 |             if cuda:
 94 |                 images  = images.cuda()
 95 |                 targets = [ann.cuda() for ann in targets]
 96 |             #----------------------#
 97 |             #   清零梯度
 98 |             #----------------------#
 99 |             optimizer.zero_grad()
100 |             #----------------------#
101 |             #   前向传播
102 |             #----------------------#
103 |             outputs         = model_train(images)
104 | 
105 |             loss_value_all  = 0
106 |             #----------------------#
107 |             #   计算损失
108 |             #----------------------#
109 |             for l in range(len(outputs)):
110 |                 loss_item = yolo_loss(l, outputs[l], targets)
111 |                 loss_value_all  += loss_item
112 |             loss_value  = loss_value_all
113 | 
114 |         val_loss += loss_value.item()
115 |         if local_rank == 0:
116 |             pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
117 |             pbar.update(1)
118 | 
119 |     if local_rank == 0:
120 |         pbar.close()
121 |         print('Finish Validation')
122 |         loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val)
123 |         print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
124 |         print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))
125 |         if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
126 |             torch.save(model.state_dict(), os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val)))
127 |         # 每次保存最后一个权重
128 |         torch.save(model.state_dict(), os.path.join(save_dir, "last.pth" ))


--------------------------------------------------------------------------------
/utils_coco/coco_annotation.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------#
  2 | #   用于处理COCO数据集，根据json文件生成txt文件用于训练
  3 | #-------------------------------------------------------#
  4 | import json
  5 | import os
  6 | from collections import defaultdict
  7 | 
  8 | #-------------------------------------------------------#
  9 | #   指向了COCO训练集与验证集图片的路径
 10 | #-------------------------------------------------------#
 11 | train_datasets_path     = "coco_dataset/train2017"
 12 | val_datasets_path       = "coco_dataset/val2017"
 13 | 
 14 | #-------------------------------------------------------#
 15 | #   指向了COCO训练集与验证集标签的路径
 16 | #-------------------------------------------------------#
 17 | train_annotation_path   = "coco_dataset/annotations/instances_train2017.json"
 18 | val_annotation_path     = "coco_dataset/annotations/instances_val2017.json"
 19 | 
 20 | #-------------------------------------------------------#
 21 | #   生成的txt文件路径
 22 | #-------------------------------------------------------#
 23 | train_output_path       = "coco_train.txt"
 24 | val_output_path         = "coco_val.txt"
 25 | 
 26 | if __name__ == "__main__":
 27 |     name_box_id = defaultdict(list)
 28 |     id_name     = dict()
 29 |     f           = open(train_annotation_path, encoding='utf-8')
 30 |     data        = json.load(f)
 31 | 
 32 |     annotations = data['annotations']
 33 |     for ant in annotations:
 34 |         id = ant['image_id']
 35 |         name = os.path.join(train_datasets_path, '%012d.jpg' % id)
 36 |         cat = ant['category_id']
 37 |         if cat >= 1 and cat <= 11:
 38 |             cat = cat - 1
 39 |         elif cat >= 13 and cat <= 25:
 40 |             cat = cat - 2
 41 |         elif cat >= 27 and cat <= 28:
 42 |             cat = cat - 3
 43 |         elif cat >= 31 and cat <= 44:
 44 |             cat = cat - 5
 45 |         elif cat >= 46 and cat <= 65:
 46 |             cat = cat - 6
 47 |         elif cat == 67:
 48 |             cat = cat - 7
 49 |         elif cat == 70:
 50 |             cat = cat - 9
 51 |         elif cat >= 72 and cat <= 82:
 52 |             cat = cat - 10
 53 |         elif cat >= 84 and cat <= 90:
 54 |             cat = cat - 11
 55 |         name_box_id[name].append([ant['bbox'], cat])
 56 | 
 57 |     f = open(train_output_path, 'w')
 58 |     for key in name_box_id.keys():
 59 |         f.write(key)
 60 |         box_infos = name_box_id[key]
 61 |         for info in box_infos:
 62 |             x_min = int(info[0][0])
 63 |             y_min = int(info[0][1])
 64 |             x_max = x_min + int(info[0][2])
 65 |             y_max = y_min + int(info[0][3])
 66 | 
 67 |             box_info = " %d,%d,%d,%d,%d" % (
 68 |                 x_min, y_min, x_max, y_max, int(info[1]))
 69 |             f.write(box_info)
 70 |         f.write('\n')
 71 |     f.close()
 72 | 
 73 |     name_box_id = defaultdict(list)
 74 |     id_name     = dict()
 75 |     f           = open(val_annotation_path, encoding='utf-8')
 76 |     data        = json.load(f)
 77 | 
 78 |     annotations = data['annotations']
 79 |     for ant in annotations:
 80 |         id = ant['image_id']
 81 |         name = os.path.join(val_datasets_path, '%012d.jpg' % id)
 82 |         cat = ant['category_id']
 83 |         if cat >= 1 and cat <= 11:
 84 |             cat = cat - 1
 85 |         elif cat >= 13 and cat <= 25:
 86 |             cat = cat - 2
 87 |         elif cat >= 27 and cat <= 28:
 88 |             cat = cat - 3
 89 |         elif cat >= 31 and cat <= 44:
 90 |             cat = cat - 5
 91 |         elif cat >= 46 and cat <= 65:
 92 |             cat = cat - 6
 93 |         elif cat == 67:
 94 |             cat = cat - 7
 95 |         elif cat == 70:
 96 |             cat = cat - 9
 97 |         elif cat >= 72 and cat <= 82:
 98 |             cat = cat - 10
 99 |         elif cat >= 84 and cat <= 90:
100 |             cat = cat - 11
101 |         name_box_id[name].append([ant['bbox'], cat])
102 | 
103 |     f = open(val_output_path, 'w')
104 |     for key in name_box_id.keys():
105 |         f.write(key)
106 |         box_infos = name_box_id[key]
107 |         for info in box_infos:
108 |             x_min = int(info[0][0])
109 |             y_min = int(info[0][1])
110 |             x_max = x_min + int(info[0][2])
111 |             y_max = y_min + int(info[0][3])
112 | 
113 |             box_info = " %d,%d,%d,%d,%d" % (
114 |                 x_min, y_min, x_max, y_max, int(info[1]))
115 |             f.write(box_info)
116 |         f.write('\n')
117 |     f.close()
118 | 


--------------------------------------------------------------------------------
/utils_coco/get_map_coco.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from PIL import Image
  7 | from pycocotools.coco import COCO
  8 | from pycocotools.cocoeval import COCOeval
  9 | from tqdm import tqdm
 10 | 
 11 | from utils.utils import cvtColor, preprocess_input, resize_image
 12 | from yolo import YOLO
 13 | 
 14 | #---------------------------------------------------------------------------#
 15 | #   map_mode用于指定该文件运行时计算的内容
 16 | #   map_mode为0代表整个map计算流程，包括获得预测结果、计算map。
 17 | #   map_mode为1代表仅仅获得预测结果。
 18 | #   map_mode为2代表仅仅获得计算map。
 19 | #---------------------------------------------------------------------------#
 20 | map_mode            = 0
 21 | #-------------------------------------------------------#
 22 | #   指向了验证集标签与图片路径
 23 | #-------------------------------------------------------#
 24 | cocoGt_path         = 'coco_dataset/annotations/instances_val2017.json'
 25 | dataset_img_path    = 'coco_dataset/val2017'
 26 | #-------------------------------------------------------#
 27 | #   结果输出的文件夹，默认为map_out
 28 | #-------------------------------------------------------#
 29 | temp_save_path      = 'map_out/coco_eval'
 30 | 
 31 | class mAP_YOLO(YOLO):
 32 |     #---------------------------------------------------#
 33 |     #   检测图片
 34 |     #---------------------------------------------------#
 35 |     def detect_image(self, image_id, image, results):
 36 |         #---------------------------------------------------#
 37 |         #   计算输入图片的高和宽
 38 |         #---------------------------------------------------#
 39 |         image_shape = np.array(np.shape(image)[0:2])
 40 |         #---------------------------------------------------------#
 41 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
 42 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
 43 |         #---------------------------------------------------------#
 44 |         image       = cvtColor(image)
 45 |         #---------------------------------------------------------#
 46 |         #   给图像增加灰条，实现不失真的resize
 47 |         #   也可以直接resize进行识别
 48 |         #---------------------------------------------------------#
 49 |         image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
 50 |         #---------------------------------------------------------#
 51 |         #   添加上batch_size维度
 52 |         #---------------------------------------------------------#
 53 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
 54 | 
 55 |         with torch.no_grad():
 56 |             images = torch.from_numpy(image_data)
 57 |             if self.cuda:
 58 |                 images = images.cuda()
 59 |             #---------------------------------------------------------#
 60 |             #   将图像输入网络当中进行预测！
 61 |             #---------------------------------------------------------#
 62 |             outputs = self.net(images)
 63 |             outputs = self.bbox_util.decode_box(outputs)
 64 |             #---------------------------------------------------------#
 65 |             #   将预测框进行堆叠，然后进行非极大抑制
 66 |             #---------------------------------------------------------#
 67 |             outputs = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
 68 |                         image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
 69 |                                                     
 70 |             if outputs[0] is None: 
 71 |                 return results
 72 | 
 73 |             top_label   = np.array(outputs[0][:, 6], dtype = 'int32')
 74 |             top_conf    = outputs[0][:, 4] * outputs[0][:, 5]
 75 |             top_boxes   = outputs[0][:, :4]
 76 | 
 77 |         for i, c in enumerate(top_label):
 78 |             result                      = {}
 79 |             top, left, bottom, right    = top_boxes[i]
 80 | 
 81 |             result["image_id"]      = int(image_id)
 82 |             result["category_id"]   = clsid2catid[c]
 83 |             result["bbox"]          = [float(left),float(top),float(right-left),float(bottom-top)]
 84 |             result["score"]         = float(top_conf[i])
 85 |             results.append(result)
 86 |         return results
 87 | 
 88 | if __name__ == "__main__":
 89 |     if not os.path.exists(temp_save_path):
 90 |         os.makedirs(temp_save_path)
 91 | 
 92 |     cocoGt      = COCO(cocoGt_path)
 93 |     ids         = list(cocoGt.imgToAnns.keys())
 94 |     clsid2catid = cocoGt.getCatIds()
 95 | 
 96 |     if map_mode == 0 or map_mode == 1:
 97 |         yolo = mAP_YOLO(confidence = 0.001, nms_iou = 0.65)
 98 | 
 99 |         with open(os.path.join(temp_save_path, 'eval_results.json'),"w") as f:
100 |             results = []
101 |             for image_id in tqdm(ids):
102 |                 image_path  = os.path.join(dataset_img_path, cocoGt.loadImgs(image_id)[0]['file_name'])
103 |                 image       = Image.open(image_path)
104 |                 results     = yolo.detect_image(image_id, image, results)
105 |             json.dump(results, f)
106 | 
107 |     if map_mode == 0 or map_mode == 2:
108 |         cocoDt      = cocoGt.loadRes(os.path.join(temp_save_path, 'eval_results.json'))
109 |         cocoEval    = COCOeval(cocoGt, cocoDt, 'bbox') 
110 |         cocoEval.evaluate()
111 |         cocoEval.accumulate()
112 |         cocoEval.summarize()
113 |         print("Get map done.")
114 | 


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import xml.etree.ElementTree as ET
  4 | from get_yaml import get_config
  5 | from utils.utils import get_classes
  6 | 
  7 | #--------------------------------------------------------------------------------------------------------------------------------#
  8 | #   annotation_mode用于指定该文件运行时计算的内容
  9 | #   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
 10 | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
 11 | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
 12 | #--------------------------------------------------------------------------------------------------------------------------------#
 13 | annotation_mode     = 0
 14 | #-------------------------------------------------------------------#
 15 | #   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
 16 | #   与训练和预测所用的classes_path一致即可
 17 | #   如果生成的2007_train.txt里面没有目标信息
 18 | #   那么就是因为classes没有设定正确
 19 | #   仅在annotation_mode为0和2的时候有效
 20 | #-------------------------------------------------------------------#
 21 | # classes_path        = 'model_data/gesture_classes.txt'
 22 | #--------------------------------------------------------------------------------------------------------------------------------#
 23 | #   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1
 24 | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1  
 25 | #   仅在annotation_mode为0和1的时候有效
 26 | #--------------------------------------------------------------------------------------------------------------------------------#
 27 | trainval_percent    = 1
 28 | train_percent       = 0.9
 29 | #-------------------------------------------------------#
 30 | #   指向VOC数据集所在的文件夹
 31 | #   默认指向根目录下的VOC数据集
 32 | #-------------------------------------------------------#
 33 | VOCdevkit_path  = 'VOCdevkit'
 34 | 
 35 | VOCdevkit_sets  = [('2007', 'train'), ('2007', 'val')]
 36 | # classes, _      = get_classes(classes_path)
 37 | config = get_config()
 38 | classes = config['classes']
 39 | def convert_annotation(year, image_id, list_file):
 40 |     in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
 41 |     tree=ET.parse(in_file)
 42 |     root = tree.getroot()
 43 | 
 44 |     for obj in root.iter('object'):
 45 |         difficult = 0 
 46 |         if obj.find('difficult')!=None:
 47 |             difficult = obj.find('difficult').text
 48 |         cls = obj.find('name').text
 49 |         if cls not in classes or int(difficult)==1:
 50 |             continue
 51 |         cls_id = classes.index(cls)
 52 |         xmlbox = obj.find('bndbox')
 53 |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
 54 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 55 |         
 56 | if __name__ == "__main__":
 57 |     random.seed(0)
 58 |     if annotation_mode == 0 or annotation_mode == 1:
 59 |         print("Generate txt in ImageSets.")
 60 |         xmlfilepath     = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
 61 |         saveBasePath    = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
 62 |         temp_xml        = os.listdir(xmlfilepath)
 63 |         total_xml       = []
 64 |         for xml in temp_xml:
 65 |             if xml.endswith(".xml"):
 66 |                 total_xml.append(xml)
 67 | 
 68 |         num     = len(total_xml)  
 69 |         list    = range(num)  
 70 |         tv      = int(num*trainval_percent)  
 71 |         tr      = int(tv*train_percent)  
 72 |         trainval= random.sample(list,tv)  
 73 |         train   = random.sample(trainval,tr)  
 74 |         
 75 |         print("train and val size",tv)
 76 |         print("train size",tr)
 77 |         ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
 78 |         ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
 79 |         ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 80 |         fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
 81 |         
 82 |         for i in list:  
 83 |             name=total_xml[i][:-4]+'\n'  
 84 |             if i in trainval:  
 85 |                 ftrainval.write(name)  
 86 |                 if i in train:  
 87 |                     ftrain.write(name)  
 88 |                 else:  
 89 |                     fval.write(name)  
 90 |             else:  
 91 |                 ftest.write(name)  
 92 |         
 93 |         ftrainval.close()  
 94 |         ftrain.close()  
 95 |         fval.close()  
 96 |         ftest.close()
 97 |         print("Generate txt in ImageSets done.")
 98 | 
 99 |     if annotation_mode == 0 or annotation_mode == 2:
100 |         print("Generate gesture_train.txt and 2007_val.txt for train.")
101 |         for year, image_set in VOCdevkit_sets:
102 |             image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split()
103 |             list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
104 |             for image_id in image_ids:
105 |                 list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id))
106 | 
107 |                 convert_annotation(year, image_id, list_file)
108 |                 list_file.write('\n')
109 |             list_file.close()
110 |         print("Generate gesture_train.txt and gesture_val.txt for train done.")
111 | 


--------------------------------------------------------------------------------
/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 105,107, 118,136, 152,122, 114,165, 139,151, 160,156, 152,185, 181,167, 192,197


--------------------------------------------------------------------------------