├── darknet53
    ├── .idea
    │   ├── .WeDrive
    │   ├── .gitignore
    │   ├── inspectionProfiles
    │   │   ├── .WeDrive
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── yolov3.iml
    ├── __pycache__
    │   ├── .WeDrive
    │   ├── cfg.cpython-38.pyc
    │   ├── dataset.cpython-38.pyc
    │   ├── utils.cpython-38.pyc
    │   ├── net_block.cpython-38.pyc
    │   ├── darbnet53_module.cpython-38.pyc
    │   └── mobilenet_v2_module.cpython-38.pyc
    ├── data
    │   ├── person_label.txt
    │   ├── images
    │   │   ├── 000017.jpg
    │   │   ├── 000022.jpg
    │   │   ├── 000261.jpg
    │   │   ├── 000455.jpg
    │   │   ├── 000812.jpg
    │   │   ├── 000930.jpg
    │   │   ├── 001183.jpg
    │   │   ├── 001370.jpg
    │   │   ├── 001579.jpg
    │   │   └── 001988.jpg
    │   ├── data.txt
    │   ├── test.py
    │   ├── image_voc
    │   │   ├── 000017.xml
    │   │   ├── 000022.xml
    │   │   ├── 000455.xml
    │   │   ├── 001988.xml
    │   │   ├── 000261.xml
    │   │   ├── 000930.xml
    │   │   ├── 001183.xml
    │   │   ├── 001579.xml
    │   │   ├── 000812.xml
    │   │   └── 001370.xml
    │   └── make_data_txt.py
    ├── images
    │   ├── 000017.jpg
    │   ├── 000022.jpg
    │   ├── 000261.jpg
    │   ├── 000455.jpg
    │   ├── 000812.jpg
    │   ├── 000930.jpg
    │   ├── 001084.jpg
    │   ├── 001183.jpg
    │   ├── 001370.jpg
    │   ├── 001579.jpg
    │   └── 001988.jpg
    ├── test.py
    ├── cfg.py
    ├── trainer.py
    ├── dataset.py
    ├── utils.py
    ├── detector.py
    ├── darbnet53_module.py
    └── net_block.py
├── mobilenetv2
    ├── .idea
    │   ├── .WeDrive
    │   ├── .gitignore
    │   ├── inspectionProfiles
    │   │   ├── .WeDrive
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── yolov3.iml
    ├── __pycache__
    │   ├── cfg.cpython-38.pyc
    │   ├── utils.cpython-38.pyc
    │   ├── dataset.cpython-38.pyc
    │   ├── net_block.cpython-38.pyc
    │   └── mobilenet_v2_module.cpython-38.pyc
    ├── cfg.py
    ├── data
    │   ├── test.py
    │   └── make_data_txt.py
    ├── trainer.py
    ├── dataset.py
    ├── utils.py
    ├── detector.py
    ├── mobilenet_v2_module.py
    └── net_block.py
└── README.md


/darknet53/.idea/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea


--------------------------------------------------------------------------------
/mobilenetv2/.idea/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea


--------------------------------------------------------------------------------
/darknet53/__pycache__/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\__pycache__


--------------------------------------------------------------------------------
/darknet53/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | 


--------------------------------------------------------------------------------
/mobilenetv2/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | 


--------------------------------------------------------------------------------
/darknet53/.idea/inspectionProfiles/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles


--------------------------------------------------------------------------------
/mobilenetv2/.idea/inspectionProfiles/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles


--------------------------------------------------------------------------------
/darknet53/data/person_label.txt:
--------------------------------------------------------------------------------
1 | images/1.jpg 1 12 13 51 18 2 22 38 55 98 2 44 33 62 62
2 | images/2.jpg 3 82 46 57 19
3 | 


--------------------------------------------------------------------------------
/darknet53/images/000017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000017.jpg


--------------------------------------------------------------------------------
/darknet53/images/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000022.jpg


--------------------------------------------------------------------------------
/darknet53/images/000261.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000261.jpg


--------------------------------------------------------------------------------
/darknet53/images/000455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000455.jpg


--------------------------------------------------------------------------------
/darknet53/images/000812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000812.jpg


--------------------------------------------------------------------------------
/darknet53/images/000930.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000930.jpg


--------------------------------------------------------------------------------
/darknet53/images/001084.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001084.jpg


--------------------------------------------------------------------------------
/darknet53/images/001183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001183.jpg


--------------------------------------------------------------------------------
/darknet53/images/001370.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001370.jpg


--------------------------------------------------------------------------------
/darknet53/images/001579.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001579.jpg


--------------------------------------------------------------------------------
/darknet53/images/001988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001988.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000017.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000022.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000261.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000261.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000455.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000812.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/000930.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000930.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/001183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001183.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/001370.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001370.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/001579.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001579.jpg


--------------------------------------------------------------------------------
/darknet53/data/images/001988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001988.jpg


--------------------------------------------------------------------------------
/darknet53/__pycache__/cfg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/cfg.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/cfg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/cfg.cpython-38.pyc


--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/__pycache__/net_block.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/net_block.cpython-38.pyc


--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/net_block.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/net_block.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/__pycache__/darbnet53_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/darbnet53_module.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc


--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc


--------------------------------------------------------------------------------
/darknet53/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/mobilenetv2/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yolo-v3-DarkNet53-MobileNet-V2
2 | 分别基于DarkNet53和MobileNet-V2两个网络实现yolo-v3
3 | 
4 | 
5 | DarkNet53的B站视频地址：https://www.bilibili.com/video/BV1Rf4y1n7mG?spm_id_from=333.999.0.0
6 | 
7 | 其中MobileNet-V2网络实现，是因为他很轻量级，这个网络训练比较快，虽然准确率不如DarkNet53
8 | 


--------------------------------------------------------------------------------
/darknet53/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/mobilenetv2/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/darknet53/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/yolov3.iml" filepath="$PROJECT_DIR$/.idea/yolov3.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/mobilenetv2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/yolov3.iml" filepath="$PROJECT_DIR$/.idea/yolov3.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/darknet53/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | 
 4 | # a = torch.Tensor([1, 2, 3, 4])
 5 | # b = a < 3  # mask
 6 | # print(b)
 7 | # print(a[b])
 8 | # print(b.nonzero())
 9 | # print(a[b.nonzero()])
10 | 
11 | a = torch.Tensor([[1, 2], [5, 6], [3, 1], [2, 8]])
12 | # b = a < 3
13 | # print(b)
14 | # print(a[b])
15 | b = a[:, 1] > 5
16 | print(b)
17 | print(a[b])
18 | print(b.nonzero())
19 | 
20 | print(math.modf(3.4))
21 | 
22 | 
23 | print(400/32)


--------------------------------------------------------------------------------
/mobilenetv2/cfg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | IMG_HEIGHT = 416
 3 | IMG_WIDTH = 416
 4 | 
 5 | CLASS_NUM = 3
 6 | 
 7 | ANCHORS_GROUP = {
 8 |     13: [[311, 247], [159, 232], [200, 117]],
 9 |     26: [[89, 159], [91, 74], [47, 97]],
10 |     52: [[48, 34], [25, 55], [15, 21]]
11 | }
12 | 
13 | ANCHORS_GROUP_AREA = {
14 |     13: [x * y for x, y in ANCHORS_GROUP[13]],
15 |     26: [x * y for x, y in ANCHORS_GROUP[26]],
16 |     52: [x * y for x, y in ANCHORS_GROUP[52]],
17 | }
18 | 


--------------------------------------------------------------------------------
/darknet53/.idea/yolov3.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/darknet53/cfg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | IMG_HEIGHT = 416
 3 | IMG_WIDTH = 416
 4 | 
 5 | CLASS_NUM = 3
 6 | 
 7 | ANCHORS_GROUP = {
 8 |     13: [[270, 254], [291, 179], [162, 304]],
 9 |     26: [[175, 222], [112, 235], [175, 140]],
10 |     52: [[81, 118], [53, 142], [44, 28]]
11 | }
12 | 
13 | ANCHORS_GROUP_AREA = {
14 |     13: [x * y for x, y in ANCHORS_GROUP[13]],
15 |     26: [x * y for x, y in ANCHORS_GROUP[26]],
16 |     52: [x * y for x, y in ANCHORS_GROUP[52]],
17 | }
18 | 


--------------------------------------------------------------------------------
/mobilenetv2/.idea/yolov3.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/darknet53/data/data.txt:
--------------------------------------------------------------------------------
 1 | 000017.jpg 0 201 112 81 118 1 213 179 271 223
 2 | 000022.jpg 1 181 160 249 149 0 183 113 57 154
 3 | 000261.jpg 2 168 167 336 208 2 193 155 291 179
 4 | 000455.jpg 0 213 102 74 147 1 259 159 312 188
 5 | 000812.jpg 2 197 209 230 134 0 211 152 175 249 0 332 33 51 36 0 99 10 36 19
 6 | 000930.jpg 0 210 133 132 146 2 201 180 171 107
 7 | 001183.jpg 1 110 184 89 98 1 202 198 175 195 0 212 138 49 129
 8 | 001370.jpg 2 97 297 121 235 2 208 273 84 223 0 100 208 165 337 0 218 198 112 302
 9 | 001579.jpg 2 90 333 179 164 2 149 288 239 155 0 158 224 128 225 0 53 264 105 267
10 | 001988.jpg 2 142 268 268 286 0 143 190 158 271
11 | 


--------------------------------------------------------------------------------
/darknet53/data/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image,ImageDraw
 3 | import os
 4 | f=open('data.txt','r')
 5 | datas=f.readlines()
 6 | for data in datas:
 7 |     data=data.strip().split()
 8 |     img_path=os.path.join('images',data[0])
 9 |     img=Image.open(img_path)
10 |     w,h=img.size
11 |     case=416/max(w,h)
12 |     _boxes=np.array([float(x) for x in data[1:]])
13 |     boxes=np.split(_boxes,len(_boxes)//5)
14 |     draw=ImageDraw.Draw(img)
15 |     for box in boxes:
16 |         cls,cx,cy,w,h=box
17 |         x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case
18 |         draw.rectangle((x1,y1,x2,y2),outline='red',width=2)
19 | 
20 |     img.show()


--------------------------------------------------------------------------------
/mobilenetv2/data/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image,ImageDraw
 3 | import os
 4 | f=open('data.txt','r')
 5 | datas=f.readlines()
 6 | for data in datas:
 7 |     data=data.strip().split()
 8 |     img_path=os.path.join('images',data[0])
 9 |     img=Image.open(img_path)
10 |     w,h=img.size
11 |     case=416/max(w,h)
12 |     _boxes=np.array([float(x) for x in data[1:]])
13 |     boxes=np.split(_boxes,len(_boxes)//5)
14 |     draw=ImageDraw.Draw(img)
15 |     for box in boxes:
16 |         cls,cx,cy,w,h=box
17 |         x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case
18 |         draw.rectangle((x1,y1,x2,y2),outline='red',width=2)
19 | 
20 |     img.show()


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000017.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000017.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>228217974</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>genewolf</flickrid>
12 | 		<name>whiskey kitten</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>480</width>
16 | 		<height>364</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>person</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>185</xmin>
27 | 			<ymin>62</ymin>
28 | 			<xmax>279</xmax>
29 | 			<ymax>199</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>horse</name>
34 | 		<pose>Left</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>90</xmin>
39 | 			<ymin>78</ymin>
40 | 			<xmax>403</xmax>
41 | 			<ymax>336</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000022.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000022.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>336380018</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>Lothar Lenz</flickrid>
12 | 		<name>Lothar Lenz</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>332</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>horse</name>
22 | 		<pose>Right</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>68</xmin>
27 | 			<ymin>103</ymin>
28 | 			<xmax>368</xmax>
29 | 			<ymax>283</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>person</name>
34 | 		<pose>Right</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>186</xmin>
39 | 			<ymin>44</ymin>
40 | 			<xmax>255</xmax>
41 | 			<ymax>230</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000455.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000455.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>323858157</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>Lothar Lenz</flickrid>
12 | 		<name>Lothar Lenz</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>332</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>person</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>213</xmin>
27 | 			<ymin>35</ymin>
28 | 			<xmax>302</xmax>
29 | 			<ymax>212</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>horse</name>
34 | 		<pose>Left</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>125</xmin>
39 | 			<ymin>79</ymin>
40 | 			<xmax>500</xmax>
41 | 			<ymax>306</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/001988.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>001988.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>272556816</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>.s.e.a.n.</flickrid>
12 | 		<name>Sean Scott</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>333</width>
16 | 		<height>500</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>bicycle</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>10</xmin>
27 | 			<ymin>151</ymin>
28 | 			<xmax>333</xmax>
29 | 			<ymax>495</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>person</name>
34 | 		<pose>Unspecified</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>78</xmin>
39 | 			<ymin>66</ymin>
40 | 			<xmax>269</xmax>
41 | 			<ymax>392</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000261.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000261.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>336032008</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>MIKEBECKHAM DOT NET</flickrid>
12 | 		<name>Mike Beckham</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>375</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>bicycle</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>1</xmin>
27 | 			<ymin>76</ymin>
28 | 			<xmax>405</xmax>
29 | 			<ymax>326</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>bicycle</name>
34 | 		<pose>Right</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>58</xmin>
39 | 			<ymin>79</ymin>
40 | 			<xmax>408</xmax>
41 | 			<ymax>295</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000930.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000930.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>200825654</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>bikeride</flickrid>
12 | 		<name>Brent Soderberg</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>375</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>person</name>
22 | 		<pose>Unspecified</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>174</xmin>
27 | 			<ymin>72</ymin>
28 | 			<xmax>333</xmax>
29 | 			<ymax>248</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>bicycle</name>
34 | 		<pose>Left</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>139</xmin>
39 | 			<ymin>153</ymin>
40 | 			<xmax>345</xmax>
41 | 			<ymax>282</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | </annotation>
45 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/001183.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>001183.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>308220821</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>lxt</flickrid>
12 | 		<name>Laura Thomson</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>375</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>horse</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>1</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>79</xmin>
27 | 			<ymin>163</ymin>
28 | 			<xmax>187</xmax>
29 | 			<ymax>281</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>horse</name>
34 | 		<pose>Left</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>138</xmin>
39 | 			<ymin>121</ymin>
40 | 			<xmax>349</xmax>
41 | 			<ymax>356</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | 	<object>
45 | 		<name>person</name>
46 | 		<pose>Unspecified</pose>
47 | 		<truncated>0</truncated>
48 | 		<difficult>0</difficult>
49 | 		<bndbox>
50 | 			<xmin>227</xmin>
51 | 			<ymin>89</ymin>
52 | 			<xmax>286</xmax>
53 | 			<ymax>245</ymax>
54 | 		</bndbox>
55 | 	</object>
56 | </annotation>
57 | 


--------------------------------------------------------------------------------
/darknet53/data/make_data_txt.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import xml.etree.ElementTree as ET
 3 | import os
 4 | from PIL import Image
 5 | class_dict={
 6 |     'person':0,
 7 |     'horse':1,
 8 |     'bicycle':2,
 9 | }
10 | xml_files=os.listdir('image_voc')
11 | with open('data.txt','a') as f:
12 |     for xml_file in xml_files:
13 |         tree=ET.parse(os.path.join('image_voc',xml_file))
14 |         root=tree.getroot()
15 |         image_name=root.find('filename')
16 |         class_name=root.findall('object/name')
17 |         boxes=root.findall('object/bndbox')
18 |         filename=image_name.text
19 |         temp=max(Image.open(os.path.join('images',filename)).size)
20 |         print(416/temp)
21 |         data=[]
22 |         data.append(filename)
23 |         for cls,box in zip(class_name,boxes):
24 |             cls=class_dict[cls.text]
25 |             cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2)
26 |             w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text))
27 |             obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}"
28 |             data.append(obj)
29 |         str=''
30 |         for i in data:
31 |             str=str+i+','
32 |         str=str.replace(',',' ').strip()
33 |         f.write(str+'\n')
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/001579.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>001579.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>250936479</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>selimski</flickrid>
12 | 		<name>?</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>375</width>
16 | 		<height>500</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>bicycle</name>
22 | 		<pose>Right</pose>
23 | 		<truncated>1</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>1</xmin>
27 | 			<ymin>302</ymin>
28 | 			<xmax>217</xmax>
29 | 			<ymax>500</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>bicycle</name>
34 | 		<pose>Right</pose>
35 | 		<truncated>1</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>36</xmin>
39 | 			<ymin>254</ymin>
40 | 			<xmax>324</xmax>
41 | 			<ymax>441</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | 	<object>
45 | 		<name>person</name>
46 | 		<pose>Right</pose>
47 | 		<truncated>0</truncated>
48 | 		<difficult>0</difficult>
49 | 		<bndbox>
50 | 			<xmin>114</xmin>
51 | 			<ymin>135</ymin>
52 | 			<xmax>269</xmax>
53 | 			<ymax>406</ymax>
54 | 		</bndbox>
55 | 	</object>
56 | 	<object>
57 | 		<name>person</name>
58 | 		<pose>Right</pose>
59 | 		<truncated>1</truncated>
60 | 		<difficult>0</difficult>
61 | 		<bndbox>
62 | 			<xmin>1</xmin>
63 | 			<ymin>157</ymin>
64 | 			<xmax>128</xmax>
65 | 			<ymax>479</ymax>
66 | 		</bndbox>
67 | 	</object>
68 | </annotation>
69 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/000812.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>000812.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>329837865</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>.s.e.a.n.</flickrid>
12 | 		<name>Sean Scott</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>500</width>
16 | 		<height>333</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>bicycle</name>
22 | 		<pose>Left</pose>
23 | 		<truncated>1</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>99</xmin>
27 | 			<ymin>171</ymin>
28 | 			<xmax>376</xmax>
29 | 			<ymax>333</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>person</name>
34 | 		<pose>Unspecified</pose>
35 | 		<truncated>0</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>149</xmin>
39 | 			<ymin>33</ymin>
40 | 			<xmax>360</xmax>
41 | 			<ymax>333</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | 	<object>
45 | 		<name>person</name>
46 | 		<pose>Unspecified</pose>
47 | 		<truncated>1</truncated>
48 | 		<difficult>1</difficult>
49 | 		<bndbox>
50 | 			<xmin>369</xmin>
51 | 			<ymin>18</ymin>
52 | 			<xmax>431</xmax>
53 | 			<ymax>62</ymax>
54 | 		</bndbox>
55 | 	</object>
56 | 	<object>
57 | 		<name>person</name>
58 | 		<pose>Unspecified</pose>
59 | 		<truncated>1</truncated>
60 | 		<difficult>1</difficult>
61 | 		<bndbox>
62 | 			<xmin>97</xmin>
63 | 			<ymin>1</ymin>
64 | 			<xmax>141</xmax>
65 | 			<ymax>25</ymax>
66 | 		</bndbox>
67 | 	</object>
68 | </annotation>
69 | 


--------------------------------------------------------------------------------
/darknet53/data/image_voc/001370.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>VOC2007</folder>
 3 | 	<filename>001370.jpg</filename>
 4 | 	<source>
 5 | 		<database>The VOC2007 Database</database>
 6 | 		<annotation>PASCAL VOC2007</annotation>
 7 | 		<image>flickr</image>
 8 | 		<flickrid>220172534</flickrid>
 9 | 	</source>
10 | 	<owner>
11 | 		<flickrid>thevelodrome.com</flickrid>
12 | 		<name>?</name>
13 | 	</owner>
14 | 	<size>
15 | 		<width>333</width>
16 | 		<height>500</height>
17 | 		<depth>3</depth>
18 | 	</size>
19 | 	<segmented>0</segmented>
20 | 	<object>
21 | 		<name>bicycle</name>
22 | 		<pose>Frontal</pose>
23 | 		<truncated>0</truncated>
24 | 		<difficult>0</difficult>
25 | 		<bndbox>
26 | 			<xmin>44</xmin>
27 | 			<ymin>217</ymin>
28 | 			<xmax>190</xmax>
29 | 			<ymax>500</ymax>
30 | 		</bndbox>
31 | 	</object>
32 | 	<object>
33 | 		<name>bicycle</name>
34 | 		<pose>Frontal</pose>
35 | 		<truncated>1</truncated>
36 | 		<difficult>0</difficult>
37 | 		<bndbox>
38 | 			<xmin>199</xmin>
39 | 			<ymin>195</ymin>
40 | 			<xmax>301</xmax>
41 | 			<ymax>464</ymax>
42 | 		</bndbox>
43 | 	</object>
44 | 	<object>
45 | 		<name>person</name>
46 | 		<pose>Unspecified</pose>
47 | 		<truncated>0</truncated>
48 | 		<difficult>0</difficult>
49 | 		<bndbox>
50 | 			<xmin>22</xmin>
51 | 			<ymin>48</ymin>
52 | 			<xmax>221</xmax>
53 | 			<ymax>454</ymax>
54 | 		</bndbox>
55 | 	</object>
56 | 	<object>
57 | 		<name>person</name>
58 | 		<pose>Unspecified</pose>
59 | 		<truncated>1</truncated>
60 | 		<difficult>0</difficult>
61 | 		<bndbox>
62 | 			<xmin>196</xmin>
63 | 			<ymin>57</ymin>
64 | 			<xmax>331</xmax>
65 | 			<ymax>420</ymax>
66 | 		</bndbox>
67 | 	</object>
68 | </annotation>
69 | 


--------------------------------------------------------------------------------
/mobilenetv2/data/make_data_txt.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import xml.etree.ElementTree as ET
 3 | import os
 4 | from PIL import Image
 5 | class_dict={
 6 |     'aeroplane': 0,
 7 |     'bicycle': 1,
 8 |     'bird': 2,
 9 |     'boat': 3,
10 |     'bottle': 4,
11 |     'bus': 5,
12 |     'car': 6,
13 |     'cat': 7,
14 |     'chair': 8,
15 |     'cow': 9,
16 |     'diningtable': 10,
17 |     'dog': 11,
18 |     'horse': 12,
19 |     'motorbike': 13,
20 |     'person': 14,
21 |     'pottedplant': 15,
22 |     'sheep': 16,
23 |     'sofa': 17,
24 |     'train': 18,
25 |     'tvmonitor': 19
26 | }
27 | train_xml_path=r'G:\data\voc\voc_train\VOC2007\Annotations'
28 | train_img_path=r'G:\data\voc\voc_train\VOC2007\JPEGImages'
29 | test_xml_path=r'G:\data\voc\voc_test\VOC2007\Annotations'
30 | test_img_path=r'G:\data\voc\voc_test\VOC2007\JPEGImages'
31 | 
32 | xml_files=os.listdir(train_xml_path)
33 | with open('train_data.txt','a') as f:
34 |     for xml_file in xml_files:
35 |         tree=ET.parse(os.path.join(train_xml_path,xml_file))
36 |         root=tree.getroot()
37 |         image_name=root.find('filename')
38 |         class_name=root.findall('object/name')
39 |         boxes=root.findall('object/bndbox')
40 |         filename=image_name.text
41 |         temp=max(Image.open(os.path.join(train_img_path,filename)).size)
42 |         data=[]
43 |         data.append(filename)
44 |         for cls,box in zip(class_name,boxes):
45 |             cls=class_dict[cls.text]
46 |             cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2)
47 |             w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text))
48 |             obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}"
49 |             data.append(obj)
50 |         str=''
51 |         for i in data:
52 |             str=str+i+','
53 |         str=str.replace(',',' ').strip()
54 |         f.write(str+'\n')
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/darknet53/trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch.nn
 6 | from mobilenet_v2_module import *
 7 | import dataset
 8 | from darbnet53_module import *
 9 | from torch import nn
10 | 
11 | def loss_fn(output, target, alpha):
12 |     output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45
13 |     output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15
14 |     # print("output:",output.shape)
15 |     mask_obj = target[..., 0] > 0#N,13,13,3
16 |     # print("mask_obj:",mask_obj.shape)
17 |     mask_noobj = target[..., 0] == 0
18 |     # print("mask_noobj:",mask_noobj.shape)
19 |     # print("output[mask_obj]:",output[mask_obj].shape)
20 |     # print("output[mask_noobj]:", output[mask_noobj].shape)
21 |     loss_p_fun=nn.BCELoss()
22 |     loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0])
23 |     loss_box_fun=nn.MSELoss()
24 |     loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5])
25 |     loss_cls_box_fun=nn.CrossEntropyLoss()
26 |     loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1))
27 |     loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box
28 |     return loss
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     weight_path= 'darknet_params/net597.pt'
33 |     myDataset = dataset.MyDataset()
34 |     train_loader = torch.utils.data.DataLoader(myDataset, batch_size=5, shuffle=True)
35 | 
36 |     net = Darknet53().cuda()
37 |     if os.path.exists(weight_path):
38 |         net.load_state_dict(torch.load(weight_path))
39 |     net.train()
40 | 
41 |     opt = torch.optim.Adam(net.parameters())
42 |     epoch = 0
43 |     while True:
44 |         for target_13, target_26, target_52, img_data in train_loader:
45 |             target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda()
46 |             output_13, output_26, output_52 = net(img_data)
47 |             loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6)
48 |             loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6)
49 |             loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6)
50 |             #
51 |             loss = loss_13 + loss_26 + loss_52
52 |             opt.zero_grad()
53 |             loss.backward()
54 |             opt.step()
55 |             print(epoch,loss.item())
56 |         torch.save(net.state_dict(), f'darknet_params/net{epoch}.pt')
57 |         print(f'{epoch}保存成功')
58 |         epoch+=1


--------------------------------------------------------------------------------
/mobilenetv2/trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch.nn
 6 | from mobilenet_v2_module import *
 7 | import dataset
 8 | from torch import nn
 9 | 
10 | def loss_fn(output, target, alpha):
11 |     output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45
12 |     output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15
13 |     # print("output:",output.shape)
14 |     mask_obj = target[..., 0] > 0#N,13,13,3
15 |     # print("mask_obj:",mask_obj.shape)
16 |     mask_noobj = target[..., 0] == 0
17 |     # print("mask_noobj:",mask_noobj.shape)
18 |     # print("output[mask_obj]:",output[mask_obj].shape)
19 |     # print("output[mask_noobj]:", output[mask_noobj].shape)
20 |     loss_p_fun=nn.BCELoss()
21 |     loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0])
22 |     loss_box_fun=nn.MSELoss()
23 |     loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5])
24 |     loss_cls_box_fun=nn.CrossEntropyLoss()
25 |     loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1))
26 |     loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box
27 |     return loss
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     weight_path= 'mobilenetv2_params/net0-599-5949.pt'
32 |     myDataset = dataset.MyDataset()
33 |     train_loader = torch.utils.data.DataLoader(myDataset, batch_size=3, shuffle=True)
34 | 
35 |     net = MobileNet_v2(config).cuda()
36 |     if os.path.exists(weight_path):
37 |         net.load_state_dict(torch.load(weight_path))
38 |     net.train()
39 | 
40 |     opt = torch.optim.Adam(net.parameters())
41 |     epoch = 0
42 |     while True:
43 |         for i,(target_13, target_26, target_52, img_data) in enumerate(train_loader):
44 |             target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda()
45 |             output_13, output_26, output_52 = net(img_data)
46 |             loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6)
47 |             loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6)
48 |             loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6)
49 |             loss = loss_13 + loss_26 + loss_52
50 |             opt.zero_grad()
51 |             loss.backward()
52 |             opt.step()
53 |             print(f"{epoch}--{i}--{loss.item()}")
54 |             # if i%10==0:
55 |             #     print(f"{epoch}--{i}--{loss.item()}")
56 |             # if (i+1)%200==0:
57 |             #     torch.save(net.state_dict(), f'mobilenetv2_params/net{epoch}-{i}-{i}.pt')
58 |             #     print(f'{epoch}保存成功')
59 |         torch.save(net.state_dict(), f'g_params/net{epoch}.pt')
60 |         print(f'{epoch}保存成功')
61 |         epoch+=1


--------------------------------------------------------------------------------
/darknet53/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | import torchvision
 4 | import numpy as np
 5 | import cfg
 6 | import os
 7 | from utils import *
 8 | from PIL import Image,ImageDraw
 9 | import math
10 | 
11 | LABEL_FILE_PATH = "data/data.txt"
12 | IMG_BASE_DIR = "data/images"
13 | 
14 | transforms = torchvision.transforms.Compose([
15 |     torchvision.transforms.ToTensor()
16 | ])
17 | 
18 | 
19 | def one_hot(cls_num, v):
20 |     b = np.zeros(cls_num)
21 |     b[v] = 1.
22 |     return b
23 | 
24 | 
25 | class MyDataset(Dataset):
26 | 
27 |     def __init__(self):
28 |         with open(LABEL_FILE_PATH) as f:
29 |             self.dataset = f.readlines()
30 | 
31 |     def __len__(self):
32 |         return len(self.dataset)
33 | 
34 |     def __getitem__(self, index):
35 |         labels = {}
36 | 
37 |         line = self.dataset[index]
38 |         strs = line.split()
39 |         _img_data = make_image_data(os.path.join(IMG_BASE_DIR, strs[0]))
40 |         w,h=_img_data.size[0],_img_data.size[1]
41 | 
42 |         _img_data = _img_data.resize((416,416))#此处要等比缩放
43 |         img_data = transforms(_img_data)
44 |         draw=ImageDraw.Draw(_img_data)
45 |         _boxes = np.array([float(x) for x in strs[1:]])
46 |         # print(_boxes[0])
47 |         # _boxes = np.array(list(map(float, strs[1:])))
48 |         boxes = np.split(_boxes, len(_boxes) // 5)
49 |         index = 0
50 |         for feature_size, anchors in cfg.ANCHORS_GROUP.items():
51 |             labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
52 | 
53 |             for box in boxes:
54 |                 cls, cx, cy, w, h = box
55 |                 draw.rectangle((cx-w*0.5,cy-h*0.5,cx+w*0.5,cy+h*0.5),outline='red',width=1)
56 |                 _img_data.show()
57 |                 cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)
58 |                 cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)
59 |                 for i, anchor in enumerate(anchors):
60 | 
61 |                     anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]
62 |                     p_w, p_h = w / (anchor[0]), h / (anchor[1])
63 |                     p_area = w * h
64 |                     iou = min(p_area, anchor_area) / max(p_area, anchor_area)
65 |                     index+=1
66 |                     # print(feature_size, cx_index, cy_index, i)
67 |                     # print(box)
68 |                     if labels[feature_size][int(cy_index), int(cx_index), i][0]<iou:
69 |                         labels[feature_size][int(cy_index), int(cx_index), i] = np.array([iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLASS_NUM, int(cls))])
70 | 
71 |         # print(index)
72 |         return labels[13], labels[26], labels[52], img_data
73 | 
74 | if __name__ == '__main__':
75 |     data = MyDataset()
76 |     print(data[0][3].shape)
77 |     print(data[0][0].shape)
78 |     print(data[0][0][...,0])
79 |     for i in data:
80 |         i
81 |     # print("============")
82 |     # print(data[0][0][...,8])
83 |     # print("============")
84 |     # print(data[0][2][...,0])
85 | 
86 | 


--------------------------------------------------------------------------------
/mobilenetv2/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | import torchvision
 4 | import numpy as np
 5 | import cfg
 6 | import os
 7 | from utils import *
 8 | from PIL import Image,ImageDraw
 9 | import math
10 | 
11 | LABEL_FILE_PATH = "E:/pythonSpace/yolov3/darknet53/data/data.txt"
12 | IMG_BASE_DIR = "E:/pythonSpace/yolov3/darknet53/data/images"
13 | 
14 | # LABEL_FILE_PATH = "data/train_data.txt"
15 | # IMG_BASE_DIR = "G:/data/voc/voc_train/VOC2007/JPEGImages"
16 | 
17 | transforms = torchvision.transforms.Compose([
18 |     torchvision.transforms.ToTensor()
19 | ])
20 | 
21 | 
22 | def one_hot(cls_num, v):
23 |     b = np.zeros(cls_num)
24 |     b[v] = 1.
25 |     return b
26 | 
27 | 
28 | class MyDataset(Dataset):
29 | 
30 |     def __init__(self):
31 |         with open(LABEL_FILE_PATH) as f:
32 |             self.dataset = f.readlines()
33 | 
34 |     def __len__(self):
35 |         return len(self.dataset)
36 | 
37 |     def __getitem__(self, index):
38 |         labels = {}
39 | 
40 |         line = self.dataset[index]
41 |         strs = line.split()
42 |         _img_data = make_image_data(os.path.join(IMG_BASE_DIR, strs[0]))
43 |         w,h=_img_data.size[0],_img_data.size[1]
44 | 
45 |         _img_data = _img_data.resize((416,416))#此处要等比缩放
46 |         img_data = transforms(_img_data)
47 |         draw=ImageDraw.Draw(_img_data)
48 |         _boxes = np.array([float(x) for x in strs[1:]])
49 |         # print(_boxes[0])
50 |         # _boxes = np.array(list(map(float, strs[1:])))
51 |         boxes = np.split(_boxes, len(_boxes) // 5)
52 |         index = 0
53 |         for feature_size, anchors in cfg.ANCHORS_GROUP.items():
54 |             labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
55 | 
56 |             for box in boxes:
57 |                 cls, cx, cy, w, h = box
58 |                 # draw.rectangle((cx-w*0.5,cy-h*0.5,cx+w*0.5,cy+h*0.5),outline='red',width=1)
59 |                 # _img_data.show()
60 |                 cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)
61 |                 cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)
62 |                 for i, anchor in enumerate(anchors):
63 | 
64 |                     anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]
65 |                     p_w, p_h = w / (anchor[0]), h / (anchor[1])
66 |                     p_area = w * h
67 |                     iou = min(p_area, anchor_area) / max(p_area, anchor_area)
68 |                     index+=1
69 |                     # print(feature_size, cx_index, cy_index, i)
70 |                     # print(box)
71 |                     if labels[feature_size][int(cy_index), int(cx_index), i][0]<iou:
72 |                         labels[feature_size][int(cy_index), int(cx_index), i] = np.array([iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLASS_NUM, int(cls))])
73 | 
74 |         # print(index)
75 |         return labels[13], labels[26], labels[52], img_data
76 | 
77 | if __name__ == '__main__':
78 |     data = MyDataset()
79 |     print(data[0][3].shape)
80 |     print(data[0][0].shape)
81 |     print(data[0][0][...,0])
82 |     for i in data:
83 |         i
84 |     # print("============")
85 |     # print(data[0][0][...,8])
86 |     # print("============")
87 |     # print(data[0][2][...,0])
88 | 
89 | 


--------------------------------------------------------------------------------
/darknet53/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | from PIL import Image
  5 | 
  6 | def make_image_data(path):
  7 |     img=Image.open(path)
  8 |     w,h=img.size[0],img.size[1]
  9 |     temp=max(h,w)
 10 |     mask=Image.new('RGB',(temp,temp),(0,0,0))
 11 |     mask.paste(img,(0,0))
 12 |     return mask
 13 | 
 14 | def iou(box, boxes, mode="inter"):
 15 |     cx, cy, w, h = box[2], box[3], box[4], box[5]
 16 |     cxs, cys, ws, hs = boxes[:, 2], boxes[:, 3], boxes[:, 4], boxes[:, 5]
 17 | 
 18 |     box_area = w * h # 最小面积
 19 |     boxes_area = ws * hs # 最大面积
 20 | 
 21 |     _x1, _x2, _y1, _y2 = cx - w/2, cx + w/2, cy - h/2, cy + h/2
 22 |     _xx1, _xx2, _yy1, _yy2 = cxs - ws / 2, cxs + ws / 2, cys - hs / 2, cys + hs / 2
 23 | 
 24 |     xx1 = torch.maximum(_x1, _xx1) # 左上角   最大值
 25 |     yy1 = torch.maximum(_y1, _yy1) # 左上角   最大值
 26 |     xx2 = torch.minimum(_x2, _xx2) # 右下角  最小值
 27 |     yy2 = torch.minimum(_y2, _yy2) # 右下角  最小值
 28 | 
 29 |     # 将输入input张量每个元素的夹紧到区间 [min,max][min,max]，并返回结果到一个新张量。
 30 |     w = torch.clamp(xx2 - xx1, min=0) # ★夹
 31 |     h = torch.clamp(yy2 - yy1, min=0)
 32 | 
 33 |     inter = w * h
 34 | 
 35 |     if mode == 'inter':
 36 |         return inter / (box_area + boxes_area - inter) #交集除以并集
 37 |     elif mode == 'min':
 38 |         return inter / torch.min(box_area, boxes_area)
 39 | '''
 40 | def iou(box, boxes, mode="inter"):
 41 |     box_area = (box[3] - box[1]) * (box[4] - box[2])
 42 |     boxes_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
 43 | 
 44 |     x1 = torch.max(box[1], boxes[:, 1])
 45 |     y1 = torch.max(box[2], boxes[:, 2])
 46 |     x2 = torch.min(box[3], boxes[:, 3])
 47 |     y2 = torch.min(box[4], boxes[:, 4])
 48 | 
 49 |     w = torch.clamp(x2 - x1, min=0)
 50 |     h = torch.clamp(y2 - y1, min=0)
 51 | 
 52 |     inter = w * h
 53 | 
 54 |     if mode == 'inter':
 55 |         return inter / (box_area + boxes_area - inter)
 56 |     elif mode == 'min':
 57 |         return inter / torch.min(box_area, boxes_area)
 58 | '''
 59 | 
 60 | def nms(boxes, thresh, mode='inter'):
 61 |     args = boxes[:, 1].argsort(descending=True)
 62 |     sort_boxes = boxes[args]
 63 |     keep_boxes = []
 64 | 
 65 |     while len(sort_boxes) > 0:
 66 |         _box = sort_boxes[0]
 67 |         keep_boxes.append(_box)
 68 | 
 69 |         if len(sort_boxes) > 1:
 70 |             _boxes = sort_boxes[1:]
 71 |             # print(_clses.shape)
 72 |             # print(_cls.shape)
 73 |             # print(mask.shape, "-------------------")
 74 |             # print(_boxes)
 75 |             # print(_boxes.shape)
 76 | 
 77 |             _iou = iou(_box, _boxes, mode)
 78 |             sort_boxes=_boxes[_iou< thresh]
 79 | 
 80 |         else:
 81 |             break
 82 | 
 83 |     return keep_boxes
 84 | 
 85 | 
 86 | # def detect(feature_map, thresh):
 87 | #     masks = feature_map[:, 4, :, :] > thresh
 88 | #     idxs = torch.nonzero(masks)
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     # box = torch.Tensor([2, 2, 3, 3, 6])
 93 |     # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]])
 94 |     # print(iou(box, boxes, mode="inter"))
 95 |     # print(nms(boxes, 0.1))
 96 |     # import numpy as np
 97 |     #
 98 |     # a = np.array([[1, 2], [3, 4]])
 99 |     # print(a[:, 1])
100 |     make_image_data('images/1.jpg')
101 | 


--------------------------------------------------------------------------------
/darknet53/detector.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from darbnet53_module import *
  4 | import cfg
  5 | from PIL import Image,ImageDraw
  6 | from darbnet53_module import *
  7 | from dataset import *
  8 | import os
  9 | class_dict={
 10 |     0:'person',
 11 |     1:'horse',
 12 |     2:'bicycle',
 13 | }
 14 | class Detector(torch.nn.Module):
 15 | 
 16 |     def __init__(self):
 17 |         super(Detector, self).__init__()
 18 | 
 19 |         self.net = Darknet53()
 20 |         self.net.load_state_dict(torch.load('darknet_params/net597.pt'))
 21 |         self.net.eval()
 22 | 
 23 |     def forward(self, input, thresh, anchors,case):
 24 |         output_13, output_26, output_52 = self.net(input)
 25 | 
 26 |         idxs_13, vecs_13 = self._filter(output_13, thresh)
 27 |         boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13],case)
 28 | 
 29 |         idxs_26, vecs_26 = self._filter(output_26, thresh)
 30 |         boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26],case)
 31 | 
 32 |         idxs_52, vecs_52 = self._filter(output_52, thresh)
 33 |         boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52],case)
 34 |         boxes=torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
 35 |         # rst=[]
 36 |         # for i in range(3):
 37 |         #     bs=boxes[boxes[...,6]==i]
 38 |         #     for j in bs
 39 |         #     bs = nms(bs, 0.9, mode="inter")
 40 |         #     rst.append(bs)
 41 |         boxes=nms(boxes, 0.5, mode='inter')
 42 |         return boxes
 43 | 
 44 | 
 45 |     def _filter(self, output, thresh):
 46 |         output = output.permute(0, 2, 3, 1)
 47 |         output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
 48 |         #mask:N,H,W,3,15
 49 | 
 50 |         mask = torch.sigmoid(output[..., 0]) > thresh
 51 | 
 52 |         idxs = mask.nonzero()
 53 |         vecs = output[mask]
 54 |         return idxs, vecs
 55 | 
 56 |     def _parse(self, idxs, vecs, t, anchors,case):
 57 |         anchors = torch.Tensor(anchors)
 58 | 
 59 |         n = idxs[:, 0]  # 所属的图片
 60 |         a = idxs[:, 3]  # 建议框
 61 | 
 62 |         cy = (idxs[:, 1].float() + vecs[:, 2]) * t /case # 原图的中心点y
 63 |         cx = (idxs[:, 2].float() + vecs[:, 1]) * t /case # 原图的中心点x
 64 | 
 65 |         w = anchors[a, 0] * torch.exp(vecs[:, 3])/case
 66 |         h = anchors[a, 1] * torch.exp(vecs[:, 4])/case
 67 |         p=vecs[:,0]
 68 |         cls_p=vecs[:,5:]
 69 |         cls_p=torch.softmax(cls_p,dim=1)
 70 |         cls_index = torch.argmax(cls_p, dim=1)
 71 |         return torch.stack([n.float(), torch.sigmoid(p),cx, cy, w, h,cls_index], dim=1)
 72 | 
 73 | 
 74 | if __name__ == '__main__':
 75 |     detector = Detector()
 76 |     # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5)
 77 |     # print(y.shape)
 78 |     for i in os.listdir('images'):
 79 |         img=Image.open('images/'+i)
 80 |         _img = make_image_data('images/'+i)
 81 |         w, h = _img.size[0], _img.size[1]
 82 |         case = 416 / w
 83 |         # print(case)
 84 |         _img = _img.resize((416, 416))  # 此处要等比缩放
 85 |         _img_data = transforms(_img)
 86 |         _img_data=torch.unsqueeze(_img_data,dim=0)
 87 |         # print(_img_data.shape)
 88 |         result=detector(_img_data, 0.2, cfg.ANCHORS_GROUP,case)
 89 |         draw=ImageDraw.Draw(img)
 90 |         for rst in result:
 91 |             if len(rst)==0:
 92 |                 continue
 93 |             else:
 94 |                 # rst=rst[0]
 95 |                 x1,y1,x2,y2=rst[2]-0.5*rst[4],rst[3]-0.5*rst[5],rst[2]+0.5*rst[4],rst[3]+0.5*rst[5]
 96 |                 print(f'置信度：{str(rst[1].item())[:4]} 坐标点：{x1,y1,x2,y2} 类别：{class_dict[int(rst[6].item())]}')
 97 |                 draw.text((x1,y1),class_dict[int(rst[6].item())]+str(rst[1].item())[:4])
 98 |                 draw.rectangle((x1,y1,x2,y2),width=1,outline='red')
 99 |         img.show()
100 | 


--------------------------------------------------------------------------------
/mobilenetv2/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | from PIL import Image
  5 | 
  6 | def make_image_data(path):
  7 |     img=Image.open(path)
  8 |     w,h=img.size[0],img.size[1]
  9 |     temp=max(h,w)
 10 |     mask=Image.new('RGB',(temp,temp),(0,0,0))
 11 |     mask.paste(img,(0,0))
 12 |     return mask
 13 | 
 14 | def iou(box, boxes, mode="inter"):
 15 |     cx, cy, w, h = box[2], box[3], box[4], box[5]
 16 |     cxs, cys, ws, hs = boxes[:, 2], boxes[:, 3], boxes[:, 4], boxes[:, 5]
 17 | 
 18 |     box_area = w * h # 最小面积
 19 |     boxes_area = ws * hs # 最大面积
 20 | 
 21 |     _x1, _x2, _y1, _y2 = cx - w/2, cx + w/2, cy - h/2, cy + h/2
 22 |     _xx1, _xx2, _yy1, _yy2 = cxs - ws / 2, cxs + ws / 2, cys - hs / 2, cys + hs / 2
 23 | 
 24 |     xx1 = torch.maximum(_x1, _xx1) # 左上角   最大值
 25 |     yy1 = torch.maximum(_y1, _yy1) # 左上角   最大值
 26 |     xx2 = torch.minimum(_x2, _xx2) # 右下角  最小值
 27 |     yy2 = torch.minimum(_y2, _yy2) # 右下角  最小值
 28 | 
 29 |     # 将输入input张量每个元素的夹紧到区间 [min,max][min,max]，并返回结果到一个新张量。
 30 |     w = torch.clamp(xx2 - xx1, min=0) # ★夹
 31 |     h = torch.clamp(yy2 - yy1, min=0)
 32 | 
 33 |     inter = w * h
 34 | 
 35 |     if mode == 'inter':
 36 |         return inter / (box_area + boxes_area - inter) #交集除以并集
 37 |     elif mode == 'min':
 38 |         return inter / torch.min(box_area, boxes_area)
 39 | '''
 40 | def iou(box, boxes, mode="inter"):
 41 |     box_area = (box[3] - box[1]) * (box[4] - box[2])
 42 |     boxes_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
 43 | 
 44 |     x1 = torch.max(box[1], boxes[:, 1])
 45 |     y1 = torch.max(box[2], boxes[:, 2])
 46 |     x2 = torch.min(box[3], boxes[:, 3])
 47 |     y2 = torch.min(box[4], boxes[:, 4])
 48 | 
 49 |     w = torch.clamp(x2 - x1, min=0)
 50 |     h = torch.clamp(y2 - y1, min=0)
 51 | 
 52 |     inter = w * h
 53 | 
 54 |     if mode == 'inter':
 55 |         return inter / (box_area + boxes_area - inter)
 56 |     elif mode == 'min':
 57 |         return inter / torch.min(box_area, boxes_area)
 58 | '''
 59 | 
 60 | def nms(boxes, thresh, mode='inter'):
 61 |     args = boxes[:, 1].argsort(descending=True)
 62 | 
 63 |     sort_boxes = boxes[args]
 64 |     keep_boxes = []
 65 | 
 66 |     while len(sort_boxes) > 0:
 67 |         _box = sort_boxes[0]
 68 |         keep_boxes.append(_box)
 69 | 
 70 |         if len(sort_boxes) > 1:
 71 |             _boxes = sort_boxes[1:]
 72 |             # _boxes=sort[sort_boxes[:,-1]==_box[-1]]
 73 |             _iou = iou(_box, _boxes, mode)
 74 |             sort_boxes = _boxes[_iou < thresh]
 75 |         else:
 76 |             break
 77 | 
 78 |     return keep_boxes
 79 | 
 80 | # def nms(boxes, thresh, mode='inter'):
 81 | #     args = boxes[:, 1].argsort(descending=True)
 82 | #     sort_boxes = boxes[args]
 83 | #     keep_boxes = []
 84 | #
 85 | #     while len(sort_boxes) > 0:
 86 | #         _box = sort_boxes[0]
 87 | #         _cls=sort_boxes[6]
 88 | #         keep_boxes.append(_box)
 89 | #
 90 | #         if len(sort_boxes) > 1:
 91 | #             _boxes = sort_boxes[1:]
 92 | #             print(_boxes.shape)
 93 | #             __cls=_boxes[...,6]
 94 | #             _cls_boxes=_boxes[_cls ==__cls]
 95 | #             not_cls_boxes=[_cls!=__cls]
 96 | #             # _boxes=sort[sort_boxes[:,-1]==_box[-1]]
 97 | #             _iou = iou(_box, _cls_boxes, mode)
 98 | #             sort_boxes1 = _boxes[_iou < thresh]
 99 | #             print(sort_boxes1.shape,not_cls_boxes.shape)
100 | #             sort_boxes=torch.cat(sort_boxes1,not_cls_boxes,dim=1)
101 | #         else:
102 | #             break
103 | #
104 | #     return keep_boxes
105 | # def detect(feature_map, thresh):
106 | #     masks = feature_map[:, 4, :, :] > thresh
107 | #     idxs = torch.nonzero(masks)
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     # box = torch.Tensor([2, 2, 3, 3, 6])
112 |     # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]])
113 |     # print(iou(box, boxes, mode="inter"))
114 |     # print(nms(boxes, 0.1))
115 |     # import numpy as np
116 |     #
117 |     # a = np.array([[1, 2], [3, 4]])
118 |     # print(a[:, 1])
119 |     make_image_data('images/1.jpg')
120 | 


--------------------------------------------------------------------------------
/mobilenetv2/detector.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from mobilenet_v2_module import *
  4 | import cfg
  5 | from PIL import Image, ImageDraw
  6 | from mobilenet_v2_module import *
  7 | from dataset import *
  8 | import os
  9 | 
 10 | # class_dict = {
 11 | #     0: 'aeroplane',
 12 | #     1: 'bicycle',
 13 | #     2: 'bird',
 14 | #     3: 'boat',
 15 | #     4: 'bottle',
 16 | #     5: 'bus',
 17 | #     6: 'car',
 18 | #     7: 'cat',
 19 | #     8: 'chair',
 20 | #     9: 'cow',
 21 | #     10: 'diningtable',
 22 | #     11: 'dog',
 23 | #     12: 'horse',
 24 | #     13: 'motorbike',
 25 | #     14: 'person',
 26 | #     15: 'pottedplant',
 27 | #     16: 'sheep',
 28 | #     17: 'sofa',
 29 | #     18: 'train',
 30 | #     19: 'tvmonitor'
 31 | # }
 32 | class_dict = {
 33 | 0:'person',
 34 | 1:'horse',
 35 | 2:'bicycle'}
 36 | 
 37 | class Detector(torch.nn.Module):
 38 | 
 39 |     def __init__(self):
 40 |         super(Detector, self).__init__()
 41 | 
 42 |         self.net = MobileNet_v2(config)
 43 |         self.net.load_state_dict(torch.load('g_params/net237.pt'))
 44 |         self.net.eval()
 45 | 
 46 |     def forward(self, input, thresh, anchors, case):
 47 |         output_13, output_26, output_52 = self.net(input)
 48 | 
 49 |         idxs_13, vecs_13 = self._filter(output_13, thresh)
 50 |         boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13], case)
 51 | 
 52 |         idxs_26, vecs_26 = self._filter(output_26, thresh)
 53 |         boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26], case)
 54 | 
 55 |         idxs_52, vecs_52 = self._filter(output_52, thresh)
 56 |         boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52], case)
 57 |         boxes = torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
 58 |         boxes = nms(boxes, 0.4, mode="inter")
 59 |         # print(boxes)
 60 |         return boxes
 61 | 
 62 |     def _filter(self, output, thresh):
 63 |         output = output.permute(0, 2, 3, 1)
 64 |         output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
 65 |         # mask:N,H,W,3,15
 66 | 
 67 |         mask = torch.sigmoid(output[..., 0]) > thresh
 68 | 
 69 |         idxs = mask.nonzero()
 70 |         vecs = output[mask]
 71 |         return idxs, vecs
 72 | 
 73 |     def _parse(self, idxs, vecs, t, anchors, case):
 74 |         anchors = torch.Tensor(anchors)
 75 | 
 76 |         n = idxs[:, 0]  # 所属的图片
 77 |         a = idxs[:, 3]  # 建议框
 78 | 
 79 |         cy = (idxs[:, 1].float() + vecs[:, 2]) * t / case  # 原图的中心点y
 80 |         cx = (idxs[:, 2].float() + vecs[:, 1]) * t / case  # 原图的中心点x
 81 | 
 82 |         w = anchors[a, 0] * torch.exp(vecs[:, 3]) / case
 83 |         h = anchors[a, 1] * torch.exp(vecs[:, 4]) / case
 84 |         p = vecs[:, 0]
 85 |         cls_p = vecs[:, 5:]
 86 |         cls_p = torch.softmax(cls_p, dim=1)
 87 |         cls_index = torch.argmax(cls_p, dim=1)
 88 |         return torch.stack([n.float(), torch.sigmoid(p), cx, cy, w, h, cls_index], dim=1)
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     detector = Detector()
 93 |     # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5)
 94 |     # print(y.shape)
 95 |     for i in os.listdir('E:/pythonSpace/yolov3/darknet53/data/images'):
 96 |         img = Image.open('E:/pythonSpace/yolov3/darknet53/data/images/' + i)
 97 |         _img = make_image_data('E:/pythonSpace/yolov3/darknet53/data/images/' + i)
 98 |         w, h = _img.size[0], _img.size[1]
 99 |         case = 416 / w
100 |         # print(case)
101 |         _img = _img.resize((416, 416))  # 此处要等比缩放
102 |         _img_data = transforms(_img)
103 |         _img_data = torch.unsqueeze(_img_data, dim=0)
104 |         # print(_img_data.shape)
105 |         result = detector(_img_data, 0.2, cfg.ANCHORS_GROUP, case)
106 |         draw = ImageDraw.Draw(img)
107 |         for rst in result:
108 |             x1, y1, x2, y2 = rst[2] - 0.5 * rst[4], rst[3] - 0.5 * rst[5], rst[2] + 0.5 * rst[4], rst[3] + 0.5 * rst[5]
109 |             print(f'置信度：{str(rst[1].item())[:4]} 坐标点：{x1, y1, x2, y2} 类别：{class_dict[int(rst[6].item())]}')
110 |             draw.text((x1, y1), class_dict[int(rst[6].item())] + str(rst[1].item())[:4])
111 |             draw.rectangle((x1, y1, x2, y2), width=1, outline='red')
112 |         img.show()
113 | 


--------------------------------------------------------------------------------
/mobilenetv2/mobilenet_v2_module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from net_block import *
  4 | 
  5 | config = [
  6 |     [-1, 32, 1, 2],
  7 |     [1, 16, 1, 1],
  8 |     [6, 24, 2, 2],
  9 |     [6, 32, 4, 2],
 10 |     [6, 64, 4, 2],
 11 |     [6, 96, 4, 1],
 12 |     [6, 160, 4, 2],
 13 |     [6, 320, 2, 1],
 14 | ]
 15 | 
 16 | 
 17 | class Bottleneck(nn.Module):
 18 |     def __init__(self, c_in, i, t, c, n, s):
 19 |         super(Bottleneck, self).__init__()
 20 |         self.i = i
 21 |         self.n = n
 22 |         _s = s if i == n - 1 else 1
 23 |         _c = c if i == n - 1 else c_in
 24 |         _p_c = c_in * t
 25 | 
 26 |         self.sub_module = nn.Sequential(
 27 |             nn.Conv2d(c_in, _p_c, 1, 1, bias=False),
 28 |             nn.BatchNorm2d(_p_c),
 29 |             nn.ReLU6(),
 30 |             nn.Conv2d(_p_c, _p_c, 3, _s, 1, bias=False),
 31 |             nn.BatchNorm2d(_p_c),
 32 |             nn.ReLU6(),
 33 |             nn.Conv2d(_p_c, _c, 1, 1, bias=False),
 34 |             nn.BatchNorm2d(_c)
 35 |         )
 36 | 
 37 |     def forward(self, x):
 38 |         if self.i == self.n - 1:
 39 |             return self.sub_module(x)
 40 |         else:
 41 |             return self.sub_module(x) + x
 42 | 
 43 | 
 44 | class MobileNet_v2(nn.Module):
 45 |     def __init__(self, config):
 46 |         super(MobileNet_v2, self).__init__()
 47 |         self.input_layer = nn.Sequential(
 48 |             nn.Conv2d(3, 32, 3, 2, 1, bias=False),
 49 |             nn.BatchNorm2d(32),
 50 |             nn.ReLU6()
 51 |         )
 52 |         self.blocks1 = []
 53 |         self.blocks2 = []
 54 |         self.blocks3 = []
 55 |         c_in = config[0][1]
 56 |         for t, c, n, s in config[1:4]:
 57 |             for i in range(n):
 58 |                 self.blocks1.append(Bottleneck(c_in, i, t, c, n, s))
 59 |             c_in = c
 60 |         for t, c, n, s in config[4:5]:
 61 |             for i in range(n):
 62 |                 self.blocks2.append(Bottleneck(c_in, i, t, c, n, s))
 63 |             c_in = c
 64 |         for t, c, n, s in config[5:]:
 65 |             for i in range(n):
 66 |                 self.blocks3.append(Bottleneck(c_in, i, t, c, n, s))
 67 |             c_in = c
 68 | 
 69 |         self.hidden_layers1 = nn.Sequential(*self.blocks1)
 70 |         self.hidden_layers2 = nn.Sequential(*self.blocks2)
 71 |         self.hidden_layers3 = nn.Sequential(*self.blocks3)
 72 | 
 73 |         self.convset_13 = ConvolutionalSet(320, 64)
 74 |         self.detetion_13 = nn.Sequential(
 75 |             ConvolutionalLayer(64, 320, 3, 1, 1),
 76 |             nn.Conv2d(320, 24, 1, 1, 0)
 77 |         )
 78 |         self.up_13_to_26 = nn.Sequential(
 79 |             ConvolutionalLayer(64, 32, 3, 1, 1),
 80 |             UpSampleLayer()
 81 |         )
 82 | 
 83 |         self.convset_26 = ConvolutionalSet(96, 32)
 84 |         self.detetion_26 = nn.Sequential(
 85 |             ConvolutionalLayer(32, 64, 3, 1, 1),
 86 |             nn.Conv2d(64, 24, 1, 1, 0)
 87 |         )
 88 |         self.up_26_to_52 = nn.Sequential(
 89 |             ConvolutionalLayer(32, 16, 3, 1, 1),
 90 |             UpSampleLayer()
 91 |         )
 92 | 
 93 |         self.convset_52 = ConvolutionalSet(48, 24)
 94 |         self.detetion_52 = nn.Sequential(
 95 |             ConvolutionalLayer(24, 48, 3, 1, 1),
 96 |             nn.Conv2d(48, 24, 1, 1, 0)
 97 |         )
 98 | 
 99 |     def forward(self, x):
100 |         out_52 = self.hidden_layers1(self.input_layer(x))
101 |         out_26 = self.hidden_layers2(out_52)
102 |         out_13 = self.hidden_layers3(out_26)
103 | 
104 |         convset_out_13 = self.convset_13(out_13)
105 |         detetion_out_13 = self.detetion_13(convset_out_13)
106 |         up_13_to_26_out = self.up_13_to_26(convset_out_13)
107 |         cat_out_26 = torch.cat((up_13_to_26_out, out_26), dim=1)
108 | 
109 |         convset_26 = self.convset_26(cat_out_26)
110 |         detetion_out_26 = self.detetion_26(convset_26)
111 |         up_26_to_52_out = self.up_26_to_52(convset_26)
112 |         cat_out_52 = torch.cat((up_26_to_52_out, out_52), dim=1)
113 | 
114 |         convset_52 = self.convset_52(cat_out_52)
115 |         detetion_out_52 = self.detetion_52(convset_52)
116 | 
117 |         return detetion_out_13, detetion_out_26, detetion_out_52
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     x = torch.randn((1, 3, 416, 416))
122 |     net = MobileNet_v2(config)
123 |     y = net(x)
124 |     print(y[0].shape)
125 |     print(y[1].shape)
126 |     print(y[2].shape)
127 | 


--------------------------------------------------------------------------------
/darknet53/darbnet53_module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class UpsampleLayer(torch.nn.Module):
  5 | 
  6 |     def __init__(self):
  7 |         super(UpsampleLayer, self).__init__()
  8 | 
  9 |     def forward(self, x):
 10 |         return torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')
 11 | 
 12 | 
 13 | class ConvolutionalLayer(torch.nn.Module):
 14 | 
 15 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
 16 |         super(ConvolutionalLayer, self).__init__()
 17 | 
 18 |         self.sub_module = torch.nn.Sequential(
 19 |             torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
 20 |             torch.nn.BatchNorm2d(out_channels),
 21 |             torch.nn.LeakyReLU()
 22 |         )
 23 | 
 24 |     def forward(self, x):
 25 |         return self.sub_module(x)
 26 | 
 27 | 
 28 | class ResidualLayer(torch.nn.Module):
 29 | 
 30 |     def __init__(self, in_channels):
 31 |         super(ResidualLayer, self).__init__()
 32 | 
 33 |         self.sub_module = torch.nn.Sequential(
 34 |             ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0),
 35 |             ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1),
 36 |         )
 37 | 
 38 |     def forward(self, x):
 39 |         return x + self.sub_module(x)
 40 | 
 41 | 
 42 | class DownsamplingLayer(torch.nn.Module):
 43 |     def __init__(self, in_channels, out_channels):
 44 |         super(DownsamplingLayer, self).__init__()
 45 | 
 46 |         self.sub_module = torch.nn.Sequential(
 47 |             ConvolutionalLayer(in_channels, out_channels, 3, 2, 1)
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.sub_module(x)
 52 | 
 53 | 
 54 | class ConvolutionalSet(torch.nn.Module):
 55 |     def __init__(self, in_channels, out_channels):
 56 |         super(ConvolutionalSet, self).__init__()
 57 | 
 58 |         self.sub_module = torch.nn.Sequential(
 59 |             ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
 60 |             ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
 61 | 
 62 |             ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
 63 |             ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
 64 | 
 65 |             ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
 66 |         )
 67 | 
 68 |     def forward(self, x):
 69 |         return self.sub_module(x)
 70 | 
 71 | 
 72 | class Darknet53(torch.nn.Module):
 73 | 
 74 |     def __init__(self):
 75 |         super(Darknet53, self).__init__()
 76 | 
 77 |         self.trunk_52 = torch.nn.Sequential(
 78 |             ConvolutionalLayer(3, 32, 3, 1, 1),
 79 |             ConvolutionalLayer(32, 64, 3, 2, 1),
 80 | 
 81 |             ResidualLayer(64),
 82 |             DownsamplingLayer(64, 128),
 83 | 
 84 |             ResidualLayer(128),
 85 |             ResidualLayer(128),
 86 |             DownsamplingLayer(128, 256),
 87 | 
 88 |             ResidualLayer(256),
 89 |             ResidualLayer(256),
 90 |             ResidualLayer(256),
 91 |             ResidualLayer(256),
 92 |             ResidualLayer(256),
 93 |             ResidualLayer(256),
 94 |             ResidualLayer(256),
 95 |             ResidualLayer(256),
 96 |         )
 97 | 
 98 |         self.trunk_26 = torch.nn.Sequential(
 99 |             DownsamplingLayer(256, 512),
100 |             ResidualLayer(512),
101 |             ResidualLayer(512),
102 |             ResidualLayer(512),
103 |             ResidualLayer(512),
104 |             ResidualLayer(512),
105 |             ResidualLayer(512),
106 |             ResidualLayer(512),
107 |             ResidualLayer(512),
108 |         )
109 | 
110 |         self.trunk_13 = torch.nn.Sequential(
111 |             DownsamplingLayer(512, 1024),
112 |             ResidualLayer(1024),
113 |             ResidualLayer(1024),
114 |             ResidualLayer(1024),
115 |             ResidualLayer(1024)
116 |         )
117 | 
118 |         self.convset_13 = torch.nn.Sequential(
119 |             ConvolutionalSet(1024, 512)
120 |         )
121 | 
122 |         self.detetion_13 = torch.nn.Sequential(
123 |             ConvolutionalLayer(512, 1024, 3, 1, 1),
124 |             torch.nn.Conv2d(1024, 24, 1, 1, 0)
125 |         )
126 | 
127 |         self.up_26 = torch.nn.Sequential(
128 |             ConvolutionalLayer(512, 256, 3, 1, 1),
129 |             UpsampleLayer()
130 |         )
131 | 
132 |         self.convset_26 = torch.nn.Sequential(
133 |             ConvolutionalSet(768, 256)
134 |         )
135 | 
136 |         self.detetion_26 = torch.nn.Sequential(
137 |             ConvolutionalLayer(256, 512, 3, 1, 1),
138 |             torch.nn.Conv2d(512, 24, 1, 1, 0)
139 |         )
140 | 
141 |         self.up_52 = torch.nn.Sequential(
142 |             ConvolutionalLayer(256, 128, 3, 1, 1),
143 |             UpsampleLayer()
144 |         )
145 | 
146 |         self.convset_52 = torch.nn.Sequential(
147 |             ConvolutionalSet(384, 128)
148 |         )
149 | 
150 |         self.detetion_52 = torch.nn.Sequential(
151 |             ConvolutionalLayer(128, 256, 3, 1, 1),
152 |             torch.nn.Conv2d(256, 24, 1, 1, 0)
153 |         )
154 | 
155 |     def forward(self, x):
156 |         h_52 = self.trunk_52(x)
157 |         h_26 = self.trunk_26(h_52)
158 |         h_13 = self.trunk_13(h_26)
159 | 
160 |         convset_out_13 = self.convset_13(h_13)
161 |         detetion_out_13 = self.detetion_13(convset_out_13)
162 | 
163 |         up_out_26 = self.up_26(convset_out_13)
164 |         route_out_26 = torch.cat((up_out_26, h_26), dim=1)
165 |         convset_out_26 = self.convset_26(route_out_26)
166 |         detetion_out_26 = self.detetion_26(convset_out_26)
167 | 
168 |         up_out_52 = self.up_52(convset_out_26)
169 |         route_out_52 = torch.cat((up_out_52, h_52), dim=1)
170 |         convset_out_52 = self.convset_52(route_out_52)
171 |         detetion_out_52 = self.detetion_52(convset_out_52)
172 | 
173 |         return detetion_out_13, detetion_out_26, detetion_out_52
174 | 
175 | if __name__ == '__main__':
176 |     yolo = Darknet53()
177 |     x = torch.randn(1,3,416,416)
178 |     y = yolo(x)
179 |     print(y[0].shape)
180 |     print(y[1].shape)
181 |     print(y[2].shape)
182 | 


--------------------------------------------------------------------------------
/darknet53/net_block.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch
  3 | from torch.nn import functional
  4 | 
  5 | 
  6 | # 卷积块
  7 | class ConvolutionalLayer(nn.Module):
  8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
  9 |         super(ConvolutionalLayer, self).__init__()
 10 |         self.sub_module = nn.Sequential(
 11 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
 12 |                       padding=padding, bias=bias),
 13 |             nn.BatchNorm2d(out_channels),
 14 |             nn.LeakyReLU()
 15 |         )
 16 | 
 17 |     def forward(self, x):
 18 |         return self.sub_module(x)
 19 | 
 20 | 
 21 | # 残差块
 22 | class ResidualLayer(nn.Module):
 23 |     def __init__(self, in_channels):
 24 |         super(ResidualLayer, self).__init__()
 25 |         self.sub_module = nn.Sequential(
 26 |             ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1,
 27 |                                padding=0),
 28 |             ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1,
 29 |                                padding=1)
 30 |         )
 31 | 
 32 |     def forward(self, x):
 33 |         return self.sub_module(x)
 34 | 
 35 | 
 36 | # 下采样
 37 | class DownSampleLayer(nn.Module):
 38 |     def __init__(self, in_channels, out_channels):
 39 |         super(DownSampleLayer, self).__init__()
 40 |         self.sub_module = nn.Sequential(
 41 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1)
 42 |         )
 43 | 
 44 |     def forward(self, x):
 45 |         return self.sub_module(x)
 46 | 
 47 | 
 48 | # 上采样
 49 | class UpSampleLayer(nn.Module):
 50 |     def __init__(self):
 51 |         super(UpSampleLayer, self).__init__()
 52 | 
 53 |     def forward(self, x):
 54 |         return functional.interpolate(x, scale_factor=2, mode='nearest')
 55 | 
 56 | 
 57 | # 卷积集
 58 | class ConvolutionalSet(nn.Module):
 59 |     # 一般输入通道大  输出通道小，因为目的就是为了降低通道进行特征提取
 60 |     def __init__(self, in_channels, out_channels):
 61 |         super(ConvolutionalSet, self).__init__()
 62 |         self.sub_module = nn.Sequential(
 63 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 64 |             ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
 65 | 
 66 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 67 |             ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
 68 | 
 69 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 70 |         )
 71 | 
 72 |     def forward(self, x):
 73 |         return self.sub_module(x)
 74 | 
 75 | 
 76 | class YoloNet_V3(nn.Module):
 77 |     def __init__(self):
 78 |         super(YoloNet_V3, self).__init__()
 79 |         self.trunk_52 = nn.Sequential(
 80 |             ConvolutionalLayer(3, 32, 3, 1, 1),
 81 |             DownSampleLayer(32, 64),
 82 | 
 83 |             ResidualLayer(64),
 84 |             DownSampleLayer(64, 128),
 85 | 
 86 |             ResidualLayer(128),
 87 |             ResidualLayer(128),
 88 |             DownSampleLayer(128, 256),
 89 | 
 90 |             ResidualLayer(256),
 91 |             ResidualLayer(256),
 92 |             ResidualLayer(256),
 93 |             ResidualLayer(256),
 94 |             ResidualLayer(256),
 95 |             ResidualLayer(256),
 96 |             ResidualLayer(256),
 97 |             ResidualLayer(256)
 98 |         )
 99 |         self.trunk_26 = nn.Sequential(
100 |             DownSampleLayer(256, 512),
101 |             ResidualLayer(512),
102 |             ResidualLayer(512),
103 |             ResidualLayer(512),
104 |             ResidualLayer(512),
105 |             ResidualLayer(512),
106 |             ResidualLayer(512),
107 |             ResidualLayer(512),
108 |             ResidualLayer(512)
109 |         )
110 |         self.trunk_13 = nn.Sequential(
111 |             DownSampleLayer(512, 1024),
112 |             ResidualLayer(1024),
113 |             ResidualLayer(1024),
114 |             ResidualLayer(1024),
115 |             ResidualLayer(1024)
116 |         )
117 | 
118 |         self.convset_13 = nn.Sequential(
119 |             ConvolutionalSet(1024, 512)
120 |         )
121 |         self.detetion_13 = nn.Sequential(
122 |             ConvolutionalLayer(512, 1024, 3, 1, 1),
123 |             nn.Conv2d(1024, 45, 1, 1, 0)
124 |         )
125 |         self.up_13_to_26 = nn.Sequential(
126 |             # 原文为1*1的卷积，使用3*3的卷积是为了做特征提取，因为1*1不能进行特征提取
127 |             ConvolutionalLayer(512, 256, 3, 1, 1),
128 |             UpSampleLayer()
129 |         )
130 | 
131 |         self.convset_26 = nn.Sequential(
132 |             ConvolutionalSet(768, 256)
133 |         )
134 |         self.detetion_26 = nn.Sequential(
135 |             ConvolutionalLayer(256, 512, 3, 1, 1),
136 |             nn.Conv2d(512, 45, 1, 1, 0)
137 |         )
138 | 
139 |         self.up_26_to_52 = nn.Sequential(
140 |             ConvolutionalLayer(256, 128, 3, 1, 1),
141 |             UpSampleLayer()
142 |         )
143 | 
144 |         self.convset_52 = nn.Sequential(
145 |             ConvolutionalSet(384, 128)
146 |         )
147 |         self.detetion_52 = nn.Sequential(
148 |             ConvolutionalLayer(128, 256, 3, 1, 1),
149 |             nn.Conv2d(256, 45, 1, 1, 0)
150 |         )
151 | 
152 |     def forward(self, x):
153 |         h_52 = self.trunk_52(x)
154 |         h_26 = self.trunk_26(h_52)
155 |         h_13 = self.trunk_13(h_26)
156 | 
157 |         convset_out_13 = self.convset_13(h_13)
158 |         detetion_out_13 = self.detetion_13(convset_out_13)
159 | 
160 |         up_out_13_to_26 = self.up_13_to_26(convset_out_13)
161 |         cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1)
162 |         convset_out_26 = self.convset_26(cat_out_26)
163 |         detetion_out_26 = self.detetion_26(convset_out_26)
164 | 
165 |         up_out_26_to_52 = self.up_26_to_52(convset_out_26)
166 |         cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1)
167 |         convset_out_52 = self.convset_52(cat_out_52)
168 |         detetion_out_52 = self.detetion_52(convset_out_52)
169 | 
170 |         return detetion_out_13, detetion_out_26, detetion_out_52
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     yolo = YoloNet_V3()
175 |     x = torch.randn(1, 3, 416, 416)
176 |     y = yolo(x)
177 |     print(y[0].shape)
178 |     print(y[1].shape)
179 |     print(y[2].shape)
180 | 


--------------------------------------------------------------------------------
/mobilenetv2/net_block.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch
  3 | from torch.nn import functional
  4 | 
  5 | 
  6 | # 卷积块
  7 | class ConvolutionalLayer(nn.Module):
  8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
  9 |         super(ConvolutionalLayer, self).__init__()
 10 |         self.sub_module = nn.Sequential(
 11 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
 12 |                       padding=padding, bias=bias),
 13 |             nn.BatchNorm2d(out_channels),
 14 |             nn.LeakyReLU()
 15 |         )
 16 | 
 17 |     def forward(self, x):
 18 |         return self.sub_module(x)
 19 | 
 20 | 
 21 | # 残差块
 22 | class ResidualLayer(nn.Module):
 23 |     def __init__(self, in_channels):
 24 |         super(ResidualLayer, self).__init__()
 25 |         self.sub_module = nn.Sequential(
 26 |             ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1,
 27 |                                padding=0),
 28 |             ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1,
 29 |                                padding=1)
 30 |         )
 31 | 
 32 |     def forward(self, x):
 33 |         return self.sub_module(x)
 34 | 
 35 | 
 36 | # 下采样
 37 | class DownSampleLayer(nn.Module):
 38 |     def __init__(self, in_channels, out_channels):
 39 |         super(DownSampleLayer, self).__init__()
 40 |         self.sub_module = nn.Sequential(
 41 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1)
 42 |         )
 43 | 
 44 |     def forward(self, x):
 45 |         return self.sub_module(x)
 46 | 
 47 | 
 48 | # 上采样
 49 | class UpSampleLayer(nn.Module):
 50 |     def __init__(self):
 51 |         super(UpSampleLayer, self).__init__()
 52 | 
 53 |     def forward(self, x):
 54 |         return functional.interpolate(x, scale_factor=2, mode='nearest')
 55 | 
 56 | 
 57 | # 卷积集
 58 | class ConvolutionalSet(nn.Module):
 59 |     # 一般输入通道大  输出通道小，因为目的就是为了降低通道进行特征提取
 60 |     def __init__(self, in_channels, out_channels):
 61 |         super(ConvolutionalSet, self).__init__()
 62 |         self.sub_module = nn.Sequential(
 63 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 64 |             ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
 65 | 
 66 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 67 |             ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
 68 | 
 69 |             ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
 70 |         )
 71 | 
 72 |     def forward(self, x):
 73 |         return self.sub_module(x)
 74 | 
 75 | 
 76 | class YoloNet_V3(nn.Module):
 77 |     def __init__(self):
 78 |         super(YoloNet_V3, self).__init__()
 79 |         self.trunk_52 = nn.Sequential(
 80 |             ConvolutionalLayer(3, 32, 3, 1, 1),
 81 |             DownSampleLayer(32, 64),
 82 | 
 83 |             ResidualLayer(64),
 84 |             DownSampleLayer(64, 128),
 85 | 
 86 |             ResidualLayer(128),
 87 |             ResidualLayer(128),
 88 |             DownSampleLayer(128, 256),
 89 | 
 90 |             ResidualLayer(256),
 91 |             ResidualLayer(256),
 92 |             ResidualLayer(256),
 93 |             ResidualLayer(256),
 94 |             ResidualLayer(256),
 95 |             ResidualLayer(256),
 96 |             ResidualLayer(256),
 97 |             ResidualLayer(256)
 98 |         )
 99 |         self.trunk_26 = nn.Sequential(
100 |             DownSampleLayer(256, 512),
101 |             ResidualLayer(512),
102 |             ResidualLayer(512),
103 |             ResidualLayer(512),
104 |             ResidualLayer(512),
105 |             ResidualLayer(512),
106 |             ResidualLayer(512),
107 |             ResidualLayer(512),
108 |             ResidualLayer(512)
109 |         )
110 |         self.trunk_13 = nn.Sequential(
111 |             DownSampleLayer(512, 1024),
112 |             ResidualLayer(1024),
113 |             ResidualLayer(1024),
114 |             ResidualLayer(1024),
115 |             ResidualLayer(1024)
116 |         )
117 | 
118 |         self.convset_13 = nn.Sequential(
119 |             ConvolutionalSet(1024, 512)
120 |         )
121 |         self.detetion_13 = nn.Sequential(
122 |             ConvolutionalLayer(512, 1024, 3, 1, 1),
123 |             nn.Conv2d(1024, 45, 1, 1, 0)
124 |         )
125 |         self.up_13_to_26 = nn.Sequential(
126 |             # 原文为1*1的卷积，使用3*3的卷积是为了做特征提取，因为1*1不能进行特征提取
127 |             ConvolutionalLayer(512, 256, 3, 1, 1),
128 |             UpSampleLayer()
129 |         )
130 | 
131 |         self.convset_26 = nn.Sequential(
132 |             ConvolutionalSet(768, 256)
133 |         )
134 |         self.detetion_26 = nn.Sequential(
135 |             ConvolutionalLayer(256, 512, 3, 1, 1),
136 |             nn.Conv2d(512, 45, 1, 1, 0)
137 |         )
138 | 
139 |         self.up_26_to_52 = nn.Sequential(
140 |             ConvolutionalLayer(256, 128, 3, 1, 1),
141 |             UpSampleLayer()
142 |         )
143 | 
144 |         self.convset_52 = nn.Sequential(
145 |             ConvolutionalSet(384, 128)
146 |         )
147 |         self.detetion_52 = nn.Sequential(
148 |             ConvolutionalLayer(128, 256, 3, 1, 1),
149 |             nn.Conv2d(256, 45, 1, 1, 0)
150 |         )
151 | 
152 |     def forward(self, x):
153 |         h_52 = self.trunk_52(x)
154 |         h_26 = self.trunk_26(h_52)
155 |         h_13 = self.trunk_13(h_26)
156 | 
157 |         convset_out_13 = self.convset_13(h_13)
158 |         detetion_out_13 = self.detetion_13(convset_out_13)
159 | 
160 |         up_out_13_to_26 = self.up_13_to_26(convset_out_13)
161 |         cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1)
162 |         convset_out_26 = self.convset_26(cat_out_26)
163 |         detetion_out_26 = self.detetion_26(convset_out_26)
164 | 
165 |         up_out_26_to_52 = self.up_26_to_52(convset_out_26)
166 |         cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1)
167 |         convset_out_52 = self.convset_52(cat_out_52)
168 |         detetion_out_52 = self.detetion_52(convset_out_52)
169 | 
170 |         return detetion_out_13, detetion_out_26, detetion_out_52
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     yolo = YoloNet_V3()
175 |     x = torch.randn(1, 3, 416, 416)
176 |     y = yolo(x)
177 |     print(y[0].shape)
178 |     print(y[1].shape)
179 |     print(y[2].shape)
180 | 


--------------------------------------------------------------------------------