├── darknet53
├── .idea
│ ├── .WeDrive
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── .WeDrive
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── yolov3.iml
├── __pycache__
│ ├── .WeDrive
│ ├── cfg.cpython-38.pyc
│ ├── dataset.cpython-38.pyc
│ ├── utils.cpython-38.pyc
│ ├── net_block.cpython-38.pyc
│ ├── darbnet53_module.cpython-38.pyc
│ └── mobilenet_v2_module.cpython-38.pyc
├── data
│ ├── person_label.txt
│ ├── images
│ │ ├── 000017.jpg
│ │ ├── 000022.jpg
│ │ ├── 000261.jpg
│ │ ├── 000455.jpg
│ │ ├── 000812.jpg
│ │ ├── 000930.jpg
│ │ ├── 001183.jpg
│ │ ├── 001370.jpg
│ │ ├── 001579.jpg
│ │ └── 001988.jpg
│ ├── data.txt
│ ├── test.py
│ ├── image_voc
│ │ ├── 000017.xml
│ │ ├── 000022.xml
│ │ ├── 000455.xml
│ │ ├── 001988.xml
│ │ ├── 000261.xml
│ │ ├── 000930.xml
│ │ ├── 001183.xml
│ │ ├── 001579.xml
│ │ ├── 000812.xml
│ │ └── 001370.xml
│ └── make_data_txt.py
├── images
│ ├── 000017.jpg
│ ├── 000022.jpg
│ ├── 000261.jpg
│ ├── 000455.jpg
│ ├── 000812.jpg
│ ├── 000930.jpg
│ ├── 001084.jpg
│ ├── 001183.jpg
│ ├── 001370.jpg
│ ├── 001579.jpg
│ └── 001988.jpg
├── test.py
├── cfg.py
├── trainer.py
├── dataset.py
├── utils.py
├── detector.py
├── darbnet53_module.py
└── net_block.py
├── mobilenetv2
├── .idea
│ ├── .WeDrive
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── .WeDrive
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── yolov3.iml
├── __pycache__
│ ├── cfg.cpython-38.pyc
│ ├── utils.cpython-38.pyc
│ ├── dataset.cpython-38.pyc
│ ├── net_block.cpython-38.pyc
│ └── mobilenet_v2_module.cpython-38.pyc
├── cfg.py
├── data
│ ├── test.py
│ └── make_data_txt.py
├── trainer.py
├── dataset.py
├── utils.py
├── detector.py
├── mobilenet_v2_module.py
└── net_block.py
└── README.md
/darknet53/.idea/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea
--------------------------------------------------------------------------------
/mobilenetv2/.idea/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea
--------------------------------------------------------------------------------
/darknet53/__pycache__/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\__pycache__
--------------------------------------------------------------------------------
/darknet53/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/mobilenetv2/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/darknet53/.idea/inspectionProfiles/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles
--------------------------------------------------------------------------------
/mobilenetv2/.idea/inspectionProfiles/.WeDrive:
--------------------------------------------------------------------------------
1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles
--------------------------------------------------------------------------------
/darknet53/data/person_label.txt:
--------------------------------------------------------------------------------
1 | images/1.jpg 1 12 13 51 18 2 22 38 55 98 2 44 33 62 62
2 | images/2.jpg 3 82 46 57 19
3 |
--------------------------------------------------------------------------------
/darknet53/images/000017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000017.jpg
--------------------------------------------------------------------------------
/darknet53/images/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000022.jpg
--------------------------------------------------------------------------------
/darknet53/images/000261.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000261.jpg
--------------------------------------------------------------------------------
/darknet53/images/000455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000455.jpg
--------------------------------------------------------------------------------
/darknet53/images/000812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000812.jpg
--------------------------------------------------------------------------------
/darknet53/images/000930.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000930.jpg
--------------------------------------------------------------------------------
/darknet53/images/001084.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001084.jpg
--------------------------------------------------------------------------------
/darknet53/images/001183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001183.jpg
--------------------------------------------------------------------------------
/darknet53/images/001370.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001370.jpg
--------------------------------------------------------------------------------
/darknet53/images/001579.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001579.jpg
--------------------------------------------------------------------------------
/darknet53/images/001988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001988.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000017.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000022.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000261.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000261.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000455.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000812.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/000930.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000930.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/001183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001183.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/001370.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001370.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/001579.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001579.jpg
--------------------------------------------------------------------------------
/darknet53/data/images/001988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001988.jpg
--------------------------------------------------------------------------------
/darknet53/__pycache__/cfg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/cfg.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/cfg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/cfg.cpython-38.pyc
--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/__pycache__/net_block.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/net_block.cpython-38.pyc
--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/net_block.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/net_block.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/__pycache__/darbnet53_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/darbnet53_module.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc
--------------------------------------------------------------------------------
/mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc
--------------------------------------------------------------------------------
/darknet53/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/mobilenetv2/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yolo-v3-DarkNet53-MobileNet-V2
2 | 分别基于DarkNet53和MobileNet-V2两个网络实现yolo-v3
3 |
4 |
5 | DarkNet53的B站视频地址:https://www.bilibili.com/video/BV1Rf4y1n7mG?spm_id_from=333.999.0.0
6 |
7 | 其中MobileNet-V2网络实现,是因为他很轻量级,这个网络训练比较快,虽然准确率不如DarkNet53
8 |
--------------------------------------------------------------------------------
/darknet53/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/mobilenetv2/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/darknet53/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/mobilenetv2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/darknet53/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 |
4 | # a = torch.Tensor([1, 2, 3, 4])
5 | # b = a < 3 # mask
6 | # print(b)
7 | # print(a[b])
8 | # print(b.nonzero())
9 | # print(a[b.nonzero()])
10 |
11 | a = torch.Tensor([[1, 2], [5, 6], [3, 1], [2, 8]])
12 | # b = a < 3
13 | # print(b)
14 | # print(a[b])
15 | b = a[:, 1] > 5
16 | print(b)
17 | print(a[b])
18 | print(b.nonzero())
19 |
20 | print(math.modf(3.4))
21 |
22 |
23 | print(400/32)
--------------------------------------------------------------------------------
/mobilenetv2/cfg.py:
--------------------------------------------------------------------------------
1 |
2 | IMG_HEIGHT = 416
3 | IMG_WIDTH = 416
4 |
5 | CLASS_NUM = 3
6 |
7 | ANCHORS_GROUP = {
8 | 13: [[311, 247], [159, 232], [200, 117]],
9 | 26: [[89, 159], [91, 74], [47, 97]],
10 | 52: [[48, 34], [25, 55], [15, 21]]
11 | }
12 |
13 | ANCHORS_GROUP_AREA = {
14 | 13: [x * y for x, y in ANCHORS_GROUP[13]],
15 | 26: [x * y for x, y in ANCHORS_GROUP[26]],
16 | 52: [x * y for x, y in ANCHORS_GROUP[52]],
17 | }
18 |
--------------------------------------------------------------------------------
/darknet53/.idea/yolov3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/darknet53/cfg.py:
--------------------------------------------------------------------------------
1 |
2 | IMG_HEIGHT = 416
3 | IMG_WIDTH = 416
4 |
5 | CLASS_NUM = 3
6 |
7 | ANCHORS_GROUP = {
8 | 13: [[270, 254], [291, 179], [162, 304]],
9 | 26: [[175, 222], [112, 235], [175, 140]],
10 | 52: [[81, 118], [53, 142], [44, 28]]
11 | }
12 |
13 | ANCHORS_GROUP_AREA = {
14 | 13: [x * y for x, y in ANCHORS_GROUP[13]],
15 | 26: [x * y for x, y in ANCHORS_GROUP[26]],
16 | 52: [x * y for x, y in ANCHORS_GROUP[52]],
17 | }
18 |
--------------------------------------------------------------------------------
/mobilenetv2/.idea/yolov3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/darknet53/data/data.txt:
--------------------------------------------------------------------------------
1 | 000017.jpg 0 201 112 81 118 1 213 179 271 223
2 | 000022.jpg 1 181 160 249 149 0 183 113 57 154
3 | 000261.jpg 2 168 167 336 208 2 193 155 291 179
4 | 000455.jpg 0 213 102 74 147 1 259 159 312 188
5 | 000812.jpg 2 197 209 230 134 0 211 152 175 249 0 332 33 51 36 0 99 10 36 19
6 | 000930.jpg 0 210 133 132 146 2 201 180 171 107
7 | 001183.jpg 1 110 184 89 98 1 202 198 175 195 0 212 138 49 129
8 | 001370.jpg 2 97 297 121 235 2 208 273 84 223 0 100 208 165 337 0 218 198 112 302
9 | 001579.jpg 2 90 333 179 164 2 149 288 239 155 0 158 224 128 225 0 53 264 105 267
10 | 001988.jpg 2 142 268 268 286 0 143 190 158 271
11 |
--------------------------------------------------------------------------------
/darknet53/data/test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image,ImageDraw
3 | import os
4 | f=open('data.txt','r')
5 | datas=f.readlines()
6 | for data in datas:
7 | data=data.strip().split()
8 | img_path=os.path.join('images',data[0])
9 | img=Image.open(img_path)
10 | w,h=img.size
11 | case=416/max(w,h)
12 | _boxes=np.array([float(x) for x in data[1:]])
13 | boxes=np.split(_boxes,len(_boxes)//5)
14 | draw=ImageDraw.Draw(img)
15 | for box in boxes:
16 | cls,cx,cy,w,h=box
17 | x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case
18 | draw.rectangle((x1,y1,x2,y2),outline='red',width=2)
19 |
20 | img.show()
--------------------------------------------------------------------------------
/mobilenetv2/data/test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image,ImageDraw
3 | import os
4 | f=open('data.txt','r')
5 | datas=f.readlines()
6 | for data in datas:
7 | data=data.strip().split()
8 | img_path=os.path.join('images',data[0])
9 | img=Image.open(img_path)
10 | w,h=img.size
11 | case=416/max(w,h)
12 | _boxes=np.array([float(x) for x in data[1:]])
13 | boxes=np.split(_boxes,len(_boxes)//5)
14 | draw=ImageDraw.Draw(img)
15 | for box in boxes:
16 | cls,cx,cy,w,h=box
17 | x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case
18 | draw.rectangle((x1,y1,x2,y2),outline='red',width=2)
19 |
20 | img.show()
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000017.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000017.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 228217974
9 |
10 |
11 | genewolf
12 | whiskey kitten
13 |
14 |
15 | 480
16 | 364
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000022.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000022.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 336380018
9 |
10 |
11 | Lothar Lenz
12 | Lothar Lenz
13 |
14 |
15 | 500
16 | 332
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000455.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000455.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 323858157
9 |
10 |
11 | Lothar Lenz
12 | Lothar Lenz
13 |
14 |
15 | 500
16 | 332
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/001988.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 001988.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 272556816
9 |
10 |
11 | .s.e.a.n.
12 | Sean Scott
13 |
14 |
15 | 333
16 | 500
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000261.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000261.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 336032008
9 |
10 |
11 | MIKEBECKHAM DOT NET
12 | Mike Beckham
13 |
14 |
15 | 500
16 | 375
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000930.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000930.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 200825654
9 |
10 |
11 | bikeride
12 | Brent Soderberg
13 |
14 |
15 | 500
16 | 375
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/001183.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 001183.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 308220821
9 |
10 |
11 | lxt
12 | Laura Thomson
13 |
14 |
15 | 500
16 | 375
17 | 3
18 |
19 | 0
20 |
32 |
44 |
56 |
57 |
--------------------------------------------------------------------------------
/darknet53/data/make_data_txt.py:
--------------------------------------------------------------------------------
1 | import math
2 | import xml.etree.ElementTree as ET
3 | import os
4 | from PIL import Image
5 | class_dict={
6 | 'person':0,
7 | 'horse':1,
8 | 'bicycle':2,
9 | }
10 | xml_files=os.listdir('image_voc')
11 | with open('data.txt','a') as f:
12 | for xml_file in xml_files:
13 | tree=ET.parse(os.path.join('image_voc',xml_file))
14 | root=tree.getroot()
15 | image_name=root.find('filename')
16 | class_name=root.findall('object/name')
17 | boxes=root.findall('object/bndbox')
18 | filename=image_name.text
19 | temp=max(Image.open(os.path.join('images',filename)).size)
20 | print(416/temp)
21 | data=[]
22 | data.append(filename)
23 | for cls,box in zip(class_name,boxes):
24 | cls=class_dict[cls.text]
25 | cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2)
26 | w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text))
27 | obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}"
28 | data.append(obj)
29 | str=''
30 | for i in data:
31 | str=str+i+','
32 | str=str.replace(',',' ').strip()
33 | f.write(str+'\n')
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/001579.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 001579.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 250936479
9 |
10 |
11 | selimski
12 | ?
13 |
14 |
15 | 375
16 | 500
17 | 3
18 |
19 | 0
20 |
32 |
44 |
56 |
68 |
69 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/000812.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 000812.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 329837865
9 |
10 |
11 | .s.e.a.n.
12 | Sean Scott
13 |
14 |
15 | 500
16 | 333
17 | 3
18 |
19 | 0
20 |
32 |
44 |
56 |
68 |
69 |
--------------------------------------------------------------------------------
/darknet53/data/image_voc/001370.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 001370.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 220172534
9 |
10 |
11 | thevelodrome.com
12 | ?
13 |
14 |
15 | 333
16 | 500
17 | 3
18 |
19 | 0
20 |
32 |
44 |
56 |
68 |
69 |
--------------------------------------------------------------------------------
/mobilenetv2/data/make_data_txt.py:
--------------------------------------------------------------------------------
1 | import math
2 | import xml.etree.ElementTree as ET
3 | import os
4 | from PIL import Image
5 | class_dict={
6 | 'aeroplane': 0,
7 | 'bicycle': 1,
8 | 'bird': 2,
9 | 'boat': 3,
10 | 'bottle': 4,
11 | 'bus': 5,
12 | 'car': 6,
13 | 'cat': 7,
14 | 'chair': 8,
15 | 'cow': 9,
16 | 'diningtable': 10,
17 | 'dog': 11,
18 | 'horse': 12,
19 | 'motorbike': 13,
20 | 'person': 14,
21 | 'pottedplant': 15,
22 | 'sheep': 16,
23 | 'sofa': 17,
24 | 'train': 18,
25 | 'tvmonitor': 19
26 | }
27 | train_xml_path=r'G:\data\voc\voc_train\VOC2007\Annotations'
28 | train_img_path=r'G:\data\voc\voc_train\VOC2007\JPEGImages'
29 | test_xml_path=r'G:\data\voc\voc_test\VOC2007\Annotations'
30 | test_img_path=r'G:\data\voc\voc_test\VOC2007\JPEGImages'
31 |
32 | xml_files=os.listdir(train_xml_path)
33 | with open('train_data.txt','a') as f:
34 | for xml_file in xml_files:
35 | tree=ET.parse(os.path.join(train_xml_path,xml_file))
36 | root=tree.getroot()
37 | image_name=root.find('filename')
38 | class_name=root.findall('object/name')
39 | boxes=root.findall('object/bndbox')
40 | filename=image_name.text
41 | temp=max(Image.open(os.path.join(train_img_path,filename)).size)
42 | data=[]
43 | data.append(filename)
44 | for cls,box in zip(class_name,boxes):
45 | cls=class_dict[cls.text]
46 | cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2)
47 | w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text))
48 | obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}"
49 | data.append(obj)
50 | str=''
51 | for i in data:
52 | str=str+i+','
53 | str=str.replace(',',' ').strip()
54 | f.write(str+'\n')
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/darknet53/trainer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 |
5 | import torch.nn
6 | from mobilenet_v2_module import *
7 | import dataset
8 | from darbnet53_module import *
9 | from torch import nn
10 |
11 | def loss_fn(output, target, alpha):
12 | output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45
13 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15
14 | # print("output:",output.shape)
15 | mask_obj = target[..., 0] > 0#N,13,13,3
16 | # print("mask_obj:",mask_obj.shape)
17 | mask_noobj = target[..., 0] == 0
18 | # print("mask_noobj:",mask_noobj.shape)
19 | # print("output[mask_obj]:",output[mask_obj].shape)
20 | # print("output[mask_noobj]:", output[mask_noobj].shape)
21 | loss_p_fun=nn.BCELoss()
22 | loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0])
23 | loss_box_fun=nn.MSELoss()
24 | loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5])
25 | loss_cls_box_fun=nn.CrossEntropyLoss()
26 | loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1))
27 | loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box
28 | return loss
29 |
30 |
31 | if __name__ == '__main__':
32 | weight_path= 'darknet_params/net597.pt'
33 | myDataset = dataset.MyDataset()
34 | train_loader = torch.utils.data.DataLoader(myDataset, batch_size=5, shuffle=True)
35 |
36 | net = Darknet53().cuda()
37 | if os.path.exists(weight_path):
38 | net.load_state_dict(torch.load(weight_path))
39 | net.train()
40 |
41 | opt = torch.optim.Adam(net.parameters())
42 | epoch = 0
43 | while True:
44 | for target_13, target_26, target_52, img_data in train_loader:
45 | target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda()
46 | output_13, output_26, output_52 = net(img_data)
47 | loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6)
48 | loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6)
49 | loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6)
50 | #
51 | loss = loss_13 + loss_26 + loss_52
52 | opt.zero_grad()
53 | loss.backward()
54 | opt.step()
55 | print(epoch,loss.item())
56 | torch.save(net.state_dict(), f'darknet_params/net{epoch}.pt')
57 | print(f'{epoch}保存成功')
58 | epoch+=1
--------------------------------------------------------------------------------
/mobilenetv2/trainer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 |
5 | import torch.nn
6 | from mobilenet_v2_module import *
7 | import dataset
8 | from torch import nn
9 |
10 | def loss_fn(output, target, alpha):
11 | output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45
12 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15
13 | # print("output:",output.shape)
14 | mask_obj = target[..., 0] > 0#N,13,13,3
15 | # print("mask_obj:",mask_obj.shape)
16 | mask_noobj = target[..., 0] == 0
17 | # print("mask_noobj:",mask_noobj.shape)
18 | # print("output[mask_obj]:",output[mask_obj].shape)
19 | # print("output[mask_noobj]:", output[mask_noobj].shape)
20 | loss_p_fun=nn.BCELoss()
21 | loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0])
22 | loss_box_fun=nn.MSELoss()
23 | loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5])
24 | loss_cls_box_fun=nn.CrossEntropyLoss()
25 | loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1))
26 | loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box
27 | return loss
28 |
29 |
30 | if __name__ == '__main__':
31 | weight_path= 'mobilenetv2_params/net0-599-5949.pt'
32 | myDataset = dataset.MyDataset()
33 | train_loader = torch.utils.data.DataLoader(myDataset, batch_size=3, shuffle=True)
34 |
35 | net = MobileNet_v2(config).cuda()
36 | if os.path.exists(weight_path):
37 | net.load_state_dict(torch.load(weight_path))
38 | net.train()
39 |
40 | opt = torch.optim.Adam(net.parameters())
41 | epoch = 0
42 | while True:
43 | for i,(target_13, target_26, target_52, img_data) in enumerate(train_loader):
44 | target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda()
45 | output_13, output_26, output_52 = net(img_data)
46 | loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6)
47 | loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6)
48 | loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6)
49 | loss = loss_13 + loss_26 + loss_52
50 | opt.zero_grad()
51 | loss.backward()
52 | opt.step()
53 | print(f"{epoch}--{i}--{loss.item()}")
54 | # if i%10==0:
55 | # print(f"{epoch}--{i}--{loss.item()}")
56 | # if (i+1)%200==0:
57 | # torch.save(net.state_dict(), f'mobilenetv2_params/net{epoch}-{i}-{i}.pt')
58 | # print(f'{epoch}保存成功')
59 | torch.save(net.state_dict(), f'g_params/net{epoch}.pt')
60 | print(f'{epoch}保存成功')
61 | epoch+=1
--------------------------------------------------------------------------------
/darknet53/dataset.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import Dataset
3 | import torchvision
4 | import numpy as np
5 | import cfg
6 | import os
7 | from utils import *
8 | from PIL import Image,ImageDraw
9 | import math
10 |
11 | LABEL_FILE_PATH = "data/data.txt"
12 | IMG_BASE_DIR = "data/images"
13 |
14 | transforms = torchvision.transforms.Compose([
15 | torchvision.transforms.ToTensor()
16 | ])
17 |
18 |
19 | def one_hot(cls_num, v):
20 | b = np.zeros(cls_num)
21 | b[v] = 1.
22 | return b
23 |
24 |
25 | class MyDataset(Dataset):
26 |
27 | def __init__(self):
28 | with open(LABEL_FILE_PATH) as f:
29 | self.dataset = f.readlines()
30 |
31 | def __len__(self):
32 | return len(self.dataset)
33 |
34 | def __getitem__(self, index):
35 | labels = {}
36 |
37 | line = self.dataset[index]
38 | strs = line.split()
39 | _img_data = make_image_data(os.path.join(IMG_BASE_DIR, strs[0]))
40 | w,h=_img_data.size[0],_img_data.size[1]
41 |
42 | _img_data = _img_data.resize((416,416))#此处要等比缩放
43 | img_data = transforms(_img_data)
44 | draw=ImageDraw.Draw(_img_data)
45 | _boxes = np.array([float(x) for x in strs[1:]])
46 | # print(_boxes[0])
47 | # _boxes = np.array(list(map(float, strs[1:])))
48 | boxes = np.split(_boxes, len(_boxes) // 5)
49 | index = 0
50 | for feature_size, anchors in cfg.ANCHORS_GROUP.items():
51 | labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
52 |
53 | for box in boxes:
54 | cls, cx, cy, w, h = box
55 | draw.rectangle((cx-w*0.5,cy-h*0.5,cx+w*0.5,cy+h*0.5),outline='red',width=1)
56 | _img_data.show()
57 | cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)
58 | cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)
59 | for i, anchor in enumerate(anchors):
60 |
61 | anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]
62 | p_w, p_h = w / (anchor[0]), h / (anchor[1])
63 | p_area = w * h
64 | iou = min(p_area, anchor_area) / max(p_area, anchor_area)
65 | index+=1
66 | # print(feature_size, cx_index, cy_index, i)
67 | # print(box)
68 | if labels[feature_size][int(cy_index), int(cx_index), i][0] 0:
66 | _box = sort_boxes[0]
67 | keep_boxes.append(_box)
68 |
69 | if len(sort_boxes) > 1:
70 | _boxes = sort_boxes[1:]
71 | # print(_clses.shape)
72 | # print(_cls.shape)
73 | # print(mask.shape, "-------------------")
74 | # print(_boxes)
75 | # print(_boxes.shape)
76 |
77 | _iou = iou(_box, _boxes, mode)
78 | sort_boxes=_boxes[_iou< thresh]
79 |
80 | else:
81 | break
82 |
83 | return keep_boxes
84 |
85 |
86 | # def detect(feature_map, thresh):
87 | # masks = feature_map[:, 4, :, :] > thresh
88 | # idxs = torch.nonzero(masks)
89 |
90 |
91 | if __name__ == '__main__':
92 | # box = torch.Tensor([2, 2, 3, 3, 6])
93 | # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]])
94 | # print(iou(box, boxes, mode="inter"))
95 | # print(nms(boxes, 0.1))
96 | # import numpy as np
97 | #
98 | # a = np.array([[1, 2], [3, 4]])
99 | # print(a[:, 1])
100 | make_image_data('images/1.jpg')
101 |
--------------------------------------------------------------------------------
/darknet53/detector.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from darbnet53_module import *
4 | import cfg
5 | from PIL import Image,ImageDraw
6 | from darbnet53_module import *
7 | from dataset import *
8 | import os
9 | class_dict={
10 | 0:'person',
11 | 1:'horse',
12 | 2:'bicycle',
13 | }
14 | class Detector(torch.nn.Module):
15 |
16 | def __init__(self):
17 | super(Detector, self).__init__()
18 |
19 | self.net = Darknet53()
20 | self.net.load_state_dict(torch.load('darknet_params/net597.pt'))
21 | self.net.eval()
22 |
23 | def forward(self, input, thresh, anchors,case):
24 | output_13, output_26, output_52 = self.net(input)
25 |
26 | idxs_13, vecs_13 = self._filter(output_13, thresh)
27 | boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13],case)
28 |
29 | idxs_26, vecs_26 = self._filter(output_26, thresh)
30 | boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26],case)
31 |
32 | idxs_52, vecs_52 = self._filter(output_52, thresh)
33 | boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52],case)
34 | boxes=torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
35 | # rst=[]
36 | # for i in range(3):
37 | # bs=boxes[boxes[...,6]==i]
38 | # for j in bs
39 | # bs = nms(bs, 0.9, mode="inter")
40 | # rst.append(bs)
41 | boxes=nms(boxes, 0.5, mode='inter')
42 | return boxes
43 |
44 |
45 | def _filter(self, output, thresh):
46 | output = output.permute(0, 2, 3, 1)
47 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
48 | #mask:N,H,W,3,15
49 |
50 | mask = torch.sigmoid(output[..., 0]) > thresh
51 |
52 | idxs = mask.nonzero()
53 | vecs = output[mask]
54 | return idxs, vecs
55 |
56 | def _parse(self, idxs, vecs, t, anchors,case):
57 | anchors = torch.Tensor(anchors)
58 |
59 | n = idxs[:, 0] # 所属的图片
60 | a = idxs[:, 3] # 建议框
61 |
62 | cy = (idxs[:, 1].float() + vecs[:, 2]) * t /case # 原图的中心点y
63 | cx = (idxs[:, 2].float() + vecs[:, 1]) * t /case # 原图的中心点x
64 |
65 | w = anchors[a, 0] * torch.exp(vecs[:, 3])/case
66 | h = anchors[a, 1] * torch.exp(vecs[:, 4])/case
67 | p=vecs[:,0]
68 | cls_p=vecs[:,5:]
69 | cls_p=torch.softmax(cls_p,dim=1)
70 | cls_index = torch.argmax(cls_p, dim=1)
71 | return torch.stack([n.float(), torch.sigmoid(p),cx, cy, w, h,cls_index], dim=1)
72 |
73 |
74 | if __name__ == '__main__':
75 | detector = Detector()
76 | # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5)
77 | # print(y.shape)
78 | for i in os.listdir('images'):
79 | img=Image.open('images/'+i)
80 | _img = make_image_data('images/'+i)
81 | w, h = _img.size[0], _img.size[1]
82 | case = 416 / w
83 | # print(case)
84 | _img = _img.resize((416, 416)) # 此处要等比缩放
85 | _img_data = transforms(_img)
86 | _img_data=torch.unsqueeze(_img_data,dim=0)
87 | # print(_img_data.shape)
88 | result=detector(_img_data, 0.2, cfg.ANCHORS_GROUP,case)
89 | draw=ImageDraw.Draw(img)
90 | for rst in result:
91 | if len(rst)==0:
92 | continue
93 | else:
94 | # rst=rst[0]
95 | x1,y1,x2,y2=rst[2]-0.5*rst[4],rst[3]-0.5*rst[5],rst[2]+0.5*rst[4],rst[3]+0.5*rst[5]
96 | print(f'置信度:{str(rst[1].item())[:4]} 坐标点:{x1,y1,x2,y2} 类别:{class_dict[int(rst[6].item())]}')
97 | draw.text((x1,y1),class_dict[int(rst[6].item())]+str(rst[1].item())[:4])
98 | draw.rectangle((x1,y1,x2,y2),width=1,outline='red')
99 | img.show()
100 |
--------------------------------------------------------------------------------
/mobilenetv2/utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | from PIL import Image
5 |
6 | def make_image_data(path):
7 | img=Image.open(path)
8 | w,h=img.size[0],img.size[1]
9 | temp=max(h,w)
10 | mask=Image.new('RGB',(temp,temp),(0,0,0))
11 | mask.paste(img,(0,0))
12 | return mask
13 |
14 | def iou(box, boxes, mode="inter"):
15 | cx, cy, w, h = box[2], box[3], box[4], box[5]
16 | cxs, cys, ws, hs = boxes[:, 2], boxes[:, 3], boxes[:, 4], boxes[:, 5]
17 |
18 | box_area = w * h # 最小面积
19 | boxes_area = ws * hs # 最大面积
20 |
21 | _x1, _x2, _y1, _y2 = cx - w/2, cx + w/2, cy - h/2, cy + h/2
22 | _xx1, _xx2, _yy1, _yy2 = cxs - ws / 2, cxs + ws / 2, cys - hs / 2, cys + hs / 2
23 |
24 | xx1 = torch.maximum(_x1, _xx1) # 左上角 最大值
25 | yy1 = torch.maximum(_y1, _yy1) # 左上角 最大值
26 | xx2 = torch.minimum(_x2, _xx2) # 右下角 最小值
27 | yy2 = torch.minimum(_y2, _yy2) # 右下角 最小值
28 |
29 | # 将输入input张量每个元素的夹紧到区间 [min,max][min,max],并返回结果到一个新张量。
30 | w = torch.clamp(xx2 - xx1, min=0) # ★夹
31 | h = torch.clamp(yy2 - yy1, min=0)
32 |
33 | inter = w * h
34 |
35 | if mode == 'inter':
36 | return inter / (box_area + boxes_area - inter) #交集除以并集
37 | elif mode == 'min':
38 | return inter / torch.min(box_area, boxes_area)
39 | '''
40 | def iou(box, boxes, mode="inter"):
41 | box_area = (box[3] - box[1]) * (box[4] - box[2])
42 | boxes_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
43 |
44 | x1 = torch.max(box[1], boxes[:, 1])
45 | y1 = torch.max(box[2], boxes[:, 2])
46 | x2 = torch.min(box[3], boxes[:, 3])
47 | y2 = torch.min(box[4], boxes[:, 4])
48 |
49 | w = torch.clamp(x2 - x1, min=0)
50 | h = torch.clamp(y2 - y1, min=0)
51 |
52 | inter = w * h
53 |
54 | if mode == 'inter':
55 | return inter / (box_area + boxes_area - inter)
56 | elif mode == 'min':
57 | return inter / torch.min(box_area, boxes_area)
58 | '''
59 |
60 | def nms(boxes, thresh, mode='inter'):
61 | args = boxes[:, 1].argsort(descending=True)
62 |
63 | sort_boxes = boxes[args]
64 | keep_boxes = []
65 |
66 | while len(sort_boxes) > 0:
67 | _box = sort_boxes[0]
68 | keep_boxes.append(_box)
69 |
70 | if len(sort_boxes) > 1:
71 | _boxes = sort_boxes[1:]
72 | # _boxes=sort[sort_boxes[:,-1]==_box[-1]]
73 | _iou = iou(_box, _boxes, mode)
74 | sort_boxes = _boxes[_iou < thresh]
75 | else:
76 | break
77 |
78 | return keep_boxes
79 |
80 | # def nms(boxes, thresh, mode='inter'):
81 | # args = boxes[:, 1].argsort(descending=True)
82 | # sort_boxes = boxes[args]
83 | # keep_boxes = []
84 | #
85 | # while len(sort_boxes) > 0:
86 | # _box = sort_boxes[0]
87 | # _cls=sort_boxes[6]
88 | # keep_boxes.append(_box)
89 | #
90 | # if len(sort_boxes) > 1:
91 | # _boxes = sort_boxes[1:]
92 | # print(_boxes.shape)
93 | # __cls=_boxes[...,6]
94 | # _cls_boxes=_boxes[_cls ==__cls]
95 | # not_cls_boxes=[_cls!=__cls]
96 | # # _boxes=sort[sort_boxes[:,-1]==_box[-1]]
97 | # _iou = iou(_box, _cls_boxes, mode)
98 | # sort_boxes1 = _boxes[_iou < thresh]
99 | # print(sort_boxes1.shape,not_cls_boxes.shape)
100 | # sort_boxes=torch.cat(sort_boxes1,not_cls_boxes,dim=1)
101 | # else:
102 | # break
103 | #
104 | # return keep_boxes
105 | # def detect(feature_map, thresh):
106 | # masks = feature_map[:, 4, :, :] > thresh
107 | # idxs = torch.nonzero(masks)
108 |
109 |
110 | if __name__ == '__main__':
111 | # box = torch.Tensor([2, 2, 3, 3, 6])
112 | # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]])
113 | # print(iou(box, boxes, mode="inter"))
114 | # print(nms(boxes, 0.1))
115 | # import numpy as np
116 | #
117 | # a = np.array([[1, 2], [3, 4]])
118 | # print(a[:, 1])
119 | make_image_data('images/1.jpg')
120 |
--------------------------------------------------------------------------------
/mobilenetv2/detector.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from mobilenet_v2_module import *
4 | import cfg
5 | from PIL import Image, ImageDraw
6 | from mobilenet_v2_module import *
7 | from dataset import *
8 | import os
9 |
10 | # class_dict = {
11 | # 0: 'aeroplane',
12 | # 1: 'bicycle',
13 | # 2: 'bird',
14 | # 3: 'boat',
15 | # 4: 'bottle',
16 | # 5: 'bus',
17 | # 6: 'car',
18 | # 7: 'cat',
19 | # 8: 'chair',
20 | # 9: 'cow',
21 | # 10: 'diningtable',
22 | # 11: 'dog',
23 | # 12: 'horse',
24 | # 13: 'motorbike',
25 | # 14: 'person',
26 | # 15: 'pottedplant',
27 | # 16: 'sheep',
28 | # 17: 'sofa',
29 | # 18: 'train',
30 | # 19: 'tvmonitor'
31 | # }
32 | class_dict = {
33 | 0:'person',
34 | 1:'horse',
35 | 2:'bicycle'}
36 |
37 | class Detector(torch.nn.Module):
38 |
39 | def __init__(self):
40 | super(Detector, self).__init__()
41 |
42 | self.net = MobileNet_v2(config)
43 | self.net.load_state_dict(torch.load('g_params/net237.pt'))
44 | self.net.eval()
45 |
46 | def forward(self, input, thresh, anchors, case):
47 | output_13, output_26, output_52 = self.net(input)
48 |
49 | idxs_13, vecs_13 = self._filter(output_13, thresh)
50 | boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13], case)
51 |
52 | idxs_26, vecs_26 = self._filter(output_26, thresh)
53 | boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26], case)
54 |
55 | idxs_52, vecs_52 = self._filter(output_52, thresh)
56 | boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52], case)
57 | boxes = torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
58 | boxes = nms(boxes, 0.4, mode="inter")
59 | # print(boxes)
60 | return boxes
61 |
62 | def _filter(self, output, thresh):
63 | output = output.permute(0, 2, 3, 1)
64 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
65 | # mask:N,H,W,3,15
66 |
67 | mask = torch.sigmoid(output[..., 0]) > thresh
68 |
69 | idxs = mask.nonzero()
70 | vecs = output[mask]
71 | return idxs, vecs
72 |
73 | def _parse(self, idxs, vecs, t, anchors, case):
74 | anchors = torch.Tensor(anchors)
75 |
76 | n = idxs[:, 0] # 所属的图片
77 | a = idxs[:, 3] # 建议框
78 |
79 | cy = (idxs[:, 1].float() + vecs[:, 2]) * t / case # 原图的中心点y
80 | cx = (idxs[:, 2].float() + vecs[:, 1]) * t / case # 原图的中心点x
81 |
82 | w = anchors[a, 0] * torch.exp(vecs[:, 3]) / case
83 | h = anchors[a, 1] * torch.exp(vecs[:, 4]) / case
84 | p = vecs[:, 0]
85 | cls_p = vecs[:, 5:]
86 | cls_p = torch.softmax(cls_p, dim=1)
87 | cls_index = torch.argmax(cls_p, dim=1)
88 | return torch.stack([n.float(), torch.sigmoid(p), cx, cy, w, h, cls_index], dim=1)
89 |
90 |
91 | if __name__ == '__main__':
92 | detector = Detector()
93 | # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5)
94 | # print(y.shape)
95 | for i in os.listdir('E:/pythonSpace/yolov3/darknet53/data/images'):
96 | img = Image.open('E:/pythonSpace/yolov3/darknet53/data/images/' + i)
97 | _img = make_image_data('E:/pythonSpace/yolov3/darknet53/data/images/' + i)
98 | w, h = _img.size[0], _img.size[1]
99 | case = 416 / w
100 | # print(case)
101 | _img = _img.resize((416, 416)) # 此处要等比缩放
102 | _img_data = transforms(_img)
103 | _img_data = torch.unsqueeze(_img_data, dim=0)
104 | # print(_img_data.shape)
105 | result = detector(_img_data, 0.2, cfg.ANCHORS_GROUP, case)
106 | draw = ImageDraw.Draw(img)
107 | for rst in result:
108 | x1, y1, x2, y2 = rst[2] - 0.5 * rst[4], rst[3] - 0.5 * rst[5], rst[2] + 0.5 * rst[4], rst[3] + 0.5 * rst[5]
109 | print(f'置信度:{str(rst[1].item())[:4]} 坐标点:{x1, y1, x2, y2} 类别:{class_dict[int(rst[6].item())]}')
110 | draw.text((x1, y1), class_dict[int(rst[6].item())] + str(rst[1].item())[:4])
111 | draw.rectangle((x1, y1, x2, y2), width=1, outline='red')
112 | img.show()
113 |
--------------------------------------------------------------------------------
/mobilenetv2/mobilenet_v2_module.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from net_block import *
4 |
5 | config = [
6 | [-1, 32, 1, 2],
7 | [1, 16, 1, 1],
8 | [6, 24, 2, 2],
9 | [6, 32, 4, 2],
10 | [6, 64, 4, 2],
11 | [6, 96, 4, 1],
12 | [6, 160, 4, 2],
13 | [6, 320, 2, 1],
14 | ]
15 |
16 |
17 | class Bottleneck(nn.Module):
18 | def __init__(self, c_in, i, t, c, n, s):
19 | super(Bottleneck, self).__init__()
20 | self.i = i
21 | self.n = n
22 | _s = s if i == n - 1 else 1
23 | _c = c if i == n - 1 else c_in
24 | _p_c = c_in * t
25 |
26 | self.sub_module = nn.Sequential(
27 | nn.Conv2d(c_in, _p_c, 1, 1, bias=False),
28 | nn.BatchNorm2d(_p_c),
29 | nn.ReLU6(),
30 | nn.Conv2d(_p_c, _p_c, 3, _s, 1, bias=False),
31 | nn.BatchNorm2d(_p_c),
32 | nn.ReLU6(),
33 | nn.Conv2d(_p_c, _c, 1, 1, bias=False),
34 | nn.BatchNorm2d(_c)
35 | )
36 |
37 | def forward(self, x):
38 | if self.i == self.n - 1:
39 | return self.sub_module(x)
40 | else:
41 | return self.sub_module(x) + x
42 |
43 |
44 | class MobileNet_v2(nn.Module):
45 | def __init__(self, config):
46 | super(MobileNet_v2, self).__init__()
47 | self.input_layer = nn.Sequential(
48 | nn.Conv2d(3, 32, 3, 2, 1, bias=False),
49 | nn.BatchNorm2d(32),
50 | nn.ReLU6()
51 | )
52 | self.blocks1 = []
53 | self.blocks2 = []
54 | self.blocks3 = []
55 | c_in = config[0][1]
56 | for t, c, n, s in config[1:4]:
57 | for i in range(n):
58 | self.blocks1.append(Bottleneck(c_in, i, t, c, n, s))
59 | c_in = c
60 | for t, c, n, s in config[4:5]:
61 | for i in range(n):
62 | self.blocks2.append(Bottleneck(c_in, i, t, c, n, s))
63 | c_in = c
64 | for t, c, n, s in config[5:]:
65 | for i in range(n):
66 | self.blocks3.append(Bottleneck(c_in, i, t, c, n, s))
67 | c_in = c
68 |
69 | self.hidden_layers1 = nn.Sequential(*self.blocks1)
70 | self.hidden_layers2 = nn.Sequential(*self.blocks2)
71 | self.hidden_layers3 = nn.Sequential(*self.blocks3)
72 |
73 | self.convset_13 = ConvolutionalSet(320, 64)
74 | self.detetion_13 = nn.Sequential(
75 | ConvolutionalLayer(64, 320, 3, 1, 1),
76 | nn.Conv2d(320, 24, 1, 1, 0)
77 | )
78 | self.up_13_to_26 = nn.Sequential(
79 | ConvolutionalLayer(64, 32, 3, 1, 1),
80 | UpSampleLayer()
81 | )
82 |
83 | self.convset_26 = ConvolutionalSet(96, 32)
84 | self.detetion_26 = nn.Sequential(
85 | ConvolutionalLayer(32, 64, 3, 1, 1),
86 | nn.Conv2d(64, 24, 1, 1, 0)
87 | )
88 | self.up_26_to_52 = nn.Sequential(
89 | ConvolutionalLayer(32, 16, 3, 1, 1),
90 | UpSampleLayer()
91 | )
92 |
93 | self.convset_52 = ConvolutionalSet(48, 24)
94 | self.detetion_52 = nn.Sequential(
95 | ConvolutionalLayer(24, 48, 3, 1, 1),
96 | nn.Conv2d(48, 24, 1, 1, 0)
97 | )
98 |
99 | def forward(self, x):
100 | out_52 = self.hidden_layers1(self.input_layer(x))
101 | out_26 = self.hidden_layers2(out_52)
102 | out_13 = self.hidden_layers3(out_26)
103 |
104 | convset_out_13 = self.convset_13(out_13)
105 | detetion_out_13 = self.detetion_13(convset_out_13)
106 | up_13_to_26_out = self.up_13_to_26(convset_out_13)
107 | cat_out_26 = torch.cat((up_13_to_26_out, out_26), dim=1)
108 |
109 | convset_26 = self.convset_26(cat_out_26)
110 | detetion_out_26 = self.detetion_26(convset_26)
111 | up_26_to_52_out = self.up_26_to_52(convset_26)
112 | cat_out_52 = torch.cat((up_26_to_52_out, out_52), dim=1)
113 |
114 | convset_52 = self.convset_52(cat_out_52)
115 | detetion_out_52 = self.detetion_52(convset_52)
116 |
117 | return detetion_out_13, detetion_out_26, detetion_out_52
118 |
119 |
120 | if __name__ == '__main__':
121 | x = torch.randn((1, 3, 416, 416))
122 | net = MobileNet_v2(config)
123 | y = net(x)
124 | print(y[0].shape)
125 | print(y[1].shape)
126 | print(y[2].shape)
127 |
--------------------------------------------------------------------------------
/darknet53/darbnet53_module.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class UpsampleLayer(torch.nn.Module):
5 |
6 | def __init__(self):
7 | super(UpsampleLayer, self).__init__()
8 |
9 | def forward(self, x):
10 | return torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')
11 |
12 |
13 | class ConvolutionalLayer(torch.nn.Module):
14 |
15 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
16 | super(ConvolutionalLayer, self).__init__()
17 |
18 | self.sub_module = torch.nn.Sequential(
19 | torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
20 | torch.nn.BatchNorm2d(out_channels),
21 | torch.nn.LeakyReLU()
22 | )
23 |
24 | def forward(self, x):
25 | return self.sub_module(x)
26 |
27 |
28 | class ResidualLayer(torch.nn.Module):
29 |
30 | def __init__(self, in_channels):
31 | super(ResidualLayer, self).__init__()
32 |
33 | self.sub_module = torch.nn.Sequential(
34 | ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0),
35 | ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1),
36 | )
37 |
38 | def forward(self, x):
39 | return x + self.sub_module(x)
40 |
41 |
42 | class DownsamplingLayer(torch.nn.Module):
43 | def __init__(self, in_channels, out_channels):
44 | super(DownsamplingLayer, self).__init__()
45 |
46 | self.sub_module = torch.nn.Sequential(
47 | ConvolutionalLayer(in_channels, out_channels, 3, 2, 1)
48 | )
49 |
50 | def forward(self, x):
51 | return self.sub_module(x)
52 |
53 |
54 | class ConvolutionalSet(torch.nn.Module):
55 | def __init__(self, in_channels, out_channels):
56 | super(ConvolutionalSet, self).__init__()
57 |
58 | self.sub_module = torch.nn.Sequential(
59 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
60 | ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
61 |
62 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
63 | ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
64 |
65 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
66 | )
67 |
68 | def forward(self, x):
69 | return self.sub_module(x)
70 |
71 |
72 | class Darknet53(torch.nn.Module):
73 |
74 | def __init__(self):
75 | super(Darknet53, self).__init__()
76 |
77 | self.trunk_52 = torch.nn.Sequential(
78 | ConvolutionalLayer(3, 32, 3, 1, 1),
79 | ConvolutionalLayer(32, 64, 3, 2, 1),
80 |
81 | ResidualLayer(64),
82 | DownsamplingLayer(64, 128),
83 |
84 | ResidualLayer(128),
85 | ResidualLayer(128),
86 | DownsamplingLayer(128, 256),
87 |
88 | ResidualLayer(256),
89 | ResidualLayer(256),
90 | ResidualLayer(256),
91 | ResidualLayer(256),
92 | ResidualLayer(256),
93 | ResidualLayer(256),
94 | ResidualLayer(256),
95 | ResidualLayer(256),
96 | )
97 |
98 | self.trunk_26 = torch.nn.Sequential(
99 | DownsamplingLayer(256, 512),
100 | ResidualLayer(512),
101 | ResidualLayer(512),
102 | ResidualLayer(512),
103 | ResidualLayer(512),
104 | ResidualLayer(512),
105 | ResidualLayer(512),
106 | ResidualLayer(512),
107 | ResidualLayer(512),
108 | )
109 |
110 | self.trunk_13 = torch.nn.Sequential(
111 | DownsamplingLayer(512, 1024),
112 | ResidualLayer(1024),
113 | ResidualLayer(1024),
114 | ResidualLayer(1024),
115 | ResidualLayer(1024)
116 | )
117 |
118 | self.convset_13 = torch.nn.Sequential(
119 | ConvolutionalSet(1024, 512)
120 | )
121 |
122 | self.detetion_13 = torch.nn.Sequential(
123 | ConvolutionalLayer(512, 1024, 3, 1, 1),
124 | torch.nn.Conv2d(1024, 24, 1, 1, 0)
125 | )
126 |
127 | self.up_26 = torch.nn.Sequential(
128 | ConvolutionalLayer(512, 256, 3, 1, 1),
129 | UpsampleLayer()
130 | )
131 |
132 | self.convset_26 = torch.nn.Sequential(
133 | ConvolutionalSet(768, 256)
134 | )
135 |
136 | self.detetion_26 = torch.nn.Sequential(
137 | ConvolutionalLayer(256, 512, 3, 1, 1),
138 | torch.nn.Conv2d(512, 24, 1, 1, 0)
139 | )
140 |
141 | self.up_52 = torch.nn.Sequential(
142 | ConvolutionalLayer(256, 128, 3, 1, 1),
143 | UpsampleLayer()
144 | )
145 |
146 | self.convset_52 = torch.nn.Sequential(
147 | ConvolutionalSet(384, 128)
148 | )
149 |
150 | self.detetion_52 = torch.nn.Sequential(
151 | ConvolutionalLayer(128, 256, 3, 1, 1),
152 | torch.nn.Conv2d(256, 24, 1, 1, 0)
153 | )
154 |
155 | def forward(self, x):
156 | h_52 = self.trunk_52(x)
157 | h_26 = self.trunk_26(h_52)
158 | h_13 = self.trunk_13(h_26)
159 |
160 | convset_out_13 = self.convset_13(h_13)
161 | detetion_out_13 = self.detetion_13(convset_out_13)
162 |
163 | up_out_26 = self.up_26(convset_out_13)
164 | route_out_26 = torch.cat((up_out_26, h_26), dim=1)
165 | convset_out_26 = self.convset_26(route_out_26)
166 | detetion_out_26 = self.detetion_26(convset_out_26)
167 |
168 | up_out_52 = self.up_52(convset_out_26)
169 | route_out_52 = torch.cat((up_out_52, h_52), dim=1)
170 | convset_out_52 = self.convset_52(route_out_52)
171 | detetion_out_52 = self.detetion_52(convset_out_52)
172 |
173 | return detetion_out_13, detetion_out_26, detetion_out_52
174 |
175 | if __name__ == '__main__':
176 | yolo = Darknet53()
177 | x = torch.randn(1,3,416,416)
178 | y = yolo(x)
179 | print(y[0].shape)
180 | print(y[1].shape)
181 | print(y[2].shape)
182 |
--------------------------------------------------------------------------------
/darknet53/net_block.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from torch.nn import functional
4 |
5 |
6 | # 卷积块
7 | class ConvolutionalLayer(nn.Module):
8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
9 | super(ConvolutionalLayer, self).__init__()
10 | self.sub_module = nn.Sequential(
11 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
12 | padding=padding, bias=bias),
13 | nn.BatchNorm2d(out_channels),
14 | nn.LeakyReLU()
15 | )
16 |
17 | def forward(self, x):
18 | return self.sub_module(x)
19 |
20 |
21 | # 残差块
22 | class ResidualLayer(nn.Module):
23 | def __init__(self, in_channels):
24 | super(ResidualLayer, self).__init__()
25 | self.sub_module = nn.Sequential(
26 | ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1,
27 | padding=0),
28 | ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1,
29 | padding=1)
30 | )
31 |
32 | def forward(self, x):
33 | return self.sub_module(x)
34 |
35 |
36 | # 下采样
37 | class DownSampleLayer(nn.Module):
38 | def __init__(self, in_channels, out_channels):
39 | super(DownSampleLayer, self).__init__()
40 | self.sub_module = nn.Sequential(
41 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1)
42 | )
43 |
44 | def forward(self, x):
45 | return self.sub_module(x)
46 |
47 |
48 | # 上采样
49 | class UpSampleLayer(nn.Module):
50 | def __init__(self):
51 | super(UpSampleLayer, self).__init__()
52 |
53 | def forward(self, x):
54 | return functional.interpolate(x, scale_factor=2, mode='nearest')
55 |
56 |
57 | # 卷积集
58 | class ConvolutionalSet(nn.Module):
59 | # 一般输入通道大 输出通道小,因为目的就是为了降低通道进行特征提取
60 | def __init__(self, in_channels, out_channels):
61 | super(ConvolutionalSet, self).__init__()
62 | self.sub_module = nn.Sequential(
63 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
64 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
65 |
66 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
67 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
68 |
69 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
70 | )
71 |
72 | def forward(self, x):
73 | return self.sub_module(x)
74 |
75 |
76 | class YoloNet_V3(nn.Module):
77 | def __init__(self):
78 | super(YoloNet_V3, self).__init__()
79 | self.trunk_52 = nn.Sequential(
80 | ConvolutionalLayer(3, 32, 3, 1, 1),
81 | DownSampleLayer(32, 64),
82 |
83 | ResidualLayer(64),
84 | DownSampleLayer(64, 128),
85 |
86 | ResidualLayer(128),
87 | ResidualLayer(128),
88 | DownSampleLayer(128, 256),
89 |
90 | ResidualLayer(256),
91 | ResidualLayer(256),
92 | ResidualLayer(256),
93 | ResidualLayer(256),
94 | ResidualLayer(256),
95 | ResidualLayer(256),
96 | ResidualLayer(256),
97 | ResidualLayer(256)
98 | )
99 | self.trunk_26 = nn.Sequential(
100 | DownSampleLayer(256, 512),
101 | ResidualLayer(512),
102 | ResidualLayer(512),
103 | ResidualLayer(512),
104 | ResidualLayer(512),
105 | ResidualLayer(512),
106 | ResidualLayer(512),
107 | ResidualLayer(512),
108 | ResidualLayer(512)
109 | )
110 | self.trunk_13 = nn.Sequential(
111 | DownSampleLayer(512, 1024),
112 | ResidualLayer(1024),
113 | ResidualLayer(1024),
114 | ResidualLayer(1024),
115 | ResidualLayer(1024)
116 | )
117 |
118 | self.convset_13 = nn.Sequential(
119 | ConvolutionalSet(1024, 512)
120 | )
121 | self.detetion_13 = nn.Sequential(
122 | ConvolutionalLayer(512, 1024, 3, 1, 1),
123 | nn.Conv2d(1024, 45, 1, 1, 0)
124 | )
125 | self.up_13_to_26 = nn.Sequential(
126 | # 原文为1*1的卷积,使用3*3的卷积是为了做特征提取,因为1*1不能进行特征提取
127 | ConvolutionalLayer(512, 256, 3, 1, 1),
128 | UpSampleLayer()
129 | )
130 |
131 | self.convset_26 = nn.Sequential(
132 | ConvolutionalSet(768, 256)
133 | )
134 | self.detetion_26 = nn.Sequential(
135 | ConvolutionalLayer(256, 512, 3, 1, 1),
136 | nn.Conv2d(512, 45, 1, 1, 0)
137 | )
138 |
139 | self.up_26_to_52 = nn.Sequential(
140 | ConvolutionalLayer(256, 128, 3, 1, 1),
141 | UpSampleLayer()
142 | )
143 |
144 | self.convset_52 = nn.Sequential(
145 | ConvolutionalSet(384, 128)
146 | )
147 | self.detetion_52 = nn.Sequential(
148 | ConvolutionalLayer(128, 256, 3, 1, 1),
149 | nn.Conv2d(256, 45, 1, 1, 0)
150 | )
151 |
152 | def forward(self, x):
153 | h_52 = self.trunk_52(x)
154 | h_26 = self.trunk_26(h_52)
155 | h_13 = self.trunk_13(h_26)
156 |
157 | convset_out_13 = self.convset_13(h_13)
158 | detetion_out_13 = self.detetion_13(convset_out_13)
159 |
160 | up_out_13_to_26 = self.up_13_to_26(convset_out_13)
161 | cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1)
162 | convset_out_26 = self.convset_26(cat_out_26)
163 | detetion_out_26 = self.detetion_26(convset_out_26)
164 |
165 | up_out_26_to_52 = self.up_26_to_52(convset_out_26)
166 | cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1)
167 | convset_out_52 = self.convset_52(cat_out_52)
168 | detetion_out_52 = self.detetion_52(convset_out_52)
169 |
170 | return detetion_out_13, detetion_out_26, detetion_out_52
171 |
172 |
173 | if __name__ == '__main__':
174 | yolo = YoloNet_V3()
175 | x = torch.randn(1, 3, 416, 416)
176 | y = yolo(x)
177 | print(y[0].shape)
178 | print(y[1].shape)
179 | print(y[2].shape)
180 |
--------------------------------------------------------------------------------
/mobilenetv2/net_block.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from torch.nn import functional
4 |
5 |
6 | # 卷积块
7 | class ConvolutionalLayer(nn.Module):
8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
9 | super(ConvolutionalLayer, self).__init__()
10 | self.sub_module = nn.Sequential(
11 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
12 | padding=padding, bias=bias),
13 | nn.BatchNorm2d(out_channels),
14 | nn.LeakyReLU()
15 | )
16 |
17 | def forward(self, x):
18 | return self.sub_module(x)
19 |
20 |
21 | # 残差块
22 | class ResidualLayer(nn.Module):
23 | def __init__(self, in_channels):
24 | super(ResidualLayer, self).__init__()
25 | self.sub_module = nn.Sequential(
26 | ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1,
27 | padding=0),
28 | ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1,
29 | padding=1)
30 | )
31 |
32 | def forward(self, x):
33 | return self.sub_module(x)
34 |
35 |
36 | # 下采样
37 | class DownSampleLayer(nn.Module):
38 | def __init__(self, in_channels, out_channels):
39 | super(DownSampleLayer, self).__init__()
40 | self.sub_module = nn.Sequential(
41 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1)
42 | )
43 |
44 | def forward(self, x):
45 | return self.sub_module(x)
46 |
47 |
48 | # 上采样
49 | class UpSampleLayer(nn.Module):
50 | def __init__(self):
51 | super(UpSampleLayer, self).__init__()
52 |
53 | def forward(self, x):
54 | return functional.interpolate(x, scale_factor=2, mode='nearest')
55 |
56 |
57 | # 卷积集
58 | class ConvolutionalSet(nn.Module):
59 | # 一般输入通道大 输出通道小,因为目的就是为了降低通道进行特征提取
60 | def __init__(self, in_channels, out_channels):
61 | super(ConvolutionalSet, self).__init__()
62 | self.sub_module = nn.Sequential(
63 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
64 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
65 |
66 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
67 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1),
68 |
69 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
70 | )
71 |
72 | def forward(self, x):
73 | return self.sub_module(x)
74 |
75 |
76 | class YoloNet_V3(nn.Module):
77 | def __init__(self):
78 | super(YoloNet_V3, self).__init__()
79 | self.trunk_52 = nn.Sequential(
80 | ConvolutionalLayer(3, 32, 3, 1, 1),
81 | DownSampleLayer(32, 64),
82 |
83 | ResidualLayer(64),
84 | DownSampleLayer(64, 128),
85 |
86 | ResidualLayer(128),
87 | ResidualLayer(128),
88 | DownSampleLayer(128, 256),
89 |
90 | ResidualLayer(256),
91 | ResidualLayer(256),
92 | ResidualLayer(256),
93 | ResidualLayer(256),
94 | ResidualLayer(256),
95 | ResidualLayer(256),
96 | ResidualLayer(256),
97 | ResidualLayer(256)
98 | )
99 | self.trunk_26 = nn.Sequential(
100 | DownSampleLayer(256, 512),
101 | ResidualLayer(512),
102 | ResidualLayer(512),
103 | ResidualLayer(512),
104 | ResidualLayer(512),
105 | ResidualLayer(512),
106 | ResidualLayer(512),
107 | ResidualLayer(512),
108 | ResidualLayer(512)
109 | )
110 | self.trunk_13 = nn.Sequential(
111 | DownSampleLayer(512, 1024),
112 | ResidualLayer(1024),
113 | ResidualLayer(1024),
114 | ResidualLayer(1024),
115 | ResidualLayer(1024)
116 | )
117 |
118 | self.convset_13 = nn.Sequential(
119 | ConvolutionalSet(1024, 512)
120 | )
121 | self.detetion_13 = nn.Sequential(
122 | ConvolutionalLayer(512, 1024, 3, 1, 1),
123 | nn.Conv2d(1024, 45, 1, 1, 0)
124 | )
125 | self.up_13_to_26 = nn.Sequential(
126 | # 原文为1*1的卷积,使用3*3的卷积是为了做特征提取,因为1*1不能进行特征提取
127 | ConvolutionalLayer(512, 256, 3, 1, 1),
128 | UpSampleLayer()
129 | )
130 |
131 | self.convset_26 = nn.Sequential(
132 | ConvolutionalSet(768, 256)
133 | )
134 | self.detetion_26 = nn.Sequential(
135 | ConvolutionalLayer(256, 512, 3, 1, 1),
136 | nn.Conv2d(512, 45, 1, 1, 0)
137 | )
138 |
139 | self.up_26_to_52 = nn.Sequential(
140 | ConvolutionalLayer(256, 128, 3, 1, 1),
141 | UpSampleLayer()
142 | )
143 |
144 | self.convset_52 = nn.Sequential(
145 | ConvolutionalSet(384, 128)
146 | )
147 | self.detetion_52 = nn.Sequential(
148 | ConvolutionalLayer(128, 256, 3, 1, 1),
149 | nn.Conv2d(256, 45, 1, 1, 0)
150 | )
151 |
152 | def forward(self, x):
153 | h_52 = self.trunk_52(x)
154 | h_26 = self.trunk_26(h_52)
155 | h_13 = self.trunk_13(h_26)
156 |
157 | convset_out_13 = self.convset_13(h_13)
158 | detetion_out_13 = self.detetion_13(convset_out_13)
159 |
160 | up_out_13_to_26 = self.up_13_to_26(convset_out_13)
161 | cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1)
162 | convset_out_26 = self.convset_26(cat_out_26)
163 | detetion_out_26 = self.detetion_26(convset_out_26)
164 |
165 | up_out_26_to_52 = self.up_26_to_52(convset_out_26)
166 | cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1)
167 | convset_out_52 = self.convset_52(cat_out_52)
168 | detetion_out_52 = self.detetion_52(convset_out_52)
169 |
170 | return detetion_out_13, detetion_out_26, detetion_out_52
171 |
172 |
173 | if __name__ == '__main__':
174 | yolo = YoloNet_V3()
175 | x = torch.randn(1, 3, 416, 416)
176 | y = yolo(x)
177 | print(y[0].shape)
178 | print(y[1].shape)
179 | print(y[2].shape)
180 |
--------------------------------------------------------------------------------