├── darknet53 ├── .idea │ ├── .WeDrive │ ├── .gitignore │ ├── inspectionProfiles │ │ ├── .WeDrive │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── yolov3.iml ├── __pycache__ │ ├── .WeDrive │ ├── cfg.cpython-38.pyc │ ├── dataset.cpython-38.pyc │ ├── utils.cpython-38.pyc │ ├── net_block.cpython-38.pyc │ ├── darbnet53_module.cpython-38.pyc │ └── mobilenet_v2_module.cpython-38.pyc ├── data │ ├── person_label.txt │ ├── images │ │ ├── 000017.jpg │ │ ├── 000022.jpg │ │ ├── 000261.jpg │ │ ├── 000455.jpg │ │ ├── 000812.jpg │ │ ├── 000930.jpg │ │ ├── 001183.jpg │ │ ├── 001370.jpg │ │ ├── 001579.jpg │ │ └── 001988.jpg │ ├── data.txt │ ├── test.py │ ├── image_voc │ │ ├── 000017.xml │ │ ├── 000022.xml │ │ ├── 000455.xml │ │ ├── 001988.xml │ │ ├── 000261.xml │ │ ├── 000930.xml │ │ ├── 001183.xml │ │ ├── 001579.xml │ │ ├── 000812.xml │ │ └── 001370.xml │ └── make_data_txt.py ├── images │ ├── 000017.jpg │ ├── 000022.jpg │ ├── 000261.jpg │ ├── 000455.jpg │ ├── 000812.jpg │ ├── 000930.jpg │ ├── 001084.jpg │ ├── 001183.jpg │ ├── 001370.jpg │ ├── 001579.jpg │ └── 001988.jpg ├── test.py ├── cfg.py ├── trainer.py ├── dataset.py ├── utils.py ├── detector.py ├── darbnet53_module.py └── net_block.py ├── mobilenetv2 ├── .idea │ ├── .WeDrive │ ├── .gitignore │ ├── inspectionProfiles │ │ ├── .WeDrive │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── yolov3.iml ├── __pycache__ │ ├── cfg.cpython-38.pyc │ ├── utils.cpython-38.pyc │ ├── dataset.cpython-38.pyc │ ├── net_block.cpython-38.pyc │ └── mobilenet_v2_module.cpython-38.pyc ├── cfg.py ├── data │ ├── test.py │ └── make_data_txt.py ├── trainer.py ├── dataset.py ├── utils.py ├── detector.py ├── mobilenet_v2_module.py └── net_block.py └── README.md /darknet53/.idea/.WeDrive: -------------------------------------------------------------------------------- 1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea -------------------------------------------------------------------------------- /mobilenetv2/.idea/.WeDrive: -------------------------------------------------------------------------------- 1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea -------------------------------------------------------------------------------- /darknet53/__pycache__/.WeDrive: -------------------------------------------------------------------------------- 1 | D:\课程代码\20210823_YOLOv3_01\yolov3\__pycache__ -------------------------------------------------------------------------------- /darknet53/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /mobilenetv2/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /darknet53/.idea/inspectionProfiles/.WeDrive: -------------------------------------------------------------------------------- 1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles -------------------------------------------------------------------------------- /mobilenetv2/.idea/inspectionProfiles/.WeDrive: -------------------------------------------------------------------------------- 1 | D:\课程代码\20210823_YOLOv3_01\yolov3\.idea\inspectionProfiles -------------------------------------------------------------------------------- /darknet53/data/person_label.txt: -------------------------------------------------------------------------------- 1 | images/1.jpg 1 12 13 51 18 2 22 38 55 98 2 44 33 62 62 2 | images/2.jpg 3 82 46 57 19 3 | -------------------------------------------------------------------------------- /darknet53/images/000017.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000017.jpg -------------------------------------------------------------------------------- /darknet53/images/000022.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000022.jpg -------------------------------------------------------------------------------- /darknet53/images/000261.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000261.jpg -------------------------------------------------------------------------------- /darknet53/images/000455.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000455.jpg -------------------------------------------------------------------------------- /darknet53/images/000812.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000812.jpg -------------------------------------------------------------------------------- /darknet53/images/000930.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/000930.jpg -------------------------------------------------------------------------------- /darknet53/images/001084.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001084.jpg -------------------------------------------------------------------------------- /darknet53/images/001183.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001183.jpg -------------------------------------------------------------------------------- /darknet53/images/001370.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001370.jpg -------------------------------------------------------------------------------- /darknet53/images/001579.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001579.jpg -------------------------------------------------------------------------------- /darknet53/images/001988.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/images/001988.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000017.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000017.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000022.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000022.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000261.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000261.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000455.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000455.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000812.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000812.jpg -------------------------------------------------------------------------------- /darknet53/data/images/000930.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/000930.jpg -------------------------------------------------------------------------------- /darknet53/data/images/001183.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001183.jpg -------------------------------------------------------------------------------- /darknet53/data/images/001370.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001370.jpg -------------------------------------------------------------------------------- /darknet53/data/images/001579.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001579.jpg -------------------------------------------------------------------------------- /darknet53/data/images/001988.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/data/images/001988.jpg -------------------------------------------------------------------------------- /darknet53/__pycache__/cfg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/cfg.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /mobilenetv2/__pycache__/cfg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/cfg.cpython-38.pyc -------------------------------------------------------------------------------- /mobilenetv2/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/__pycache__/net_block.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/net_block.cpython-38.pyc -------------------------------------------------------------------------------- /mobilenetv2/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /mobilenetv2/__pycache__/net_block.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/net_block.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/__pycache__/darbnet53_module.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/darbnet53_module.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/darknet53/__pycache__/mobilenet_v2_module.cpython-38.pyc -------------------------------------------------------------------------------- /mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiaofengsheng/yolo-v3-DarkNet53-MobileNet-V2/HEAD/mobilenetv2/__pycache__/mobilenet_v2_module.cpython-38.pyc -------------------------------------------------------------------------------- /darknet53/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /mobilenetv2/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolo-v3-DarkNet53-MobileNet-V2 2 | 分别基于DarkNet53和MobileNet-V2两个网络实现yolo-v3 3 | 4 | 5 | DarkNet53的B站视频地址:https://www.bilibili.com/video/BV1Rf4y1n7mG?spm_id_from=333.999.0.0 6 | 7 | 其中MobileNet-V2网络实现,是因为他很轻量级,这个网络训练比较快,虽然准确率不如DarkNet53 8 | -------------------------------------------------------------------------------- /darknet53/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /mobilenetv2/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /darknet53/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /mobilenetv2/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /darknet53/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | # a = torch.Tensor([1, 2, 3, 4]) 5 | # b = a < 3 # mask 6 | # print(b) 7 | # print(a[b]) 8 | # print(b.nonzero()) 9 | # print(a[b.nonzero()]) 10 | 11 | a = torch.Tensor([[1, 2], [5, 6], [3, 1], [2, 8]]) 12 | # b = a < 3 13 | # print(b) 14 | # print(a[b]) 15 | b = a[:, 1] > 5 16 | print(b) 17 | print(a[b]) 18 | print(b.nonzero()) 19 | 20 | print(math.modf(3.4)) 21 | 22 | 23 | print(400/32) -------------------------------------------------------------------------------- /mobilenetv2/cfg.py: -------------------------------------------------------------------------------- 1 | 2 | IMG_HEIGHT = 416 3 | IMG_WIDTH = 416 4 | 5 | CLASS_NUM = 3 6 | 7 | ANCHORS_GROUP = { 8 | 13: [[311, 247], [159, 232], [200, 117]], 9 | 26: [[89, 159], [91, 74], [47, 97]], 10 | 52: [[48, 34], [25, 55], [15, 21]] 11 | } 12 | 13 | ANCHORS_GROUP_AREA = { 14 | 13: [x * y for x, y in ANCHORS_GROUP[13]], 15 | 26: [x * y for x, y in ANCHORS_GROUP[26]], 16 | 52: [x * y for x, y in ANCHORS_GROUP[52]], 17 | } 18 | -------------------------------------------------------------------------------- /darknet53/.idea/yolov3.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /darknet53/cfg.py: -------------------------------------------------------------------------------- 1 | 2 | IMG_HEIGHT = 416 3 | IMG_WIDTH = 416 4 | 5 | CLASS_NUM = 3 6 | 7 | ANCHORS_GROUP = { 8 | 13: [[270, 254], [291, 179], [162, 304]], 9 | 26: [[175, 222], [112, 235], [175, 140]], 10 | 52: [[81, 118], [53, 142], [44, 28]] 11 | } 12 | 13 | ANCHORS_GROUP_AREA = { 14 | 13: [x * y for x, y in ANCHORS_GROUP[13]], 15 | 26: [x * y for x, y in ANCHORS_GROUP[26]], 16 | 52: [x * y for x, y in ANCHORS_GROUP[52]], 17 | } 18 | -------------------------------------------------------------------------------- /mobilenetv2/.idea/yolov3.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /darknet53/data/data.txt: -------------------------------------------------------------------------------- 1 | 000017.jpg 0 201 112 81 118 1 213 179 271 223 2 | 000022.jpg 1 181 160 249 149 0 183 113 57 154 3 | 000261.jpg 2 168 167 336 208 2 193 155 291 179 4 | 000455.jpg 0 213 102 74 147 1 259 159 312 188 5 | 000812.jpg 2 197 209 230 134 0 211 152 175 249 0 332 33 51 36 0 99 10 36 19 6 | 000930.jpg 0 210 133 132 146 2 201 180 171 107 7 | 001183.jpg 1 110 184 89 98 1 202 198 175 195 0 212 138 49 129 8 | 001370.jpg 2 97 297 121 235 2 208 273 84 223 0 100 208 165 337 0 218 198 112 302 9 | 001579.jpg 2 90 333 179 164 2 149 288 239 155 0 158 224 128 225 0 53 264 105 267 10 | 001988.jpg 2 142 268 268 286 0 143 190 158 271 11 | -------------------------------------------------------------------------------- /darknet53/data/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image,ImageDraw 3 | import os 4 | f=open('data.txt','r') 5 | datas=f.readlines() 6 | for data in datas: 7 | data=data.strip().split() 8 | img_path=os.path.join('images',data[0]) 9 | img=Image.open(img_path) 10 | w,h=img.size 11 | case=416/max(w,h) 12 | _boxes=np.array([float(x) for x in data[1:]]) 13 | boxes=np.split(_boxes,len(_boxes)//5) 14 | draw=ImageDraw.Draw(img) 15 | for box in boxes: 16 | cls,cx,cy,w,h=box 17 | x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case 18 | draw.rectangle((x1,y1,x2,y2),outline='red',width=2) 19 | 20 | img.show() -------------------------------------------------------------------------------- /mobilenetv2/data/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image,ImageDraw 3 | import os 4 | f=open('data.txt','r') 5 | datas=f.readlines() 6 | for data in datas: 7 | data=data.strip().split() 8 | img_path=os.path.join('images',data[0]) 9 | img=Image.open(img_path) 10 | w,h=img.size 11 | case=416/max(w,h) 12 | _boxes=np.array([float(x) for x in data[1:]]) 13 | boxes=np.split(_boxes,len(_boxes)//5) 14 | draw=ImageDraw.Draw(img) 15 | for box in boxes: 16 | cls,cx,cy,w,h=box 17 | x1,y1,x2,y2=cx/case-0.5*w/case,cy/case-0.5*h/case,cx/case+0.5*w/case,cy/case+0.5*h/case 18 | draw.rectangle((x1,y1,x2,y2),outline='red',width=2) 19 | 20 | img.show() -------------------------------------------------------------------------------- /darknet53/data/image_voc/000017.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000017.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 228217974 9 | 10 | 11 | genewolf 12 | whiskey kitten 13 | 14 | 15 | 480 16 | 364 17 | 3 18 | 19 | 0 20 | 21 | person 22 | Left 23 | 0 24 | 0 25 | 26 | 185 27 | 62 28 | 279 29 | 199 30 | 31 | 32 | 33 | horse 34 | Left 35 | 0 36 | 0 37 | 38 | 90 39 | 78 40 | 403 41 | 336 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/000022.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000022.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 336380018 9 | 10 | 11 | Lothar Lenz 12 | Lothar Lenz 13 | 14 | 15 | 500 16 | 332 17 | 3 18 | 19 | 0 20 | 21 | horse 22 | Right 23 | 0 24 | 0 25 | 26 | 68 27 | 103 28 | 368 29 | 283 30 | 31 | 32 | 33 | person 34 | Right 35 | 0 36 | 0 37 | 38 | 186 39 | 44 40 | 255 41 | 230 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/000455.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000455.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 323858157 9 | 10 | 11 | Lothar Lenz 12 | Lothar Lenz 13 | 14 | 15 | 500 16 | 332 17 | 3 18 | 19 | 0 20 | 21 | person 22 | Left 23 | 0 24 | 0 25 | 26 | 213 27 | 35 28 | 302 29 | 212 30 | 31 | 32 | 33 | horse 34 | Left 35 | 0 36 | 0 37 | 38 | 125 39 | 79 40 | 500 41 | 306 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/001988.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 001988.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 272556816 9 | 10 | 11 | .s.e.a.n. 12 | Sean Scott 13 | 14 | 15 | 333 16 | 500 17 | 3 18 | 19 | 0 20 | 21 | bicycle 22 | Left 23 | 0 24 | 0 25 | 26 | 10 27 | 151 28 | 333 29 | 495 30 | 31 | 32 | 33 | person 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 78 39 | 66 40 | 269 41 | 392 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/000261.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000261.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 336032008 9 | 10 | 11 | MIKEBECKHAM DOT NET 12 | Mike Beckham 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | bicycle 22 | Left 23 | 0 24 | 0 25 | 26 | 1 27 | 76 28 | 405 29 | 326 30 | 31 | 32 | 33 | bicycle 34 | Right 35 | 0 36 | 0 37 | 38 | 58 39 | 79 40 | 408 41 | 295 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/000930.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000930.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 200825654 9 | 10 | 11 | bikeride 12 | Brent Soderberg 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | person 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 174 27 | 72 28 | 333 29 | 248 30 | 31 | 32 | 33 | bicycle 34 | Left 35 | 0 36 | 0 37 | 38 | 139 39 | 153 40 | 345 41 | 282 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/001183.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 001183.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 308220821 9 | 10 | 11 | lxt 12 | Laura Thomson 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | horse 22 | Left 23 | 1 24 | 0 25 | 26 | 79 27 | 163 28 | 187 29 | 281 30 | 31 | 32 | 33 | horse 34 | Left 35 | 0 36 | 0 37 | 38 | 138 39 | 121 40 | 349 41 | 356 42 | 43 | 44 | 45 | person 46 | Unspecified 47 | 0 48 | 0 49 | 50 | 227 51 | 89 52 | 286 53 | 245 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /darknet53/data/make_data_txt.py: -------------------------------------------------------------------------------- 1 | import math 2 | import xml.etree.ElementTree as ET 3 | import os 4 | from PIL import Image 5 | class_dict={ 6 | 'person':0, 7 | 'horse':1, 8 | 'bicycle':2, 9 | } 10 | xml_files=os.listdir('image_voc') 11 | with open('data.txt','a') as f: 12 | for xml_file in xml_files: 13 | tree=ET.parse(os.path.join('image_voc',xml_file)) 14 | root=tree.getroot() 15 | image_name=root.find('filename') 16 | class_name=root.findall('object/name') 17 | boxes=root.findall('object/bndbox') 18 | filename=image_name.text 19 | temp=max(Image.open(os.path.join('images',filename)).size) 20 | print(416/temp) 21 | data=[] 22 | data.append(filename) 23 | for cls,box in zip(class_name,boxes): 24 | cls=class_dict[cls.text] 25 | cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2) 26 | w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text)) 27 | obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}" 28 | data.append(obj) 29 | str='' 30 | for i in data: 31 | str=str+i+',' 32 | str=str.replace(',',' ').strip() 33 | f.write(str+'\n') 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/001579.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 001579.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 250936479 9 | 10 | 11 | selimski 12 | ? 13 | 14 | 15 | 375 16 | 500 17 | 3 18 | 19 | 0 20 | 21 | bicycle 22 | Right 23 | 1 24 | 0 25 | 26 | 1 27 | 302 28 | 217 29 | 500 30 | 31 | 32 | 33 | bicycle 34 | Right 35 | 1 36 | 0 37 | 38 | 36 39 | 254 40 | 324 41 | 441 42 | 43 | 44 | 45 | person 46 | Right 47 | 0 48 | 0 49 | 50 | 114 51 | 135 52 | 269 53 | 406 54 | 55 | 56 | 57 | person 58 | Right 59 | 1 60 | 0 61 | 62 | 1 63 | 157 64 | 128 65 | 479 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/000812.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000812.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 329837865 9 | 10 | 11 | .s.e.a.n. 12 | Sean Scott 13 | 14 | 15 | 500 16 | 333 17 | 3 18 | 19 | 0 20 | 21 | bicycle 22 | Left 23 | 1 24 | 0 25 | 26 | 99 27 | 171 28 | 376 29 | 333 30 | 31 | 32 | 33 | person 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 149 39 | 33 40 | 360 41 | 333 42 | 43 | 44 | 45 | person 46 | Unspecified 47 | 1 48 | 1 49 | 50 | 369 51 | 18 52 | 431 53 | 62 54 | 55 | 56 | 57 | person 58 | Unspecified 59 | 1 60 | 1 61 | 62 | 97 63 | 1 64 | 141 65 | 25 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /darknet53/data/image_voc/001370.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 001370.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 220172534 9 | 10 | 11 | thevelodrome.com 12 | ? 13 | 14 | 15 | 333 16 | 500 17 | 3 18 | 19 | 0 20 | 21 | bicycle 22 | Frontal 23 | 0 24 | 0 25 | 26 | 44 27 | 217 28 | 190 29 | 500 30 | 31 | 32 | 33 | bicycle 34 | Frontal 35 | 1 36 | 0 37 | 38 | 199 39 | 195 40 | 301 41 | 464 42 | 43 | 44 | 45 | person 46 | Unspecified 47 | 0 48 | 0 49 | 50 | 22 51 | 48 52 | 221 53 | 454 54 | 55 | 56 | 57 | person 58 | Unspecified 59 | 1 60 | 0 61 | 62 | 196 63 | 57 64 | 331 65 | 420 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /mobilenetv2/data/make_data_txt.py: -------------------------------------------------------------------------------- 1 | import math 2 | import xml.etree.ElementTree as ET 3 | import os 4 | from PIL import Image 5 | class_dict={ 6 | 'aeroplane': 0, 7 | 'bicycle': 1, 8 | 'bird': 2, 9 | 'boat': 3, 10 | 'bottle': 4, 11 | 'bus': 5, 12 | 'car': 6, 13 | 'cat': 7, 14 | 'chair': 8, 15 | 'cow': 9, 16 | 'diningtable': 10, 17 | 'dog': 11, 18 | 'horse': 12, 19 | 'motorbike': 13, 20 | 'person': 14, 21 | 'pottedplant': 15, 22 | 'sheep': 16, 23 | 'sofa': 17, 24 | 'train': 18, 25 | 'tvmonitor': 19 26 | } 27 | train_xml_path=r'G:\data\voc\voc_train\VOC2007\Annotations' 28 | train_img_path=r'G:\data\voc\voc_train\VOC2007\JPEGImages' 29 | test_xml_path=r'G:\data\voc\voc_test\VOC2007\Annotations' 30 | test_img_path=r'G:\data\voc\voc_test\VOC2007\JPEGImages' 31 | 32 | xml_files=os.listdir(train_xml_path) 33 | with open('train_data.txt','a') as f: 34 | for xml_file in xml_files: 35 | tree=ET.parse(os.path.join(train_xml_path,xml_file)) 36 | root=tree.getroot() 37 | image_name=root.find('filename') 38 | class_name=root.findall('object/name') 39 | boxes=root.findall('object/bndbox') 40 | filename=image_name.text 41 | temp=max(Image.open(os.path.join(train_img_path,filename)).size) 42 | data=[] 43 | data.append(filename) 44 | for cls,box in zip(class_name,boxes): 45 | cls=class_dict[cls.text] 46 | cx,cy=math.floor((int(box[0].text)+int(box[2].text))/2),math.floor((int(box[1].text)+int(box[3].text))/2) 47 | w,h=(int(box[2].text)-int(box[0].text)),(int(box[3].text)-int(box[1].text)) 48 | obj=f"{cls},{math.floor(cx*416/temp)},{math.floor(cy*416/temp)},{math.floor(w*416/temp)},{math.floor(h*416/temp)}" 49 | data.append(obj) 50 | str='' 51 | for i in data: 52 | str=str+i+',' 53 | str=str.replace(',',' ').strip() 54 | f.write(str+'\n') 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /darknet53/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | import torch.nn 6 | from mobilenet_v2_module import * 7 | import dataset 8 | from darbnet53_module import * 9 | from torch import nn 10 | 11 | def loss_fn(output, target, alpha): 12 | output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45 13 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15 14 | # print("output:",output.shape) 15 | mask_obj = target[..., 0] > 0#N,13,13,3 16 | # print("mask_obj:",mask_obj.shape) 17 | mask_noobj = target[..., 0] == 0 18 | # print("mask_noobj:",mask_noobj.shape) 19 | # print("output[mask_obj]:",output[mask_obj].shape) 20 | # print("output[mask_noobj]:", output[mask_noobj].shape) 21 | loss_p_fun=nn.BCELoss() 22 | loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0]) 23 | loss_box_fun=nn.MSELoss() 24 | loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5]) 25 | loss_cls_box_fun=nn.CrossEntropyLoss() 26 | loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1)) 27 | loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box 28 | return loss 29 | 30 | 31 | if __name__ == '__main__': 32 | weight_path= 'darknet_params/net597.pt' 33 | myDataset = dataset.MyDataset() 34 | train_loader = torch.utils.data.DataLoader(myDataset, batch_size=5, shuffle=True) 35 | 36 | net = Darknet53().cuda() 37 | if os.path.exists(weight_path): 38 | net.load_state_dict(torch.load(weight_path)) 39 | net.train() 40 | 41 | opt = torch.optim.Adam(net.parameters()) 42 | epoch = 0 43 | while True: 44 | for target_13, target_26, target_52, img_data in train_loader: 45 | target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda() 46 | output_13, output_26, output_52 = net(img_data) 47 | loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6) 48 | loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6) 49 | loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6) 50 | # 51 | loss = loss_13 + loss_26 + loss_52 52 | opt.zero_grad() 53 | loss.backward() 54 | opt.step() 55 | print(epoch,loss.item()) 56 | torch.save(net.state_dict(), f'darknet_params/net{epoch}.pt') 57 | print(f'{epoch}保存成功') 58 | epoch+=1 -------------------------------------------------------------------------------- /mobilenetv2/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | import torch.nn 6 | from mobilenet_v2_module import * 7 | import dataset 8 | from torch import nn 9 | 10 | def loss_fn(output, target, alpha): 11 | output = output.permute(0, 2, 3, 1)#N,45,13,13==>N,13,13,45 12 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)#N,13,13,3,15 13 | # print("output:",output.shape) 14 | mask_obj = target[..., 0] > 0#N,13,13,3 15 | # print("mask_obj:",mask_obj.shape) 16 | mask_noobj = target[..., 0] == 0 17 | # print("mask_noobj:",mask_noobj.shape) 18 | # print("output[mask_obj]:",output[mask_obj].shape) 19 | # print("output[mask_noobj]:", output[mask_noobj].shape) 20 | loss_p_fun=nn.BCELoss() 21 | loss_p=loss_p_fun(torch.sigmoid(output[...,0]),target[...,0]) 22 | loss_box_fun=nn.MSELoss() 23 | loss_box=loss_box_fun(output[mask_obj][...,1:5],target[mask_obj][...,1:5]) 24 | loss_cls_box_fun=nn.CrossEntropyLoss() 25 | loss_cls_box=loss_cls_box_fun(output[mask_obj][...,5:],torch.argmax(target[mask_obj][...,5:],dim=1,keepdim=True).squeeze(dim=1)) 26 | loss = alpha * loss_p + (1-alpha)*0.5*loss_box+ (1-alpha)*0.5*loss_cls_box 27 | return loss 28 | 29 | 30 | if __name__ == '__main__': 31 | weight_path= 'mobilenetv2_params/net0-599-5949.pt' 32 | myDataset = dataset.MyDataset() 33 | train_loader = torch.utils.data.DataLoader(myDataset, batch_size=3, shuffle=True) 34 | 35 | net = MobileNet_v2(config).cuda() 36 | if os.path.exists(weight_path): 37 | net.load_state_dict(torch.load(weight_path)) 38 | net.train() 39 | 40 | opt = torch.optim.Adam(net.parameters()) 41 | epoch = 0 42 | while True: 43 | for i,(target_13, target_26, target_52, img_data) in enumerate(train_loader): 44 | target_13, target_26, target_52, img_data=target_13.cuda(), target_26.cuda(), target_52.cuda(), img_data.cuda() 45 | output_13, output_26, output_52 = net(img_data) 46 | loss_13 = loss_fn(output_13.float(), target_13.float(), 0.6) 47 | loss_26 = loss_fn(output_26.float(), target_26.float(), 0.6) 48 | loss_52 = loss_fn(output_52.float(), target_52.float(), 0.6) 49 | loss = loss_13 + loss_26 + loss_52 50 | opt.zero_grad() 51 | loss.backward() 52 | opt.step() 53 | print(f"{epoch}--{i}--{loss.item()}") 54 | # if i%10==0: 55 | # print(f"{epoch}--{i}--{loss.item()}") 56 | # if (i+1)%200==0: 57 | # torch.save(net.state_dict(), f'mobilenetv2_params/net{epoch}-{i}-{i}.pt') 58 | # print(f'{epoch}保存成功') 59 | torch.save(net.state_dict(), f'g_params/net{epoch}.pt') 60 | print(f'{epoch}保存成功') 61 | epoch+=1 -------------------------------------------------------------------------------- /darknet53/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | import torchvision 4 | import numpy as np 5 | import cfg 6 | import os 7 | from utils import * 8 | from PIL import Image,ImageDraw 9 | import math 10 | 11 | LABEL_FILE_PATH = "data/data.txt" 12 | IMG_BASE_DIR = "data/images" 13 | 14 | transforms = torchvision.transforms.Compose([ 15 | torchvision.transforms.ToTensor() 16 | ]) 17 | 18 | 19 | def one_hot(cls_num, v): 20 | b = np.zeros(cls_num) 21 | b[v] = 1. 22 | return b 23 | 24 | 25 | class MyDataset(Dataset): 26 | 27 | def __init__(self): 28 | with open(LABEL_FILE_PATH) as f: 29 | self.dataset = f.readlines() 30 | 31 | def __len__(self): 32 | return len(self.dataset) 33 | 34 | def __getitem__(self, index): 35 | labels = {} 36 | 37 | line = self.dataset[index] 38 | strs = line.split() 39 | _img_data = make_image_data(os.path.join(IMG_BASE_DIR, strs[0])) 40 | w,h=_img_data.size[0],_img_data.size[1] 41 | 42 | _img_data = _img_data.resize((416,416))#此处要等比缩放 43 | img_data = transforms(_img_data) 44 | draw=ImageDraw.Draw(_img_data) 45 | _boxes = np.array([float(x) for x in strs[1:]]) 46 | # print(_boxes[0]) 47 | # _boxes = np.array(list(map(float, strs[1:]))) 48 | boxes = np.split(_boxes, len(_boxes) // 5) 49 | index = 0 50 | for feature_size, anchors in cfg.ANCHORS_GROUP.items(): 51 | labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM)) 52 | 53 | for box in boxes: 54 | cls, cx, cy, w, h = box 55 | draw.rectangle((cx-w*0.5,cy-h*0.5,cx+w*0.5,cy+h*0.5),outline='red',width=1) 56 | _img_data.show() 57 | cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH) 58 | cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH) 59 | for i, anchor in enumerate(anchors): 60 | 61 | anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i] 62 | p_w, p_h = w / (anchor[0]), h / (anchor[1]) 63 | p_area = w * h 64 | iou = min(p_area, anchor_area) / max(p_area, anchor_area) 65 | index+=1 66 | # print(feature_size, cx_index, cy_index, i) 67 | # print(box) 68 | if labels[feature_size][int(cy_index), int(cx_index), i][0] 0: 66 | _box = sort_boxes[0] 67 | keep_boxes.append(_box) 68 | 69 | if len(sort_boxes) > 1: 70 | _boxes = sort_boxes[1:] 71 | # print(_clses.shape) 72 | # print(_cls.shape) 73 | # print(mask.shape, "-------------------") 74 | # print(_boxes) 75 | # print(_boxes.shape) 76 | 77 | _iou = iou(_box, _boxes, mode) 78 | sort_boxes=_boxes[_iou< thresh] 79 | 80 | else: 81 | break 82 | 83 | return keep_boxes 84 | 85 | 86 | # def detect(feature_map, thresh): 87 | # masks = feature_map[:, 4, :, :] > thresh 88 | # idxs = torch.nonzero(masks) 89 | 90 | 91 | if __name__ == '__main__': 92 | # box = torch.Tensor([2, 2, 3, 3, 6]) 93 | # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]]) 94 | # print(iou(box, boxes, mode="inter")) 95 | # print(nms(boxes, 0.1)) 96 | # import numpy as np 97 | # 98 | # a = np.array([[1, 2], [3, 4]]) 99 | # print(a[:, 1]) 100 | make_image_data('images/1.jpg') 101 | -------------------------------------------------------------------------------- /darknet53/detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from darbnet53_module import * 4 | import cfg 5 | from PIL import Image,ImageDraw 6 | from darbnet53_module import * 7 | from dataset import * 8 | import os 9 | class_dict={ 10 | 0:'person', 11 | 1:'horse', 12 | 2:'bicycle', 13 | } 14 | class Detector(torch.nn.Module): 15 | 16 | def __init__(self): 17 | super(Detector, self).__init__() 18 | 19 | self.net = Darknet53() 20 | self.net.load_state_dict(torch.load('darknet_params/net597.pt')) 21 | self.net.eval() 22 | 23 | def forward(self, input, thresh, anchors,case): 24 | output_13, output_26, output_52 = self.net(input) 25 | 26 | idxs_13, vecs_13 = self._filter(output_13, thresh) 27 | boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13],case) 28 | 29 | idxs_26, vecs_26 = self._filter(output_26, thresh) 30 | boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26],case) 31 | 32 | idxs_52, vecs_52 = self._filter(output_52, thresh) 33 | boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52],case) 34 | boxes=torch.cat([boxes_13, boxes_26, boxes_52], dim=0) 35 | # rst=[] 36 | # for i in range(3): 37 | # bs=boxes[boxes[...,6]==i] 38 | # for j in bs 39 | # bs = nms(bs, 0.9, mode="inter") 40 | # rst.append(bs) 41 | boxes=nms(boxes, 0.5, mode='inter') 42 | return boxes 43 | 44 | 45 | def _filter(self, output, thresh): 46 | output = output.permute(0, 2, 3, 1) 47 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1) 48 | #mask:N,H,W,3,15 49 | 50 | mask = torch.sigmoid(output[..., 0]) > thresh 51 | 52 | idxs = mask.nonzero() 53 | vecs = output[mask] 54 | return idxs, vecs 55 | 56 | def _parse(self, idxs, vecs, t, anchors,case): 57 | anchors = torch.Tensor(anchors) 58 | 59 | n = idxs[:, 0] # 所属的图片 60 | a = idxs[:, 3] # 建议框 61 | 62 | cy = (idxs[:, 1].float() + vecs[:, 2]) * t /case # 原图的中心点y 63 | cx = (idxs[:, 2].float() + vecs[:, 1]) * t /case # 原图的中心点x 64 | 65 | w = anchors[a, 0] * torch.exp(vecs[:, 3])/case 66 | h = anchors[a, 1] * torch.exp(vecs[:, 4])/case 67 | p=vecs[:,0] 68 | cls_p=vecs[:,5:] 69 | cls_p=torch.softmax(cls_p,dim=1) 70 | cls_index = torch.argmax(cls_p, dim=1) 71 | return torch.stack([n.float(), torch.sigmoid(p),cx, cy, w, h,cls_index], dim=1) 72 | 73 | 74 | if __name__ == '__main__': 75 | detector = Detector() 76 | # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5) 77 | # print(y.shape) 78 | for i in os.listdir('images'): 79 | img=Image.open('images/'+i) 80 | _img = make_image_data('images/'+i) 81 | w, h = _img.size[0], _img.size[1] 82 | case = 416 / w 83 | # print(case) 84 | _img = _img.resize((416, 416)) # 此处要等比缩放 85 | _img_data = transforms(_img) 86 | _img_data=torch.unsqueeze(_img_data,dim=0) 87 | # print(_img_data.shape) 88 | result=detector(_img_data, 0.2, cfg.ANCHORS_GROUP,case) 89 | draw=ImageDraw.Draw(img) 90 | for rst in result: 91 | if len(rst)==0: 92 | continue 93 | else: 94 | # rst=rst[0] 95 | x1,y1,x2,y2=rst[2]-0.5*rst[4],rst[3]-0.5*rst[5],rst[2]+0.5*rst[4],rst[3]+0.5*rst[5] 96 | print(f'置信度:{str(rst[1].item())[:4]} 坐标点:{x1,y1,x2,y2} 类别:{class_dict[int(rst[6].item())]}') 97 | draw.text((x1,y1),class_dict[int(rst[6].item())]+str(rst[1].item())[:4]) 98 | draw.rectangle((x1,y1,x2,y2),width=1,outline='red') 99 | img.show() 100 | -------------------------------------------------------------------------------- /mobilenetv2/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | 6 | def make_image_data(path): 7 | img=Image.open(path) 8 | w,h=img.size[0],img.size[1] 9 | temp=max(h,w) 10 | mask=Image.new('RGB',(temp,temp),(0,0,0)) 11 | mask.paste(img,(0,0)) 12 | return mask 13 | 14 | def iou(box, boxes, mode="inter"): 15 | cx, cy, w, h = box[2], box[3], box[4], box[5] 16 | cxs, cys, ws, hs = boxes[:, 2], boxes[:, 3], boxes[:, 4], boxes[:, 5] 17 | 18 | box_area = w * h # 最小面积 19 | boxes_area = ws * hs # 最大面积 20 | 21 | _x1, _x2, _y1, _y2 = cx - w/2, cx + w/2, cy - h/2, cy + h/2 22 | _xx1, _xx2, _yy1, _yy2 = cxs - ws / 2, cxs + ws / 2, cys - hs / 2, cys + hs / 2 23 | 24 | xx1 = torch.maximum(_x1, _xx1) # 左上角 最大值 25 | yy1 = torch.maximum(_y1, _yy1) # 左上角 最大值 26 | xx2 = torch.minimum(_x2, _xx2) # 右下角 最小值 27 | yy2 = torch.minimum(_y2, _yy2) # 右下角 最小值 28 | 29 | # 将输入input张量每个元素的夹紧到区间 [min,max][min,max],并返回结果到一个新张量。 30 | w = torch.clamp(xx2 - xx1, min=0) # ★夹 31 | h = torch.clamp(yy2 - yy1, min=0) 32 | 33 | inter = w * h 34 | 35 | if mode == 'inter': 36 | return inter / (box_area + boxes_area - inter) #交集除以并集 37 | elif mode == 'min': 38 | return inter / torch.min(box_area, boxes_area) 39 | ''' 40 | def iou(box, boxes, mode="inter"): 41 | box_area = (box[3] - box[1]) * (box[4] - box[2]) 42 | boxes_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2]) 43 | 44 | x1 = torch.max(box[1], boxes[:, 1]) 45 | y1 = torch.max(box[2], boxes[:, 2]) 46 | x2 = torch.min(box[3], boxes[:, 3]) 47 | y2 = torch.min(box[4], boxes[:, 4]) 48 | 49 | w = torch.clamp(x2 - x1, min=0) 50 | h = torch.clamp(y2 - y1, min=0) 51 | 52 | inter = w * h 53 | 54 | if mode == 'inter': 55 | return inter / (box_area + boxes_area - inter) 56 | elif mode == 'min': 57 | return inter / torch.min(box_area, boxes_area) 58 | ''' 59 | 60 | def nms(boxes, thresh, mode='inter'): 61 | args = boxes[:, 1].argsort(descending=True) 62 | 63 | sort_boxes = boxes[args] 64 | keep_boxes = [] 65 | 66 | while len(sort_boxes) > 0: 67 | _box = sort_boxes[0] 68 | keep_boxes.append(_box) 69 | 70 | if len(sort_boxes) > 1: 71 | _boxes = sort_boxes[1:] 72 | # _boxes=sort[sort_boxes[:,-1]==_box[-1]] 73 | _iou = iou(_box, _boxes, mode) 74 | sort_boxes = _boxes[_iou < thresh] 75 | else: 76 | break 77 | 78 | return keep_boxes 79 | 80 | # def nms(boxes, thresh, mode='inter'): 81 | # args = boxes[:, 1].argsort(descending=True) 82 | # sort_boxes = boxes[args] 83 | # keep_boxes = [] 84 | # 85 | # while len(sort_boxes) > 0: 86 | # _box = sort_boxes[0] 87 | # _cls=sort_boxes[6] 88 | # keep_boxes.append(_box) 89 | # 90 | # if len(sort_boxes) > 1: 91 | # _boxes = sort_boxes[1:] 92 | # print(_boxes.shape) 93 | # __cls=_boxes[...,6] 94 | # _cls_boxes=_boxes[_cls ==__cls] 95 | # not_cls_boxes=[_cls!=__cls] 96 | # # _boxes=sort[sort_boxes[:,-1]==_box[-1]] 97 | # _iou = iou(_box, _cls_boxes, mode) 98 | # sort_boxes1 = _boxes[_iou < thresh] 99 | # print(sort_boxes1.shape,not_cls_boxes.shape) 100 | # sort_boxes=torch.cat(sort_boxes1,not_cls_boxes,dim=1) 101 | # else: 102 | # break 103 | # 104 | # return keep_boxes 105 | # def detect(feature_map, thresh): 106 | # masks = feature_map[:, 4, :, :] > thresh 107 | # idxs = torch.nonzero(masks) 108 | 109 | 110 | if __name__ == '__main__': 111 | # box = torch.Tensor([2, 2, 3, 3, 6]) 112 | # boxes = torch.Tensor([[2, 2, 3, 3, 6], [2, 2, 4, 4, 5], [2, 2, 5, 5, 4]]) 113 | # print(iou(box, boxes, mode="inter")) 114 | # print(nms(boxes, 0.1)) 115 | # import numpy as np 116 | # 117 | # a = np.array([[1, 2], [3, 4]]) 118 | # print(a[:, 1]) 119 | make_image_data('images/1.jpg') 120 | -------------------------------------------------------------------------------- /mobilenetv2/detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mobilenet_v2_module import * 4 | import cfg 5 | from PIL import Image, ImageDraw 6 | from mobilenet_v2_module import * 7 | from dataset import * 8 | import os 9 | 10 | # class_dict = { 11 | # 0: 'aeroplane', 12 | # 1: 'bicycle', 13 | # 2: 'bird', 14 | # 3: 'boat', 15 | # 4: 'bottle', 16 | # 5: 'bus', 17 | # 6: 'car', 18 | # 7: 'cat', 19 | # 8: 'chair', 20 | # 9: 'cow', 21 | # 10: 'diningtable', 22 | # 11: 'dog', 23 | # 12: 'horse', 24 | # 13: 'motorbike', 25 | # 14: 'person', 26 | # 15: 'pottedplant', 27 | # 16: 'sheep', 28 | # 17: 'sofa', 29 | # 18: 'train', 30 | # 19: 'tvmonitor' 31 | # } 32 | class_dict = { 33 | 0:'person', 34 | 1:'horse', 35 | 2:'bicycle'} 36 | 37 | class Detector(torch.nn.Module): 38 | 39 | def __init__(self): 40 | super(Detector, self).__init__() 41 | 42 | self.net = MobileNet_v2(config) 43 | self.net.load_state_dict(torch.load('g_params/net237.pt')) 44 | self.net.eval() 45 | 46 | def forward(self, input, thresh, anchors, case): 47 | output_13, output_26, output_52 = self.net(input) 48 | 49 | idxs_13, vecs_13 = self._filter(output_13, thresh) 50 | boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13], case) 51 | 52 | idxs_26, vecs_26 = self._filter(output_26, thresh) 53 | boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26], case) 54 | 55 | idxs_52, vecs_52 = self._filter(output_52, thresh) 56 | boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52], case) 57 | boxes = torch.cat([boxes_13, boxes_26, boxes_52], dim=0) 58 | boxes = nms(boxes, 0.4, mode="inter") 59 | # print(boxes) 60 | return boxes 61 | 62 | def _filter(self, output, thresh): 63 | output = output.permute(0, 2, 3, 1) 64 | output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1) 65 | # mask:N,H,W,3,15 66 | 67 | mask = torch.sigmoid(output[..., 0]) > thresh 68 | 69 | idxs = mask.nonzero() 70 | vecs = output[mask] 71 | return idxs, vecs 72 | 73 | def _parse(self, idxs, vecs, t, anchors, case): 74 | anchors = torch.Tensor(anchors) 75 | 76 | n = idxs[:, 0] # 所属的图片 77 | a = idxs[:, 3] # 建议框 78 | 79 | cy = (idxs[:, 1].float() + vecs[:, 2]) * t / case # 原图的中心点y 80 | cx = (idxs[:, 2].float() + vecs[:, 1]) * t / case # 原图的中心点x 81 | 82 | w = anchors[a, 0] * torch.exp(vecs[:, 3]) / case 83 | h = anchors[a, 1] * torch.exp(vecs[:, 4]) / case 84 | p = vecs[:, 0] 85 | cls_p = vecs[:, 5:] 86 | cls_p = torch.softmax(cls_p, dim=1) 87 | cls_index = torch.argmax(cls_p, dim=1) 88 | return torch.stack([n.float(), torch.sigmoid(p), cx, cy, w, h, cls_index], dim=1) 89 | 90 | 91 | if __name__ == '__main__': 92 | detector = Detector() 93 | # y = detector(torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP,0.5) 94 | # print(y.shape) 95 | for i in os.listdir('E:/pythonSpace/yolov3/darknet53/data/images'): 96 | img = Image.open('E:/pythonSpace/yolov3/darknet53/data/images/' + i) 97 | _img = make_image_data('E:/pythonSpace/yolov3/darknet53/data/images/' + i) 98 | w, h = _img.size[0], _img.size[1] 99 | case = 416 / w 100 | # print(case) 101 | _img = _img.resize((416, 416)) # 此处要等比缩放 102 | _img_data = transforms(_img) 103 | _img_data = torch.unsqueeze(_img_data, dim=0) 104 | # print(_img_data.shape) 105 | result = detector(_img_data, 0.2, cfg.ANCHORS_GROUP, case) 106 | draw = ImageDraw.Draw(img) 107 | for rst in result: 108 | x1, y1, x2, y2 = rst[2] - 0.5 * rst[4], rst[3] - 0.5 * rst[5], rst[2] + 0.5 * rst[4], rst[3] + 0.5 * rst[5] 109 | print(f'置信度:{str(rst[1].item())[:4]} 坐标点:{x1, y1, x2, y2} 类别:{class_dict[int(rst[6].item())]}') 110 | draw.text((x1, y1), class_dict[int(rst[6].item())] + str(rst[1].item())[:4]) 111 | draw.rectangle((x1, y1, x2, y2), width=1, outline='red') 112 | img.show() 113 | -------------------------------------------------------------------------------- /mobilenetv2/mobilenet_v2_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from net_block import * 4 | 5 | config = [ 6 | [-1, 32, 1, 2], 7 | [1, 16, 1, 1], 8 | [6, 24, 2, 2], 9 | [6, 32, 4, 2], 10 | [6, 64, 4, 2], 11 | [6, 96, 4, 1], 12 | [6, 160, 4, 2], 13 | [6, 320, 2, 1], 14 | ] 15 | 16 | 17 | class Bottleneck(nn.Module): 18 | def __init__(self, c_in, i, t, c, n, s): 19 | super(Bottleneck, self).__init__() 20 | self.i = i 21 | self.n = n 22 | _s = s if i == n - 1 else 1 23 | _c = c if i == n - 1 else c_in 24 | _p_c = c_in * t 25 | 26 | self.sub_module = nn.Sequential( 27 | nn.Conv2d(c_in, _p_c, 1, 1, bias=False), 28 | nn.BatchNorm2d(_p_c), 29 | nn.ReLU6(), 30 | nn.Conv2d(_p_c, _p_c, 3, _s, 1, bias=False), 31 | nn.BatchNorm2d(_p_c), 32 | nn.ReLU6(), 33 | nn.Conv2d(_p_c, _c, 1, 1, bias=False), 34 | nn.BatchNorm2d(_c) 35 | ) 36 | 37 | def forward(self, x): 38 | if self.i == self.n - 1: 39 | return self.sub_module(x) 40 | else: 41 | return self.sub_module(x) + x 42 | 43 | 44 | class MobileNet_v2(nn.Module): 45 | def __init__(self, config): 46 | super(MobileNet_v2, self).__init__() 47 | self.input_layer = nn.Sequential( 48 | nn.Conv2d(3, 32, 3, 2, 1, bias=False), 49 | nn.BatchNorm2d(32), 50 | nn.ReLU6() 51 | ) 52 | self.blocks1 = [] 53 | self.blocks2 = [] 54 | self.blocks3 = [] 55 | c_in = config[0][1] 56 | for t, c, n, s in config[1:4]: 57 | for i in range(n): 58 | self.blocks1.append(Bottleneck(c_in, i, t, c, n, s)) 59 | c_in = c 60 | for t, c, n, s in config[4:5]: 61 | for i in range(n): 62 | self.blocks2.append(Bottleneck(c_in, i, t, c, n, s)) 63 | c_in = c 64 | for t, c, n, s in config[5:]: 65 | for i in range(n): 66 | self.blocks3.append(Bottleneck(c_in, i, t, c, n, s)) 67 | c_in = c 68 | 69 | self.hidden_layers1 = nn.Sequential(*self.blocks1) 70 | self.hidden_layers2 = nn.Sequential(*self.blocks2) 71 | self.hidden_layers3 = nn.Sequential(*self.blocks3) 72 | 73 | self.convset_13 = ConvolutionalSet(320, 64) 74 | self.detetion_13 = nn.Sequential( 75 | ConvolutionalLayer(64, 320, 3, 1, 1), 76 | nn.Conv2d(320, 24, 1, 1, 0) 77 | ) 78 | self.up_13_to_26 = nn.Sequential( 79 | ConvolutionalLayer(64, 32, 3, 1, 1), 80 | UpSampleLayer() 81 | ) 82 | 83 | self.convset_26 = ConvolutionalSet(96, 32) 84 | self.detetion_26 = nn.Sequential( 85 | ConvolutionalLayer(32, 64, 3, 1, 1), 86 | nn.Conv2d(64, 24, 1, 1, 0) 87 | ) 88 | self.up_26_to_52 = nn.Sequential( 89 | ConvolutionalLayer(32, 16, 3, 1, 1), 90 | UpSampleLayer() 91 | ) 92 | 93 | self.convset_52 = ConvolutionalSet(48, 24) 94 | self.detetion_52 = nn.Sequential( 95 | ConvolutionalLayer(24, 48, 3, 1, 1), 96 | nn.Conv2d(48, 24, 1, 1, 0) 97 | ) 98 | 99 | def forward(self, x): 100 | out_52 = self.hidden_layers1(self.input_layer(x)) 101 | out_26 = self.hidden_layers2(out_52) 102 | out_13 = self.hidden_layers3(out_26) 103 | 104 | convset_out_13 = self.convset_13(out_13) 105 | detetion_out_13 = self.detetion_13(convset_out_13) 106 | up_13_to_26_out = self.up_13_to_26(convset_out_13) 107 | cat_out_26 = torch.cat((up_13_to_26_out, out_26), dim=1) 108 | 109 | convset_26 = self.convset_26(cat_out_26) 110 | detetion_out_26 = self.detetion_26(convset_26) 111 | up_26_to_52_out = self.up_26_to_52(convset_26) 112 | cat_out_52 = torch.cat((up_26_to_52_out, out_52), dim=1) 113 | 114 | convset_52 = self.convset_52(cat_out_52) 115 | detetion_out_52 = self.detetion_52(convset_52) 116 | 117 | return detetion_out_13, detetion_out_26, detetion_out_52 118 | 119 | 120 | if __name__ == '__main__': 121 | x = torch.randn((1, 3, 416, 416)) 122 | net = MobileNet_v2(config) 123 | y = net(x) 124 | print(y[0].shape) 125 | print(y[1].shape) 126 | print(y[2].shape) 127 | -------------------------------------------------------------------------------- /darknet53/darbnet53_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class UpsampleLayer(torch.nn.Module): 5 | 6 | def __init__(self): 7 | super(UpsampleLayer, self).__init__() 8 | 9 | def forward(self, x): 10 | return torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest') 11 | 12 | 13 | class ConvolutionalLayer(torch.nn.Module): 14 | 15 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False): 16 | super(ConvolutionalLayer, self).__init__() 17 | 18 | self.sub_module = torch.nn.Sequential( 19 | torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias), 20 | torch.nn.BatchNorm2d(out_channels), 21 | torch.nn.LeakyReLU() 22 | ) 23 | 24 | def forward(self, x): 25 | return self.sub_module(x) 26 | 27 | 28 | class ResidualLayer(torch.nn.Module): 29 | 30 | def __init__(self, in_channels): 31 | super(ResidualLayer, self).__init__() 32 | 33 | self.sub_module = torch.nn.Sequential( 34 | ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0), 35 | ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1), 36 | ) 37 | 38 | def forward(self, x): 39 | return x + self.sub_module(x) 40 | 41 | 42 | class DownsamplingLayer(torch.nn.Module): 43 | def __init__(self, in_channels, out_channels): 44 | super(DownsamplingLayer, self).__init__() 45 | 46 | self.sub_module = torch.nn.Sequential( 47 | ConvolutionalLayer(in_channels, out_channels, 3, 2, 1) 48 | ) 49 | 50 | def forward(self, x): 51 | return self.sub_module(x) 52 | 53 | 54 | class ConvolutionalSet(torch.nn.Module): 55 | def __init__(self, in_channels, out_channels): 56 | super(ConvolutionalSet, self).__init__() 57 | 58 | self.sub_module = torch.nn.Sequential( 59 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0), 60 | ConvolutionalLayer(out_channels, in_channels, 3, 1, 1), 61 | 62 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0), 63 | ConvolutionalLayer(out_channels, in_channels, 3, 1, 1), 64 | 65 | ConvolutionalLayer(in_channels, out_channels, 1, 1, 0), 66 | ) 67 | 68 | def forward(self, x): 69 | return self.sub_module(x) 70 | 71 | 72 | class Darknet53(torch.nn.Module): 73 | 74 | def __init__(self): 75 | super(Darknet53, self).__init__() 76 | 77 | self.trunk_52 = torch.nn.Sequential( 78 | ConvolutionalLayer(3, 32, 3, 1, 1), 79 | ConvolutionalLayer(32, 64, 3, 2, 1), 80 | 81 | ResidualLayer(64), 82 | DownsamplingLayer(64, 128), 83 | 84 | ResidualLayer(128), 85 | ResidualLayer(128), 86 | DownsamplingLayer(128, 256), 87 | 88 | ResidualLayer(256), 89 | ResidualLayer(256), 90 | ResidualLayer(256), 91 | ResidualLayer(256), 92 | ResidualLayer(256), 93 | ResidualLayer(256), 94 | ResidualLayer(256), 95 | ResidualLayer(256), 96 | ) 97 | 98 | self.trunk_26 = torch.nn.Sequential( 99 | DownsamplingLayer(256, 512), 100 | ResidualLayer(512), 101 | ResidualLayer(512), 102 | ResidualLayer(512), 103 | ResidualLayer(512), 104 | ResidualLayer(512), 105 | ResidualLayer(512), 106 | ResidualLayer(512), 107 | ResidualLayer(512), 108 | ) 109 | 110 | self.trunk_13 = torch.nn.Sequential( 111 | DownsamplingLayer(512, 1024), 112 | ResidualLayer(1024), 113 | ResidualLayer(1024), 114 | ResidualLayer(1024), 115 | ResidualLayer(1024) 116 | ) 117 | 118 | self.convset_13 = torch.nn.Sequential( 119 | ConvolutionalSet(1024, 512) 120 | ) 121 | 122 | self.detetion_13 = torch.nn.Sequential( 123 | ConvolutionalLayer(512, 1024, 3, 1, 1), 124 | torch.nn.Conv2d(1024, 24, 1, 1, 0) 125 | ) 126 | 127 | self.up_26 = torch.nn.Sequential( 128 | ConvolutionalLayer(512, 256, 3, 1, 1), 129 | UpsampleLayer() 130 | ) 131 | 132 | self.convset_26 = torch.nn.Sequential( 133 | ConvolutionalSet(768, 256) 134 | ) 135 | 136 | self.detetion_26 = torch.nn.Sequential( 137 | ConvolutionalLayer(256, 512, 3, 1, 1), 138 | torch.nn.Conv2d(512, 24, 1, 1, 0) 139 | ) 140 | 141 | self.up_52 = torch.nn.Sequential( 142 | ConvolutionalLayer(256, 128, 3, 1, 1), 143 | UpsampleLayer() 144 | ) 145 | 146 | self.convset_52 = torch.nn.Sequential( 147 | ConvolutionalSet(384, 128) 148 | ) 149 | 150 | self.detetion_52 = torch.nn.Sequential( 151 | ConvolutionalLayer(128, 256, 3, 1, 1), 152 | torch.nn.Conv2d(256, 24, 1, 1, 0) 153 | ) 154 | 155 | def forward(self, x): 156 | h_52 = self.trunk_52(x) 157 | h_26 = self.trunk_26(h_52) 158 | h_13 = self.trunk_13(h_26) 159 | 160 | convset_out_13 = self.convset_13(h_13) 161 | detetion_out_13 = self.detetion_13(convset_out_13) 162 | 163 | up_out_26 = self.up_26(convset_out_13) 164 | route_out_26 = torch.cat((up_out_26, h_26), dim=1) 165 | convset_out_26 = self.convset_26(route_out_26) 166 | detetion_out_26 = self.detetion_26(convset_out_26) 167 | 168 | up_out_52 = self.up_52(convset_out_26) 169 | route_out_52 = torch.cat((up_out_52, h_52), dim=1) 170 | convset_out_52 = self.convset_52(route_out_52) 171 | detetion_out_52 = self.detetion_52(convset_out_52) 172 | 173 | return detetion_out_13, detetion_out_26, detetion_out_52 174 | 175 | if __name__ == '__main__': 176 | yolo = Darknet53() 177 | x = torch.randn(1,3,416,416) 178 | y = yolo(x) 179 | print(y[0].shape) 180 | print(y[1].shape) 181 | print(y[2].shape) 182 | -------------------------------------------------------------------------------- /darknet53/net_block.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | from torch.nn import functional 4 | 5 | 6 | # 卷积块 7 | class ConvolutionalLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False): 9 | super(ConvolutionalLayer, self).__init__() 10 | self.sub_module = nn.Sequential( 11 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 12 | padding=padding, bias=bias), 13 | nn.BatchNorm2d(out_channels), 14 | nn.LeakyReLU() 15 | ) 16 | 17 | def forward(self, x): 18 | return self.sub_module(x) 19 | 20 | 21 | # 残差块 22 | class ResidualLayer(nn.Module): 23 | def __init__(self, in_channels): 24 | super(ResidualLayer, self).__init__() 25 | self.sub_module = nn.Sequential( 26 | ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1, 27 | padding=0), 28 | ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1, 29 | padding=1) 30 | ) 31 | 32 | def forward(self, x): 33 | return self.sub_module(x) 34 | 35 | 36 | # 下采样 37 | class DownSampleLayer(nn.Module): 38 | def __init__(self, in_channels, out_channels): 39 | super(DownSampleLayer, self).__init__() 40 | self.sub_module = nn.Sequential( 41 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1) 42 | ) 43 | 44 | def forward(self, x): 45 | return self.sub_module(x) 46 | 47 | 48 | # 上采样 49 | class UpSampleLayer(nn.Module): 50 | def __init__(self): 51 | super(UpSampleLayer, self).__init__() 52 | 53 | def forward(self, x): 54 | return functional.interpolate(x, scale_factor=2, mode='nearest') 55 | 56 | 57 | # 卷积集 58 | class ConvolutionalSet(nn.Module): 59 | # 一般输入通道大 输出通道小,因为目的就是为了降低通道进行特征提取 60 | def __init__(self, in_channels, out_channels): 61 | super(ConvolutionalSet, self).__init__() 62 | self.sub_module = nn.Sequential( 63 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 64 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1), 65 | 66 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 67 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1), 68 | 69 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 70 | ) 71 | 72 | def forward(self, x): 73 | return self.sub_module(x) 74 | 75 | 76 | class YoloNet_V3(nn.Module): 77 | def __init__(self): 78 | super(YoloNet_V3, self).__init__() 79 | self.trunk_52 = nn.Sequential( 80 | ConvolutionalLayer(3, 32, 3, 1, 1), 81 | DownSampleLayer(32, 64), 82 | 83 | ResidualLayer(64), 84 | DownSampleLayer(64, 128), 85 | 86 | ResidualLayer(128), 87 | ResidualLayer(128), 88 | DownSampleLayer(128, 256), 89 | 90 | ResidualLayer(256), 91 | ResidualLayer(256), 92 | ResidualLayer(256), 93 | ResidualLayer(256), 94 | ResidualLayer(256), 95 | ResidualLayer(256), 96 | ResidualLayer(256), 97 | ResidualLayer(256) 98 | ) 99 | self.trunk_26 = nn.Sequential( 100 | DownSampleLayer(256, 512), 101 | ResidualLayer(512), 102 | ResidualLayer(512), 103 | ResidualLayer(512), 104 | ResidualLayer(512), 105 | ResidualLayer(512), 106 | ResidualLayer(512), 107 | ResidualLayer(512), 108 | ResidualLayer(512) 109 | ) 110 | self.trunk_13 = nn.Sequential( 111 | DownSampleLayer(512, 1024), 112 | ResidualLayer(1024), 113 | ResidualLayer(1024), 114 | ResidualLayer(1024), 115 | ResidualLayer(1024) 116 | ) 117 | 118 | self.convset_13 = nn.Sequential( 119 | ConvolutionalSet(1024, 512) 120 | ) 121 | self.detetion_13 = nn.Sequential( 122 | ConvolutionalLayer(512, 1024, 3, 1, 1), 123 | nn.Conv2d(1024, 45, 1, 1, 0) 124 | ) 125 | self.up_13_to_26 = nn.Sequential( 126 | # 原文为1*1的卷积,使用3*3的卷积是为了做特征提取,因为1*1不能进行特征提取 127 | ConvolutionalLayer(512, 256, 3, 1, 1), 128 | UpSampleLayer() 129 | ) 130 | 131 | self.convset_26 = nn.Sequential( 132 | ConvolutionalSet(768, 256) 133 | ) 134 | self.detetion_26 = nn.Sequential( 135 | ConvolutionalLayer(256, 512, 3, 1, 1), 136 | nn.Conv2d(512, 45, 1, 1, 0) 137 | ) 138 | 139 | self.up_26_to_52 = nn.Sequential( 140 | ConvolutionalLayer(256, 128, 3, 1, 1), 141 | UpSampleLayer() 142 | ) 143 | 144 | self.convset_52 = nn.Sequential( 145 | ConvolutionalSet(384, 128) 146 | ) 147 | self.detetion_52 = nn.Sequential( 148 | ConvolutionalLayer(128, 256, 3, 1, 1), 149 | nn.Conv2d(256, 45, 1, 1, 0) 150 | ) 151 | 152 | def forward(self, x): 153 | h_52 = self.trunk_52(x) 154 | h_26 = self.trunk_26(h_52) 155 | h_13 = self.trunk_13(h_26) 156 | 157 | convset_out_13 = self.convset_13(h_13) 158 | detetion_out_13 = self.detetion_13(convset_out_13) 159 | 160 | up_out_13_to_26 = self.up_13_to_26(convset_out_13) 161 | cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1) 162 | convset_out_26 = self.convset_26(cat_out_26) 163 | detetion_out_26 = self.detetion_26(convset_out_26) 164 | 165 | up_out_26_to_52 = self.up_26_to_52(convset_out_26) 166 | cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1) 167 | convset_out_52 = self.convset_52(cat_out_52) 168 | detetion_out_52 = self.detetion_52(convset_out_52) 169 | 170 | return detetion_out_13, detetion_out_26, detetion_out_52 171 | 172 | 173 | if __name__ == '__main__': 174 | yolo = YoloNet_V3() 175 | x = torch.randn(1, 3, 416, 416) 176 | y = yolo(x) 177 | print(y[0].shape) 178 | print(y[1].shape) 179 | print(y[2].shape) 180 | -------------------------------------------------------------------------------- /mobilenetv2/net_block.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | from torch.nn import functional 4 | 5 | 6 | # 卷积块 7 | class ConvolutionalLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False): 9 | super(ConvolutionalLayer, self).__init__() 10 | self.sub_module = nn.Sequential( 11 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 12 | padding=padding, bias=bias), 13 | nn.BatchNorm2d(out_channels), 14 | nn.LeakyReLU() 15 | ) 16 | 17 | def forward(self, x): 18 | return self.sub_module(x) 19 | 20 | 21 | # 残差块 22 | class ResidualLayer(nn.Module): 23 | def __init__(self, in_channels): 24 | super(ResidualLayer, self).__init__() 25 | self.sub_module = nn.Sequential( 26 | ConvolutionalLayer(in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, stride=1, 27 | padding=0), 28 | ConvolutionalLayer(in_channels=in_channels // 2, out_channels=in_channels, kernel_size=3, stride=1, 29 | padding=1) 30 | ) 31 | 32 | def forward(self, x): 33 | return self.sub_module(x) 34 | 35 | 36 | # 下采样 37 | class DownSampleLayer(nn.Module): 38 | def __init__(self, in_channels, out_channels): 39 | super(DownSampleLayer, self).__init__() 40 | self.sub_module = nn.Sequential( 41 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1) 42 | ) 43 | 44 | def forward(self, x): 45 | return self.sub_module(x) 46 | 47 | 48 | # 上采样 49 | class UpSampleLayer(nn.Module): 50 | def __init__(self): 51 | super(UpSampleLayer, self).__init__() 52 | 53 | def forward(self, x): 54 | return functional.interpolate(x, scale_factor=2, mode='nearest') 55 | 56 | 57 | # 卷积集 58 | class ConvolutionalSet(nn.Module): 59 | # 一般输入通道大 输出通道小,因为目的就是为了降低通道进行特征提取 60 | def __init__(self, in_channels, out_channels): 61 | super(ConvolutionalSet, self).__init__() 62 | self.sub_module = nn.Sequential( 63 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 64 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1), 65 | 66 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 67 | ConvolutionalLayer(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1), 68 | 69 | ConvolutionalLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 70 | ) 71 | 72 | def forward(self, x): 73 | return self.sub_module(x) 74 | 75 | 76 | class YoloNet_V3(nn.Module): 77 | def __init__(self): 78 | super(YoloNet_V3, self).__init__() 79 | self.trunk_52 = nn.Sequential( 80 | ConvolutionalLayer(3, 32, 3, 1, 1), 81 | DownSampleLayer(32, 64), 82 | 83 | ResidualLayer(64), 84 | DownSampleLayer(64, 128), 85 | 86 | ResidualLayer(128), 87 | ResidualLayer(128), 88 | DownSampleLayer(128, 256), 89 | 90 | ResidualLayer(256), 91 | ResidualLayer(256), 92 | ResidualLayer(256), 93 | ResidualLayer(256), 94 | ResidualLayer(256), 95 | ResidualLayer(256), 96 | ResidualLayer(256), 97 | ResidualLayer(256) 98 | ) 99 | self.trunk_26 = nn.Sequential( 100 | DownSampleLayer(256, 512), 101 | ResidualLayer(512), 102 | ResidualLayer(512), 103 | ResidualLayer(512), 104 | ResidualLayer(512), 105 | ResidualLayer(512), 106 | ResidualLayer(512), 107 | ResidualLayer(512), 108 | ResidualLayer(512) 109 | ) 110 | self.trunk_13 = nn.Sequential( 111 | DownSampleLayer(512, 1024), 112 | ResidualLayer(1024), 113 | ResidualLayer(1024), 114 | ResidualLayer(1024), 115 | ResidualLayer(1024) 116 | ) 117 | 118 | self.convset_13 = nn.Sequential( 119 | ConvolutionalSet(1024, 512) 120 | ) 121 | self.detetion_13 = nn.Sequential( 122 | ConvolutionalLayer(512, 1024, 3, 1, 1), 123 | nn.Conv2d(1024, 45, 1, 1, 0) 124 | ) 125 | self.up_13_to_26 = nn.Sequential( 126 | # 原文为1*1的卷积,使用3*3的卷积是为了做特征提取,因为1*1不能进行特征提取 127 | ConvolutionalLayer(512, 256, 3, 1, 1), 128 | UpSampleLayer() 129 | ) 130 | 131 | self.convset_26 = nn.Sequential( 132 | ConvolutionalSet(768, 256) 133 | ) 134 | self.detetion_26 = nn.Sequential( 135 | ConvolutionalLayer(256, 512, 3, 1, 1), 136 | nn.Conv2d(512, 45, 1, 1, 0) 137 | ) 138 | 139 | self.up_26_to_52 = nn.Sequential( 140 | ConvolutionalLayer(256, 128, 3, 1, 1), 141 | UpSampleLayer() 142 | ) 143 | 144 | self.convset_52 = nn.Sequential( 145 | ConvolutionalSet(384, 128) 146 | ) 147 | self.detetion_52 = nn.Sequential( 148 | ConvolutionalLayer(128, 256, 3, 1, 1), 149 | nn.Conv2d(256, 45, 1, 1, 0) 150 | ) 151 | 152 | def forward(self, x): 153 | h_52 = self.trunk_52(x) 154 | h_26 = self.trunk_26(h_52) 155 | h_13 = self.trunk_13(h_26) 156 | 157 | convset_out_13 = self.convset_13(h_13) 158 | detetion_out_13 = self.detetion_13(convset_out_13) 159 | 160 | up_out_13_to_26 = self.up_13_to_26(convset_out_13) 161 | cat_out_26 = torch.cat((up_out_13_to_26, h_26), dim=1) 162 | convset_out_26 = self.convset_26(cat_out_26) 163 | detetion_out_26 = self.detetion_26(convset_out_26) 164 | 165 | up_out_26_to_52 = self.up_26_to_52(convset_out_26) 166 | cat_out_52 = torch.cat((up_out_26_to_52, h_52), dim=1) 167 | convset_out_52 = self.convset_52(cat_out_52) 168 | detetion_out_52 = self.detetion_52(convset_out_52) 169 | 170 | return detetion_out_13, detetion_out_26, detetion_out_52 171 | 172 | 173 | if __name__ == '__main__': 174 | yolo = YoloNet_V3() 175 | x = torch.randn(1, 3, 416, 416) 176 | y = yolo(x) 177 | print(y[0].shape) 178 | print(y[1].shape) 179 | print(y[2].shape) 180 | --------------------------------------------------------------------------------