├── .github
└── stale.yml
├── .gitignore
├── BBN
├── bbn_dataset.py
├── bbn_model.py
├── dataset.txt
├── readme.md
└── train.py
├── CAM_pytorch
├── __init__.py
├── checkpoint
│ └── .gitkeep
├── data
│ ├── MyDataSet.py
│ └── __init__.py
├── main.py
├── models
│ ├── VGG_CAM.py
│ └── __init__.py
├── readme.md
└── utils
│ ├── __init__.py
│ ├── config.py
│ └── visualize.py
├── CUDA_Python
├── CUDA-Python证书.pdf
├── readme.md
├── 课程1
│ ├── .ipynb_checkpoints
│ │ └── Introduction to CUDA Python with Numba-checkpoint.ipynb
│ ├── Introduction to CUDA Python with Numba.ipynb
│ ├── images
│ │ ├── DLI Header.png
│ │ ├── numba_flowchart.png
│ │ └── run_the_assessment.png
│ ├── section1.tar.gz
│ └── solutions
│ │ ├── make_pulses_solution.py
│ │ ├── monte_carlo_pi_solution.py
│ │ └── zero_suppress_solution.py
├── 课程2
│ ├── .ipynb_checkpoints
│ │ └── Custom CUDA Kernels in Python with Numba-checkpoint.ipynb
│ ├── Custom CUDA Kernels in Python with Numba.ipynb
│ ├── assessment
│ │ └── histogram.py
│ ├── debug
│ │ ├── ex1.py
│ │ ├── ex1a.py
│ │ ├── ex2.py
│ │ ├── ex3.py
│ │ └── ex3a.py
│ ├── images
│ │ ├── DLI Header.png
│ │ └── run_the_assessment.png
│ ├── img
│ │ ├── numba_flowchart.png
│ │ ├── sensor_humidity.png
│ │ └── sensor_temp.png
│ ├── section2.tar.gz
│ └── solutions
│ │ ├── hypot_stride_solution.py
│ │ ├── monte_carlo_pi_solution.py
│ │ └── square_device_solution.py
├── 课程3
│ ├── .ipynb_checkpoints
│ │ └── Effective Memory Use-checkpoint.ipynb
│ ├── Effective Memory Use.ipynb
│ ├── Multidimensional Grids and Shared Memory for CUDA Python with Numba.ipynb
│ ├── assessment
│ │ └── definition.py
│ ├── images
│ │ ├── DLI Header.png
│ │ ├── mm_image.png
│ │ ├── run_assess_task.png
│ │ └── run_the_assessment.png
│ └── solutions
│ │ ├── add_matrix_solution.py
│ │ ├── add_matrix_stride_solution.py
│ │ ├── col_sums_solution.py
│ │ ├── matrix_add_solution.py
│ │ ├── matrix_multiply_solution.py
│ │ ├── matrix_multiply_stride_solution.py
│ │ ├── monte_carlo_pi_solution.py
│ │ └── tile_transpose_solution.py
└── 课程笔记.pdf
├── DataHub
└── readme.md
├── FPN_pytorch
├── README.md
├── fpn.py
└── retina_fpn.py
├── FasterRcnn_pytorch
├── LICENSE
├── README.MD
├── __pycache__
│ └── trainer.cpython-35.pyc
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── dataset.cpython-35.pyc
│ │ ├── util.cpython-35.pyc
│ │ └── voc_dataset.cpython-35.pyc
│ ├── dataset.py
│ ├── util.py
│ └── voc_dataset.py
├── demo.ipynb
├── misc
│ ├── convert_caffe_pretrain.py
│ ├── demo.jpg
│ └── train_fast.py
├── model
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── faster_rcnn.cpython-35.pyc
│ │ ├── faster_rcnn_vgg16.cpython-35.pyc
│ │ ├── region_proposal_network.cpython-35.pyc
│ │ └── roi_module.cpython-35.pyc
│ ├── faster_rcnn.py
│ ├── faster_rcnn_vgg16.py
│ ├── region_proposal_network.py
│ ├── roi_module.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── bbox_tools.cpython-35.pyc
│ │ ├── creator_tool.cpython-35.pyc
│ │ └── roi_cupy.cpython-35.pyc
│ │ ├── bbox_tools.py
│ │ ├── creator_tool.py
│ │ ├── nms
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── _nms_gpu_post_py.cpython-35.pyc
│ │ │ └── non_maximum_suppression.cpython-35.pyc
│ │ ├── _nms_gpu_post.c
│ │ ├── _nms_gpu_post.pyx
│ │ ├── _nms_gpu_post_py.py
│ │ ├── build.py
│ │ ├── build
│ │ │ ├── lib.linux-x86_64-3.5
│ │ │ │ └── _nms_gpu_post.cpython-35m-x86_64-linux-gnu.so
│ │ │ └── temp.linux-x86_64-3.5
│ │ │ │ └── _nms_gpu_post.o
│ │ └── non_maximum_suppression.py
│ │ └── roi_cupy.py
├── requirements.txt
├── train.py
├── trainer.py
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-35.pyc
│ ├── array_tool.cpython-35.pyc
│ ├── config.cpython-35.pyc
│ ├── eval_tool.cpython-35.pyc
│ └── vis_tool.cpython-35.pyc
│ ├── array_tool.py
│ ├── config.py
│ ├── eval_tool.py
│ └── vis_tool.py
├── GhostNet
├── G-Ghost.png
├── g_ghost_regnet.py
└── readme.md
├── LICENSE
├── RepVGG
├── readme.md
├── repvgg.png
└── repvgg.py
├── SSD_pytorch
├── checkpoint
│ └── .gitkeep
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── voc0712.cpython-35.pyc
│ └── voc0712.py
├── main.py
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── box_utils.cpython-35.pyc
│ │ └── ssd.cpython-35.pyc
│ ├── box_utils.py
│ ├── functions
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── detection.cpython-35.pyc
│ │ │ └── prior_box.cpython-35.pyc
│ │ ├── detection.py
│ │ └── prior_box.py
│ ├── modules
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── init_weights.cpython-35.pyc
│ │ │ ├── l2norm.cpython-35.pyc
│ │ │ └── multibox_loss.cpython-35.pyc
│ │ ├── init_weights.py
│ │ ├── l2norm.py
│ │ └── multibox_loss.py
│ └── ssd.py
├── readme.md
├── temp
│ └── test.png
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-35.pyc
│ ├── augmentations.cpython-35.pyc
│ ├── config.cpython-35.pyc
│ ├── eval_untils.cpython-35.pyc
│ ├── timer.cpython-35.pyc
│ └── visualize.cpython-35.pyc
│ ├── augmentations.py
│ ├── config.py
│ ├── eval_untils.py
│ ├── timer.py
│ └── visualize.py
├── UNet_pytorch
├── dice_loss.py
├── eval.py
├── predict.py
├── readme.md
├── submit.py
├── train.py
├── unet
│ ├── __init__.py
│ ├── unet_model.py
│ └── unet_parts.py
└── utils
│ ├── __init__.py
│ ├── config.py
│ ├── crf.py
│ ├── data_vis.py
│ ├── load.py
│ └── utils.py
├── Yolov1_pytorch
├── checkpoint
│ └── .gitkeep
├── config.py
├── data
│ ├── __init__.py
│ ├── dataset.py
│ ├── voc2007test.txt
│ ├── voc2012train.txt
│ └── xml_2_txt.py
├── main.py
├── main_resnet.py
├── models
│ ├── __init__.py
│ ├── net.py
│ └── resnet.py
├── readme.md
└── utils
│ ├── __init__.py
│ ├── predictUtils.py
│ ├── testImgs
│ └── __init__.py
│ ├── visualize.py
│ └── yoloLoss.py
├── Yolov3_pytorch
├── checkpoints
│ ├── .gitkeep
│ └── download_weights.sh
├── config
│ ├── coco.data
│ └── yolov3.cfg
├── data
│ ├── coco.names
│ ├── get_coco_dataset.sh
│ └── samples
│ │ ├── dog.jpg
│ │ ├── eagle.jpg
│ │ ├── giraffe.jpg
│ │ ├── herd_of_horses.jpg
│ │ ├── img1.jpg
│ │ ├── img2.jpg
│ │ ├── img3.jpg
│ │ ├── img4.jpg
│ │ ├── messi.jpg
│ │ └── person.jpg
├── datasets
│ └── datasets.py
├── main.py
├── models
│ └── models.py
├── readme.md
└── utils
│ ├── __init__.py
│ ├── config.py
│ ├── parse_config.py
│ ├── utils.py
│ └── visualize.py
└── readme.md
/.github/stale.yml:
--------------------------------------------------------------------------------
1 | # Number of days of inactivity before an issue becomes stale 在一个问题变得陈旧之前不活跃的天数
2 | daysUntilStale: 60
3 | # Number of days of inactivity before a stale issue is closed 在一个陈旧的问题被关闭之前,没有活动的天数
4 | daysUntilClose: 7
5 | # Issues with these labels will never be considered stale 这些标签的问题永远不会被认为是过时的
6 | exemptLabels:
7 | - pinned
8 | - security
9 | # Label to use when marking an issue as stale 当标记过期问题时使用的标签
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable 当把一个问题标记为过时时发表评论。设置为' false '禁用
12 | markComment: >
13 | This issue has been automatically marked as stale because it has not had
14 | recent activity. It will be closed if no further activity occurs. Thank you
15 | for your contributions.(由于长期不活动,机器人自动关闭此问题,如果需要欢迎提问)
16 | # Comment to post when closing a stale issue. Set to `false` to disable 在关闭过期问题时发表评论。设置为' false '禁用
17 | closeComment: false
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .DS_Store
3 | .idea
4 |
--------------------------------------------------------------------------------
/BBN/bbn_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import torch.utils.data as data
4 | import torch
5 | import random
6 | import glob
7 | from tqdm import tqdm
8 | from timm.data.transforms_factory import create_transform as timm_transform
9 | from PIL import Image
10 | import torch
11 | import cv2
12 | import os
13 | import numpy as np
14 | import torchvision
15 | from torchvision.transforms import transforms
16 |
17 | def Process(img_path, img_size, use_augment):
18 | """
19 | timm默认预处理
20 | """
21 | # 读取图像
22 | assert os.path.exists(img_path), f"{img_path} 图像不存在"
23 | img = cv2.imread(img_path, cv2.IMREAD_COLOR) # BGR
24 | img = Image.fromarray(img)
25 | if use_augment:
26 | # 增广:Random(缩放、裁剪、翻转、色彩...)
27 | img_trans = timm_transform(
28 | img_size,
29 | is_training=True,
30 | re_prob=0.5,
31 | re_mode="pixel", # 随机擦除
32 | auto_augment=None, # 自动增广 eg:rand-m9-mstd0.5
33 | )
34 | else:
35 | # 不增广:ReSize256 -> CenterCrop224
36 | img_trans = timm_transform(img_size)
37 | return img_trans(img)
38 |
39 |
40 | class BBN_Dataset(data.Dataset):
41 | """数据加载器"""
42 |
43 | def __init__(self, txt_path, mode, size):
44 | """
45 |
46 | Args:
47 | txt_path (str): 数据集路径
48 | mode (str): 类型
49 | size (list): 图像尺寸 eg: [224,224]
50 | """
51 | assert mode in ["train", "val", "test"]
52 | self.use_augment = True if mode == "train" else False # 训练集开启增广
53 | self.size = size
54 |
55 | self.dataset = self.load_txt(txt_path)
56 | self.imgs_list = self.dataset[mode]
57 | self.all_labels = self.dataset["all_labels"]
58 |
59 | # 训练集开启BBN
60 | if mode == "train":
61 |
62 | labels_list = [label for _, label in self.imgs_list] # 所有图片对应的类别列表
63 | class_index_dict = dict() # key类别名对应的索引 values该类的所有图片索引
64 | class_nums_list = [0] * len(self.all_labels) # 每个类对应的图片数
65 | for index, label in enumerate(labels_list):
66 | if not int(label) in class_index_dict:
67 | class_index_dict[int(label)] = []
68 | class_index_dict[int(label)].append(index)
69 |
70 | class_nums_list[int(label)] += 1
71 |
72 | # 构建逆向采样分布
73 | max_num = max(class_nums_list) # 类内最大样本数
74 | class_weight = [max_num / i for i in class_nums_list] # 概率占比的倒数 列表
75 | sum_weight = sum(class_weight) # 逆向的概率占比之和
76 | self.class_weight, self.sum_weight = class_weight, sum_weight
77 | self.class_index_dict = class_index_dict
78 |
79 | def __getitem__(self, index):
80 | img_path, label = self.imgs_list[index]
81 | # 图像预处理
82 | img = Process(img_path, self.size, self.use_augment)
83 | # 训练集 BBN采样
84 | if self.use_augment:
85 | sample_class = self.sample_class_index_by_weight() # 类别索引
86 | sample_indexes = self.class_index_dict[sample_class] # 获得该类别的所有图片索引(对应图片顺序)
87 | sample_index = random.choice(sample_indexes) # 随机抽取一个样本
88 | img2_path, label2 = self.imgs_list[sample_index]
89 | img2 = Process(img2_path, self.size, self.use_augment)
90 |
91 | return img, label, img_path, img2, label2, img2_path
92 | # 验证集/测试集
93 | else:
94 | return img,label,img_path
95 |
96 | def __len__(self):
97 | return len(self.imgs_list)
98 |
99 | def load_txt(self, txt_path):
100 | """单标签分类 加载数据集
101 |
102 | Args:
103 | txt_path (str): 数据集路径
104 |
105 | 训练格式形如 类型, 类别名, 图片路径
106 | train, dog, img1.jpg
107 | val, dog, img2.jpg
108 | test, cat, img3.jpg
109 |
110 | 返回:
111 | {
112 | "train": [
113 | img_1, 0,
114 | img_2, 1,
115 | ...
116 | ],
117 | "val": 类似,
118 | "test": 类似,
119 | "all_labels": ["dog", "cat",...],
120 | }
121 |
122 | """
123 | # 读取
124 | f = open(txt_path)
125 | txt_list = f.readlines()
126 | txt_list =[ txt.split(",") for txt in txt_list]
127 | f.close()
128 |
129 |
130 | # 获取所有类别
131 | all_labels = [txt_i[1] for txt_i in txt_list]
132 | all_labels = list(set(all_labels))
133 | all_labels.sort()
134 |
135 | # 构建数据集
136 | dataset = {
137 | "train": [],
138 | "val": [],
139 | "test": [],
140 | "all_labels": all_labels,
141 | }
142 | for mode, label, img_path in txt_list:
143 | assert mode in ["train", "val", "test"]
144 | dataset[mode].append([img_path, all_labels.index(label)])
145 | return dataset
146 |
147 | def sample_class_index_by_weight(self):
148 | """
149 | 逆向采样
150 | """
151 | # rand_number 0~逆向比例之和
152 | rand_number, now_sum = random.random() * self.sum_weight, 0
153 | # self.cls_num 类别总数
154 | # 遍历每个类别 即判断随机数处于哪个类别范围内,即返回该类别索引
155 | for i in range(len(self.class_weight)):
156 | now_sum += self.class_weight[i]
157 | if rand_number <= now_sum:
158 | return i # 采样的类别索引
--------------------------------------------------------------------------------
/BBN/dataset.txt:
--------------------------------------------------------------------------------
1 | train,dog,CatDog/dog/dog_67.jpg
2 | train,dog,CatDog/dog/dog_2.jpg
3 | train,dog,CatDog/dog/dog_52.jpg
4 | train,dog,CatDog/dog/dog_82.jpg
5 | train,dog,CatDog/dog/dog_99.jpg
6 | train,dog,CatDog/dog/dog_85.jpg
7 | train,dog,CatDog/dog/dog_55.jpg
8 | train,dog,CatDog/dog/dog_41.jpg
9 | train,dog,CatDog/dog/dog_8.jpg
10 | train,dog,CatDog/dog/dog_56.jpg
11 | train,dog,CatDog/dog/dog_25.jpg
12 | train,dog,CatDog/dog/dog_92.jpg
13 | train,dog,CatDog/dog/dog_33.jpg
14 | train,dog,CatDog/dog/dog_62.jpg
15 | train,dog,CatDog/dog/dog_51.jpg
16 | train,dog,CatDog/dog/dog_13.jpg
17 | train,dog,CatDog/dog/dog_74.jpg
18 | train,dog,CatDog/dog/dog_24.jpg
19 | train,dog,CatDog/dog/dog_93.jpg
20 | train,dog,CatDog/dog/dog_12.jpg
21 | train,dog,CatDog/dog/dog_5.jpg
22 | train,dog,CatDog/dog/dog_22.jpg
23 | train,dog,CatDog/dog/dog_30.jpg
24 | train,dog,CatDog/dog/dog_28.jpg
25 | train,dog,CatDog/dog/dog_79.jpg
26 | train,dog,CatDog/dog/dog_35.jpg
27 | train,dog,CatDog/dog/dog_23.jpg
28 | train,dog,CatDog/dog/dog_94.jpg
29 | train,dog,CatDog/dog/dog_54.jpg
30 | train,dog,CatDog/dog/dog_40.jpg
31 | train,dog,CatDog/dog/dog_53.jpg
32 | train,dog,CatDog/dog/dog_88.jpg
33 | train,dog,CatDog/dog/dog_59.jpg
34 | train,dog,CatDog/dog/dog_42.jpg
35 | train,dog,CatDog/dog/dog_21.jpg
36 | train,dog,CatDog/dog/dog_73.jpg
37 | train,dog,CatDog/dog/dog_18.jpg
38 | train,dog,CatDog/dog/dog_43.jpg
39 | train,dog,CatDog/dog/dog_46.jpg
40 | train,dog,CatDog/dog/dog_57.jpg
41 | train,dog,CatDog/dog/dog_96.jpg
42 | train,dog,CatDog/dog/dog_77.jpg
43 | train,dog,CatDog/dog/dog_4.jpg
44 | train,dog,CatDog/dog/dog_20.jpg
45 | train,dog,CatDog/dog/dog_10.jpg
46 | train,dog,CatDog/dog/dog_69.jpg
47 | train,dog,CatDog/dog/dog_100.jpg
48 | train,dog,CatDog/dog/dog_66.jpg
49 | train,dog,CatDog/dog/dog_95.jpg
50 | train,dog,CatDog/dog/dog_84.jpg
51 | train,dog,CatDog/dog/dog_64.jpg
52 | train,dog,CatDog/dog/dog_31.jpg
53 | train,dog,CatDog/dog/dog_16.jpg
54 | train,dog,CatDog/dog/dog_89.jpg
55 | train,dog,CatDog/dog/dog_76.jpg
56 | train,dog,CatDog/dog/dog_19.jpg
57 | train,dog,CatDog/dog/dog_70.jpg
58 | train,dog,CatDog/dog/dog_91.jpg
59 | train,dog,CatDog/dog/dog_44.jpg
60 | train,dog,CatDog/dog/dog_86.jpg
61 | train,dog,CatDog/dog/dog_78.jpg
62 | train,dog,CatDog/dog/dog_61.jpg
63 | train,dog,CatDog/dog/dog_45.jpg
64 | train,dog,CatDog/dog/dog_37.jpg
65 | train,dog,CatDog/dog/dog_11.jpg
66 | train,dog,CatDog/dog/dog_60.jpg
67 | train,dog,CatDog/dog/dog_6.jpg
68 | train,dog,CatDog/dog/dog_27.jpg
69 | train,dog,CatDog/dog/dog_65.jpg
70 | train,dog,CatDog/dog/dog_29.jpg
71 | train,cat,CatDog/cat/cat_56.jpg
72 | train,cat,CatDog/cat/cat_35.jpg
73 | train,cat,CatDog/cat/cat_90.jpg
74 | train,cat,CatDog/cat/cat_32.jpg
75 | train,cat,CatDog/cat/cat_7.jpg
76 | train,cat,CatDog/cat/cat_37.jpg
77 | train,cat,CatDog/cat/cat_100.jpg
78 | train,cat,CatDog/cat/cat_25.jpg
79 | train,cat,CatDog/cat/cat_28.jpg
80 | train,cat,CatDog/cat/cat_77.jpg
81 | train,cat,CatDog/cat/cat_23.jpg
82 | train,cat,CatDog/cat/cat_21.jpg
83 | train,cat,CatDog/cat/cat_11.jpg
84 | train,cat,CatDog/cat/cat_47.jpg
85 | train,cat,CatDog/cat/cat_27.jpg
86 | train,cat,CatDog/cat/cat_41.jpg
87 | train,cat,CatDog/cat/cat_97.jpg
88 | train,cat,CatDog/cat/cat_39.jpg
89 | train,cat,CatDog/cat/cat_98.jpg
90 | train,cat,CatDog/cat/cat_38.jpg
91 | val,dog,CatDog/dog/dog_39.jpg
92 | val,dog,CatDog/dog/dog_81.jpg
93 | val,dog,CatDog/dog/dog_1.jpg
94 | val,dog,CatDog/dog/dog_71.jpg
95 | val,dog,CatDog/dog/dog_98.jpg
96 | val,dog,CatDog/dog/dog_80.jpg
97 | val,dog,CatDog/dog/dog_49.jpg
98 | val,dog,CatDog/dog/dog_26.jpg
99 | val,dog,CatDog/dog/dog_38.jpg
100 | val,dog,CatDog/dog/dog_15.jpg
101 | val,cat,CatDog/cat/cat_63.jpg
102 | val,cat,CatDog/cat/cat_14.jpg
103 | val,cat,CatDog/cat/cat_43.jpg
104 | val,cat,CatDog/cat/cat_64.jpg
105 | val,cat,CatDog/cat/cat_84.jpg
106 | val,cat,CatDog/cat/cat_52.jpg
107 | val,cat,CatDog/cat/cat_57.jpg
108 | val,cat,CatDog/cat/cat_46.jpg
109 | val,cat,CatDog/cat/cat_60.jpg
110 | val,cat,CatDog/cat/cat_44.jpg
111 | test,dog,CatDog/dog/dog_32.jpg
112 | test,dog,CatDog/dog/dog_75.jpg
113 | test,dog,CatDog/dog/dog_58.jpg
114 | test,dog,CatDog/dog/dog_3.jpg
115 | test,dog,CatDog/dog/dog_7.jpg
116 | test,dog,CatDog/dog/dog_34.jpg
117 | test,dog,CatDog/dog/dog_48.jpg
118 | test,dog,CatDog/dog/dog_83.jpg
119 | test,dog,CatDog/dog/dog_36.jpg
120 | test,dog,CatDog/dog/dog_9.jpg
121 | test,dog,CatDog/dog/dog_63.jpg
122 | test,dog,CatDog/dog/dog_72.jpg
123 | test,dog,CatDog/dog/dog_50.jpg
124 | test,dog,CatDog/dog/dog_97.jpg
125 | test,dog,CatDog/dog/dog_47.jpg
126 | test,dog,CatDog/dog/dog_17.jpg
127 | test,dog,CatDog/dog/dog_68.jpg
128 | test,dog,CatDog/dog/dog_14.jpg
129 | test,dog,CatDog/dog/dog_90.jpg
130 | test,dog,CatDog/dog/dog_87.jpg
131 | test,cat,CatDog/cat/cat_58.jpg
132 | test,cat,CatDog/cat/cat_17.jpg
133 | test,cat,CatDog/cat/cat_96.jpg
134 | test,cat,CatDog/cat/cat_40.jpg
135 | test,cat,CatDog/cat/cat_87.jpg
136 | test,cat,CatDog/cat/cat_69.jpg
137 | test,cat,CatDog/cat/cat_67.jpg
138 | test,cat,CatDog/cat/cat_3.jpg
139 | test,cat,CatDog/cat/cat_18.jpg
140 | test,cat,CatDog/cat/cat_2.jpg
141 | test,cat,CatDog/cat/cat_13.jpg
142 | test,cat,CatDog/cat/cat_15.jpg
143 | test,cat,CatDog/cat/cat_80.jpg
144 | test,cat,CatDog/cat/cat_95.jpg
145 | test,cat,CatDog/cat/cat_5.jpg
146 | test,cat,CatDog/cat/cat_73.jpg
147 | test,cat,CatDog/cat/cat_6.jpg
148 | test,cat,CatDog/cat/cat_10.jpg
149 | test,cat,CatDog/cat/cat_36.jpg
150 | test,cat,CatDog/cat/cat_65.jpg
151 |
--------------------------------------------------------------------------------
/BBN/readme.md:
--------------------------------------------------------------------------------
1 | # BBN: Bilateral-Branch Network with Cumulative Learning for Long-Tailed Visual Recognition
2 |
3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
4 |
5 |
6 | - [官方库](https://github.com/Megvii-Nanjing/BBN) [官方知乎解读](https://zhuanlan.zhihu.com/p/123876769)
7 | - 目的:图像分类任务中,长尾数据分布存在极端的类别不平衡问题
8 |
9 | ## TODO
10 | - [x] BBN 数据加载、模型定义
11 |
12 | > 基于官方库,简化代码。便于迁移到任意训练框架。
13 |
14 | - [ ] 训练示例和采样可视化
15 |
16 |
--------------------------------------------------------------------------------
/BBN/train.py:
--------------------------------------------------------------------------------
1 | from .bbn_dataset import BBN_Dataset
2 | from .bbn_model import BBN_ResNet50
3 | from torch.utils.data import DataLoader
4 | from tqdm import tqdm
5 | import torch.nn as nn
6 | from pycm import ConfusionMatrix
7 | import torch
8 | # 初始化模型
9 | model=BBN_ResNet50()
10 |
11 |
12 | # 构建数据集
13 | batch=64
14 | txt_path="./dataset.txt"
15 | train_set = BBN_Dataset(txt_path=txt_path,mode="train",size=[224,224])
16 | val_set = BBN_Dataset(txt_path=txt_path,mode="val",size=[224,224])
17 |
18 | # 构建数据集加载器
19 | train_dataloader = DataLoader(
20 | dataset=train_set,
21 | batch_size=batch,
22 | num_workers=4,
23 | shuffle=True,
24 | drop_last=True,
25 | )
26 | val_dataloader = DataLoader(
27 | dataset=val_set,
28 | batch_size=batch,
29 | num_workers=4,
30 | )
31 |
32 | # 开始训练
33 | optimizer=None
34 | lr_scheduler=None
35 | criterion=nn.CrossEntropyLoss()
36 | Epochs=100
37 |
38 | for epoch in range(Epochs):
39 | optimizer.zero_grad()
40 |
41 | for batch_idx, (
42 | imgs,
43 | labels,
44 | imgs_path,
45 | imgs2,
46 | labels2,
47 | imgs_path2,
48 | ) in enumerate(tqdm(train_dataloader)):
49 | model.train()
50 | # 正常采样分布
51 | imgs, labels = imgs.cuda(), labels.cuda()
52 | # 逆向采样分布
53 | imgs2, labels2 = imgs2.cuda(), labels2.cuda()
54 |
55 | l = 1 - ((epoch - 1) / Epochs) ** 2 # parabolic decay抛物线
56 | params = {"imgs1": imgs, "imgs2": imgs2, "l": l}
57 | output = model(params)
58 | loss = l * criterion(output, labels) + (1 - l) * criterion(
59 | output, labels2
60 | )
61 | loss.backward()
62 | optimizer.step()
63 | optimizer.zero_grad()
64 | model.eval()
65 | lr_scheduler.step()
66 | # 评估模型
67 | preds_list, labels_list = [], []
68 | for batch_idx, (imgs, labels, imgs_path) in enumerate(tqdm(val_dataloader)):
69 | imgs, labels = imgs.cuda(), labels.cuda()
70 | scores = model(imgs)
71 | scores = torch.nn.functional.softmax(scores, dim=1)
72 | preds = torch.argmax(scores, dim=1)
73 |
74 | preds_list.append(preds)
75 | labels_list.append(labels)
76 | preds_list = torch.cat(preds_list, dim=0).cpu().numpy()
77 | labels_list = torch.cat(labels_list, dim=0).cpu().numpy()
78 | acc=ConfusionMatrix(labels_list, preds_list).Overall_ACC
79 | print("val acc:",acc)
--------------------------------------------------------------------------------
/CAM_pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/__init__.py
--------------------------------------------------------------------------------
/CAM_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/checkpoint/.gitkeep
--------------------------------------------------------------------------------
/CAM_pytorch/data/MyDataSet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding:utf-8 -*-
3 | # power by Mr.Li
4 | import os
5 | from torch.utils import data
6 | from torchvision import transforms as T
7 | import cv2
8 | import random
9 | from utils.config import opt
10 | class MyDataSet(data.Dataset):
11 | '''
12 | 主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据
13 | '''
14 | def __init__(self, root, transforms=None, train=True, test=False):
15 | self.test = test #状态
16 | self.train = train
17 | self.root = root #数据集路径
18 |
19 | # 读取文件夹下所有图像
20 | if root!='':
21 | pos_root=os.path.join(root, 'pos')
22 | neg_root = os.path.join(root, 'neg')
23 |
24 | pos_imgs = [os.path.join(pos_root, img) for img in os.listdir(pos_root)]
25 | neg_imgs = [os.path.join(neg_root, img) for img in os.listdir(neg_root)]
26 |
27 | imgs = pos_imgs + neg_imgs
28 | # 打乱数据集
29 | random.shuffle(imgs)
30 | else:
31 | print('数据集为空???')
32 | imgs = []
33 |
34 | imgs_num = len (imgs)
35 | # 划分数据集
36 | if train:
37 | self.imgs = imgs[:int(0.8 * imgs_num)]
38 | else:
39 | self.imgs = imgs[int(0.8 * imgs_num):]
40 |
41 |
42 |
43 | # 对图像进行转化(若未指定转化,则执行默认操作)
44 | if transforms is None:
45 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
46 |
47 | if self.test or not train: # 测试集和验证集
48 | self.transforms = T.Compose([
49 | T.ToTensor(),
50 | normalize
51 | ])
52 | else: # 训练集
53 | self.transforms = T.Compose([
54 | T.ToTensor(),
55 | normalize
56 | ])
57 |
58 | def __getitem__(self, index):
59 | '''
60 | 一次返回一张图片的数据
61 | '''
62 | # 图片的完整路径
63 | img_path = self.imgs[index]
64 | # 读取图像
65 | img = cv2.imread(img_path)
66 | img = self.BGR2RGB(img) # 因为pytorch自身提供的预训练好的模型期望的输入是RGB
67 | img = cv2.resize(img, (64, 128))
68 | # 对图片进行转化
69 | img = self.transforms(img)
70 | # 标签真值
71 | if 'neg' in img_path:
72 | label=0 # 没有人
73 | else:
74 | label=1 # 有人
75 |
76 | return img,label
77 |
78 | def __len__(self):
79 | return len(self.imgs)
80 |
81 | def BGR2RGB(self, img):
82 | return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
83 |
84 | def get_test_img(self):
85 | # 读取图像
86 | img_origin = cv2.imread(opt.test_img)
87 | img = self.BGR2RGB(img_origin) # 因为pytorch自身提供的预训练好的模型期望的输入是RGB
88 | img = cv2.resize(img, (64, 128))
89 | # 对图片进行转化
90 | img = self.transforms(img)
91 | return img_origin,img
92 |
93 |
--------------------------------------------------------------------------------
/CAM_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/data/__init__.py
--------------------------------------------------------------------------------
/CAM_pytorch/models/VGG_CAM.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | from torch import nn
4 | import torch as t
5 | from torchvision.models import vgg16
6 | from utils.config import opt
7 | class VGG16_CAM(nn.Module):
8 | '''
9 | 定义网络
10 | '''
11 | def __init__(self):
12 | super(VGG16_CAM, self).__init__()
13 | # 设置网络名称
14 | self.moduel_name = str("VGG16_CAM")
15 | # 去掉 VGG16 feature层的maxpool层
16 | self.feature_layer = nn.Sequential(*list(vgg16(pretrained=True).features.children())[0:-1])
17 | # 全局平均池化层 GAP
18 | self.fc_layer = nn.Linear(512,2)
19 |
20 | def forward(self, x):
21 | x = self.feature_layer(x)
22 | # GAP 全局平均池化
23 | x = t.mean(x,dim=3)
24 | x = t.mean(x,dim=2)
25 |
26 | # 全连接层+softmax层
27 | x = self.fc_layer(x)
28 | # x = F.softmax(x) #交叉熵自带softmax
29 | return x
30 |
31 |
32 | # def test():
33 | # from torch.autograd import Variable
34 | # model=VGG16_CAM()
35 | # print(model)
36 | # img=t.rand(2,3,224,224)
37 | # img=Variable(img)
38 | # output=model(img)
39 | # print(output.size())
40 | #
41 | # if __name__ == '__main__':
42 | # test()
--------------------------------------------------------------------------------
/CAM_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .VGG_CAM import VGG16_CAM
--------------------------------------------------------------------------------
/CAM_pytorch/readme.md:
--------------------------------------------------------------------------------
1 | # class activation mapping
2 |
3 |
4 | - 环境:
5 |
6 | | python版本 | pytorch版本 |
7 | | ----------- | ---------- |
8 | | 3.5 | 0.3.0 |
9 |
10 |
11 | - 作用:分类、定位(不使用真值框进行定位,论文证明 卷积层本身就有定位功能)
12 |
13 | ----------
14 |
15 | ## 数据集
16 |
17 | - [INRIA Person数据集(官方)](http://pascal.inrialpes.fr/data/human/)
18 | - [INRIA Person数据集(百度云)](https://pan.baidu.com/s/1adTzYgX13K4CIjZNODRXqQ)
19 |
20 |
21 | ## 预训练模型
22 |
23 | - [VGG16_CAM_39_99.455.pth](https://pan.baidu.com/s/1OVnxBBhmtVgTEUz0nNmrFg)
24 |
25 |
26 | ## 训练
27 |
28 | 1、在config.py中配置数据集等训练参数
29 |
30 | 2、执行main.py开始训练
31 |
32 | ## 可视化
33 |
34 | 1、在config.py中配置预训练模型
35 |
36 | 2、执行main.py可视化class_activation_map
37 |
38 |
39 |
40 |
41 | ## 训练过程
42 |
43 |


44 |
45 |
46 | ----------
47 |
48 | ## 效果
49 |
50 | - 网络分类时重点关注的区域(即网络的分类依据)
51 |
52 |
53 |


54 |
55 |
56 |
57 |


58 |
59 |
60 |
61 |


62 |
63 |
64 | ----------
65 |
66 | ## 参考
67 |
68 | - [Keras implementation of CAM](https://github.com/jacobgil/keras-cam)
69 | - [可视化CNN](https://github.com/huanghao-code/VisCNN_CVPR_2016_Loc)
70 | - [论文CVPR 2016](https://arxiv.org/pdf/1512.04150.pdf)
--------------------------------------------------------------------------------
/CAM_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/utils/__init__.py
--------------------------------------------------------------------------------
/CAM_pytorch/utils/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | # 设置默认参数
4 | import datetime
5 | import os
6 | class DefaultConfig():
7 | # 使用的模型,名字必须与models/__init__.py中的名字一致
8 | # 目前支持的网络
9 | model = 'VGG16_CAM'
10 |
11 | # 数据集地址
12 | dataset_root = '/home/bobo/data/cam_dataset/INRIAPerson/Train'
13 |
14 | # 保存模型
15 | root = os.path.abspath(os.path.join(os.path.dirname(__file__))) + '/'
16 | checkpoint_root = root + '../checkpoint/' # 存储模型的路径
17 | # load_model_path = None # 加载预训练的模型的路径,为None代表不加载(用于训练)
18 | load_model_path = checkpoint_root+'VGG16_CAM_39_99.455.pth'
19 |
20 | use_gpu = True # user GPU or not
21 | batch_size = 32
22 | num_workers = 4 # 加载数据时的线程数
23 |
24 | max_epoch = 40
25 |
26 |
27 | lr = 0.01
28 | lr_decay = 0.5
29 |
30 | test_img='/home/bobo/windowsPycharmProject/cam_pytorch/person_and_bike_191.png' #一张测试图片地址
31 |
32 |
33 |
34 | #初始化该类的一个对象
35 | opt=DefaultConfig()
--------------------------------------------------------------------------------
/CAM_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding:utf-8 -*-
3 | # power by Mr.Li
4 | import visdom
5 | import time
6 | import numpy as np
7 | class Visualizer(object):
8 | '''
9 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function`
10 | 调用原生的visdom接口
11 | '''
12 | def __init__(self, env='default', **kwargs):
13 | self.vis = visdom.Visdom(env=env, **kwargs)
14 | # 画的第几个数,相当于横座标
15 | # 保存(’loss',23) 即loss的第23个点
16 | self.index = {}
17 | self.log_text = ''
18 | def reinit(self,env='default',**kwargs):
19 | '''
20 | 修改visdom的配置 重新初始化
21 | '''
22 | self.vis = visdom.Visdom(env=env,**kwargs)
23 | return self
24 | def plot_many(self, d):
25 | '''
26 | 一次plot多个损失图形
27 | @params d: dict (name,value) i.e. ('loss',0.11)
28 | '''
29 | for k, v in d.items():
30 | self.plot(k, v)
31 | def img_many(self, d):
32 | '''
33 | 一次画多个图像
34 | '''
35 | for k, v in d.items():
36 | self.img(k, v)
37 | def plot(self, name, y,**kwargs):
38 | '''
39 | self.plot('loss',1.00)
40 | '''
41 | #得到下标序号
42 | x = self.index.get(name, 0)
43 | self.vis.line(Y=np.array([y]), X=np.array([x]),
44 | win=name,#窗口名
45 | opts=dict(title=name),
46 | update=None if x == 0 else 'append', #按照append的画图形
47 | **kwargs
48 | )
49 | #下标累加1
50 | self.index[name] = x + 1
51 | def img(self, name, img_,**kwargs):
52 | '''
53 | self.img('input_img',t.Tensor(64,64))
54 | self.img('input_imgs',t.Tensor(3,64,64))
55 | self.img('input_imgs',t.Tensor(100,1,64,64))
56 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
57 |
58 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!!
59 | '''
60 | self.vis.images(img_.cpu().numpy(),
61 | win=name,
62 | opts=dict(title=name),
63 | **kwargs
64 | )
65 | def log(self,info,win='log_text'):
66 | '''
67 | self.log({'loss':1,'lr':0.0001})
68 | 打印日志
69 | '''
70 |
71 | self.log_text += ('[{time}] {info}
'.format(
72 | time=time.strftime('%m%d_%H%M%S'),\
73 | info=info))
74 | self.vis.text(self.log_text,win)
75 | def __getattr__(self, name):
76 | return getattr(self.vis, name)
--------------------------------------------------------------------------------
/CUDA_Python/CUDA-Python证书.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/CUDA-Python证书.pdf
--------------------------------------------------------------------------------
/CUDA_Python/readme.md:
--------------------------------------------------------------------------------
1 | # 加速计算基础——CUDA Python 通关版
2 |
3 | >- [Nvidia课程官网](https://courses.nvidia.com/courses/course-v1:DLI+C-AC-02+V1/)
4 | >
5 | >- [本人课程证书](https://courses.nvidia.com/certificates/59466c0d52ae45a394d3b40902aad864/)
6 |
7 | ### 课程1 使用 Numba 的 CUDA Python 简介
8 |
9 | - 基于 Numba 的 CUDA Python 编程简介
10 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数
11 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存
12 |
13 | ### 课程2 使用 Numba 的 CUDA Python 的自定义核函数和内存管理
14 |
15 | - 基于 Numba 的 CUDA Python 编程简介
16 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数
17 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存
18 |
19 | ### 课程3 有效使用内存子系统
20 |
21 | * 编写受益于合并内存访问模式的 CUDA 核函数。
22 | * 使用多维网格和线程块。
23 | * 使用共享内存来协调块内的线程。
24 | * 使用共享内存来促进合并内存访问模式。
25 | * 解决共享内存区的冲突。
26 |
27 |
--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/DLI Header.png
--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/numba_flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/numba_flowchart.png
--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/run_the_assessment.png
--------------------------------------------------------------------------------
/CUDA_Python/课程1/section1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/section1.tar.gz
--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/make_pulses_solution.py:
--------------------------------------------------------------------------------
1 | n = 100000
2 | noise = (np.random.normal(size=n) * 3).astype(np.float32)
3 | t = np.arange(n, dtype=np.float32)
4 | period = n / 23
5 |
6 | d_noise = cuda.to_device(noise)
7 | d_t = cuda.to_device(t)
8 | d_pulses = cuda.device_array(shape=(n,), dtype=np.float32)
9 |
10 | make_pulses(d_t, period, 100.0, out=d_pulses)
11 | waveform = add_ufunc(d_pulses, d_noise)
--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
1 | from numba import jit # `jit` is the Numba just-in-time-compiler function
2 | import random
3 |
4 | @jit # Use the decorator syntax to mark `monte_carlo_pi` for Numba compilation
5 | def monte_carlo_pi(nsamples):
6 | acc = 0
7 | for i in range(nsamples):
8 | x = random.random()
9 | y = random.random()
10 | if (x**2 + y**2) < 1.0:
11 | acc += 1
12 | return 4.0 * acc / nsamples
--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/zero_suppress_solution.py:
--------------------------------------------------------------------------------
1 | @vectorize(['int16(int16, int16)'], target='cuda')
2 | def zero_suppress(waveform_value, threshold):
3 | if waveform_value < threshold:
4 | result = 0
5 | else:
6 | result = waveform_value
7 | return result
--------------------------------------------------------------------------------
/CUDA_Python/课程2/assessment/histogram.py:
--------------------------------------------------------------------------------
1 | # Add your solution here
2 | @cuda.jit
3 | def cuda_histogram(x, xmin, xmax, histogram_out):
4 | '''Increment bin counts in histogram_out, given histogram range [xmin, xmax).'''
5 | nbins = histogram_out.shape[0] # 分为N组
6 | bin_width = (xmax - xmin) / nbins # 每组宽度
7 |
8 |
9 | start = cuda.grid(1)
10 |
11 | stride=cuda.gridsize(1) # 1指 所有进程按一维下标索引
12 | for i in range(start,x.shape[0],stride):
13 | bin_number=(x[i] - xmin)/bin_width # 所有进程的一次并行计算
14 | if bin_number >= 0 and bin_number < histogram_out.shape[0]:
15 | cuda.atomic.add(histogram_out, bin_number, 1)# 原子操作 全局加1
--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numba import cuda
4 |
5 | @cuda.jit
6 | def histogram(x, xmin, xmax, histogram_out):
7 | nbins = histogram_out.shape[0]
8 | bin_width = (xmax - xmin) / nbins
9 |
10 | start = cuda.grid(1)
11 | stride = cuda.gridsize(1)
12 |
13 | for i in range(start, x.shape[0], stride):
14 | bin_number = np.int32((x[i] - xmin)/bin_width)
15 | if bin_number >= 0 and bin_number < histogram_out.shape[0]:
16 | histogram_out[bin_number] += 1
17 |
18 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
19 | xmin = np.float32(-4.0)
20 | xmax = np.float32(4.0)
21 | histogram_out = np.zeros(shape=10, dtype=np.int32)
22 |
23 | histogram[64, 64](x, xmin, xmax, histogram_out)
24 |
25 | print('input count:', x.shape[0])
26 | print('histogram:', histogram_out)
27 | print('count:', histogram_out.sum())
28 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex1a.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numba import cuda
4 |
5 | @cuda.jit
6 | def histogram(x, xmin, xmax, histogram_out):
7 | nbins = histogram_out.shape[0]
8 | bin_width = (xmax - xmin) / nbins
9 |
10 | start = cuda.grid(1)
11 | stride = cuda.gridsize(1)
12 |
13 | for i in range(start, x.shape[0], stride):
14 | bin_number = np.int32((x[i] - xmin)/bin_width)
15 | if bin_number >= 0 and bin_number < histogram_out.shape[0]:
16 | histogram_out[bin_number] += 1
17 | print('in range', x[i], bin_number)
18 | else:
19 | print('out of range', x[i], bin_number)
20 |
21 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
22 | xmin = np.float32(-4.0)
23 | xmax = np.float32(4.0)
24 | histogram_out = np.zeros(shape=10, dtype=np.int32)
25 |
26 | histogram[64, 64](x, xmin, xmax, histogram_out)
27 |
28 | print('input count:', x.shape[0])
29 | print('histogram:', histogram_out)
30 | print('count:', histogram_out.sum())
31 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numba import cuda
4 |
5 | @cuda.jit
6 | def histogram(x, xmin, xmax, histogram_out):
7 | nbins = histogram_out.shape[0]
8 | bin_width = (xmax - xmin) / nbins
9 |
10 | start = cuda.grid(1)
11 | stride = cuda.gridsize(1)
12 |
13 | ### DEBUG FIRST THREAD
14 | if start == 0:
15 | from pdb import set_trace; set_trace()
16 | ###
17 |
18 | for i in range(start, x.shape[0], stride):
19 | bin_number = np.int32((x[i] + xmin)/bin_width)
20 |
21 | if bin_number >= 0 and bin_number < histogram_out.shape[0]:
22 | cuda.atomic.add(histogram_out, bin_number, 1)
23 |
24 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
25 | xmin = np.float32(-4.0)
26 | xmax = np.float32(4.0)
27 | histogram_out = np.zeros(shape=10, dtype=np.int32)
28 |
29 | histogram[64, 64](x, xmin, xmax, histogram_out)
30 |
31 | print('input count:', x.shape[0])
32 | print('histogram:', histogram_out)
33 | print('count:', histogram_out.sum())
34 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numba import cuda
4 |
5 | @cuda.jit
6 | def histogram(x, xmin, xmax, histogram_out):
7 | nbins = histogram_out.shape[0]
8 | bin_width = (xmax - xmin) / nbins
9 |
10 | start = cuda.grid(1)
11 | stride = cuda.gridsize(1)
12 |
13 | for i in range(start, x.shape[0], stride):
14 | bin_number = np.int32((x[i] + xmin)/bin_width)
15 |
16 | if bin_number >= 0 or bin_number < histogram_out.shape[0]:
17 | cuda.atomic.add(histogram_out, bin_number, 1)
18 |
19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
20 | xmin = np.float32(-4.0)
21 | xmax = np.float32(4.0)
22 | histogram_out = np.zeros(shape=10, dtype=np.int32)
23 |
24 | histogram[64, 64](x, xmin, xmax, histogram_out)
25 |
26 | print('input count:', x.shape[0])
27 | print('histogram:', histogram_out)
28 | print('count:', histogram_out.sum())
29 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex3a.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from numba import cuda
4 |
5 | @cuda.jit(debug=True)
6 | def histogram(x, xmin, xmax, histogram_out):
7 | nbins = histogram_out.shape[0]
8 | bin_width = (xmax - xmin) / nbins
9 |
10 | start = cuda.grid(1)
11 | stride = cuda.gridsize(1)
12 |
13 | for i in range(start, x.shape[0], stride):
14 | bin_number = np.int32((x[i] + xmin)/bin_width)
15 |
16 | if bin_number >= 0 or bin_number < histogram_out.shape[0]:
17 | cuda.atomic.add(histogram_out, bin_number, 1)
18 |
19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
20 | xmin = np.float32(-4.0)
21 | xmax = np.float32(4.0)
22 | histogram_out = np.zeros(shape=10, dtype=np.int32)
23 |
24 | histogram[64, 64](x, xmin, xmax, histogram_out)
25 |
26 | print('input count:', x.shape[0])
27 | print('histogram:', histogram_out)
28 | print('count:', histogram_out.sum())
29 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/DLI Header.png
--------------------------------------------------------------------------------
/CUDA_Python/课程2/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/run_the_assessment.png
--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/numba_flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/numba_flowchart.png
--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/sensor_humidity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_humidity.png
--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/sensor_temp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_temp.png
--------------------------------------------------------------------------------
/CUDA_Python/课程2/section2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/section2.tar.gz
--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/hypot_stride_solution.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import cuda
3 | from math import hypot
4 |
5 | @cuda.jit
6 | def hypot_stride(a, b, c):
7 | idx = cuda.grid(1)
8 | stride = cuda.gridsize(1)
9 |
10 | for i in range(idx, a.shape[0], stride):
11 | c[i] = hypot(a[i], b[i])
12 |
13 | n = 1000000
14 | a = np.random.uniform(-12, 12, n).astype(np.float32)
15 | b = np.random.uniform(-12, 12, n).astype(np.float32)
16 | d_a = cuda.to_device(a)
17 | d_b = cuda.to_device(b)
18 | d_c = cuda.device_array_like(d_b)
19 |
20 | hypot_stride[1, 1](d_a, d_b, d_c)
--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def monte_carlo_pi_device(rng_states, nsamples, out):
3 | thread_id = cuda.grid(1)
4 |
5 | # Compute pi by drawing random (x, y) points and finding what
6 | # fraction lie inside a unit circle
7 | acc = 0
8 | for i in range(nsamples):
9 | x = xoroshiro128p_uniform_float32(rng_states, thread_id)
10 | y = xoroshiro128p_uniform_float32(rng_states, thread_id)
11 | if x**2 + y**2 <= 1.0:
12 | acc += 1
13 |
14 | out[thread_id] = 4.0 * acc / nsamples
15 |
--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/square_device_solution.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import cuda
3 |
4 | @cuda.jit
5 | def square_device(a, out):
6 | idx = cuda.grid(1)
7 | out[idx] = a[idx]**2
8 |
9 | n = 4096
10 | a = np.arange(n)
11 |
12 | d_a = cuda.to_device(a)
13 | d_out = cuda.device_array(shape=(n,), dtype=np.float32)
14 |
15 | threads = 32
16 | blocks = 128
17 |
18 | square_device[blocks, threads](d_a, d_out)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/assessment/definition.py:
--------------------------------------------------------------------------------
1 | # Use the 'File' menu above to 'Save' after pasting in your own mm_shared function definition.
--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/DLI Header.png
--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/mm_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/mm_image.png
--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/run_assess_task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_assess_task.png
--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_the_assessment.png
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/add_matrix_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def add_matrix(A, B, C):
3 | i,j = cuda.grid(2)
4 |
5 | C[j,i] = A[j,i] + B[j,i]
6 |
7 | A = np.arange(36*36).reshape(36, 36).astype(np.int32)
8 | B = A * 2
9 | C = np.zeros_like(A)
10 | d_A = cuda.to_device(A)
11 | d_B = cuda.to_device(B)
12 | d_C = cuda.to_device(C)
13 |
14 | blocks = (6,6)
15 | threads_per_block = (6,6)
16 |
17 | add_matrix[blocks, threads_per_block](d_A, d_B, d_C)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/add_matrix_stride_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def add_matrix_stride(A, B, C):
3 |
4 | y, x = cuda.grid(2)
5 | stride_y, stride_x = cuda.gridsize(2)
6 |
7 | for i in range(x, A.shape[0], stride_x):
8 | for j in range(y, A.shape[1], stride_y):
9 | C[i][j] = A[i][j] + B[i][j]
10 |
11 | A = np.arange(64*64).reshape(64, 64).astype(np.int32)
12 | B = A * 2
13 | C = np.zeros_like(A)
14 | d_A = cuda.to_device(A)
15 | d_B = cuda.to_device(B)
16 | d_C = cuda.to_device(C)
17 |
18 | blocks = (6,6)
19 | threads_per_block = (6,6)
20 |
21 | add_matrix_stride[blocks, threads_per_block](d_A, d_B, d_C)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/col_sums_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def col_sums(a, sums, ds):
3 | idx = cuda.grid(1)
4 | sum = 0.0
5 |
6 | for i in range(ds):
7 | sum += a[i][idx]
8 |
9 | sums[idx] = sum
10 |
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_add_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def matrix_add(a, b, out, coalesced):
3 | x, y = cuda.grid(2)
4 |
5 | if coalesced == True:
6 | out[y][x] = a[y][x] + b[y][x]
7 | else:
8 | out[x][y] = a[x][y] + b[x][y]
9 |
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_multiply_solution.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import cuda
3 |
4 | @cuda.jit
5 | def mm(a, b, c):
6 | column, row = cuda.grid(2)
7 | sum = 0
8 |
9 | for i in range(a.shape[0]):
10 | sum += a[row][i] * b[i][column]
11 |
12 | c[row][column] = sum
13 |
14 | a = np.arange(16).reshape(4,4).astype(np.int32)
15 | b = np.arange(16).reshape(4,4).astype(np.int32)
16 | c = np.zeros_like(a)
17 |
18 | d_a = cuda.to_device(a)
19 | d_b = cuda.to_device(b)
20 | d_c = cuda.to_device(c)
21 |
22 | grid = (2,2)
23 | block = (2,2)
24 | mm[grid, block](d_a, d_b, d_c)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_multiply_stride_solution.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import cuda
3 |
4 | @cuda.jit
5 | def mm_stride(A, B, C):
6 |
7 | grid_column, grid_row = cuda.grid(2)
8 | stride_column, stride_row = cuda.gridsize(2)
9 |
10 | for data_row in range(grid_row, A.shape[0], stride_row):
11 | for data_column in range(grid_column, B.shape[1], stride_column):
12 | sum = 0
13 | for i in range(A.shape[1]): # `range(B.shape[0])` is also okay
14 | sum += A[data_row][i] * B[i][data_column]
15 |
16 | C[data_row][data_column] = sum
17 |
18 | n = 1024
19 | a = np.arange(n*n).reshape(n,n).astype(np.int32)
20 | b = np.arange(n*n).reshape(n,n).astype(np.int32)
21 | c = np.zeros((a.shape[0], b.shape[1])).astype(np.int32)
22 |
23 | d_a = cuda.to_device(a)
24 | d_b = cuda.to_device(b)
25 | d_c = cuda.to_device(c)
26 |
27 | ts = (32,32)
28 | bs = (32,32)
29 |
30 | mm_stride[bs, ts](d_a, d_b, d_c)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def monte_carlo_pi_device(rng_states, nsamples, out):
3 | thread_id = cuda.grid(1)
4 |
5 | # Compute pi by drawing random (x, y) points and finding what
6 | # fraction lie inside a unit circle
7 | acc = 0
8 | for i in range(nsamples):
9 | x = xoroshiro128p_uniform_float32(rng_states, thread_id)
10 | y = xoroshiro128p_uniform_float32(rng_states, thread_id)
11 | if x**2 + y**2 <= 1.0:
12 | acc += 1
13 |
14 | out[thread_id] = 4.0 * acc / nsamples
15 |
16 | nsamples = 10000000
17 | threads_per_block = 128
18 | blocks = 32
19 | grid_size = threads_per_block * blocks
20 |
21 | samples_per_thread = int(nsamples / grid_size)
22 | rng_states = create_xoroshiro128p_states(grid_size, seed=1)
23 | d_out = cuda.device_array(threads_per_block * blocks, dtype=np.float32)
24 |
25 | monte_carlo_pi_device[blocks, threads_per_block](rng_states, samples_per_thread, d_out)
--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/tile_transpose_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def tile_transpose(a, transposed):
3 | # `tile_transpose` assumes it is launched with a 32x32 block dimension,
4 | # and that `a` is a multiple of these dimensions.
5 |
6 | # 1) Create 32x32 shared memory array.
7 | tile = cuda.shared.array((32, 32), numba_types.int32)
8 |
9 | # Compute offsets into global input array. Recall for coalesced access we want to map threadIdx.x increments to
10 | # the fastest changing index in the data, i.e. the column in our array.
11 | a_col = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
12 | a_row = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
13 |
14 | # 2) Make coalesced read from global memory into shared memory array.
15 | # Note the use of local thread indices for the shared memory write,
16 | # and global offsets for global memory read.
17 | tile[cuda.threadIdx.y, cuda.threadIdx.x] = a[a_row, a_col]
18 |
19 | # 3) Wait for all threads in the block to finish updating shared memory.
20 | cuda.syncthreads()
21 |
22 | # 4) Calculate transposed location for the shared memory array tile
23 | # to be written back to global memory. Note that blockIdx.y*blockDim.y
24 | # and blockIdx.x* blockDim.x are swapped (because we want to write to the
25 | # transpose locations), but we want to keep access coalesced, so match up the
26 | # threadIdx.x to the fastest changing index, i.e. the column
27 | t_col = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.x
28 | t_row = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.y
29 |
30 | # 5) Write from shared memory (using thread indices)
31 | # back to global memory (using grid indices)
32 | # transposing each element within the shared memory array.
33 | transposed[t_row, t_col] = tile[cuda.threadIdx.x, cuda.threadIdx.y]
--------------------------------------------------------------------------------
/CUDA_Python/课程笔记.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程笔记.pdf
--------------------------------------------------------------------------------
/DataHub/readme.md:
--------------------------------------------------------------------------------
1 | # 公开数据集汇总
2 |
3 |
4 |
5 | ## 数据集平台
6 |
7 | - [百度](https://aistudio.baidu.com/aistudio/datasetoverview)
8 | - [阿里天池](https://tianchi.aliyun.com/dataset)
9 | - [kaggle](https://www.kaggle.com/datasets)
10 | - [集市](https://www.cvmart.net/dataSets)
11 |
12 |
13 |
14 | ## 多模态
15 |
16 | | 链接 | 标注类型 | 数量 |
17 | | ------------------------------------------------------------ | ----------------------- | ---- |
18 | | [COYO-700M](https://github.com/kakaobrain/coyo-dataset) | 大规模英文图文对 数据集 | 7亿 |
19 | | [img2dataset](https://github.com/rom1504/img2dataset/tree/main) | N个图文对数据集 | |
20 |
21 |
22 |
23 | ## 安全场景
24 |
25 | ### 枪支
26 |
27 | | 链接 | 标注类型 | 数量 |
28 | | ------------------------------------------------------------ | -------- | ---- |
29 | | [URL1](https://www.kaggle.com/datasets/shivanirana63/labeled-guns-data-for-object-detection) | 目标检测 | 3k |
30 | | [URL2](https://www.kaggle.com/code/gattoni/faster-rcnn-guns-object-detection-with-save-load/data) | 目标检测 | 0.3k |
31 |
32 | ### 二维码
33 |
34 | | 链接 | 标注类型 | 数量 |
35 | | ------------------------------------------------------------ | -------- | ---- |
36 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/147099/0) | 目标检测 | 0.6k |
37 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/103078/0) | 目标检测 | 2k |
38 |
39 | ### 火灾烟雾
40 |
41 | | 链接 | 标注类型 | 数量 |
42 | | ------------------------------------------------------------ | -------- | ---- |
43 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/107770/0) | 目标检测 | 6.9k |
44 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/90352) | 目标检测 | 2k |
45 | | [URL3](https://aistudio.baidu.com/aistudio/datasetdetail/84374/0) | 目标检测 | 5k |
46 |
47 | ### 抽烟
48 |
49 | | 链接 | 标注类型 | 数量 |
50 | | ------------------------------------------------------------ | -------- | ---- |
51 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/72629/0) | 目标检测 | 1.5k |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/FPN_pytorch/README.md:
--------------------------------------------------------------------------------
1 | # PyTorch-FPN
2 |
3 | _Feature Pyramid Networks_ in PyTorch.
4 |
5 | [原地址](https://github.com/kuangliu/pytorch-fpn)
6 |
7 | References:
8 | [1] [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
9 | [2] [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
10 |
11 |
12 |
13 | # 自己想法
14 |
15 | - 目前工作
16 |
17 | 无意完成 以FPN为基础的RPN网络的fast rcnn,仅了解FPN基本思想即可。
18 |
19 | - 网络结构
20 |
21 | 
22 |
23 |
24 | - 基本流程
25 |
26 |
27 | 
--------------------------------------------------------------------------------
/FPN_pytorch/fpn.py:
--------------------------------------------------------------------------------
1 | '''FPN in PyTorch.
2 |
3 | See the paper "Feature Pyramid Networks for Object Detection" for more details.
4 | '''
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from torch.autograd import Variable
10 |
11 |
12 | class Bottleneck(nn.Module):
13 | expansion = 4
14 |
15 | def __init__(self, in_planes, planes, stride=1):
16 | super(Bottleneck, self).__init__()
17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
20 | self.bn2 = nn.BatchNorm2d(planes)
21 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
22 | self.bn3 = nn.BatchNorm2d(self.expansion*planes)
23 |
24 | self.shortcut = nn.Sequential()
25 | if stride != 1 or in_planes != self.expansion*planes:
26 | self.shortcut = nn.Sequential(
27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
28 | nn.BatchNorm2d(self.expansion*planes)
29 | )
30 |
31 | def forward(self, x):
32 | out = F.relu(self.bn1(self.conv1(x)))
33 | out = F.relu(self.bn2(self.conv2(out)))
34 | out = self.bn3(self.conv3(out))
35 | out += self.shortcut(x)
36 | out = F.relu(out)
37 | return out
38 |
39 |
40 | class FPN(nn.Module):
41 | '''
42 | 继承nn.Module,实现自定义网络模型
43 | '''
44 | def __init__(self, block, num_blocks):
45 | super(FPN, self).__init__()
46 | # 输入通道数
47 | self.in_planes = 64
48 | # nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
49 |
50 | # 原论文网络结构 in_channels=3 out_channels=64
51 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
52 | # 通道数BN层的参数是输出通道数out_channels=64
53 | self.bn1 = nn.BatchNorm2d(64)
54 |
55 | # Bottom-up layers
56 | # 自底向上的网络 resnet网络
57 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
58 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
59 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
60 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
61 |
62 | # Top layer (最顶层只有侧边连接,kernel_size=1目的减少通道数,形状不变)
63 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels 减少通道数
64 |
65 | # Smooth layers 平滑层
66 | # 作用:在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积,目的是消除上采样的混叠效应
67 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
68 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
69 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
70 |
71 | # Lateral layers 侧边层
72 | # (1*1的卷积核的主要作用是减少卷积核的个数,也就是减少了feature map的个数,并不改变feature map的尺寸大小。)
73 | self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
74 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
75 | self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)
76 |
77 | def _make_layer(self, block, planes, num_blocks, stride):
78 | '''
79 | resnet网络
80 | '''
81 | strides = [stride] + [1]*(num_blocks-1)
82 | layers = []
83 | for stride in strides:
84 | layers.append(block(self.in_planes, planes, stride))
85 | self.in_planes = planes * block.expansion
86 | return nn.Sequential(*layers)
87 |
88 | def _upsample_add(self, x, y):
89 | '''
90 | Upsample and add two feature maps.
91 | 上采样 并 将两个feature maps求和
92 | Args:
93 | x: (Variable) top feature map to be upsampled. 将要上采样的 上层feature map
94 | y: (Variable) lateral feature map. 侧边的feature map
95 |
96 | Returns:
97 | (Variable) added feature map.
98 |
99 | Note in PyTorch, when input size is odd, the upsampled feature map
100 | with `F.upsample(..., scale_factor=2, mode='nearest')`
101 | maybe not equal to the lateral feature map size.
102 | 在PyTorch中,当输入大小为奇数时,请注意上采样的特征映射
103 | 用'F.upsample(...,scale_factor = 2,mode ='nearest')`
104 | 可能不等于横向特征地图尺寸。
105 |
106 | e.g.
107 | original input size: [N,_,15,15] ->
108 | conv2d feature map size: [N,_,8,8] ->
109 | upsampled feature map size: [N,_,16,16]
110 |
111 | So we choose bilinear upsample which supports arbitrary output sizes.
112 | 所以我们选择支持任意输出大小的双线性上采样。
113 | '''
114 | _,_,H,W = y.size()
115 | # 使用 双线性插值bilinear对x进行上采样,之后与y逐元素相加
116 | return F.upsample(x, size=(H,W), mode='bilinear') + y
117 |
118 | def forward(self, x):
119 | # Bottom-up 自底向上 conv -> batchnmorm -> relu ->maxpool
120 | c1 = F.relu(self.bn1(self.conv1(x)))
121 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
122 |
123 | # resnet网络
124 | c2 = self.layer1(c1)
125 | c3 = self.layer2(c2)
126 | c4 = self.layer3(c3)
127 | c5 = self.layer4(c4)
128 |
129 | # Top-down 自顶向下并与侧边相连
130 | p5 = self.toplayer(c5) #减少通道数
131 | p4 = self._upsample_add(p5, self.latlayer1(c4))
132 | p3 = self._upsample_add(p4, self.latlayer2(c3))
133 | p2 = self._upsample_add(p3, self.latlayer3(c2))
134 |
135 | # Smooth 平滑层(在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积,目的是消除上采样的混叠效应)
136 | p4 = self.smooth1(p4)
137 | p3 = self.smooth2(p3)
138 | p2 = self.smooth3(p2)
139 | return p2, p3, p4, p5
140 |
141 |
142 | def FPN101():
143 | # [2,4,23,3]为FPN101的参数
144 | # return FPN(Bottleneck, [2,4,23,3])
145 |
146 | #[2,2,2,2]为FPN18的参数
147 | return FPN(Bottleneck, [2,2,2,2])
148 |
149 |
150 | def test():
151 | # 新建FPN101网络
152 | net = FPN101()
153 | print('网络结构为')
154 | print(net)
155 | # 前向传播,得到网络输出值 fms即为p2, p3, p4, p5
156 | fms = net(Variable(torch.randn(1,3,224,224)))
157 | print('网络输出的内容为')
158 | for fm in fms:
159 | print(fm.size())
160 |
161 | test()
162 |
--------------------------------------------------------------------------------
/FPN_pytorch/retina_fpn.py:
--------------------------------------------------------------------------------
1 | '''RetinaFPN in PyTorch.
2 |
3 | See the paper "Focal Loss for Dense Object Detection" for more details.
4 | '''
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from torch.autograd import Variable
10 |
11 |
12 | class Bottleneck(nn.Module):
13 | expansion = 4
14 |
15 | def __init__(self, in_planes, planes, stride=1):
16 | super(Bottleneck, self).__init__()
17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
20 | self.bn2 = nn.BatchNorm2d(planes)
21 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
22 | self.bn3 = nn.BatchNorm2d(self.expansion*planes)
23 |
24 | self.shortcut = nn.Sequential()
25 | if stride != 1 or in_planes != self.expansion*planes:
26 | self.shortcut = nn.Sequential(
27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
28 | nn.BatchNorm2d(self.expansion*planes)
29 | )
30 |
31 | def forward(self, x):
32 | out = F.relu(self.bn1(self.conv1(x)))
33 | out = F.relu(self.bn2(self.conv2(out)))
34 | out = self.bn3(self.conv3(out))
35 | out += self.shortcut(x)
36 | out = F.relu(out)
37 | return out
38 |
39 |
40 | class RetinaFPN(nn.Module):
41 | def __init__(self, block, num_blocks):
42 | super(RetinaFPN, self).__init__()
43 | self.in_planes = 64
44 |
45 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
46 | self.bn1 = nn.BatchNorm2d(64)
47 |
48 | # Bottom-up layers
49 | self.layer2 = self._make_layer(block, 64, num_blocks[0], stride=1)
50 | self.layer3 = self._make_layer(block, 128, num_blocks[1], stride=2)
51 | self.layer4 = self._make_layer(block, 256, num_blocks[2], stride=2)
52 | self.layer5 = self._make_layer(block, 512, num_blocks[3], stride=2)
53 | self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
54 | self.conv7 = nn.Conv2d( 256, 256, kernel_size=3, stride=2, padding=1)
55 |
56 | # Top layer
57 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels
58 |
59 | # Smooth layers
60 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
61 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
62 |
63 | # Lateral layers
64 | self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
65 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
66 |
67 | def _make_layer(self, block, planes, num_blocks, stride):
68 | strides = [stride] + [1]*(num_blocks-1)
69 | layers = []
70 | for stride in strides:
71 | layers.append(block(self.in_planes, planes, stride))
72 | self.in_planes = planes * block.expansion
73 | return nn.Sequential(*layers)
74 |
75 | def _upsample_add(self, x, y):
76 | '''Upsample and add two feature maps.
77 |
78 | Args:
79 | x: (Variable) top feature map to be upsampled.
80 | y: (Variable) lateral feature map.
81 |
82 | Returns:
83 | (Variable) added feature map.
84 |
85 | Note in PyTorch, when input size is odd, the upsampled feature map
86 | with `F.upsample(..., scale_factor=2, mode='nearest')`
87 | maybe not equal to the lateral feature map size.
88 |
89 | e.g.
90 | original input size: [N,_,15,15] ->
91 | conv2d feature map size: [N,_,8,8] ->
92 | upsampled feature map size: [N,_,16,16]
93 |
94 | So we choose bilinear upsample which supports arbitrary output sizes.
95 | '''
96 | _,_,H,W = y.size()
97 | return F.upsample(x, size=(H,W), mode='bilinear') + y
98 |
99 | def forward(self, x):
100 | # Bottom-up
101 | c1 = F.relu(self.bn1(self.conv1(x)))
102 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
103 | c2 = self.layer2(c1)
104 | c3 = self.layer3(c2)
105 | c4 = self.layer4(c3)
106 | c5 = self.layer5(c4)
107 | p6 = self.conv6(c5)
108 | p7 = self.conv7(F.relu(p6))
109 | # Top-down
110 | p5 = self.toplayer(c5)
111 | p4 = self._upsample_add(p5, self.latlayer1(c4))
112 | p3 = self._upsample_add(p4, self.latlayer2(c3))
113 | # Smooth
114 | p4 = self.smooth1(p4)
115 | p3 = self.smooth2(p3)
116 | return p3, p4, p5, p6, p7
117 |
118 |
119 | def RetinaFPN101():
120 | # return RetinaFPN(Bottleneck, [2,4,23,3])
121 | return RetinaFPN(Bottleneck, [2,2,2,2])
122 |
123 |
124 | def test():
125 | net = RetinaFPN101()
126 | fms = net(Variable(torch.randn(1,3,600,900)))
127 | for fm in fms:
128 | print(fm.size())
129 |
130 | test()
131 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) 2017 Yun Chen
4 |
5 | Original works by:
6 | --------------------------------------------------------
7 | chainer/chainercv
8 | Copyright (c) 2017 Yusuke Niitani
9 | Licensed under The MIT License
10 | https://github.com/chainer/chainercv/blob/master/LICENSE
11 | --------------------------------------------------------
12 | Faster R-CNN
13 | Copyright (c) 2015 Microsoft
14 | Licensed under The MIT License
15 | https://github.com/rbgirshick/py-faster-rcnn/blob/master/LICENSE
16 | --------------------------------------------------------
17 |
18 | Permission is hereby granted, free of charge, to any person obtaining a copy
19 | of this software and associated documentation files (the "Software"), to deal
20 | in the Software without restriction, including without limitation the rights
21 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | copies of the Software, and to permit persons to whom the Software is
23 | furnished to do so, subject to the following conditions:
24 |
25 | The above copyright notice and this permission notice shall be included in
26 | all copies or substantial portions of the Software.
27 |
28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 | THE SOFTWARE.
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__init__.py
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/dataset.py:
--------------------------------------------------------------------------------
1 | import torch as t
2 | from .voc_dataset import VOCBboxDataset
3 | from skimage import transform as sktsf
4 | from torchvision import transforms as tvtsf
5 | from . import util
6 | import numpy as np
7 | from utils.config import opt
8 |
9 |
10 | def inverse_normalize(img):
11 | """
12 | 将[-1,1]范围的图像近似还原回[0,255]之间
13 | """
14 | if opt.caffe_pretrain:
15 | img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1))
16 | return img[::-1, :, :]
17 | # approximate un-normalize for visualize
18 | return (img * 0.225 + 0.45).clip(min=0, max=1) * 255
19 |
20 |
21 | def pytorch_normalze(img):
22 | """
23 | https://github.com/pytorch/vision/issues/223
24 | return appr -1~1 RGB
25 | 对pytorch格式的图像进行规范化,返回值范围在[-1,1]之间 通道为RGB
26 | """
27 | normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
28 | std=[0.229, 0.224, 0.225])
29 | img = normalize(t.from_numpy(img))
30 | return img.numpy()
31 |
32 |
33 | def caffe_normalize(img):
34 | """
35 | return appr -125-125 BGR
36 | 对caffe格式的图像进行规范化,返回值范围在[-125,125]之间 通道为BGR
37 | """
38 | img = img[[2, 1, 0], :, :] # RGB-BGR
39 | img = img * 255
40 | mean = np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1)
41 | img = (img - mean).astype(np.float32, copy=True)
42 | return img
43 |
44 |
45 | def preprocess(img, min_size=600, max_size=1000):
46 | """Preprocess an image for feature extraction.
47 |
48 | The length of the shorter edge is scaled to :obj:`self.min_size`.
49 | After the scaling, if the length of the longer edge is longer than
50 | :param min_size:
51 | :obj:`self.max_size`, the image is scaled to fit the longer edge
52 | to :obj:`self.max_size`.
53 | After resizing the image, the image is subtracted by a mean image value
54 | :obj:`self.mean`.
55 |
56 | 预处理图像以进行特征提取。
57 | 较短边的长度缩放为:min_size。
58 | 缩放后,如果长边的长度比min_size或者max_size长,则长边的长度被缩放到max_size
59 | 调整图像大小后,图像减去平均图像值mean
60 |
61 | 图片进行缩放,使得长边小于等于1000,短边小于等于600(至少有一个等于)
62 |
63 | Args:
64 | img (~numpy.ndarray): An image. This is in CHW and RGB format.
65 | The range of its value is :math:`[0, 255]`.
66 |
67 | Returns:
68 | ~numpy.ndarray: A preprocessed image.
69 |
70 | """
71 | C, H, W = img.shape
72 | scale1 = min_size / min(H, W)
73 | scale2 = max_size / max(H, W)
74 | scale = min(scale1, scale2)
75 | img = img / 255.
76 | img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect')
77 | # both the longer and shorter should be less than
78 | # max_size and min_size
79 | if opt.caffe_pretrain:
80 | normalize = caffe_normalize
81 | else:
82 | normalize = pytorch_normalze
83 | #调用上述方法对img进行规范化
84 | return normalize(img)
85 |
86 |
87 | class Transform(object):
88 |
89 | def __init__(self, min_size=600, max_size=1000):
90 | self.min_size = min_size
91 | self.max_size = max_size
92 |
93 | def __call__(self, in_data):
94 | img, bbox, label = in_data
95 | _, H, W = img.shape
96 | #调用上述方法进行缩放图像
97 | img = preprocess(img, self.min_size, self.max_size)
98 | _, o_H, o_W = img.shape
99 | scale = o_H / H
100 | #对图像对应的bbox也进行同等尺度的缩放
101 | bbox = util.resize_bbox(bbox, (H, W), (o_H, o_W))
102 |
103 | # horizontally flip
104 | #水平翻转(对img和对应的bbox进行同等尺度的水平翻转)=============================只进行水平翻转
105 | img, params = util.random_flip(
106 | img, x_random=True, return_param=True)
107 | bbox = util.flip_bbox(
108 | bbox, (o_H, o_W), x_flip=params['x_flip'])
109 |
110 | return img, bbox, label, scale
111 |
112 |
113 | class Dataset:
114 | def __init__(self, opt):
115 | self.opt = opt
116 | #初始化VOCBboxDataset,传入 数据集地址
117 | #eg: /data/image/voc/VOCdevkit/VOC2007/
118 | self.db = VOCBboxDataset(opt.voc_data_dir)
119 | #调用上述方法Transform(图像转化方式),进行初始化
120 | self.tsf = Transform(opt.min_size, opt.max_size)
121 |
122 | def __getitem__(self, idx):
123 | #得到原始img,检测框、标签、困难度
124 | ori_img, bbox, label, difficult = self.db.get_example(idx)
125 | #调用上述方法Transform,执行__call__方法。返回规范化后的img, bbox, label, 转化之后的比例scale
126 | img, bbox, label, scale = self.tsf((ori_img, bbox, label))
127 | # TODO: check whose stride is negative to fix this instead copy all
128 | # some of the strides of a given numpy array are negative.
129 |
130 | return img.copy(), bbox.copy(), label.copy(), scale
131 |
132 | def __len__(self):
133 | return len(self.db)
134 |
135 |
136 | class TestDataset:
137 |
138 | def __init__(self, opt, split='test', use_difficult=True):
139 | self.opt = opt
140 | self.db = VOCBboxDataset(opt.voc_data_dir, split=split, use_difficult=use_difficult)
141 |
142 | def __getitem__(self, idx):
143 | ori_img, bbox, label, difficult = self.db.get_example(idx)
144 | img = preprocess(ori_img)
145 | return img, ori_img.shape[1:], bbox, label, difficult
146 |
147 | def __len__(self):
148 | return len(self.db)
149 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/voc_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import xml.etree.ElementTree as ET
3 |
4 | import numpy as np
5 |
6 | from .util import read_image
7 |
8 |
9 | class VOCBboxDataset:
10 | """Bounding box dataset for PASCAL `VOC`_.
11 | 边界框数据集
12 |
13 | .. _`VOC`: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
14 |
15 | The index corresponds to each image.
16 |
17 | When queried by an index, if :obj:`return_difficult == False`,
18 | this dataset returns a corresponding
19 | :obj:`img, bbox, label`, a tuple of an image, bounding boxes and labels.
20 | This is the default behaviour.
21 | If :obj:`return_difficult == True`, this dataset returns corresponding
22 | :obj:`img, bbox, label, difficult`. :obj:`difficult` is a boolean array
23 | that indicates whether bounding boxes are labeled as difficult or not.
24 |
25 | The bounding boxes are packed into a two dimensional tensor of shape
26 | :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in
27 | the image. The second axis represents attributes of the bounding box.
28 | They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the
29 | four attributes are coordinates of the top left and the bottom right
30 | vertices.
31 |
32 | The labels are packed into a one dimensional tensor of shape :math:`(R,)`.
33 | :math:`R` is the number of bounding boxes in the image.
34 | The class name of the label :math:`l` is :math:`l` th element of
35 | :obj:`VOC_BBOX_LABEL_NAMES`.
36 |
37 | The array :obj:`difficult` is a one dimensional boolean array of shape
38 | :math:`(R,)`. :math:`R` is the number of bounding boxes in the image.
39 | If :obj:`use_difficult` is :obj:`False`, this array is
40 | a boolean array with all :obj:`False`.
41 |
42 | The type of the image, the bounding boxes and the labels are as follows.
43 |
44 | * :obj:`img.dtype == numpy.float32`
45 | * :obj:`bbox.dtype == numpy.float32`
46 | * :obj:`label.dtype == numpy.int32`
47 | * :obj:`difficult.dtype == numpy.bool`
48 |
49 | Args:
50 | data_dir (string): Path to the root of the training data.
51 | i.e. "/data/image/voc/VOCdevkit/VOC2007/"
52 | split ({'train', 'val', 'trainval', 'test'}): Select a split of the
53 | dataset. :obj:`test` split is only available for
54 | 2007 dataset.
55 | year ({'2007', '2012'}): Use a dataset prepared for a challenge
56 | held in :obj:`year`.
57 | use_difficult (bool): If :obj:`True`, use images that are labeled as
58 | difficult in the original annotation.
59 | return_difficult (bool): If :obj:`True`, this dataset returns
60 | a boolean array
61 | that indicates whether bounding boxes are labeled as difficult
62 | or not. The default value is :obj:`False`.
63 |
64 | """
65 |
66 | def __init__(self, data_dir, split='trainval',
67 | use_difficult=False, return_difficult=False,
68 | ):
69 |
70 | # if split not in ['train', 'trainval', 'val']:
71 | # if not (split == 'test' and year == '2007'):
72 | # warnings.warn(
73 | # 'please pick split from \'train\', \'trainval\', \'val\''
74 | # 'for 2012 dataset. For 2007 dataset, you can pick \'test\''
75 | # ' in addition to the above mentioned splits.'
76 | # )
77 | id_list_file = os.path.join(
78 | data_dir, 'ImageSets/Main/{0}.txt'.format(split))
79 |
80 | self.ids = [id_.strip() for id_ in open(id_list_file)]
81 | self.data_dir = data_dir
82 | self.use_difficult = use_difficult
83 | self.return_difficult = return_difficult
84 | #将voc标签名称赋值给VOCBboxDataset对象
85 | self.label_names = VOC_BBOX_LABEL_NAMES
86 |
87 | def __len__(self):
88 | return len(self.ids)
89 | #dataset用到该方法得到一张图片的各种信息
90 | def get_example(self, i):
91 | """Returns the i-th example.
92 |
93 | Returns a color image and bounding boxes. The image is in CHW format.
94 | The returned image is RGB.
95 | 返回彩色图像和bbox。图像大小为CHW(通道、高、宽),返回图像为RGB
96 |
97 | Args:
98 | i (int): The index of the example.
99 |
100 | Returns:
101 | tuple of an image and bounding boxes
102 |
103 | """
104 | id_ = self.ids[i]
105 | #=======================================================================在这里读取路径后只拿最后一个\后面的内容
106 | anno = ET.parse(
107 | os.path.join(self.data_dir, 'Annotations', id_ + '.xml'))
108 | bbox = list()
109 | label = list()
110 | difficult = list()
111 | for obj in anno.findall('object'):
112 | # when in not using difficult split, and the object is difficult, skipt it.
113 | #在不使用困难分割时,对象是difficult,跳过它
114 | if not self.use_difficult and int(obj.find('difficult').text) == 1:
115 | continue
116 |
117 | difficult.append(int(obj.find('difficult').text))
118 | bndbox_anno = obj.find('bndbox')
119 | # subtract 1 to make pixel indexes 0-based
120 | #减1 以使像素索引基于0
121 | bbox.append([
122 | int(bndbox_anno.find(tag).text) - 1
123 | for tag in ('ymin', 'xmin', 'ymax', 'xmax')])
124 | name = obj.find('name').text.lower().strip()
125 | label.append(VOC_BBOX_LABEL_NAMES.index(name))
126 | bbox = np.stack(bbox).astype(np.float32)
127 | label = np.stack(label).astype(np.int32)
128 | # When `use_difficult==False`, all elements in `difficult` are False.
129 | difficult = np.array(difficult, dtype=np.bool).astype(np.uint8) # PyTorch don't support np.bool
130 |
131 | # Load a image
132 | img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg')
133 | img = read_image(img_file, color=True)
134 |
135 | # if self.return_difficult:
136 | # return img, bbox, label, difficult
137 | return img, bbox, label, difficult
138 |
139 | __getitem__ = get_example
140 |
141 | #标签名称
142 | VOC_BBOX_LABEL_NAMES = (
143 | 'aeroplane',
144 | 'bicycle',
145 | 'bird',
146 | 'boat',
147 | 'bottle',
148 | 'bus',
149 | 'car',
150 | 'cat',
151 | 'chair',
152 | 'cow',
153 | 'diningtable',
154 | 'dog',
155 | 'horse',
156 | 'motorbike',
157 | 'person',
158 | 'pottedplant',
159 | 'sheep',
160 | 'sofa',
161 | 'train',
162 | 'tvmonitor')
163 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/convert_caffe_pretrain.py:
--------------------------------------------------------------------------------
1 | # code from ruotian luo
2 | # https://github.com/ruotianluo/pytorch-faster-rcnn
3 | import torch
4 | from torch.utils.model_zoo import load_url
5 | from torchvision import models
6 |
7 | sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth")
8 | sd['classifier.0.weight'] = sd['classifier.1.weight']
9 | sd['classifier.0.bias'] = sd['classifier.1.bias']
10 | del sd['classifier.1.weight']
11 | del sd['classifier.1.bias']
12 |
13 | sd['classifier.3.weight'] = sd['classifier.4.weight']
14 | sd['classifier.3.bias'] = sd['classifier.4.bias']
15 | del sd['classifier.4.weight']
16 | del sd['classifier.4.bias']
17 |
18 |
19 | # speicify the path to save
20 | torch.save(sd, "vgg16_caffe.pth")
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/misc/demo.jpg
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/train_fast.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import ipdb
4 | import matplotlib
5 | from tqdm import tqdm
6 |
7 | from utils.config import opt
8 | from data.dataset import Dataset, TestDataset
9 | from model import FasterRCNNVGG16
10 | from torch.autograd import Variable
11 | from torch.utils import data as data_
12 | from trainer import FasterRCNNTrainer
13 | from utils import array_tool as at
14 | from utils.vis_tool import visdom_bbox
15 | from utils.eval_tool import eval_detection_voc
16 |
17 | matplotlib.use('agg')
18 |
19 | def eval(dataloader, faster_rcnn, test_num=10000):
20 | pred_bboxes, pred_labels, pred_scores = list(), list(), list()
21 | gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
22 | for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(dataloader)):
23 | sizes = [sizes[0][0], sizes[1][0]]
24 | pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs, [sizes])
25 | gt_bboxes += list(gt_bboxes_.numpy())
26 | gt_labels += list(gt_labels_.numpy())
27 | gt_difficults += list(gt_difficults_.numpy())
28 | pred_bboxes += pred_bboxes_
29 | pred_labels += pred_labels_
30 | pred_scores += pred_scores_
31 | if ii == test_num: break
32 |
33 | result = eval_detection_voc(
34 | pred_bboxes, pred_labels, pred_scores,
35 | gt_bboxes, gt_labels, gt_difficults,
36 | use_07_metric=True)
37 | return result
38 |
39 |
40 | def train(**kwargs):
41 | opt._parse(kwargs)
42 |
43 | dataset = Dataset(opt)
44 | print('load data')
45 | dataloader = data_.DataLoader(dataset, \
46 | batch_size=1, \
47 | shuffle=True, \
48 | # pin_memory=True,
49 | num_workers=opt.num_workers)
50 | testset = TestDataset(opt)
51 | test_dataloader = data_.DataLoader(testset,
52 | batch_size=1,
53 | num_workers=2,
54 | shuffle=False, \
55 | # pin_memory=True
56 | )
57 | faster_rcnn = FasterRCNNVGG16()
58 | print('model construct completed')
59 | trainer = FasterRCNNTrainer(faster_rcnn).cuda()
60 | if opt.load_path:
61 | trainer.load(opt.load_path)
62 | print('load pretrained model from %s' % opt.load_path)
63 |
64 | trainer.vis.text(dataset.db.label_names, win='labels')
65 | best_map = 0
66 | for epoch in range(7):
67 | trainer.reset_meters()
68 | for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)):
69 | scale = at.scalar(scale)
70 | img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
71 | img, bbox, label = Variable(img), Variable(bbox), Variable(label)
72 | losses = trainer.train_step(img, bbox, label, scale)
73 |
74 | if (ii + 1) % opt.plot_every == 0:
75 | if os.path.exists(opt.debug_file):
76 | ipdb.set_trace()
77 |
78 | # plot loss
79 | trainer.vis.plot_many(trainer.get_meter_data())
80 |
81 | # plot groud truth bboxes
82 | ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
83 | gt_img = visdom_bbox(at.tonumpy(ori_img_)[0],
84 | at.tonumpy(bbox_)[0],
85 | label_[0].numpy())
86 | trainer.vis.img('gt_img', gt_img)
87 |
88 | # plot predicti bboxes
89 | _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
90 | pred_img = visdom_bbox( at.tonumpy(ori_img[0]),
91 | at.tonumpy(_bboxes[0]),
92 | at.tonumpy(_labels[0]).reshape(-1),
93 | at.tonumpy(_scores[0]))
94 | trainer.vis.img('pred_img', pred_img)
95 |
96 | # rpn confusion matrix(meter)
97 | trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
98 | # roi confusion matrix
99 | trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
100 | if epoch==4:
101 | trainer.faster_rcnn.scale_lr(opt.lr_decay)
102 |
103 | eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
104 | print('eval_result')
105 | trainer.save(mAP=eval_result['map'])
106 |
107 | if __name__ == '__main__':
108 | import fire
109 |
110 | fire.Fire()
111 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .faster_rcnn_vgg16 import FasterRCNNVGG16
2 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/roi_module.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | from string import Template
3 |
4 | import cupy, torch
5 | import cupy as cp
6 | import torch as t
7 | from torch.autograd import Function
8 |
9 | from model.utils.roi_cupy import kernel_backward, kernel_forward
10 |
11 | Stream = namedtuple('Stream', ['ptr'])
12 |
13 |
14 | @cupy.util.memoize(for_each_device=True)
15 | def load_kernel(kernel_name, code, **kwargs):
16 | cp.cuda.runtime.free(0)
17 | code = Template(code).substitute(**kwargs)
18 | kernel_code = cupy.cuda.compile_with_cache(code)
19 | return kernel_code.get_function(kernel_name)
20 |
21 |
22 | CUDA_NUM_THREADS = 1024
23 |
24 |
25 | def GET_BLOCKS(N, K=CUDA_NUM_THREADS):
26 | return (N + K - 1) // K
27 |
28 |
29 | class RoI(Function):
30 | """
31 | NOTE:only CUDA-compatible
32 | """
33 |
34 | def __init__(self, outh, outw, spatial_scale):
35 | self.forward_fn = load_kernel('roi_forward', kernel_forward)
36 | self.backward_fn = load_kernel('roi_backward', kernel_backward)
37 | self.outh, self.outw, self.spatial_scale = outh, outw, spatial_scale
38 |
39 | def forward(self, x, rois):
40 | # NOTE: MAKE SURE input is contiguous too
41 | x = x.contiguous()
42 | rois = rois.contiguous()
43 | self.in_size = B, C, H, W = x.size()
44 | self.N = N = rois.size(0)
45 | output = t.zeros(N, C, self.outh, self.outw).cuda()
46 | self.argmax_data = t.zeros(N, C, self.outh, self.outw).int().cuda()
47 | self.rois = rois
48 | args = [x.data_ptr(), rois.data_ptr(),
49 | output.data_ptr(),
50 | self.argmax_data.data_ptr(),
51 | self.spatial_scale, C, H, W,
52 | self.outh, self.outw,
53 | output.numel()]
54 | stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)
55 | self.forward_fn(args=args,
56 | block=(CUDA_NUM_THREADS, 1, 1),
57 | grid=(GET_BLOCKS(output.numel()), 1, 1),
58 | stream=stream)
59 | return output
60 |
61 | def backward(self, grad_output):
62 | ##NOTE: IMPORTANT CONTIGUOUS
63 | # TODO: input
64 | grad_output = grad_output.contiguous()
65 | B, C, H, W = self.in_size
66 | grad_input = t.zeros(self.in_size).cuda()
67 | stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)
68 | args = [grad_output.data_ptr(),
69 | self.argmax_data.data_ptr(),
70 | self.rois.data_ptr(),
71 | grad_input.data_ptr(),
72 | self.N, self.spatial_scale, C, H, W, self.outh, self.outw,
73 | grad_input.numel()]
74 | self.backward_fn(args=args,
75 | block=(CUDA_NUM_THREADS, 1, 1),
76 | grid=(GET_BLOCKS(grad_input.numel()), 1, 1),
77 | stream=stream
78 | )
79 | return grad_input, None
80 |
81 |
82 | class RoIPooling2D(t.nn.Module):
83 |
84 | def __init__(self, outh, outw, spatial_scale):
85 | super(RoIPooling2D, self).__init__()
86 | self.RoI = RoI(outh, outw, spatial_scale)
87 |
88 | def forward(self, x, rois):
89 | return self.RoI(x, rois)
90 |
91 |
92 | def test_roi_module():
93 | ## fake data###
94 | B, N, C, H, W, PH, PW = 2, 8, 4, 32, 32, 7, 7
95 |
96 | bottom_data = t.randn(B, C, H, W).cuda()
97 | bottom_rois = t.randn(N, 5)
98 | bottom_rois[:int(N / 2), 0] = 0
99 | bottom_rois[int(N / 2):, 0] = 1
100 | bottom_rois[:, 1:] = (t.rand(N, 4) * 100).float()
101 | bottom_rois = bottom_rois.cuda()
102 | spatial_scale = 1. / 16
103 | outh, outw = PH, PW
104 |
105 | # pytorch version
106 | module = RoIPooling2D(outh, outw, spatial_scale)
107 | x = t.autograd.Variable(bottom_data, requires_grad=True)
108 | rois = t.autograd.Variable(bottom_rois)
109 | output = module(x, rois)
110 | output.sum().backward()
111 |
112 | def t2c(variable):
113 | npa = variable.data.cpu().numpy()
114 | return cp.array(npa)
115 |
116 | def test_eq(variable, array, info):
117 | cc = cp.asnumpy(array)
118 | neq = (cc != variable.data.cpu().numpy())
119 | assert neq.sum() == 0, 'test failed: %s' % info
120 |
121 | # chainer version,if you're going to run this
122 | # pip install chainer
123 | import chainer.functions as F
124 | from chainer import Variable
125 | x_cn = Variable(t2c(x))
126 |
127 | o_cn = F.roi_pooling_2d(x_cn, t2c(rois), outh, outw, spatial_scale)
128 | test_eq(output, o_cn.array, 'forward')
129 | F.sum(o_cn).backward()
130 | test_eq(x.grad, x_cn.grad, 'backward')
131 | print('test pass')
132 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__init__.py
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from model.utils.nms.non_maximum_suppression import non_maximum_suppression
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post.pyx:
--------------------------------------------------------------------------------
1 | cimport numpy as np
2 | from libc.stdint cimport uint64_t
3 |
4 | import numpy as np
5 |
6 | def _nms_gpu_post(np.ndarray[np.uint64_t, ndim=1] mask,
7 | int n_bbox,
8 | int threads_per_block,
9 | int col_blocks
10 | ):
11 | cdef:
12 | int i, j, nblock, index
13 | uint64_t inblock
14 | int n_selection = 0
15 | uint64_t one_ull = 1
16 | np.ndarray[np.int32_t, ndim=1] selection
17 | np.ndarray[np.uint64_t, ndim=1] remv
18 |
19 | selection = np.zeros((n_bbox,), dtype=np.int32)
20 | remv = np.zeros((col_blocks,), dtype=np.uint64)
21 |
22 | for i in range(n_bbox):
23 | nblock = i // threads_per_block
24 | inblock = i % threads_per_block
25 |
26 | if not (remv[nblock] & one_ull << inblock):
27 | selection[n_selection] = i
28 | n_selection += 1
29 |
30 | index = i * col_blocks
31 | for j in range(nblock, col_blocks):
32 | remv[j] |= mask[index + j]
33 | return selection, n_selection
34 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post_py.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 |
4 | def _nms_gpu_post( mask,
5 | n_bbox,
6 | threads_per_block,
7 | col_blocks
8 | ):
9 | n_selection = 0
10 | one_ull = np.array([1],dtype=np.uint64)
11 | selection = np.zeros((n_bbox,), dtype=np.int32)
12 | remv = np.zeros((col_blocks,), dtype=np.uint64)
13 |
14 | for i in range(n_bbox):
15 | nblock = i // threads_per_block
16 | inblock = i % threads_per_block
17 |
18 | if not (remv[nblock] & one_ull << inblock):
19 | selection[n_selection] = i
20 | n_selection += 1
21 |
22 | index = i * col_blocks
23 | for j in range(nblock, col_blocks):
24 | remv[j] |= mask[index + j]
25 | return selection, n_selection
26 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Distutils import build_ext
4 |
5 | ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])]
6 | setup(
7 | name="Hello pyx",
8 | cmdclass={'build_ext': build_ext},
9 | ext_modules=ext_modules
10 | )
11 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/roi_cupy.py:
--------------------------------------------------------------------------------
1 | kernel_forward = '''
2 | extern "C"
3 | __global__ void roi_forward(const float* const bottom_data,const float* const bottom_rois,
4 | float* top_data, int* argmax_data,
5 | const double spatial_scale,const int channels,const int height,
6 | const int width, const int pooled_height,
7 | const int pooled_width,const int NN
8 | ){
9 |
10 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
11 | if(idx>=NN)
12 | return;
13 | const int pw = idx % pooled_width;
14 | const int ph = (idx / pooled_width) % pooled_height;
15 | const int c = (idx / pooled_width / pooled_height) % channels;
16 | int num = idx / pooled_width / pooled_height / channels;
17 | const int roi_batch_ind = bottom_rois[num * 5 + 0];
18 | const int roi_start_w = round(bottom_rois[num * 5 + 1] * spatial_scale);
19 | const int roi_start_h = round(bottom_rois[num * 5 + 2] * spatial_scale);
20 | const int roi_end_w = round(bottom_rois[num * 5 + 3] * spatial_scale);
21 | const int roi_end_h = round(bottom_rois[num * 5 + 4] * spatial_scale);
22 | // Force malformed ROIs to be 1x1
23 | const int roi_width = max(roi_end_w - roi_start_w + 1, 1);
24 | const int roi_height = max(roi_end_h - roi_start_h + 1, 1);
25 | const float bin_size_h = static_cast(roi_height)
26 | / static_cast(pooled_height);
27 | const float bin_size_w = static_cast(roi_width)
28 | / static_cast(pooled_width);
29 |
30 | int hstart = static_cast(floor(static_cast(ph)
31 | * bin_size_h));
32 | int wstart = static_cast(floor(static_cast(pw)
33 | * bin_size_w));
34 | int hend = static_cast(ceil(static_cast(ph + 1)
35 | * bin_size_h));
36 | int wend = static_cast(ceil(static_cast(pw + 1)
37 | * bin_size_w));
38 |
39 | // Add roi offsets and clip to input boundaries
40 | hstart = min(max(hstart + roi_start_h, 0), height);
41 | hend = min(max(hend + roi_start_h, 0), height);
42 | wstart = min(max(wstart + roi_start_w, 0), width);
43 | wend = min(max(wend + roi_start_w, 0), width);
44 | bool is_empty = (hend <= hstart) || (wend <= wstart);
45 |
46 | // Define an empty pooling region to be zero
47 | float maxval = is_empty ? 0 : -1E+37;
48 | // If nothing is pooled, argmax=-1 causes nothing to be backprop'd
49 | int maxidx = -1;
50 | const int data_offset = (roi_batch_ind * channels + c) * height * width;
51 | for (int h = hstart; h < hend; ++h) {
52 | for (int w = wstart; w < wend; ++w) {
53 | int bottom_index = h * width + w;
54 | if (bottom_data[data_offset + bottom_index] > maxval) {
55 | maxval = bottom_data[data_offset + bottom_index];
56 | maxidx = bottom_index;
57 | }
58 | }
59 | }
60 | top_data[idx]=maxval;
61 | argmax_data[idx]=maxidx;
62 | }
63 | '''
64 | kernel_backward = '''
65 | extern "C"
66 | __global__ void roi_backward(const float* const top_diff,
67 | const int* const argmax_data,const float* const bottom_rois,
68 | float* bottom_diff, const int num_rois,
69 | const double spatial_scale, int channels,
70 | int height, int width, int pooled_height,
71 | int pooled_width,const int NN)
72 | {
73 |
74 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
75 | ////Importtan >= instead of >
76 | if(idx>=NN)
77 | return;
78 | int w = idx % width;
79 | int h = (idx / width) % height;
80 | int c = (idx/ (width * height)) % channels;
81 | int num = idx / (width * height * channels);
82 |
83 | float gradient = 0;
84 | // Accumulate gradient over all ROIs that pooled this element
85 | for (int roi_n = 0; roi_n < num_rois; ++roi_n) {
86 | // Skip if ROI's batch index doesn't match num
87 | if (num != static_cast(bottom_rois[roi_n * 5])) {
88 | continue;
89 | }
90 |
91 | int roi_start_w = round(bottom_rois[roi_n * 5 + 1]
92 | * spatial_scale);
93 | int roi_start_h = round(bottom_rois[roi_n * 5 + 2]
94 | * spatial_scale);
95 | int roi_end_w = round(bottom_rois[roi_n * 5 + 3]
96 | * spatial_scale);
97 | int roi_end_h = round(bottom_rois[roi_n * 5 + 4]
98 | * spatial_scale);
99 |
100 | // Skip if ROI doesn't include (h, w)
101 | const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
102 | h >= roi_start_h && h <= roi_end_h);
103 | if (!in_roi) {
104 | continue;
105 | }
106 |
107 | int offset = (roi_n * channels + c) * pooled_height
108 | * pooled_width;
109 |
110 | // Compute feasible set of pooled units that could have pooled
111 | // this bottom unit
112 |
113 | // Force malformed ROIs to be 1x1
114 | int roi_width = max(roi_end_w - roi_start_w + 1, 1);
115 | int roi_height = max(roi_end_h - roi_start_h + 1, 1);
116 |
117 | float bin_size_h = static_cast(roi_height)
118 | / static_cast(pooled_height);
119 | float bin_size_w = static_cast(roi_width)
120 | / static_cast(pooled_width);
121 |
122 | int phstart = floor(static_cast(h - roi_start_h)
123 | / bin_size_h);
124 | int phend = ceil(static_cast(h - roi_start_h + 1)
125 | / bin_size_h);
126 | int pwstart = floor(static_cast(w - roi_start_w)
127 | / bin_size_w);
128 | int pwend = ceil(static_cast(w - roi_start_w + 1)
129 | / bin_size_w);
130 |
131 | phstart = min(max(phstart, 0), pooled_height);
132 | phend = min(max(phend, 0), pooled_height);
133 | pwstart = min(max(pwstart, 0), pooled_width);
134 | pwend = min(max(pwend, 0), pooled_width);
135 | for (int ph = phstart; ph < phend; ++ph) {
136 | for (int pw = pwstart; pw < pwend; ++pw) {
137 | int index_ = ph * pooled_width + pw + offset;
138 | if (argmax_data[index_] == (h * width + w)) {
139 | gradient += top_diff[index_];
140 | }
141 | }
142 | }
143 | }
144 | bottom_diff[idx] = gradient;
145 | }
146 | '''
147 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-image
2 | pprint
3 | cython
4 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 cy
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/array_tool.py:
--------------------------------------------------------------------------------
1 | """
2 | tools to convert specified type
3 | """
4 | import torch as t
5 | import numpy as np
6 |
7 |
8 | def tonumpy(data):
9 | if isinstance(data, np.ndarray):
10 | return data
11 | if isinstance(data, t._TensorBase):
12 | return data.cpu().numpy()
13 | if isinstance(data, t.autograd.Variable):
14 | return tonumpy(data.data)
15 |
16 |
17 | def totensor(data, cuda=True):
18 | if isinstance(data, np.ndarray):
19 | tensor = t.from_numpy(data)
20 | if isinstance(data, t._TensorBase):
21 | tensor = data
22 | if isinstance(data, t.autograd.Variable):
23 | tensor = data.data
24 | if cuda:
25 | tensor = tensor.cuda()
26 | return tensor
27 |
28 |
29 | def tovariable(data):
30 | if isinstance(data, np.ndarray):
31 | return tovariable(totensor(data))
32 | if isinstance(data, t._TensorBase):
33 | return t.autograd.Variable(data)
34 | if isinstance(data, t.autograd.Variable):
35 | return data
36 | else:
37 | raise ValueError("UnKnow data type: %s, input should be {np.ndarray,Tensor,Variable}" %type(data))
38 |
39 |
40 | def scalar(data):
41 | if isinstance(data, np.ndarray):
42 | return data.reshape(1)[0]
43 | if isinstance(data, t._TensorBase):
44 | return data.view(1)[0]
45 | if isinstance(data, t.autograd.Variable):
46 | return data.data.view(1)[0]
47 |
--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/config.py:
--------------------------------------------------------------------------------
1 | from pprint import pprint
2 |
3 |
4 | # Default Configs for training
5 | # NOTE that, config items could be overwriten by passing argument through command line.
6 | # e.g. --voc-data-dir='./data/'
7 |
8 | class Config:
9 | # data
10 | #更改_bobo
11 | voc_data_dir = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/VOCdevkit/Pascal VOC2007/VOCdevkit/VOC2007'
12 | min_size = 600 # image resize
13 | max_size = 1000 # image resize
14 | num_workers = 8
15 | test_num_workers = 8
16 |
17 | # sigma for l1_smooth_loss
18 | rpn_sigma = 3.
19 | roi_sigma = 1.
20 |
21 | # param for optimizer
22 | # 0.0005 in origin paper but 0.0001 in tf-faster-rcnn
23 | weight_decay = 0.0005
24 | lr_decay = 0.1 # 1e-3 -> 1e-4
25 | lr = 1e-3
26 |
27 |
28 | # visualization
29 | env = 'faster-rcnn' # visdom env
30 | port = 8097
31 | plot_every = 40 # vis every N iter
32 |
33 | # preset
34 | data = 'voc'
35 | pretrained_model = 'vgg16'
36 |
37 | # training
38 | epoch = 14
39 |
40 |
41 | use_adam = False # Use Adam optimizer
42 | use_chainer = False # try match everything as chainer
43 | use_drop = False # use dropout in RoIHead
44 | # debug
45 | debug_file = '/tmp/debugf'
46 |
47 | test_num = 10000
48 | # model
49 | load_path = None
50 |
51 | #caffe_pretrain = False # use caffe pretrained model instead of torchvision
52 | caffe_pretrain = True # use caffe pretrained model instead of torchvision
53 | caffe_pretrain_path = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/simplefasterrcnnpytorchmaster/checkpoints/vgg16_caffe.pth'
54 |
55 |
56 | def _parse(self, kwargs):
57 | state_dict = self._state_dict()
58 | for k, v in kwargs.items():
59 | if k not in state_dict:
60 | raise ValueError('UnKnown Option: "--%s"' % k)
61 | setattr(self, k, v)
62 |
63 | print('======user config========')
64 | pprint(self._state_dict())
65 | print('==========end============')
66 |
67 | def _state_dict(self):
68 | return {k: getattr(self, k) for k, _ in Config.__dict__.items() \
69 | if not k.startswith('_')}
70 |
71 |
72 | opt = Config()
73 |
--------------------------------------------------------------------------------
/GhostNet/G-Ghost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/GhostNet/G-Ghost.png
--------------------------------------------------------------------------------
/GhostNet/readme.md:
--------------------------------------------------------------------------------
1 | # GhostNets on Heterogeneous Devices via Cheap Operations
2 |
3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
4 |
5 | # 说明
6 |
7 | - CVPR2020 C-GhostNet
8 | - IJCV 2022 G-GhostNet [官方库](https://github.com/huawei-noah/Efficient-AI-Backbones) [原作知乎解读](https://zhuanlan.zhihu.com/p/540547718)
9 |
10 | ## 简读
11 |
12 | | 主题 | 描述 |
13 | | ---- | :----------------------------------------------------------- |
14 | | 问题 | 问题1:深度可分离卷积(逐通道卷积+逐点卷积)、通道打乱等复杂操作在GPU下并行度不高,造成耗时。
问题2:观察到stage级别内部特征存在冗余。 |
15 | | 解决 | 问题1:仅采用普通卷积/分组卷积,加速GPU并行
问题2:在stage级别应用Ghost形式,用"便宜操作"生成冗余特征。 |
16 | | 实现 | C-Ghost: 卷积级别特征冗余,代替原来的一个普通卷积
G-Ghost: stage级别特征冗余,以代替原来的一个stage网络结构 |
17 |
18 | # 具体实现
19 |
20 |
21 | 核心方法`Stage`,结构图如下
22 |
23 | 
24 |
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Max deGroot, Ellis Brown
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/RepVGG/readme.md:
--------------------------------------------------------------------------------
1 | # RepVGG: Making VGG-style ConvNets Great Again
2 |
3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
4 |
5 |
6 |
7 | # 说明
8 |
9 | - CVPR-2021 [官方库](https://github.com/DingXiaoH/RepVGG) [原作知乎解读](https://zhuanlan.zhihu.com/p/344324470)
10 |
11 | - 本仓库仅提取网络定义部分,用以分析。
12 |
13 | - 应用广泛
14 |
15 | > 1. [yolov6](https://zhuanlan.zhihu.com/p/533127196)、[yolov7](https://arxiv.org/abs/2207.02696)等:设计结构重参数化的网络进行训练,推理时转化为等价的简单结构,加速推理。
16 | >2. [MNN线性超参数化工具](https://www.yuque.com/mnn/cn/ph6021):设计小模型 --> 训练线性过参数化大模型 --> 转换等价小模型推理
17 |
18 | ## 简读
19 |
20 | | | |
21 | | -------- | ------------------------------------------------------------ |
22 | | 问题 | 问题1:resnet等多分支结构,造成内存消耗,推理速度下降。
问题2:depthwise等复杂操作,造成内存消耗,硬件支持差。 |
23 | | 创新点 | RepVGG是VGG结构,快速、省内存、灵活
解决问题1:推理时无分支
解决问题2:仅包含3x3conv+ReLU的VGG style结构,无复杂操作 |
24 | | 具体实现 | 解耦训练和推理的网络结构。
训练:多分支提升性能
推理:结构重参数化,转为等价的VGG style结构。 |
25 |
26 | # 具体实现
27 |
28 | 核心方法`get_equivalent_kernel_bias`
29 |
30 | 1. 融合BN
31 |
32 | `conv layer + BN layer `--> `conv layer`
33 |
34 | - [详解BN层](https://blog.csdn.net/ECNU_LZJ/article/details/104203604)
35 |
36 | - [卷积与BN层融合公式](https://blog.csdn.net/oYeZhou/article/details/112802348)
37 |
38 | 2. 其余分支转为3x3卷积
39 |
40 | 1x1conv -> 3x3conv
41 |
42 | bn->3x3conv
43 |
44 | 3. 三个分支合并
45 |
46 | `conv(x, W1) + conv(x, W2) + conv(x, W3) = conv(x, W1+W2+W3))`
47 |
48 | 
49 |
50 |
--------------------------------------------------------------------------------
/RepVGG/repvgg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/RepVGG/repvgg.png
--------------------------------------------------------------------------------
/SSD_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/checkpoint/.gitkeep
--------------------------------------------------------------------------------
/SSD_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
2 | import torch
3 | import cv2
4 | import numpy as np
5 |
6 | def detection_collate(batch):
7 | """Custom collate fn for dealing with batches of images that have a different
8 | number of associated object annotations (bounding boxes).
9 |
10 | Arguments:
11 | batch: (tuple) A tuple of tensor images and lists of annotations
12 |
13 | Return:
14 | A tuple containing:
15 | 1) (tensor) batch of images stacked on their 0 dim
16 | 2) (list of tensors) annotations for a given image are stacked on
17 | 0 dim
18 | """
19 | targets = []
20 | imgs = []
21 | for sample in batch:
22 | imgs.append(sample[0])
23 | targets.append(torch.FloatTensor(sample[1]))
24 | return torch.stack(imgs, 0), targets
25 |
26 |
27 | def base_transform(image, size, mean):
28 | x = cv2.resize(image, (size, size)).astype(np.float32)
29 | x -= mean
30 | x = x.astype(np.float32)
31 | return x
32 |
33 |
34 | class BaseTransform:
35 | def __init__(self, size, mean):
36 | self.size = size
37 | self.mean = np.array(mean, dtype=np.float32)
38 |
39 | def __call__(self, image, boxes=None, labels=None):
40 | return base_transform(image, self.size, self.mean), boxes, labels
41 |
--------------------------------------------------------------------------------
/SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 |
4 |
5 | __all__ = ['Detect', 'PriorBox']
6 |
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/detection.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from ..box_utils import decode, nms
4 | from SSD_pytorch.utils.config import opt
5 |
6 |
7 | class Detect(Function):
8 | """At test time, Detect is the final layer of SSD. Decode location preds,
9 | apply non-maximum suppression to location predictions based on conf
10 | scores and threshold to a top_k number of output predictions for both
11 | confidence score and locations.
12 | """
13 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
14 | self.num_classes = num_classes
15 | self.background_label = bkg_label
16 | self.top_k = top_k
17 | # Parameters used in nms.
18 | self.nms_thresh = nms_thresh
19 | if nms_thresh <= 0:
20 | raise ValueError('nms_threshold must be non negative.')
21 | self.conf_thresh = conf_thresh
22 | self.variance = opt.voc['variance']
23 |
24 | def forward(self, loc_data, conf_data, prior_data):
25 | """
26 | Args:
27 | loc_data: (tensor) Loc preds from loc layers
28 | Shape: [batch,num_priors*4]
29 | conf_data: (tensor) Shape: Conf preds from conf layers
30 | Shape: [batch*num_priors,num_classes]
31 | prior_data: (tensor) Prior boxes and variances from priorbox layers
32 | Shape: [1,num_priors,4]
33 | """
34 | num = loc_data.size(0) # batch size
35 | num_priors = prior_data.size(0)
36 | output = torch.zeros(num, self.num_classes, self.top_k, 5)
37 | conf_preds = conf_data.view(num, num_priors,
38 | self.num_classes).transpose(2, 1)
39 |
40 | # Decode predictions into bboxes.
41 | for i in range(num):
42 | decoded_boxes = decode(loc_data[i], prior_data, self.variance)
43 | # For each class, perform nms
44 | conf_scores = conf_preds[i].clone()
45 |
46 | for cl in range(1, self.num_classes):
47 | c_mask = conf_scores[cl].gt(self.conf_thresh)
48 | scores = conf_scores[cl][c_mask]
49 | if scores.dim() == 0:
50 | continue
51 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
52 | boxes = decoded_boxes[l_mask].view(-1, 4)
53 | # idx of highest scoring and non-overlapping boxes per class
54 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
55 | output[i, cl, :count] = \
56 | torch.cat((scores[ids[:count]].unsqueeze(1),
57 | boxes[ids[:count]]), 1)
58 | flt = output.contiguous().view(num, -1, 5)
59 | _, idx = flt[:, :, 0].sort(1, descending=True)
60 | _, rank = idx.sort(1)
61 | flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
62 | return output
63 |
--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from math import sqrt as sqrt
3 | from itertools import product as product
4 | import torch
5 |
6 |
7 | class PriorBox(object):
8 | """Compute priorbox coordinates in center-offset form for each source
9 | feature map.
10 | 对于每个feature map,生成预测框(中心坐标及偏移量)
11 | """
12 | def __init__(self, cfg):
13 | super(PriorBox, self).__init__()
14 | # 300
15 | self.image_size = cfg['min_dim']
16 | # number of priors for feature map location (either 4 or 6)
17 | # 每个网格的预测框数目 (4 or 6)
18 | self.num_priors = len(cfg['aspect_ratios'])
19 | #方差
20 | self.variance = cfg['variance'] or [0.1]
21 | # 值为[38, 19, 10, 5, 3, 1] 即feature map的尺寸大小
22 | self.feature_maps = cfg['feature_maps']
23 | # s_k 表示先验框大小相对于图片的比例,而 s_{min} 和 s_{max} 表示比例的最小值与最大值
24 | # min_sizes和max_sizes用来计算s_k,s_k_prime,以便计算 长宽比为1时的两个w.h
25 | # 各个特征图的先验框尺度 [30, 60, 111, 162, 213, 264]
26 | self.min_sizes = cfg['min_sizes']
27 | # [60, 111, 162, 213, 264, 315]
28 | self.max_sizes = cfg['max_sizes']
29 | # 感受野大小,即相对于原图的缩小倍数
30 | self.steps = cfg['steps']
31 | # 纵横比[[2], [2, 3], [2, 3], [2, 3], [2], [2]]
32 | self.aspect_ratios = cfg['aspect_ratios']
33 | # True
34 | self.clip = cfg['clip']
35 | # VOC
36 | self.version = cfg['name']
37 | for v in self.variance:
38 | if v <= 0:
39 | raise ValueError('Variances must be greater than 0')
40 |
41 | def forward(self):
42 | # mean 是保存预测框的列表
43 | mean = []
44 | # 遍历不同feature map的尺寸大小
45 | for k, f in enumerate(self.feature_maps):
46 | # product用于求多个可迭代对象的笛卡尔积,它跟嵌套的 for 循环等价
47 | # repeat用于指定重复生成序列的次数。
48 | # 参考:http://funhacks.net/2017/02/13/itertools/
49 | # 即若f为2,则i,j取值为00,01,10,11。即遍历每一个可能
50 |
51 | # 当k=0,f=38时,range(f)的值为(0,1,...,37)则product(range(f), repeat=2)的所有取值为(0,0)(0,1)...直到(37,0),,,(37,37)
52 | # 遍历一个feature map上的每一个网格
53 | for i, j in product(range(f), repeat=2):
54 | # fk 是第 k 个 feature map 的大小
55 | #image_size=300 steps为每层feature maps的感受野
56 | f_k = self.image_size / self.steps[k]
57 | # 单位中心unit center x,y
58 | # 每一个网格的中心,设置为:(i+0.5|fk|,j+0.5|fk|),其中,|fk| 是第 k 个 feature map 的大小,同时,i,j∈[0,|fk|)
59 | cx = (j + 0.5) / f_k
60 | cy = (i + 0.5) / f_k
61 |
62 |
63 | # 总体上:先添加长宽比为1的两个w、h(比较特殊),再通过循环添加其他长宽比
64 | # 长宽比aspect_ratio: 1
65 | # 真实大小rel size: min_size
66 | # 先验框大小相对于图片的比例
67 | #计算s_k 是为了求解w、h
68 | s_k = self.min_sizes[k]/self.image_size
69 | # 由于长宽比为1,则w=s_k h=s_k
70 | mean += [cx, cy, s_k, s_k]
71 |
72 | # 对于 aspect ratio 为 1 时,还增加了一个 default box长宽比aspect_ratio: 1
73 | # rel size: sqrt(s_k * s_(k+1))
74 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
75 | # 由于长宽比为1,则w=s_k_prime h=s_k_prime
76 | mean += [cx, cy, s_k_prime, s_k_prime]
77 |
78 | # 其余的长宽比
79 | for ar in self.aspect_ratios[k]:
80 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
81 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
82 | # 将mean的list转化为tensor
83 | output = torch.Tensor(mean).view(-1, 4)
84 |
85 | # clip:True 将输入input张量每个元素的夹紧到区间 [min,max],并返回结果到一个新张量
86 | # 操作为 如果元素>max,则置为max。min类似
87 | if self.clip:
88 | output.clamp_(max=1, min=0)
89 | return output
90 |
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 |
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 |
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/init_weights.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.init as init
3 | '''
4 | 使用xavier方法来初始化vgg后面的新增层、loc用于回归层、conf用于分类层 的权重
5 | '''
6 | def xavier(param):
7 | '''
8 | 使用xavier算法初始化新增层的权重
9 | '''
10 | init.xavier_uniform(param)
11 |
12 |
13 | def weights_init(m):
14 | if isinstance(m, nn.Conv2d):
15 | xavier(m.weight.data)
16 | m.bias.data.zero_()
--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/l2norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Function
4 | from torch.autograd import Variable
5 | import torch.nn.init as init
6 |
7 | class L2Norm(nn.Module):
8 | def __init__(self,n_channels, scale):
9 | super(L2Norm,self).__init__()
10 | self.n_channels = n_channels
11 | self.gamma = scale or None
12 | self.eps = 1e-10
13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 | self.reset_parameters()
15 |
16 | def reset_parameters(self):
17 | init.constant(self.weight,self.gamma)
18 |
19 | def forward(self, x):
20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 | #x /= norm
22 | x = torch.div(x,norm)
23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 | return out
25 |
--------------------------------------------------------------------------------
/SSD_pytorch/readme.md:
--------------------------------------------------------------------------------
1 | # 重构SSD代码实现
2 |
3 | ----------
4 |
5 | 该仓库基于[Max deGroot](https://github.com/amdegroot)与[Ellis Brown](https://github.com/ellisbrown)的[ssd.pytorch](https://github.com/amdegroot/ssd.pytorch)进行的,非常感谢他们无私的奉献。
6 |
7 |
8 | - [原地址](https://github.com/amdegroot/ssd.pytorch)
9 | - [原地址的加注释版本](https://github.com/bobo0810/pytorchSSD)
10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/SSD_pytorch) 强烈推荐!(即本仓库)
11 |
12 | ----------
13 |
14 | # 目前支持:
15 |
16 | - 数据集:原作者支持VOC、COCO,该仓库仅支持VOC,如果有时间,考虑将COCO加上。
17 | - 网络:支持SSD300
18 |
19 | # 原因:
20 |
21 | 大牛们写代码果然不拘小节,结构混乱依然不影响他们这么优秀。强迫症犯了,一周时间理解源码,一天内重构完成。哇,世界清爽了~
22 |
23 | ###### 注:该项目功能上并未进行任何修改,仅做重构,用于理解。
24 |
25 |
26 | # 相比原作者的特点:
27 |
28 | - 所有参数均可在config.py中设置
29 | - 重新整理结构,并加入大量代码注释
30 |
31 | ### 环境:
32 |
33 | | python版本 | pytorch版本 |
34 | |------------|-------------|
35 | | 3.5 | 0.3.0 |
36 |
37 | ----------
38 |
39 | # 一般项目结构
40 |
41 | 1、定义网络
42 |
43 | 
44 |
45 | 2、封装数据集
46 |
47 | 
48 |
49 | 3、工具类
50 |
51 | 
52 |
53 | 4、主函数
54 |
55 | 
56 |
57 |
58 | ----------
59 |
60 | # SSD网络结构
61 |
62 | 
63 |
64 | - vgg16网络结构
65 |
66 | 
67 |
68 | ----------
69 |
70 | # 准备数据集:
71 | 下载VOC2007和VOC2012的数据集,并在utils/config.py中的voc_data_root配置数据集的根目录。
72 | ```
73 | VOCdevkit
74 | │
75 | └───VOC2007
76 | │ │ JPEGImages
77 | │ │ ImageSets
78 | │ │ Annotations
79 | │ │ ...
80 | │
81 | └───VOC2012
82 | │ │ JPEGImages
83 | │ │ ImageSets
84 | │ │ Annotations
85 | │ │ ...
86 | ```
87 |
88 | ----------
89 |
90 | # Trian:
91 |
92 | 作用:使用VOC2007和2012的训练集+验证集 开始训练
93 |
94 | 1、开启Visdom(类似TnsorFlow的tensorboard,可视化工具)
95 | ```
96 | # First install Python server and client
97 | pip install visdom
98 | # Start the server
99 | python -m visdom.server
100 | ```
101 | 2、下载SSD的基础网络VGG16(去掉fc层)
102 |
103 | 下载地址:[vgg16_reducedfc.pth](https://pan.baidu.com/s/19Iumt072GMiFGlS5lVNy1Q)
104 |
105 | 下载完成后将其放置在checkpoint文件夹下即可。也可通过配置config.py中basenet的路径。
106 |
107 | 3、开始训练
108 |
109 | 在main.py中将train()注释取消,其他方法注释掉,即可运行。
110 |
111 | ----------
112 |
113 | # Eval:
114 |
115 | 作用:VOC2007测试集,计算各分类AP及mAP
116 |
117 | 1、在config.py中load_model_path配置预训练模型的路径
118 |
119 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
120 |
121 |
122 | 2、在main.py中将eval()注释取消,其他方法注释掉,即可运行。
123 |
124 | ----------
125 |
126 | # Test:
127 |
128 | 功能:VOC2007测试集,将预测结果写入txt
129 |
130 | 1、在config.py中load_model_path配置预训练模型的路径
131 |
132 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
133 |
134 | 2、在main.py中将test()注释取消,其他方法注释掉,即可运行。
135 |
136 | 结果
137 |
138 |

139 |
140 |
141 | ----------
142 |
143 | # Predict:
144 |
145 | 功能:可视化一张预测图片
146 |
147 | 1、在config.py中load_model_path配置预训练模型的路径
148 |
149 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
150 | 2、在main.py中将predict()注释取消,其他方法注释掉,即可运行。
151 |
152 |
153 | 原图
154 |
155 |

156 |
157 |
158 |
159 | 预处理之后的图像
160 |
161 |

162 |
163 |
164 |
165 |
166 | 预测结果
167 |
168 |

169 |
170 |
171 | ----------
172 |
173 | # 关于作者
174 |
175 | - 原作者 [Max deGroot](https://github.com/amdegroot)、[Ellis Brown](https://github.com/ellisbrown)
176 |
177 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)
--------------------------------------------------------------------------------
/SSD_pytorch/temp/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/temp/test.png
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/config.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc
--------------------------------------------------------------------------------
/SSD_pytorch/utils/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | # 设置默认参数
4 | import os.path
5 | class DefaultConfig():
6 | env = 'SSD_' # visdom 环境的名字
7 | visdom=True # 是否可视化
8 | # 目前支持的网络
9 | model = 'vgg16'
10 |
11 |
12 | voc_data_root='/home/bobo/data/VOCdevkit/' # VOC数据集根目录,该文件夹下有两个子文件夹。一个叫VOC2007,一个叫VOC2012
13 |
14 | # 基础网络,即特征提取网络(去掉全连接的预训练模型vgg16)
15 | basenet='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/vgg16_reducedfc.pth' #应为全路径 预训练好的去掉全连接层的vgg16模型
16 | batch_size = 32 # 训练集的batch size
17 | start_iter=0 #训练从第几个item开始
18 | num_workers = 4 # 加载数据时的线程数
19 | use_gpu = True # user GPU or not
20 | lr = 0.001 # 初始的学习率
21 | momentum=0.9 #优化器的动量值
22 | weight_decay=5e-4 #随机梯度下降SGD的权重衰减
23 | gamma=0.1 # Gamma update for SGD 学习率调整参数
24 |
25 | checkpoint_root ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/' #保存模型的目录
26 | # load_model_path = None # 加载预训练的模型的路径,为None代表不加载
27 | load_model_path ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/ssd300_COCO_100000.pth'
28 | # load_model_path='C:\\Users\\Administrator\\Desktop\\ssd300_COCO_10000.pth'
29 |
30 |
31 | # gets home dir cross platform
32 | HOME = os.path.expanduser("~")
33 | # 使边界框漂亮
34 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
35 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
36 | MEANS = (104, 117, 123)
37 | # SSD300 配置
38 | voc = {
39 | 'num_classes': 21, # 分类类别20+背景1
40 | 'lr_steps': (80000, 100000, 120000),
41 | 'max_iter': 120000, # 迭代次数
42 | 'feature_maps': [38, 19, 10, 5, 3, 1],
43 | 'min_dim': 300, # 当前SSD300只支持大小300×300的数据集训练
44 | 'steps': [8, 16, 32, 64, 100, 300], # 感受野,相对于原图缩小的倍数
45 | 'min_sizes': [30, 60, 111, 162, 213, 264],
46 | 'max_sizes': [60, 111, 162, 213, 264, 315],
47 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
48 | 'variance': [0.1, 0.2], # 方差
49 | 'clip': True,
50 | 'name': 'VOC',
51 | }
52 |
53 | # 验证
54 | confidence_threshold=0.01 # 检测置信度阈值 or 0.05
55 | top_k=5 # 进一步限制要解析的预测数量
56 | cleanup=True # 清除并删除eval后的结果文件
57 | temp= '/home/bobo/windowsPycharmProject/SSD_pytorch/temp' #保存验证的临时文件
58 | annopath = os.path.join(voc_data_root, 'VOC2007', 'Annotations', '%s.xml')
59 | imgpath = os.path.join(voc_data_root, 'VOC2007', 'JPEGImages', '%s.jpg')
60 | imgsetpath = os.path.join(voc_data_root, 'VOC2007', 'ImageSets',
61 | 'Main', '{:s}.txt')
62 |
63 | #测试
64 | temp_test='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/' # 保存测试集(VOC2007测试集)的网络预测结果
65 |
66 | #预测,可视化一张预测图片
67 | test_img='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/test.png'
68 |
69 |
70 | #初始化该类的一个对象
71 | opt=DefaultConfig()
--------------------------------------------------------------------------------
/SSD_pytorch/utils/timer.py:
--------------------------------------------------------------------------------
1 | import time
2 | class Timer(object):
3 | """A simple timer."""
4 | def __init__(self):
5 | self.total_time = 0.
6 | self.calls = 0
7 | self.start_time = 0.
8 | self.diff = 0.
9 | self.average_time = 0.
10 |
11 | def tic(self):
12 | # using time.time instead of time.clock because time time.clock
13 | # does not normalize for multithreading
14 | self.start_time = time.time()
15 |
16 | def toc(self, average=True):
17 | self.diff = time.time() - self.start_time
18 | self.total_time += self.diff
19 | self.calls += 1
20 | self.average_time = self.total_time / self.calls
21 | if average:
22 | return self.average_time
23 | else:
24 | return self.diff
--------------------------------------------------------------------------------
/SSD_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding:utf-8 -*-
3 | # power by Mr.Li
4 | import visdom
5 | import time
6 | import numpy as np
7 | import torch
8 | class Visualizer(object):
9 | '''
10 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function`
11 | 调用原生的visdom接口
12 | '''
13 | def __init__(self, env='default', **kwargs):
14 | self.vis = visdom.Visdom(env=env, **kwargs)
15 | # 画的第几个数,相当于横座标
16 | # 保存(’loss',23) 即loss的第23个点
17 | self.index = {}
18 | self.log_text = ''
19 | def reinit(self,env='default',**kwargs):
20 | '''
21 | 修改visdom的配置 重新初始化
22 | '''
23 | self.vis = visdom.Visdom(env=env,**kwargs)
24 | return self
25 | def plot_many(self, d):
26 | '''
27 | 一次plot多个损失图形
28 | @params d: dict (name,value) i.e. ('loss',0.11)
29 | '''
30 | for k, v in d.items():
31 | self.plot(k, v)
32 | def img_many(self, d):
33 | '''
34 | 一次画多个图像
35 | '''
36 | for k, v in d.items():
37 | self.img(k, v)
38 | def plot(self, name, y,**kwargs):
39 | '''
40 | self.plot('loss',1.00)
41 | '''
42 | #得到下标序号
43 | x = self.index.get(name, 0)
44 | self.vis.line(Y=np.array([y]), X=np.array([x]),
45 | win=name,#窗口名
46 | opts=dict(title=name),
47 | update=None if x == 0 else 'append', #按照append的画图形
48 | **kwargs
49 | )
50 | #下标累加1
51 | self.index[name] = x + 1
52 | def img(self, name, img_,**kwargs):
53 | '''
54 | self.img('input_img',t.Tensor(64,64))
55 | self.img('input_imgs',t.Tensor(3,64,64))
56 | self.img('input_imgs',t.Tensor(100,1,64,64))
57 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
58 |
59 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!!
60 | '''
61 | self.vis.images(img_.cpu().numpy(),
62 | win=name,
63 | opts=dict(title=name),
64 | **kwargs
65 | )
66 | def log(self,info,win='log_text'):
67 | '''
68 | self.log({'loss':1,'lr':0.0001})
69 | 打印日志
70 | '''
71 |
72 | self.log_text += ('[{time}] {info}
'.format(
73 | time=time.strftime('%m%d_%H%M%S'),\
74 | info=info))
75 | self.vis.text(self.log_text,win)
76 | def __getattr__(self, name):
77 | return getattr(self.vis, name)
78 |
79 | def create_vis_plot(self,_xlabel, _ylabel, _title, _legend):
80 | viz = visdom.Visdom()
81 | '''
82 | 新增可视化图形
83 | '''
84 | return viz.line(
85 | X=torch.zeros((1,)).cpu(),
86 | Y=torch.zeros((1, 3)).cpu(),
87 | opts=dict(
88 | xlabel=_xlabel,
89 | ylabel=_ylabel,
90 | title=_title,
91 | legend=_legend
92 | )
93 | )
94 |
95 | def update_vis_plot(self,iteration, loc, conf, window1, window2, update_type,
96 | epoch_size=1):
97 | '''
98 | 可视化图形里更新数据
99 | '''
100 | viz = visdom.Visdom()
101 | viz.line(
102 | X=torch.ones((1, 3)).cpu() * iteration,
103 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu() / epoch_size,
104 | win=window1,
105 | update=update_type
106 | )
107 | # initialize epoch plot on first iteration
108 |
109 | if iteration == 0:
110 | viz.line(
111 | X=torch.zeros((1, 3)).cpu(),
112 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu(),
113 | win=window2,
114 | update=True
115 | )
--------------------------------------------------------------------------------
/UNet_pytorch/dice_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function, Variable
3 |
4 | class DiceCoeff(Function):
5 | """Dice coeff for individual examples"""
6 |
7 | def forward(self, input, target):
8 | self.save_for_backward(input, target)
9 | eps = 0.0001
10 | self.inter = torch.dot(input.view(-1), target.view(-1))
11 | self.union = torch.sum(input) + torch.sum(target) + eps
12 |
13 | t = (2 * self.inter.float() + eps) / self.union.float()
14 | return t
15 |
16 | # This function has only a single output, so it gets only one gradient
17 | def backward(self, grad_output):
18 |
19 | input, target = self.saved_variables
20 | grad_input = grad_target = None
21 |
22 | if self.needs_input_grad[0]:
23 | grad_input = grad_output * 2 * (target * self.union - self.inter) \
24 | / self.union * self.union
25 | if self.needs_input_grad[1]:
26 | grad_target = None
27 |
28 | return grad_input, grad_target
29 |
30 |
31 | def dice_coeff(input, target):
32 | """Dice coeff for batches"""
33 | if input.is_cuda:
34 | s = torch.FloatTensor(1).cuda().zero_()
35 | else:
36 | s = torch.FloatTensor(1).zero_()
37 |
38 | for i, c in enumerate(zip(input, target)):
39 | s = s + DiceCoeff().forward(c[0], c[1])
40 |
41 | return s / (i + 1)
42 |
--------------------------------------------------------------------------------
/UNet_pytorch/eval.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 | from dice_loss import dice_coeff
5 |
6 |
7 | def eval_net(net, dataset, gpu=False):
8 | '''
9 | :param net: 训练的网络
10 | :param dataset: 验证集
11 | '''
12 | """Evaluation without the densecrf with the dice coefficient"""
13 | tot = 0
14 | for i, b in enumerate(dataset):
15 | img = b[0]
16 | true_mask = b[1]
17 |
18 | img = torch.from_numpy(img).unsqueeze(0)
19 | true_mask = torch.from_numpy(true_mask).unsqueeze(0)
20 |
21 | if gpu:
22 | img = img.cuda()
23 | true_mask = true_mask.cuda()
24 |
25 | mask_pred = net(img)[0]
26 | mask_pred = (F.sigmoid(mask_pred) > 0.5).float()
27 | # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性
28 | tot += dice_coeff(mask_pred, true_mask).item()
29 | return tot / i
30 |
--------------------------------------------------------------------------------
/UNet_pytorch/predict.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import numpy as np
5 | import torch
6 | import torch.nn.functional as F
7 |
8 | from PIL import Image
9 |
10 | from unet import UNet
11 | from utils import resize_and_crop, normalize, split_img_into_squares, hwc_to_chw, merge_masks, dense_crf
12 | from utils import plot_img_and_mask
13 |
14 | from torchvision import transforms
15 |
16 |
17 | def predict_img(net,
18 | full_img,
19 | scale_factor=0.5,
20 | out_threshold=0.5,
21 | use_dense_crf=True,
22 | use_gpu=False):
23 | img_height = full_img.size[1]
24 | img_width = full_img.size[0]
25 |
26 | img = resize_and_crop(full_img, scale=scale_factor)
27 | img = normalize(img)
28 |
29 | left_square, right_square = split_img_into_squares(img)
30 |
31 | left_square = hwc_to_chw(left_square)
32 | right_square = hwc_to_chw(right_square)
33 |
34 | X_left = torch.from_numpy(left_square).unsqueeze(0)
35 | X_right = torch.from_numpy(right_square).unsqueeze(0)
36 |
37 | if use_gpu:
38 | X_left = X_left.cuda()
39 | X_right = X_right.cuda()
40 |
41 | with torch.no_grad():
42 | output_left = net(X_left)
43 | output_right = net(X_right)
44 |
45 | left_probs = F.sigmoid(output_left).squeeze(0)
46 | right_probs = F.sigmoid(output_right).squeeze(0)
47 |
48 | tf = transforms.Compose(
49 | [
50 | transforms.ToPILImage(),
51 | transforms.Resize(img_height),
52 | transforms.ToTensor()
53 | ]
54 | )
55 |
56 | left_probs = tf(left_probs.cpu())
57 | right_probs = tf(right_probs.cpu())
58 |
59 | left_mask_np = left_probs.squeeze().cpu().numpy()
60 | right_mask_np = right_probs.squeeze().cpu().numpy()
61 |
62 | full_mask = merge_masks(left_mask_np, right_mask_np, img_width)
63 |
64 | if use_dense_crf:
65 | full_mask = dense_crf(np.array(full_img).astype(np.uint8), full_mask)
66 |
67 | return full_mask > out_threshold
68 |
69 |
70 | def get_args():
71 | parser = argparse.ArgumentParser()
72 | parser.add_argument('--model', '-m', default='MODEL.pth',
73 | metavar='FILE',
74 | help="Specify the file in which is stored the model"
75 | " (default : 'MODEL.pth')")
76 | parser.add_argument('--input', '-i', metavar='INPUT', nargs='+',
77 | help='filenames of input images', required=True)
78 |
79 | parser.add_argument('--output', '-o', metavar='INPUT', nargs='+',
80 | help='filenames of ouput images')
81 | parser.add_argument('--cpu', '-c', action='store_true',
82 | help="Do not use the cuda version of the net",
83 | default=False)
84 | parser.add_argument('--viz', '-v', action='store_true',
85 | help="Visualize the images as they are processed",
86 | default=False)
87 | parser.add_argument('--no-save', '-n', action='store_true',
88 | help="Do not save the output masks",
89 | default=False)
90 | parser.add_argument('--no-crf', '-r', action='store_true',
91 | help="Do not use dense CRF postprocessing",
92 | default=False)
93 | parser.add_argument('--mask-threshold', '-t', type=float,
94 | help="Minimum probability value to consider a mask pixel white",
95 | default=0.5)
96 | parser.add_argument('--scale', '-s', type=float,
97 | help="Scale factor for the input images",
98 | default=0.5)
99 |
100 | return parser.parse_args()
101 |
102 |
103 | def get_output_filenames(args):
104 | in_files = args.input
105 | out_files = []
106 |
107 | if not args.output:
108 | for f in in_files:
109 | pathsplit = os.path.splitext(f)
110 | out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1]))
111 | elif len(in_files) != len(args.output):
112 | print("Error : Input files and output files are not of the same length")
113 | raise SystemExit()
114 | else:
115 | out_files = args.output
116 |
117 | return out_files
118 |
119 |
120 | def mask_to_image(mask):
121 | return Image.fromarray((mask * 255).astype(np.uint8))
122 |
123 |
124 | if __name__ == "__main__":
125 | args = get_args()
126 | in_files = args.input
127 | out_files = get_output_filenames(args)
128 |
129 | net = UNet(n_channels=3, n_classes=1)
130 |
131 | print("Loading model {}".format(args.model))
132 |
133 | if not args.cpu:
134 | print("Using CUDA version of the net, prepare your GPU !")
135 | net.cuda()
136 | net.load_state_dict(torch.load(args.model))
137 | else:
138 | net.cpu()
139 | net.load_state_dict(torch.load(args.model, map_location='cpu'))
140 | print("Using CPU version of the net, this may be very slow")
141 |
142 | print("Model loaded !")
143 |
144 | for i, fn in enumerate(in_files):
145 | print("\nPredicting image {} ...".format(fn))
146 |
147 | img = Image.open(fn)
148 | if img.size[0] < img.size[1]:
149 | print("Error: image height larger than the width")
150 |
151 | mask = predict_img(net=net,
152 | full_img=img,
153 | scale_factor=args.scale,
154 | out_threshold=args.mask_threshold,
155 | use_dense_crf=not args.no_crf,
156 | use_gpu=not args.cpu)
157 |
158 | if args.viz:
159 | print("Visualizing results for image {}, close to continue ...".format(fn))
160 | plot_img_and_mask(img, mask)
161 |
162 | if not args.no_save:
163 | out_fn = out_files[i]
164 | result = mask_to_image(mask)
165 | result.save(out_files[i])
166 |
167 | print("Mask saved to {}".format(out_files[i]))
168 |
--------------------------------------------------------------------------------
/UNet_pytorch/readme.md:
--------------------------------------------------------------------------------
1 | # U-Net网络
2 |
3 | ----------
4 |
5 | 该仓库基于[milesial](https://github.com/milesial)的[Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)进行的,非常感谢大佬无私的奉献。
6 |
7 |
8 | - [原地址](https://github.com/milesial/Pytorch-UNet)
9 | - [原地址的加注释版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/UNet_pytorch)
10 |
11 | ----------
12 |
13 | # 目前支持:
14 |
15 | - 数据集: Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q)
16 | - 网络:U-Net
17 |
18 |
19 | # 相比原作者的特点:
20 |
21 | - 所有参数均可在config.py中设置
22 | - 重新整理结构,并加入大量代码注释
23 | - loading
24 |
25 | ----------
26 |
27 | - 环境:
28 |
29 | | python版本 | pytorch版本 |
30 | | ----------- | ---------- |
31 | | 3.5 | 0.4 |
32 |
33 | - 依赖:
34 |
35 | pip install pydensecrf
36 |
37 | ----------
38 |
39 | # U-Net网络结构
40 |
41 | 
42 |
43 | - ###### 原论文左侧 conv 3x3 无pad,故每次conv后feature map尺寸缩小。故与右侧feature map融合之前需要裁剪。
44 | - ###### 该仓库左侧 conv 3x3 pad=1,故每次conv后feature map尺寸不变。故反卷积后保证尺度统一与右侧feature map融合即可。
45 |
46 |
47 | ----------
48 |
49 | # 准备数据集:
50 | 下载Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q)数据集,并在utils/config.py中配置数据集的根目录。
51 | ```
52 | CarvanaImageMaskingChallenge
53 | │
54 | └───train
55 | │ │ xxx.gif
56 | │ │ ...
57 | │
58 | └───train_masks
59 | │ │ xxx.jpg
60 | │ │ ...
61 | ```
62 |
63 |
64 | ----------
65 |
66 | # Trian:
67 |
68 | 1、在config.py中配置训练参数
69 |
70 | 2、执行train.py开始训练
71 |
72 | ----------
73 |
74 | # Eval:
75 |
76 | 每训练一轮epoch都将计算Dice距离(用于度量两个集合的相似性)
77 | ----------
78 |
79 | # Predict:
80 |
81 | 功能:可视化一张预测图片
82 |
83 | 1、将预训练模型放到项目根目录下
84 |
85 | 预训练模型下载:[MODEL.pth](https://pan.baidu.com/s/1D_OtX16iL3aJefvOqyRWnw)
86 |
87 | 2、预测单张图片
88 |
89 | python predict.py -i image.jpg -o output.jpg
90 |
91 | 3、预测多张图片并显示
92 |
93 | python predict.py -i image1.jpg image2.jpg --viz --no-save
94 |
95 |
96 |
97 |


98 |
99 |
100 |
101 | ----------
102 |
103 | # 关于作者
104 |
105 | - 原作者 [milesial](https://github.com/milesial)
106 |
107 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)
--------------------------------------------------------------------------------
/UNet_pytorch/submit.py:
--------------------------------------------------------------------------------
1 | import os
2 | from PIL import Image
3 |
4 | import torch
5 |
6 | from predict import predict_img
7 | from utils import rle_encode
8 | from unet import UNet
9 |
10 |
11 | def submit(net, gpu=False):
12 | """Used for Kaggle submission: predicts and encode all test images"""
13 | dir = 'data/test/'
14 |
15 | N = len(list(os.listdir(dir)))
16 | with open('SUBMISSION.csv', 'a') as f:
17 | f.write('img,rle_mask\n')
18 | for index, i in enumerate(os.listdir(dir)):
19 | print('{}/{}'.format(index, N))
20 |
21 | img = Image.open(dir + i)
22 |
23 | mask = predict_img(net, img, gpu)
24 | enc = rle_encode(mask)
25 | f.write('{},{}\n'.format(i, ' '.join(map(str, enc))))
26 |
27 |
28 | if __name__ == '__main__':
29 | net = UNet(3, 1).cuda()
30 | net.load_state_dict(torch.load('MODEL.pth'))
31 | submit(net, True)
32 |
--------------------------------------------------------------------------------
/UNet_pytorch/train.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import numpy as np
4 |
5 | import torch
6 | import torch.backends.cudnn as cudnn
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | from torch import optim
10 |
11 | from eval import eval_net
12 | from unet import UNet
13 | from utils import get_ids, split_ids, split_train_val, get_imgs_and_masks, batch
14 | from utils.config import opt_train
15 |
16 | def train_net(net,
17 | epochs=5,
18 | batch_size=1,
19 | lr=0.1,
20 | val_percent=0.05, # 训练集:验证集= 0.95: 0.05
21 | save_cp=True,
22 | gpu=False,
23 | img_scale=0.5):
24 |
25 | dir_img = opt_train.dir_img
26 | dir_mask = opt_train.dir_mask
27 | dir_checkpoint = opt_train.dir_checkpoint
28 |
29 | # 得到 图片路径列表 ids为 图片名称(无后缀名)
30 | ids = get_ids(dir_img)
31 | # 得到truple元组 (无后缀名的 图片名称,序号)
32 | # eg:当n为2 图片名称为bobo.jpg 时, 得到(bobo,0) (bobo,1)
33 | # 当序号为0 时,裁剪宽度,得到左边部分图片 当序号为1 时,裁剪宽度,得到右边部分图片
34 | ids = split_ids(ids)
35 | # 打乱数据集后,按照val_percent的比例来 切分 训练集 和 验证集
36 | iddataset = split_train_val(ids, val_percent)
37 |
38 |
39 | print('''
40 | 开始训练:
41 | Epochs: {}
42 | Batch size: {}
43 | Learning rate: {}
44 | 训练集大小: {}
45 | 验证集大小: {}
46 | GPU: {}
47 | '''.format(epochs, batch_size, lr, len(iddataset['train']),
48 | len(iddataset['val']), str(gpu)))
49 |
50 | #训练集大小
51 | N_train = len(iddataset['train'])
52 |
53 | optimizer = optim.SGD(net.parameters(),
54 | lr=lr,
55 | momentum=0.9,
56 | weight_decay=0.0005)
57 |
58 | #二进制交叉熵
59 | criterion = nn.BCELoss()
60 |
61 | for epoch in range(epochs):
62 | print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
63 |
64 | # reset the generators
65 | # 每轮epoch得到 训练集 和 验证集
66 | train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale)
67 | val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale)
68 |
69 |
70 |
71 |
72 | # 重置epoch损失计数器
73 | epoch_loss = 0
74 |
75 | for i, b in enumerate(batch(train, batch_size)):
76 | # 得到 一个batch的 imgs tensor 及 对应真实mask值
77 | # 当序号为0 时,裁剪宽度,得到左边部分图片[384,384,3] 当序号为1 时,裁剪宽度,得到右边部分图片[384,190,3]
78 | imgs = np.array([i[0] for i in b]).astype(np.float32)
79 | true_masks = np.array([i[1] for i in b])
80 |
81 | # 将值转为 torch tensor
82 | imgs = torch.from_numpy(imgs)
83 | true_masks = torch.from_numpy(true_masks)
84 |
85 | # 训练数据转到GPU上
86 | if gpu:
87 | imgs = imgs.cuda()
88 | true_masks = true_masks.cuda()
89 |
90 | # 得到 网络输出的预测mask [10,1,384,384]
91 | masks_pred = net(imgs)
92 | # 经过sigmoid
93 | masks_probs = F.sigmoid(masks_pred)
94 | masks_probs_flat = masks_probs.view(-1)
95 |
96 | true_masks_flat = true_masks.view(-1)
97 | # 计算二进制交叉熵损失
98 | loss = criterion(masks_probs_flat, true_masks_flat)
99 | # 统计一个epoch的所有batch的loss之和,用以计算 一个epoch的 loss均值
100 | epoch_loss += loss.item()
101 |
102 | # 输出 当前epoch的第几个batch 及 当前batch的loss
103 | print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))
104 |
105 | # 优化器梯度清零
106 | optimizer.zero_grad()
107 | # 反向传播
108 | loss.backward()
109 | # 更新参数
110 | optimizer.step()
111 |
112 | # 一轮epoch结束,该轮epoch的 loss均值
113 | print('Epoch finished ! Loss: {}'.format(epoch_loss / i))
114 |
115 | # 每轮epoch之后使用验证集进行评价
116 | if True:
117 | # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性
118 | val_dice = eval_net(net, val, gpu)
119 | print('Validation Dice Coeff: {}'.format(val_dice))
120 |
121 | # 保存模型
122 | if save_cp:
123 | torch.save(net.state_dict(),
124 | dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
125 | print('Checkpoint {} saved !'.format(epoch + 1))
126 |
127 |
128 |
129 |
130 |
131 | if __name__ == '__main__':
132 |
133 | # 获取训练参数
134 | args = opt_train
135 | # n_channels:输入图像的通道数 n_classes:二分类
136 | net = UNet(n_channels=3, n_classes=1)
137 |
138 | # 加载预训练模型
139 | if args.load:
140 | net.load_state_dict(torch.load(args.load))
141 | print('Model loaded from {}'.format(args.load))
142 |
143 | # 网络转移到GPU上
144 | if args.gpu:
145 | net.cuda()
146 | cudnn.benchmark = True # 速度更快,但占用内存更多
147 |
148 | try:
149 | train_net(net=net,
150 | epochs=args.epochs,
151 | batch_size=args.batchsize,
152 | lr=args.lr,
153 | gpu=args.gpu,
154 | img_scale=args.scale)
155 | except KeyboardInterrupt:
156 | # 当运行出错时,保存最新的模型
157 | torch.save(net.state_dict(), 'INTERRUPTED.pth')
158 | print('Saved interrupt')
159 | try:
160 | sys.exit(0)
161 | except SystemExit:
162 | os._exit(0)
163 |
--------------------------------------------------------------------------------
/UNet_pytorch/unet/__init__.py:
--------------------------------------------------------------------------------
1 | from .unet_model import UNet
2 |
--------------------------------------------------------------------------------
/UNet_pytorch/unet/unet_model.py:
--------------------------------------------------------------------------------
1 | # full assembly of the sub-parts to form the complete net
2 |
3 | from .unet_parts import *
4 |
5 | class UNet(nn.Module):
6 | def __init__(self, n_channels, n_classes):
7 | super(UNet, self).__init__()
8 | self.inc = inconv(n_channels, 64) # 输出层,n_channels=3 输入图像为3通道
9 | self.down1 = down(64, 128)
10 | self.down2 = down(128, 256)
11 | self.down3 = down(256, 512)
12 | self.down4 = down(512, 512)
13 | self.up1 = up(1024, 256)
14 | self.up2 = up(512, 128)
15 | self.up3 = up(256, 64)
16 | self.up4 = up(128, 64)
17 | # 最后一层的卷积核大小为1*1,将64通道的特征图转化为特定深度(分类数量,二分类为2)的结果
18 | self.outc = outconv(64, n_classes) # 输出层,n_classes=1 二分类
19 |
20 | def forward(self, x):
21 | x1 = self.inc(x)
22 | x2 = self.down1(x1)
23 | x3 = self.down2(x2)
24 | x4 = self.down3(x3)
25 | x5 = self.down4(x4)
26 | x = self.up1(x5, x4)
27 | x = self.up2(x, x3)
28 | x = self.up3(x, x2)
29 | x = self.up4(x, x1)
30 | x = self.outc(x)
31 | return x
32 |
--------------------------------------------------------------------------------
/UNet_pytorch/unet/unet_parts.py:
--------------------------------------------------------------------------------
1 | # sub-parts of the U-Net model
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class double_conv(nn.Module):
9 | '''(conv => BN => ReLU) * 2'''
10 | def __init__(self, in_ch, out_ch):
11 | super(double_conv, self).__init__()
12 | self.conv = nn.Sequential(
13 | # 每次重复中都有2个卷积层,卷积核大小均为3*3
14 | nn.Conv2d(in_ch, out_ch, 3, padding=1),
15 | nn.BatchNorm2d(out_ch),
16 | nn.ReLU(inplace=True),
17 | nn.Conv2d(out_ch, out_ch, 3, padding=1),
18 | nn.BatchNorm2d(out_ch),
19 | nn.ReLU(inplace=True)
20 | )
21 |
22 | def forward(self, x):
23 | x = self.conv(x)
24 | return x
25 |
26 |
27 | class inconv(nn.Module):
28 | def __init__(self, in_ch, out_ch):
29 | # 输入层,in_ch=3输入通道数, out_ch=64输出通道数
30 | super(inconv, self).__init__()
31 | self.conv = double_conv(in_ch, out_ch)
32 |
33 | def forward(self, x):
34 | x = self.conv(x)
35 | return x
36 |
37 |
38 | class down(nn.Module):
39 | def __init__(self, in_ch, out_ch):
40 | super(down, self).__init__()
41 | # 论文:它的架构是一种重复结构,每次重复中都有2个卷积层和一个pooling层,卷积层中卷积核大小均为3*3,激活函数使用ReLU
42 | self.mpconv = nn.Sequential(
43 | nn.MaxPool2d(2),
44 | double_conv(in_ch, out_ch)
45 | )
46 |
47 | def forward(self, x):
48 | x = self.mpconv(x)
49 | return x
50 |
51 |
52 | class up(nn.Module):
53 | def __init__(self, in_ch, out_ch, bilinear=True):
54 | super(up, self).__init__()
55 |
56 | # would be a nice idea if the upsampling could be learned too,
57 | # but my machine do not have enough memory to handle all those weights
58 | # 默认为全部为 上采样(作者因内存不足)。
59 | # bilinear=False 改为 使用反卷积(论文方法),效果会更好?
60 |
61 | # 使用双线性上采样来放大输入
62 | if bilinear: # batchsize=10 scale=0.3 时 占用9047M
63 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
64 | # (论文方法)二维反卷积层 反卷积层可以理解为输入的数据和卷积核的位置反转的卷积操作. 反卷积有时候也会被翻译成解卷积.
65 | else: # 论文方法 batchsize=10 scale=0.3 时 占用9040M
66 | self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
67 |
68 | self.conv = double_conv(in_ch, out_ch)
69 |
70 | def forward(self, x1, x2):
71 | x1 = self.up(x1)
72 | diffX = x1.size()[2] - x2.size()[2]
73 | diffY = x1.size()[3] - x2.size()[3]
74 | x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
75 | diffY // 2, int(diffY / 2)))
76 | # 深度相加
77 | x = torch.cat([x2, x1], dim=1)
78 | x = self.conv(x)
79 | return x
80 |
81 |
82 | class outconv(nn.Module):
83 | def __init__(self, in_ch, out_ch):
84 | super(outconv, self).__init__()
85 | # 最后一层的卷积核大小为1*1,将64通道的特征图转化为特定深度(分类数量,二分类为2)的结果
86 | self.conv = nn.Conv2d(in_ch, out_ch, 1)
87 |
88 | def forward(self, x):
89 | x = self.conv(x)
90 | return x
91 |
--------------------------------------------------------------------------------
/UNet_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .crf import *
2 | from .load import *
3 | from .utils import *
4 | from .data_vis import *
5 |
--------------------------------------------------------------------------------
/UNet_pytorch/utils/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | # 设置默认参数
4 | import os.path
5 | class DefaultConfig_train():
6 | epochs=5 #number of epochs
7 | batchsize= 10 #batch size
8 | lr=0.1 #learning rate
9 | gpu=True #use cudas
10 | load=False #load file model
11 | scale=0.3 #downscaling factor of the images 图像训练时缩小倍数 该值对内存影响较大(仓库默认0.5)
12 |
13 | # 数据集
14 | dir_img = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train/'
15 | dir_mask = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train_masks/'
16 | dir_checkpoint = './checkpoints/' # 模型保存位置
17 |
18 | visdom=True # 是否可视化
19 |
20 | env = 'U-Net' # visdom 环境的名字
21 | visdom = True # 是否可视化
22 | datesets_name='Carvana Image Masking Challenge' # 数据集名称
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | class DefaultConfig_predict():
31 | input='./intput.jpg' #filenames of input images
32 | output='./output.jpg' #filenames of ouput images
33 | model= './MODEL.pth' # Specify the file in which is stored the model
34 | cpu=False #Do not use the cuda version of the net
35 | scale=0.5 #Scale factor for the input images
36 | mask_threshold=0.5 #Minimum probability value to consider a mask pixel white
37 | no_crf=False #Do not use dense CRF postprocessing
38 | no_save=False #Do not save the output masks
39 | viz=False #Visualize the images as they are processed
40 | #初始化该类的一个对象
41 | opt_train=DefaultConfig_train()
42 |
43 | opt_predict=DefaultConfig_predict()
44 |
45 |
--------------------------------------------------------------------------------
/UNet_pytorch/utils/crf.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pydensecrf.densecrf as dcrf
3 |
4 | def dense_crf(img, output_probs):
5 | h = output_probs.shape[0]
6 | w = output_probs.shape[1]
7 |
8 | output_probs = np.expand_dims(output_probs, 0)
9 | output_probs = np.append(1 - output_probs, output_probs, axis=0)
10 |
11 | d = dcrf.DenseCRF2D(w, h, 2)
12 | U = -np.log(output_probs)
13 | U = U.reshape((2, -1))
14 | U = np.ascontiguousarray(U)
15 | img = np.ascontiguousarray(img)
16 |
17 | d.setUnaryEnergy(U)
18 |
19 | d.addPairwiseGaussian(sxy=20, compat=3)
20 | d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10)
21 |
22 | Q = d.inference(5)
23 | Q = np.argmax(np.array(Q), axis=0).reshape((h, w))
24 |
25 | return Q
26 |
--------------------------------------------------------------------------------
/UNet_pytorch/utils/data_vis.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | def plot_img_and_mask(img, mask):
4 | fig = plt.figure()
5 | a = fig.add_subplot(1, 2, 1)
6 | a.set_title('Input image')
7 | plt.imshow(img)
8 |
9 | b = fig.add_subplot(1, 2, 2)
10 | b.set_title('Output mask')
11 | plt.imshow(mask)
12 | plt.show()
--------------------------------------------------------------------------------
/UNet_pytorch/utils/load.py:
--------------------------------------------------------------------------------
1 | #
2 | # load.py : utils on generators / lists of ids to transform from strings to
3 | # cropped images and masks
4 |
5 | import os
6 |
7 | import numpy as np
8 | from PIL import Image
9 |
10 | from .utils import resize_and_crop, get_square, normalize, hwc_to_chw
11 |
12 |
13 | def get_ids(dir):
14 | """Returns a list of the ids in the directory"""
15 | # eg:f[:-4]是为了去掉 .jpg 后缀。结果只为 照片名称,无后缀。
16 | return (f[:-4] for f in os.listdir(dir))
17 |
18 |
19 | def split_ids(ids, n=2):
20 | """Split each id in n, creating n tuples (id, k) for each id"""
21 | return ((id, i) for i in range(n) for id in ids)
22 |
23 |
24 | def to_cropped_imgs(ids, dir, suffix, scale):
25 | """From a list of tuples, returns the correct cropped img"""
26 | #返回 tuples,(img的resize后的tensor,序号)
27 | for id, pos in ids:
28 | im = resize_and_crop(Image.open(dir + id + suffix), scale=scale)
29 | # get_square: 当pos为0 时,裁剪宽度,得到左边部分图片[384,384,3] 当pos为1 时,裁剪宽度,得到右边部分图片[384,190,3]
30 | yield get_square(im, pos)
31 |
32 | def get_imgs_and_masks(ids, dir_img, dir_mask, scale):
33 | '''
34 | :param ids:
35 | :param dir_img: 图片路径
36 | :param dir_mask: mask图片路径
37 | :param scale: 图像训练时缩小倍数
38 | :return:all the couples (img, mask)
39 | '''
40 | """Return all the couples (img, mask)"""
41 |
42 | # 读取图片,并按照scale进行resize
43 | imgs = to_cropped_imgs(ids, dir_img, '.jpg', scale)
44 |
45 | # need to transform from HWC to CHW 转化(高H、宽W、通道C)为(通道C、高H、宽W)
46 | imgs_switched = map(hwc_to_chw, imgs)
47 | # 归一化(值转化到0-1之间)
48 | imgs_normalized = map(normalize, imgs_switched)
49 |
50 | masks = to_cropped_imgs(ids, dir_mask, '_mask.gif', scale)
51 | # list( rezise且经过转化和归一化后的图像tensor,resize后的mask图像tensor)
52 | return zip(imgs_normalized, masks)
53 |
54 |
55 | def get_full_img_and_mask(id, dir_img, dir_mask):
56 | im = Image.open(dir_img + id + '.jpg')
57 | mask = Image.open(dir_mask + id + '_mask.gif')
58 | return np.array(im), np.array(mask)
59 |
--------------------------------------------------------------------------------
/UNet_pytorch/utils/utils.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 |
4 |
5 | def get_square(img, pos):
6 | """Extract a left or a right square from ndarray shape : (H, W, C))"""
7 | h = img.shape[0]
8 | if pos == 0:
9 | return img[:, :h]
10 | else:
11 | return img[:, -h:]
12 |
13 | def split_img_into_squares(img):
14 | return get_square(img, 0), get_square(img, 1)
15 |
16 | def hwc_to_chw(img):
17 | return np.transpose(img, axes=[2, 0, 1])
18 |
19 | def resize_and_crop(pilimg, scale=0.5, final_height=None):
20 | w = pilimg.size[0]
21 | h = pilimg.size[1]
22 | newW = int(w * scale)
23 | newH = int(h * scale)
24 |
25 | if not final_height:
26 | diff = 0
27 | else:
28 | diff = newH - final_height
29 |
30 | img = pilimg.resize((newW, newH))
31 | # crop 从图像中提取出某个矩形大小的图像。它接收一个四元素的元组作为参数,(起始点的横坐标,起始点的纵坐标,宽度,高度),坐标系统的原点(0, 0)是左上角。
32 | img = img.crop((0, diff // 2, newW, newH - diff // 2))
33 | return np.array(img, dtype=np.float32)
34 |
35 | def batch(iterable, batch_size):
36 | """Yields lists by batch"""
37 | b = []
38 | for i, t in enumerate(iterable):
39 | b.append(t)
40 | if (i + 1) % batch_size == 0:
41 | yield b
42 | b = []
43 |
44 | if len(b) > 0:
45 | yield b
46 |
47 | def split_train_val(dataset, val_percent=0.05):
48 | dataset = list(dataset)
49 | length = len(dataset)
50 | n = int(length * val_percent)
51 | random.shuffle(dataset)
52 | return {'train': dataset[:-n], 'val': dataset[-n:]}
53 |
54 |
55 | def normalize(x):
56 | return x / 255
57 |
58 | def merge_masks(img1, img2, full_w):
59 | h = img1.shape[0]
60 |
61 | new = np.zeros((h, full_w), np.float32)
62 | new[:, :full_w // 2 + 1] = img1[:, :full_w // 2 + 1]
63 | new[:, full_w // 2 + 1:] = img2[:, -(full_w // 2 - 1):]
64 |
65 | return new
66 |
67 |
68 | # credits to https://stackoverflow.com/users/6076729/manuel-lagunas
69 | def rle_encode(mask_image):
70 | pixels = mask_image.flatten()
71 | # We avoid issues with '1' at the start or end (at the corners of
72 | # the original image) by setting those pixels to '0' explicitly.
73 | # We do not expect these to be non-zero for an accurate mask,
74 | # so this should not harm the score.
75 | pixels[0] = 0
76 | pixels[-1] = 0
77 | runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
78 | runs[1::2] = runs[1::2] - runs[:-1:2]
79 | return runs
80 |
--------------------------------------------------------------------------------
/Yolov1_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/checkpoint/.gitkeep
--------------------------------------------------------------------------------
/Yolov1_pytorch/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | # 设置默认参数
4 | class DefaultConfig():
5 | env = 'YOLOv1' # visdom 环境的名字
6 | # model = 'NetWork' # 使用的模型,名字必须与models/__init__.py中的名字一致
7 | file_root = '/home/zhuhui/data/VOCdevkit/VOC2012/JPEGImages/' #VOC2012的训练集
8 | test_root = '/home/zhuhui/data/VOCdevkit/VOC2007/JPEGImages/' #VOC2007的测试集
9 | train_Annotations = '/home/zhuhui/data/VOCdevkit/VOC2012/Annotations/'
10 | voc_2007test='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2007test.txt'
11 | voc_2012train='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2012train.txt'
12 |
13 | test_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/a.jpg'
14 | result_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/result_a.jpg'
15 |
16 |
17 |
18 | batch_size = 32 # batch size
19 | use_gpu = True # user GPU or not
20 | num_workers = 4 # how many workers for loading data 加载数据时的线程
21 | print_freq = 20 # print info every N batch
22 |
23 | # load_model_path =None # 加载预训练的模型的路径,为None代表不加载
24 | best_test_loss_model_path= '/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_val_best.pth'
25 | current_epoch_model_path='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_bobo.pth'
26 | load_model_path = None # 加载预训练的模型的路径,为None代表不加载
27 | num_epochs = 120 #训练的epoch次数
28 | learning_rate = 0.001 # initial learning rate
29 | lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay
30 | momentum=0.95
31 | weight_decay =5e-4 # 损失函数
32 | # VOC的类别
33 | VOC_CLASSES = ( # always index 0
34 | 'aeroplane', 'bicycle', 'bird', 'boat',
35 | 'bottle', 'bus', 'car', 'cat', 'chair',
36 | 'cow', 'diningtable', 'dog', 'horse',
37 | 'motorbike', 'person', 'pottedplant',
38 | 'sheep', 'sofa', 'train', 'tvmonitor')
39 |
40 |
41 | #初始化该类的一个对象
42 | opt=DefaultConfig()
--------------------------------------------------------------------------------
/Yolov1_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/data/__init__.py
--------------------------------------------------------------------------------
/Yolov1_pytorch/data/xml_2_txt.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | import os
3 | from config import opt
4 |
5 | def parse_rec(filename):
6 | """
7 | Parse a PASCAL VOC xml file
8 | 解析一个 PASCAL VOC xml file
9 | 将数据集从xml解析为txt 用于生成voc2007test.txt等
10 | """
11 | tree = ET.parse(filename)
12 | objects = []
13 | # 遍历一张图中的所有物体
14 | for obj in tree.findall('object'):
15 | obj_struct = {}
16 | obj_struct['name'] = obj.find('name').text
17 | #obj_struct['pose'] = obj.find('pose').text
18 | #obj_struct['truncated'] = int(obj.find('truncated').text)
19 | #obj_struct['difficult'] = int(obj.find('difficult').text)
20 | bbox = obj.find('bndbox')
21 | # 从原图左上角开始为原点,向右为x轴,向下为y轴。左上角(xmin,ymin)和右下角(xmax,ymax)
22 | obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
23 | int(float(bbox.find('ymin').text)),
24 | int(float(bbox.find('xmax').text)),
25 | int(float(bbox.find('ymax').text))]
26 | objects.append(obj_struct)
27 |
28 | return objects
29 |
30 | # 新建一个名为voc2012train的txt文件,准备写入数据
31 | txt_file = open('data/voc2012train.txt','w')
32 | Annotations = opt.train_Annotations
33 | xml_files = os.listdir(Annotations)
34 |
35 | # 遍历所有的xml
36 | for xml_file in xml_files:
37 | image_path = xml_file.split('.')[0] + '.jpg'
38 | # txt 写入图像名字 非完整路径
39 | txt_file.write(image_path+' ')
40 | results = parse_rec(Annotations + xml_file)
41 | num_obj = len(results)
42 | # txt写入 一张图中的物体总数
43 | txt_file.write(str(num_obj)+' ')
44 | # 遍历一张图片中的所有物体
45 | for result in results:
46 | class_name = result['name']
47 | bbox = result['bbox']
48 | class_name = opt.VOC_CLASSES.index(class_name)
49 | # txt写入 bbox的坐标 以及 每个物体对应的类的序号
50 | txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ')
51 | txt_file.write('\n')
52 | #最后格式:图像名(1个值) 物体总数(1个值) bbox坐标(4个值) 物体对应的类的序号(1个值)
53 |
54 | txt_file.close()
--------------------------------------------------------------------------------
/Yolov1_pytorch/main_resnet.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 | import cv2
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 | import torchvision.transforms as transforms
8 | from torch.autograd import Variable
9 | from torch.utils.data import DataLoader
10 | from tqdm import tqdm
11 |
12 | from config import opt
13 | from data.dataset import yoloDataset
14 | from models.resnet import resnet152_bo,resnet152
15 | from utils.visualize import Visualizer
16 | from utils.yoloLoss import yoloLoss
17 | from utils.predictUtils import predict_result
18 | from utils.predictUtils import voc_eval
19 | from utils.predictUtils import voc_ap
20 |
21 |
22 | def train():
23 | vis=Visualizer(opt.env)
24 | # 网络部分======================================================开始
25 | # True则返回预训练好的resnet152_bo模型
26 | net=resnet152_bo(resnet152(pretrained=True))
27 | # 将模型加载到内存中(CPU)
28 | if opt.load_model_path:
29 | net.load_state_dict(torch.load(opt.load_model_path,map_location=lambda storage,loc:storage))
30 | # 再将模型转移到GPU上
31 | if opt.use_gpu:
32 | net.cuda()
33 | # 输出网络结构
34 | print(net)
35 | print('加载好预先训练好的模型')
36 | # 将模型调整为训练模式
37 | net.train()
38 | # 网络部分======================================================结束
39 |
40 | # 加载数据部分====================================================开始
41 | # 自定义封装数据集
42 | train_dataset = yoloDataset(root=opt.file_root, list_file=opt.voc_2012train, train=True, transform=[transforms.ToTensor()])
43 | # 数据集加载器 shuffle:打乱顺序 num_workers:线程数
44 | train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4)
45 | test_dataset = yoloDataset(root=opt.test_root, list_file=opt.voc_2007test, train=False, transform=[transforms.ToTensor()])
46 | test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4)
47 | # 加载数据部分====================================================结束
48 |
49 | #自定义的损失函数 7代表将图像分为7x7的网格 2代表一个网格预测两个框 5代表 λcoord 更重视8维的坐标预测 0.5代表没有object的bbox的confidence loss
50 | criterion = yoloLoss(7, 2, 5, 0.5)
51 | learning_rate=opt.learning_rate
52 | # 优化器
53 | optimizer = torch.optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum, weight_decay=opt.weight_decay)
54 | #optimizer = torch.optim.Adam(net.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay)
55 | print('训练集有 %d 张图像' % (len(train_dataset)))
56 | print('一个batch的大小为 %d' % (opt.batch_size))
57 | # 将训练过程的信息写入log文件中
58 | logfile = open('log/log.txt', 'w')
59 | # inf为正无穷大
60 | best_test_loss = np.inf
61 |
62 | for epoch in range(opt.num_epochs):
63 | if epoch == 1:
64 | learning_rate = 0.0005
65 | if epoch == 2:
66 | learning_rate = 0.00075
67 | if epoch == 3:
68 | learning_rate = 0.001
69 | if epoch == 80:
70 | learning_rate = 0.0001
71 | if epoch == 100:
72 | learning_rate = 0.00001
73 | for param_group in optimizer.param_groups:
74 | param_group['lr'] = learning_rate
75 | # 第几次epoch 及 当前epoch的学习率
76 | print('\n\n当前的epoch为 %d / %d' % (epoch + 1, opt.num_epochs))
77 | print('当前epoch的学习率: {}'.format(learning_rate))
78 |
79 | # 每轮epoch的总loss
80 | total_loss = 0.
81 | # 开始训练
82 | for i, (images, target) in enumerate(train_loader):
83 | images = Variable(images)
84 | target = Variable(target)
85 | if opt.use_gpu:
86 | images, target = images.cuda(), target.cuda()
87 | # 前向传播,得到预测值
88 | pred = net(images)
89 | # 计算损失 yoloLoss继承nn.Module,调用方法名即自动进行前向传播,执行forward方法
90 | loss = criterion(pred, target)
91 | total_loss += loss.data[0]
92 | # 优化器梯度清零
93 | optimizer.zero_grad()
94 | #loss反向传播
95 | loss.backward()
96 | # 更新参数
97 | optimizer.step()
98 | if (i + 1) % opt.print_freq == 0:
99 | print('在训练集上:当前epoch为 [%d/%d], Iter [%d/%d] 当前batch损失为: %.4f, 当前epoch到目前为止平均损失为: %.4f'
100 | % (epoch + 1, opt.num_epochs, i + 1, len(train_loader), loss.data[0], total_loss / (i + 1)))
101 | # 画出训练集的平均损失
102 | vis.plot_train_val(loss_train=total_loss / (i + 1))
103 | # 保存最新的模型
104 | torch.save(net.state_dict(),opt.current_epoch_model_path)
105 | vis.log("epoch:{epoch},lr:{lr}".format(
106 | epoch=epoch, lr=learning_rate))
107 |
108 | # =========================================================看到此
109 | # 一次epoch验证
110 | validation_loss = 0.0
111 | # 模型调整为验证模式
112 | net.eval()
113 | # 每轮epoch之后用VOC2007测试集进行验证
114 | for i, (images, target) in enumerate(test_loader):
115 | images = Variable(images, volatile=True)
116 | target = Variable(target, volatile=True)
117 | if opt.use_gpu:
118 | images, target = images.cuda(), target.cuda()
119 | # 前向传播得到预测值
120 | pred = net(images)
121 | # loss
122 | loss = criterion(pred, target)
123 | validation_loss += loss.data[0]
124 | # 计算在VOC2007测试集上的平均损失
125 | validation_loss /= len(test_loader)
126 | # 画出验证集的平均损失
127 | vis.plot_train_val(loss_val=validation_loss)
128 | # 训练模型的目标是 在验证集上的loss最小
129 | # 保存到目前为止 在验证集上的loss最小 的模型
130 | if best_test_loss > validation_loss:
131 | best_test_loss = validation_loss
132 | print('当前得到最好的验证集的平均损失为 %.5f' % best_test_loss)
133 | torch.save(net.state_dict(),opt.best_test_loss_model_path)
134 | # 将当前epoch的参数写入log文件中
135 | logfile.writelines(str(epoch) + '\t' + str(validation_loss) + '\n')
136 | logfile.flush()
137 |
138 | def predict():
139 | # fasle 返回 未训练的模型
140 | predict_model = resnet152_bo(resnet152(pretrained=True))
141 |
142 | predict_model.load_state_dict(torch.load(opt.load_model_path,map_location=lambda storage,loc:storage))
143 | # 模型改为预测模式
144 | predict_model.eval()
145 | # 如果GPU可用,加载到GPU
146 | if opt.use_gpu:
147 | predict_model.cuda()
148 | # 测试集照片地址
149 | test_img_dir = opt.test_img_dir
150 | image = cv2.imread(test_img_dir)
151 | # result中内容为 左上角坐标、右下角坐标、类别名、输入的测试图地址、预测类别的可能性
152 | result = predict_result(predict_model, test_img_dir)
153 | for left_up, right_bottom, class_name, _, prob in result:
154 | # 将预测框添加到测试图片中
155 | cv2.rectangle(image, left_up, right_bottom, (0, 255, 0), 2)
156 | # 预测框的左上角写入 所属类别
157 | cv2.putText(image, class_name, left_up, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA)
158 | print(prob)
159 | # 将测试结果写入
160 | cv2.imwrite(opt.result_img_dir,image)
161 |
162 |
163 |
164 | # 主函数
165 | if __name__ == '__main__':
166 |
167 | # 命令行工具
168 | import fire
169 | fire.Fire()
170 |
171 | train()
172 | # predict()
173 |
174 |
175 |
176 |
--------------------------------------------------------------------------------
/Yolov1_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/models/__init__.py
--------------------------------------------------------------------------------
/Yolov1_pytorch/models/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.utils.model_zoo as model_zoo
3 | import math
4 | import torch.nn.functional as F
5 | from torchvision.models import resnet152
6 |
7 | class resnet152_bo(nn.Module):
8 |
9 | def __init__(self, features, num_classes=1000):
10 | super(resnet152_bo, self).__init__()
11 | model=resnet152()
12 | # 先不修改网络结构,先试试只修改最后一个输出层参数
13 | # #取掉model的后两层(去掉最后的最大池化层和全连接层)
14 | self.features=nn.Sequential(*list(model.children())[:-2])
15 | self.classifier=nn.Sequential(
16 | nn.Linear(2048 * 7 * 7, 4096),
17 | nn.ReLU(True),
18 | nn.Dropout(),
19 | # 取消一层全连接层
20 | # nn.Linear(4096, 4096),
21 | # nn.ReLU(True),
22 | # nn.Dropout(),
23 | # 最后一层修改为1470 即为1470代表一张图的信息(1470=7x7x30)
24 | nn.Linear(4096, 1470),
25 | )
26 | # model.fc = nn.Linear(2048, 1470)
27 | # self.resnet152_bo=model
28 | # 只修改了线形层 所以只给线形层初始化权重
29 | self._initialize_weights()
30 |
31 | def _initialize_weights(self):
32 | for m in self.modules():
33 | # 只修改了线形层 所以只给线形层初始化权重
34 | if isinstance(m, nn.Linear):
35 | m.weight.data.normal_(0, 0.01)
36 | m.bias.data.zero_()
37 | def forward(self, x):
38 | x = self.features(x)
39 | x = x.view(x.size(0), -1)
40 | x = self.classifier(x)
41 | # 得到输出,经过sigmoid 归一化到0-1之间
42 | x = F.sigmoid(x)
43 | # 再改变形状,返回(xxx,7,7,30) xxx代表几张照片,(7,7,30)代表一张照片的信息
44 | x = x.view(-1,7,7,30)
45 | return x
46 |
47 |
48 |
49 | def test():
50 | '''
51 | 测试使用
52 | '''
53 | import torch
54 | from torch.autograd import Variable
55 |
56 | model = resnet152_bo(resnet152(pretrained=True))
57 | img = torch.rand(2,3,224,224)
58 | img = Variable(img)
59 | output = model(img)
60 | output = output.view(-1, 7, 7, 30)
61 | print(output.size())
62 |
63 | if __name__ == '__main__':
64 | test()
--------------------------------------------------------------------------------
/Yolov1_pytorch/readme.md:
--------------------------------------------------------------------------------
1 | - 环境:
2 |
3 | | python版本 | pytorch版本 |
4 | | ----------- | ---------- |
5 | | 3.5 | 0.3.0 |
6 |
7 | - 说明:
8 |
9 | 1、基本实现参考: [pytorchYOLOv1master][1]
10 |
11 | 2、仅重构代码,并未提升效果
12 |
13 | 3、 测试时,在VOC2012训练集上loss为0.1左右,在VOC2007测试集上loss基本降低 很少。怀疑过拟合。
14 |
15 | 4、运行main.py请确保启动可视化工具visdom
16 |
17 | - 当前工作:
18 |
19 | 1、~~训练完,可视化测试图像和loss等~~
20 |
21 | 2、~~将训练好的模型放到这里~~
22 |
23 | 3、~~准备添加注释,以便理解~~
24 |
25 | 4、尝试优化网络模型,提高mAP
26 |
27 | - 改进方向:
28 |
29 | 1、更改学习率
30 |
31 | 2、~~调整网络结构(参考版本为vgg16,试试残差)~~
32 |
33 | 3、~~更改优化器从SGD到Adam~~
34 |
35 |
36 | - 下载网络模型:
37 |
38 | 1、在VOC2007测试集上验证的效果最好的一个网络模型([百度网盘](https://pan.baidu.com/s/1HCO24KGqjJw01raiCB7f2A))
39 |
40 | 2、保存的最后一个网络模型 ([百度网盘](https://pan.baidu.com/s/1HKY7qGgK7i3Fv_ks9ldflw))
41 |
42 | - 效果:
43 |
44 | 验证集:voc2012训练集
45 |
46 | 模型:在VOC2007测试集上验证的效果最好的一个网络模型
47 |
48 |
49 |
50 |

51 |
52 |
53 |
54 | ### loss趋势
55 |
56 |
57 |
58 | | epoch | VOC2007测试集的loss |
59 | |-------|---------------------|
60 | | 0 | 5.806424896178707 |
61 | | 1 | 5.855176733386132 |
62 | | 2 | 5.9203009036279495 |
63 | | ... | ... |
64 | | 118 | 5.187265388427242 |
65 | | 119 | 5.190768877152474 |
66 |
67 |
68 |
69 |
70 |
71 |

72 |
73 | 注:蓝线为在VOC2012训练集上的loss,黄线为 VOC2007测试集的loss
74 |
75 |
76 | ### 网络表现
77 |
78 | - 最后保存的模型 在VOC2007验证集的表现
79 |
80 |


81 |
82 |
83 |


84 |
85 |
86 |


87 |
88 |
89 |
90 | - 最后保存的模型 在VOC2012训练集的表现(可能过拟合,在训练集表现优秀)
91 |
92 |
93 |


94 |
95 |
96 |

97 |

98 |
99 |
100 |


101 |
102 |
103 |
104 | # 以下为本人新增内容
105 |
106 | - 新增内容:
107 |
108 | 新增Resnet152网络来替换原作者的VGG16。(代码包括main_resnet.py 、models/resnet.py )
109 |
110 | - 实现细节:
111 |
112 | 仅仅将Resnet152网络的最后一层全连接层的输出改为1470,再改变形状为7x7x30。
113 |
114 | - 效果:
115 |
116 | 极其不好,猜测原因为Resnet152网络是用来分类,将其直接用于回归导致效果不好。
117 |
118 | - loss图:
119 |
120 | | Resnet152+Ada优化器 | Resnet152+SGD优化器 |
121 | | --- | ------------------ |
122 | | 见左下 | 见右下 |
123 |
124 |
125 |


126 |
127 |
128 | - 优化建议:
129 |
130 | 1、~~使用Resnet50,网络最后处理参考原文VGG16处理试试~~
131 |
132 | 2、Resnet50去掉最后一层,加入类似VGG16的两层全连接层+Drop等 。loss仍降不下去,该项目以后不再提供提升效果的内容。
133 |
134 |
135 | - 特别鸣谢:
136 |
137 | xiongzihua:[原作者][2]
138 |
139 | 朱辉师兄:抽空帮我理清思路,讲清代码
140 |
141 |
142 |
143 |
144 | [1]: https://github.com/xiongzihua/pytorch-YOLO-v1
145 | [2]: https://github.com/xiongzihua/pytorch-YOLO-v1
--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/__init__.py
--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/testImgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/testImgs/__init__.py
--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
1 | import visdom
2 | import numpy as np
3 |
4 | class Visualizer():
5 | def __init__(self, env='main', **kwargs):
6 | '''
7 | **kwargs, dict option
8 | '''
9 | self.vis = visdom.Visdom(env=env)
10 | self.index = {} # x, dict
11 | self.log_text = ''
12 | self.env = env
13 |
14 | def plot_train_val(self, loss_train=None, loss_val=None):
15 | '''
16 | plot val loss and train loss in one figure
17 | '''
18 | x = self.index.get('train_val', 0)
19 |
20 | if x == 0:
21 | loss = loss_train if loss_train else loss_val
22 | win_y = np.column_stack((loss, loss))
23 | win_x = np.column_stack((x, x))
24 | self.win = self.vis.line(Y=win_y, X=win_x,
25 | env=self.env)
26 | # opts=dict(
27 | # title='train_test_loss',
28 | # ))
29 | self.index['train_val'] = x + 1
30 | return
31 |
32 | if loss_train != None:
33 | self.vis.line(Y=np.array([loss_train]), X=np.array([x]),
34 | win=self.win,
35 | name='1',
36 | update='append',
37 | env=self.env)
38 | self.index['train_val'] = x + 5
39 | else:
40 | self.vis.line(Y=np.array([loss_val]), X=np.array([x]),
41 | win=self.win,
42 | name='2',
43 | update='append',
44 | env=self.env)
45 |
46 | def plot_many(self, d):
47 | '''
48 | d: dict {name, value}
49 | '''
50 | for k, v in d.iteritems():
51 | self.plot(k, v)
52 |
53 | def plot(self, name, y, **kwargs):
54 | '''
55 | plot('loss', 1.00)
56 | '''
57 | x = self.index.get(name, 0) # if none, return 0
58 | self.vis.line(Y=np.array([y]), X=np.array([x]),
59 | win=name,
60 | opts=dict(title=name),
61 | update=None if x== 0 else 'append',
62 | **kwargs)
63 | self.index[name] = x + 1
64 |
65 | def log(self, info, win='log_text'):
66 | '''
67 | show text in box not write into txt?
68 | '''
69 | pass
70 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/checkpoints/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/checkpoints/.gitkeep
--------------------------------------------------------------------------------
/Yolov3_pytorch/checkpoints/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | wget https://pjreddie.com/media/files/yolov3.weights
4 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/config/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | train=data/coco/trainvalno5k.txt
3 | valid=data/coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
4 |
5 | # Clone COCO API
6 | git clone https://github.com/pdollar/coco
7 | cd coco
8 |
9 | mkdir images
10 | cd images
11 |
12 | # 数据集自己已上传
13 | ## Download Images
14 | #wget -c https://pjreddie.com/media/files/train2014.zip
15 | #wget -c https://pjreddie.com/media/files/val2014.zip
16 |
17 | ## Unzip
18 | #unzip -q train2014.zip
19 | #unzip -q val2014.zip
20 |
21 | cd ..
22 |
23 | # Download COCO Metadata
24 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
25 | wget -c https://pjreddie.com/media/files/coco/5k.part
26 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
27 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
28 | tar xzf labels.tgz
29 | unzip -q instances_train-val2014.zip
30 |
31 | # Set Up Image Lists
32 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
33 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt
34 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/dog.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/eagle.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/giraffe.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/herd_of_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/herd_of_horses.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img1.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img2.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img3.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img4.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/messi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/messi.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/person.jpg
--------------------------------------------------------------------------------
/Yolov3_pytorch/datasets/datasets.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import random
3 | import os
4 | import numpy as np
5 |
6 | import torch
7 |
8 | from torch.utils.data import Dataset
9 | from PIL import Image
10 | import torchvision.transforms as transforms
11 |
12 | import matplotlib.pyplot as plt
13 | import matplotlib.patches as patches
14 |
15 | from skimage.transform import resize
16 |
17 | import sys
18 |
19 |
20 | import glob
21 | import random
22 | import os
23 | import numpy as np
24 |
25 | import torch
26 |
27 | from torch.utils.data import Dataset
28 | from PIL import Image
29 | import torchvision.transforms as transforms
30 |
31 | import matplotlib.pyplot as plt
32 | import matplotlib.patches as patches
33 |
34 | from skimage.transform import resize
35 |
36 | import sys
37 |
38 | class ImageFolder(Dataset):
39 | '''
40 | 仅detect.py用到,用于测试例子
41 | '''
42 | def __init__(self, folder_path, img_size=416):
43 | self.files = sorted(glob.glob('%s/*.*' % folder_path))
44 | self.img_shape = (img_size, img_size)
45 |
46 | def __getitem__(self, index):
47 | img_path = self.files[index % len(self.files)]
48 | # Extract image
49 | img = np.array(Image.open(img_path))
50 | h, w, _ = img.shape
51 | dim_diff = np.abs(h - w)
52 | # Upper (left) and lower (right) padding
53 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
54 | # Determine padding
55 | pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
56 | # Add padding
57 | input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
58 | # Resize and normalize
59 | input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
60 | # Channels-first
61 | input_img = np.transpose(input_img, (2, 0, 1))
62 | # As pytorch tensor
63 | input_img = torch.from_numpy(input_img).float()
64 |
65 | # 返回图片路径、经过处理后的图像tensor
66 | return img_path, input_img
67 |
68 | def __len__(self):
69 | return len(self.files)
70 |
71 |
72 | class ListDataset(Dataset):
73 | '''
74 | 数据集加载器
75 | '''
76 | def __init__(self, list_path, img_size=416):
77 | # 读取 数据集中分配为训练集的txt文本,以list形式保存
78 | with open(list_path, 'r') as file:
79 | self.img_files = file.readlines()
80 | # 读取 数据集中分配为训练集的txt文本(即标签,coco数据集以txt保存 框真值),以list形式保存
81 | self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
82 | # 输入训练图像大小
83 | self.img_shape = (img_size, img_size)
84 | self.max_objects = 50 # 设定一张图像最多真实存在50个物体(封装 图像真值框时使用到)
85 |
86 | def __getitem__(self, index):
87 |
88 | '''
89 | 训练时获取单张图像及真值
90 | '''
91 |
92 | # 读取图像为tensor
93 | img_path = self.img_files[index % len(self.img_files)].rstrip()
94 |
95 | copy_img=Image.open(img_path).copy()
96 | img = np.array(copy_img)
97 |
98 | # Handles images with less than three channels
99 | # 处理 图像的通道数不为3 时(即该图像损坏),则 读取下一张图片
100 | while len(img.shape) != 3:
101 | index += 1
102 | img_path = self.img_files[index % len(self.img_files)].rstrip()
103 | img = np.array(Image.open(img_path))
104 |
105 | # 对图像tensor进行处理(数据增强、规范化)
106 |
107 | # w,h按照较大值填充成正方形
108 | h, w, _ = img.shape
109 | # np.abs 绝对值
110 | dim_diff = np.abs(h - w)
111 | # Upper (left) and lower (right) padding
112 | # 上(左)和下(右)填充
113 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
114 | # Determine padding 确定填充
115 | pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
116 | # Add padding 添加填充
117 | input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
118 |
119 | # 填充成正方形后 resize到 指定形状(一般为416x416)
120 | padded_h, padded_w, _ = input_img.shape
121 | # Resize and normalize resize并规范化
122 | input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
123 |
124 | # Channels-first 转换通道
125 | input_img = np.transpose(input_img, (2, 0, 1))
126 | # As pytorch tensor 转为pytorch tensor
127 | input_img = torch.from_numpy(input_img).float()
128 |
129 | #---------
130 | # 真值标签处理
131 | #---------
132 | label_path = self.label_files[index % len(self.img_files)].rstrip()
133 | labels = None
134 | if os.path.exists(label_path):
135 | # eg:[8,5] 8:该图像有8个bbox 5: 0代表类别对应序号 1~4代表坐标(值在0~1之间)
136 | labels = np.loadtxt(label_path).reshape(-1, 5)
137 | # Extract coordinates for unpadded + unscaled image
138 | # 提取未填充+未缩放图像的坐标
139 | x1 = w * (labels[:, 1] - labels[:, 3]/2)
140 | y1 = h * (labels[:, 2] - labels[:, 4]/2)
141 | x2 = w * (labels[:, 1] + labels[:, 3]/2)
142 | y2 = h * (labels[:, 2] + labels[:, 4]/2)
143 | # Adjust for added padding
144 | # 添加填充,以便于 图像调整一致
145 | x1 += pad[1][0]
146 | y1 += pad[0][0]
147 | x2 += pad[1][0]
148 | y2 += pad[0][0]
149 | # Calculate ratios from coordinates
150 | # 从坐标计算比率
151 | labels[:, 1] = ((x1 + x2) / 2) / padded_w
152 | labels[:, 2] = ((y1 + y2) / 2) / padded_h
153 | labels[:, 3] *= w / padded_w
154 | labels[:, 4] *= h / padded_h
155 | # Fill matrix
156 | # 填充矩阵(将 txt里的内容,即每张图像的所有物体填入,最多添加50个物体)
157 | filled_labels = np.zeros((self.max_objects, 5))
158 | if labels is not None:
159 | filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
160 | filled_labels = torch.from_numpy(filled_labels)
161 | # 返回 图像路径、处理后的图像tensor、坐标被归一化后的真值框filled_labels[50,5] 值在0-1之间
162 | return img_path, input_img, filled_labels
163 |
164 | def __len__(self):
165 | return len(self.img_files)
166 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/readme.md:
--------------------------------------------------------------------------------
1 | # 重构YOLO v3代码实现
2 |
3 | ----------
4 |
5 | 该仓库基于[eriklindernoren](https://github.com/eriklindernoren)的[PyTorch-YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3)进行的,非常感谢他无私的奉献。
6 |
7 |
8 | - [原地址](https://github.com/eriklindernoren/PyTorch-YOLOv3)
9 | - [原地址的加注释版本](https://github.com/bobo0810/PyTorch-YOLOv3-master)
10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/Yolov3_pytorch) 强烈推荐!(即本仓库)
11 |
12 | ----------
13 |
14 | # 目前支持:
15 |
16 | - 数据集:COCO
17 | - 网络:Darknet-52
18 |
19 | # 相比原作者的特点:
20 |
21 | - 所有参数均可在config.py中设置
22 | - 重新整理结构,并加入大量代码注释
23 | - 加入visdom可视化
24 |
25 |
26 | ----------
27 |
28 | # 一般项目结构
29 |
30 | 1、定义网络
31 |
32 | 
33 |
34 | 2、封装数据集
35 |
36 | 
37 |
38 | 3、工具类
39 |
40 | 
41 |
42 | 4、主函数
43 |
44 | 
45 |
46 | - 环境:
47 |
48 | | python版本 | pytorch版本 |
49 | | ----------- | ---------- |
50 | | 3.5 | 0.4 |
51 |
52 | ----------
53 |
54 | # Darknet-52网络结构
55 |
56 | 
57 |
58 | 以下阅读源码有用:
59 |
60 | hyperparams
61 |
62 | 
63 |
64 | module_list
65 |
66 | 
67 |
68 | module_defs
69 |
70 | 
71 |
72 |
73 |
74 | ----------
75 |
76 | # 准备数据集:
77 | 下载COCO的数据集
78 |
79 | ```
80 | $ cd data/
81 | $ bash get_coco_dataset.sh
82 | ```
83 |
84 | 数据集结构
85 | ```
86 | data/coco
87 | │
88 | └───images
89 | │ │ train2014
90 | │ │ val2014
91 | │
92 | └───labels
93 | │ │ train2014
94 | │ │ val2014
95 | │ ...
96 | │ ...
97 |
98 | ```
99 |
100 | ----------
101 |
102 | # Trian:
103 |
104 | 1、开启Visdom(类似TnsorFlow的tensorboard,可视化工具)
105 |
106 | ```
107 | # First install Python server and client
108 | pip install visdom
109 | # Start the server
110 | python -m visdom.server
111 | ```
112 |
113 | 2、开始训练
114 |
115 | 在config.py中设置参数。
116 |
117 | 在main.py中将运行train()。
118 |
119 | ###### 由于原仓库保存、加载模型bug,故不支持保存为 .weight官方格式(二进制且仅保存conv和bn层参数,其余参数读取cfg文件即可),训练保存模型为.pt模型(保存整个模型)。
120 |
121 | 
122 |
123 | ----------
124 |
125 | # Test:
126 |
127 | 作用:测试,计算mAP
128 |
129 | 1、下载官方的预训练模型
130 |
131 | ```
132 | $ cd checkpoints/
133 | $ bash download_weights.sh
134 | ```
135 |
136 | 2、在config.py中load_model_path配置预训练模型的路径
137 |
138 | ###### 支持官方模型 .weight 和 自训练模型 .pt
139 |
140 | 3、 在config.py中设置参数。
141 |
142 | 在main.py中运行test()。
143 |
144 |
145 | | Model | mAP (min. 50 IoU) |
146 | |---------------------|-------------------|
147 | | YOLOv3 (paper) | 57.9 |
148 | | YOLOv3 (官方) | 58.38 |
149 | | YOLOv3 (this impl.) | 58.2 |
150 |
151 |
152 |
153 | 
154 |
155 | ----------
156 |
157 | # Predict:
158 |
159 | 功能:可视化预测图片
160 |
161 | 1、在config.py中load_model_path配置预训练模型的路径
162 | ###### 支持官方模型 .weight 和 自训练模型 .pt
163 | 2、在config.py中设置参数。
164 |
165 | 在main.py中将运行detect()。
166 |
167 |
168 | 官方模型效果:
169 |
170 |
171 |

172 |

173 |
174 |
175 |


176 |
177 |
178 |
179 |
180 |
181 |
182 | ----------
183 |
184 | ## 参考文献:
185 |
186 | 推荐配合阅读,效果更佳~
187 |
188 | - [从0到1实现YOLOv3(part one)](https://blog.csdn.net/qq_25737169/article/details/80530579)
189 |
190 | - [从0到1实现YOLO v3(part two)](https://blog.csdn.net/qq_25737169/article/details/80634360)
191 |
192 | - [yolo v3 译文](https://zhuanlan.zhihu.com/p/34945787)
193 |
194 | - [YOLO v3网络结构分析](https://blog.csdn.net/qq_37541097/article/details/81214953)
195 |
196 | ----------
197 |
198 | # 关于作者
199 |
200 | - 原作者 [eriklindernoren](https://github.com/eriklindernoren)
201 |
202 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)
203 |
204 |
205 |
206 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/utils/__init__.py
--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # power by Mr.Li
3 | # 设置默认参数
4 | import os.path
5 | class DefaultConfig_train():
6 | epochs=30 # 训练轮数
7 | image_folder='data/samples' #数据集地址
8 | batch_size=16 #batch大小
9 | model_config_path='config/yolov3.cfg' # 模型网络结构
10 | data_config_path='config/coco.data' # 配置数据集的使用情况
11 | class_path='data/coco.names' #coco数据集类别标签
12 | conf_thres=0.8 # 物体置信度阈值
13 | nms_thres= 0.4 # iou for nms的阈值
14 | n_cpu=0 # 批生成期间要使用的cpu线程数
15 | img_size=416 # 输入图像尺寸的大小
16 | use_cuda=True # 是否使用GPU
17 | visdom=True # 是否使用visdom来可视化loss
18 | print_freq = 8 # 训练时,每N个batch显示
19 | lr_decay = 0.1 # 1e-3 -> 1e-4
20 |
21 | checkpoint_interval=1 # 每隔几个模型保存一次
22 | checkpoint_dir='./checkpoints' # 保存生成模型的路径
23 |
24 | load_model_path=None # 加载预训练的模型的路径,为None代表不加载
25 | # load_model_path=checkpoint_dir+'/latestbobo.pt' # 预训练权重 (仅.pt)
26 |
27 | class DefaultConfig_test():
28 | epochs=200 #number of epochs
29 | batch_size=16 #size of each image batch
30 | model_config_path='config/yolov3.cfg' #'path to model config file'
31 | data_config_path='config/coco.data' #'path to data config file'
32 |
33 | checkpoint_dir = './checkpoints' # 保存生成模型的路径
34 | # load_model_path=None # 加载预训练的模型的路径,为None代表不加载
35 | load_model_path=checkpoint_dir+'/8yolov3.pt' # 预训练权重 (.weights或者.pt)
36 |
37 | class_path='data/coco.names' #'path to class label file'
38 | iou_thres=0.5 #'iou threshold required to qualify as detected'
39 | conf_thres=0.5 #'object confidence threshold'
40 | nms_thres=0.45 #'iou thresshold for non-maximum suppression'
41 | n_cpu=0 #'number of cpu threads to use during batch generation'
42 | img_size=416 #size of each image dimension
43 | use_cuda=True #'whether to use cuda if available'
44 |
45 |
46 | class DefaultConfig_detect():
47 | image_folder= 'data/samples' #path to dataset
48 | config_path='config/yolov3.cfg' #path to model config file
49 |
50 |
51 | checkpoint_dir='./checkpoints' # 保存生成模型的路径
52 | # load_model_path=None # 加载预训练的模型的路径,为None代表不加载
53 | load_model_path = checkpoint_dir + '/yolov3.weights' # 预训练权重 (.weights或者.pt)
54 |
55 |
56 | class_path='data/coco.names' #path to class label file
57 | conf_thres=0.8 #object confidence threshold
58 | nms_thres=0.4 #iou thresshold for non-maximum suppression
59 | batch_size=1 #size of the batches
60 | n_cpu=8 #number of cpu threads to use during batch generation
61 | img_size=416 #size of each image dimension
62 | use_cuda=True #whether to use cuda if available
63 |
64 |
65 |
66 | #初始化该类的一个对象
67 | opt_train=DefaultConfig_train()
68 | opt_test=DefaultConfig_test()
69 | opt_detect=DefaultConfig_detect()
70 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/parse_config.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def parse_model_config(path):
4 | """Parses the yolo-v3 layer configuration file and returns module definitions"""
5 | '''
6 | 解析yolo-v3层配置文件并返回模块定义
7 | 返回结果 为 每部分写为一行
8 | path: yolov3.cfg的路径
9 | '''
10 | file = open(path, 'r')
11 | # 按行读取,存为list
12 | lines = file.read().split('\n')
13 | # 过滤掉 "#"开头的内容,即注释信息
14 | lines = [x for x in lines if x and not x.startswith('#')]
15 | # lstrip去掉左边的(头部),rstrip去掉右边的(尾部) 默认删除字符串头和尾的空白字符(包括\n,\r,\t这些)
16 | lines = [x.rstrip().lstrip() for x in lines] # 去除边缘空白,即去掉左右两侧的空格等字符
17 | module_defs = []
18 | for line in lines:
19 | # 检查字符串是否是以指定子字符串 [ 开头,如果是则返回 True,否则返回 False
20 | if line.startswith('['): # This marks the start of a new block 标志着一个新区块的开始
21 | module_defs.append({})
22 | module_defs[-1]['type'] = line[1:-1].rstrip()
23 | if module_defs[-1]['type'] == 'convolutional':
24 | module_defs[-1]['batch_normalize'] = 0
25 | else:
26 | key, value = line.split("=")
27 | value = value.strip()
28 | module_defs[-1][key.rstrip()] = value.strip()
29 |
30 | return module_defs
31 |
32 | def parse_data_config(path):
33 | """Parses the dataloader configuration file"""
34 | '''
35 | 解析dataloader配置文件
36 | '''
37 | options = dict()
38 | # 默认GPU有4个
39 | options['gpus'] = '0,1,2,3'
40 | # 数据集加载器加载数据时使用线程数
41 | options['num_workers'] = '10'
42 | with open(path, 'r') as fp:
43 | lines = fp.readlines()
44 | for line in lines:
45 | line = line.strip()
46 | if line == '' or line.startswith('#'):
47 | continue
48 | key, value = line.split('=')
49 | options[key.strip()] = value.strip()
50 | return options
51 |
--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding:utf-8 -*-
3 | # power by Mr.Li
4 | import visdom
5 | import time
6 | import numpy as np
7 | import torch
8 | class Visualizer(object):
9 | '''
10 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function`
11 | 调用原生的visdom接口
12 | '''
13 | def __init__(self, env='default', **kwargs):
14 | self.vis = visdom.Visdom(env=env, **kwargs)
15 | # 画的第几个数,相当于横座标
16 | # 保存(’loss',23) 即loss的第23个点
17 | self.index = {}
18 | self.log_text = ''
19 | def reinit(self,env='default',**kwargs):
20 | '''
21 | 修改visdom的配置 重新初始化
22 | '''
23 | self.vis = visdom.Visdom(env=env,**kwargs)
24 | return self
25 | def plot_many(self, d):
26 | '''
27 | 一次plot多个损失图形
28 | @params d: dict (name,value) i.e. ('loss',0.11)
29 | '''
30 | for k, v in d.items():
31 | self.plot(k, v)
32 | def img_many(self, d):
33 | '''
34 | 一次画多个图像
35 | '''
36 | for k, v in d.items():
37 | self.img(k, v)
38 | def plot(self, name, y,**kwargs):
39 | '''
40 | self.plot('loss',1.00)
41 | '''
42 | #得到下标序号
43 | x = self.index.get(name, 0)
44 | self.vis.line(Y=np.array([y]), X=np.array([x]),
45 | win=name,#窗口名
46 | opts=dict(title=name),
47 | update=None if x == 0 else 'append', #按照append的画图形
48 | **kwargs
49 | )
50 | #下标累加1
51 | self.index[name] = x + 1
52 | def img(self, name, img_,**kwargs):
53 | '''
54 | self.img('input_img',t.Tensor(64,64))
55 | self.img('input_imgs',t.Tensor(3,64,64))
56 | self.img('input_imgs',t.Tensor(100,1,64,64))
57 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
58 |
59 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!!
60 | '''
61 | self.vis.images(img_.cpu().numpy(),
62 | win=name,
63 | opts=dict(title=name),
64 | **kwargs
65 | )
66 | def log(self,info,win='log_text'):
67 | '''
68 | self.log({'loss':1,'lr':0.0001})
69 | 打印日志
70 | '''
71 |
72 | self.log_text += ('[{time}] {info}
'.format(
73 | time=time.strftime('%m%d_%H%M%S'),\
74 | info=info))
75 | self.vis.text(self.log_text,win)
76 | def __getattr__(self, name):
77 | return getattr(self.vis, name)
78 |
79 | def create_vis_plot(self,_xlabel, _ylabel, _title, _legend):
80 | viz = visdom.Visdom()
81 | '''
82 | 新增可视化图形
83 | '''
84 | return viz.line(
85 | X=torch.zeros((1,)).cpu(),
86 | Y=torch.zeros((1, 3)).cpu(),
87 | opts=dict(
88 | xlabel=_xlabel,
89 | ylabel=_ylabel,
90 | title=_title,
91 | legend=_legend
92 | )
93 | )
94 |
95 |
--------------------------------------------------------------------------------