├── IMG ├── image-20221130224209895.png ├── image-20221130225020444.png ├── image-20221130231306518.png ├── image-20221130232150008.png ├── image-20221130232226853.png ├── image-20221130233557805.png ├── image-20221130234049168.png ├── image-20221130235406487.png ├── image-20221130235431124.png ├── image-20221130235548118.png ├── image-20221130235602289.png ├── image-20221201140006267.png ├── image-20221201140844546.png ├── image-20221201141758142.png └── image-20221201141815392.png ├── README.md └── faster-rcnn-pytorch-master ├── Faster R-CNN 论文复现代码.md ├── Faster R-CNN代码使用说明书.md ├── VOCdevkit └── VOC2007 │ ├── Annotations │ └── 说明书.txt │ ├── ImageSets │ └── Main │ │ └── 说明书.txt │ └── JPEGImages │ └── 说明书.txt ├── frcnn.py ├── get_map.py ├── img ├── 1.jpg ├── 2.jpg └── 3.jpg ├── logs └── 说明书.txt ├── model_data ├── simhei.ttf └── voc_classes.txt ├── nets ├── __init__.py ├── classifier.py ├── frcnn.py ├── frcnn_training.py ├── resnet50.py ├── rpn.py └── vgg16.py ├── predict.py ├── requirements.txt ├── summary.py ├── train.py ├── utils ├── __init__.py ├── anchors.py ├── callbacks.py ├── dataloader.py ├── utils.py ├── utils_bbox.py ├── utils_fit.py └── utils_map.py └── voc_annotation.py /IMG/image-20221130224209895.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130224209895.png -------------------------------------------------------------------------------- /IMG/image-20221130225020444.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130225020444.png -------------------------------------------------------------------------------- /IMG/image-20221130231306518.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130231306518.png -------------------------------------------------------------------------------- /IMG/image-20221130232150008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130232150008.png -------------------------------------------------------------------------------- /IMG/image-20221130232226853.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130232226853.png -------------------------------------------------------------------------------- /IMG/image-20221130233557805.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130233557805.png -------------------------------------------------------------------------------- /IMG/image-20221130234049168.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130234049168.png -------------------------------------------------------------------------------- /IMG/image-20221130235406487.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235406487.png -------------------------------------------------------------------------------- /IMG/image-20221130235431124.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235431124.png -------------------------------------------------------------------------------- /IMG/image-20221130235548118.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235548118.png -------------------------------------------------------------------------------- /IMG/image-20221130235602289.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235602289.png -------------------------------------------------------------------------------- /IMG/image-20221201140006267.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201140006267.png -------------------------------------------------------------------------------- /IMG/image-20221201140844546.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201140844546.png -------------------------------------------------------------------------------- /IMG/image-20221201141758142.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201141758142.png -------------------------------------------------------------------------------- /IMG/image-20221201141815392.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201141815392.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Faster-RCNN-Pytorch-master 2 | # Faster R-CNN代码使用说明书 3 | 4 | ## 一、我的配置环境 5 | 6 | ```python 7 | python == 3.10.6 8 | numpy == 1.23.3 9 | opencv == 4.6.0 10 | pillow == 9.2.0 11 | pycocotools == 2.0.6 12 | pytorch == 1.12.1 13 | scipy == 1.9.3 14 | torchvision == 0.13.1 15 | tqdm == 4.64.1 16 | matplotlib == 3.6.2 17 | hdf5 == 1.12.1 18 | ``` 19 | 20 | ## 二、参数值文件下载 21 | 22 | 我们需要的权重包括`voc_weights_resnet.pth`或者`voc_weights_vgg.pth`以及主干的网络权重我已经上传了百度云,可以自行下载。 23 | 24 | 首先来看第一个权重文件`voc_weights_resnet.pth`,是`resnet`为主干特征提取网络用到的。 25 | 26 | 第二个权重文件`voc_weights_vgg.pth`,是`vgg`为主干特征提取网络用到的。 27 | 28 | ![image-20221130224209895](./IMG/image-20221130224209895.png) 29 | 30 | 顺便训练好的参数我也一并放入了文件夹: 31 | 32 | ![image-20221130225020444](./IMG/image-20221130225020444.png) 33 | 34 | ```python 35 | 链接:https://pan.baidu.com/s/1IiBMIyw8bF132FQGz79Q6Q 36 | 提取码:dpje 37 | ``` 38 | 39 | ## 三、VOC数据集下载 40 | 41 | `VOC`数据集下载地址如下,里面已经包括了训练集、测试集、验证集(与测试集一样),无需再次划分: 42 | 43 | 该数据集为`VOC07+12`的数据集,包括了训练与测试用的数据集。为了训练方便,该数据集中`val.txt`与`test.txt`相同。 44 | 45 | ```python 46 | 链接:https://pan.baidu.com/s/1STBDRK2MpZfJJ-jRzL6iuA 47 | 提取码:vh7m 48 | ``` 49 | 50 | ## 四、模型训练步骤 51 | 52 | ### (1)训练VOC07+12数据集 53 | 54 | #### 1.数据集的准备 55 | 56 | 本文使用`VOC`格式进行训练,训练前需要下载好`VOC07+12`的数据集,解压后放在根目录 57 | 58 | 根目录就是第一级目录下: 59 | 60 | ![image-20221130231306518](./IMG/image-20221130231306518.png) 61 | 62 | 会自动填到`VOCdevkit`文件下面。 63 | 64 | #### 2.数据集的处理 65 | 66 | 修改`voc_annotation.py`里面的`annotation_mode = 2`,运行`voc_annotation.py`生成根目录下的`2007_train.txt`和`2007_val.txt`。 67 | 68 | 源码对应为: 69 | 70 | ![image-20221130232150008](./IMG/image-20221130232150008.png) 71 | 72 | 生成的目录为: 73 | 74 | ![image-20221130232226853](./IMG/image-20221130232226853.png) 75 | 76 | #### 3.开始网络训练 77 | 78 | `train.py`的默认参数用于训练`VOC`数据集,直接运行`train.py`即可开始训练。 79 | 80 | 这个我起初是在自己的笔记本上运行的,显卡为3060,显存为`6G`,但是无法运行,显存不够。 81 | 82 | 我换到了实验室的电脑,`Ubuntu18.04`,双`2080Ti`,`64G`内存,`i9`处理器,100个batch_size,平均一个花费15分钟左右。 83 | 84 | 电脑配置不好的同学可以训练不出来,但是没关系,我把训练好的参数也一并上传了,就在第一份百度盘文件中: 85 | 86 | ![image-20221130233557805](D:\git文件夹\IMG\image-20221130233557805.png) 87 | 88 | #### 4.预测 89 | 90 | 训练结果预测需要用到两个文件,分别是`frcnn.py`和`predict.py`。 91 | 92 | 我们首先需要去`frcnn.py`里面修改model_path以及classes_path,这两个参数必须要修改。 93 | 94 | model_path指向训练好的权值文件,在logs文件夹里。 95 | 96 | classes_path指向检测类别所对应的txt。 97 | 98 | ![image-20221130234049168](./IMG/image-20221130234049168.png) 99 | 100 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。 101 | 102 | ### (2)训练自己的数据集 103 | 104 | #### 1.数据集的准备 105 | 106 | 本文使用`VOC`格式进行训练,训练前需要自己制作好数据集。 107 | 108 | 训练前将标签文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`Annotation`中。 109 | 110 | 训练前将图片文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`JPEGImages`中。 111 | 112 | #### 2.数据集的处理 113 | 114 | 在完成数据集的摆放之后,我们需要利用`voc_annotation.py`获得训练用的`2007_train.txt`和`2007_val.txt`。 115 | 116 | 修改`voc_annotation.py`里面的参数。 117 | 118 | 第一次训练可以仅修改classes_path,classes_path用于指向检测类别所对应的txt。 119 | 120 | 训练自己的数据集时,可以自己建立一个`cls_classes.txt`,里面写自己所需要区分的类别。 121 | `./faster-rcnn-pytorch-master/model_data/cls_classes.txt`文件内容为: 122 | 123 | 例如我们`VOC`数据的类别为: 124 | 125 | ```python 126 | aeroplane 127 | bicycle 128 | bird 129 | boat 130 | bottle 131 | bus 132 | car 133 | cat 134 | chair 135 | cow 136 | diningtable 137 | dog 138 | horse 139 | motorbike 140 | person 141 | pottedplant 142 | sheep 143 | sofa 144 | train 145 | tvmonitor 146 | ``` 147 | 148 | 修改`voc_annotation.py`中的classes_path,使其对应`cls_classes.txt`,并运行`voc_annotation.py`。 149 | 150 | #### 3.开始网络训练 151 | 152 | **训练的参数较多,均在train.py中,大家可以在下载库后仔细看注释,其中最重要的部分依然是train.py里的classes_path。** 153 | 154 | classes_path用于指向检测类别所对应的txt,这个txt和`voc_annotation.py`里面的txt一样!训练自己的数据集必须要修改! 155 | 156 | 修改完classes_path后就可以运行`train.py`开始训练了,在训练多个epoch后,权值会生成在logs文件夹中。 157 | 158 | #### 4.训练结果预测 159 | 160 | 训练结果预测需要用到两个文件,分别是`frcnn.py`和`predict.py`。在`frcnn.py`里面修改model_path以及classes_path。 161 | 162 | model_path指向训练好的权值文件,在logs文件夹里。 163 | 164 | classes_path指向检测类别所对应的txt。 165 | 166 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。 167 | 168 | ## 五、预测步骤 169 | 170 | ### (1)使用预训练权重 171 | 172 | #### 1.下载完库后解压,在百度网盘下载`frcnn_weights.pth`,放入model_data,运行`predict.py`,输入: 173 | 174 | ![image-20221130235406487](./IMG/image-20221130235406487.png) 175 | 176 | ![image-20221130235431124](./IMG/image-20221130235431124.png) 177 | 178 | #### 2.在predict.py里面进行设置可以进行fps测试和video视频检测。 179 | 180 | ![image-20221130235548118](./IMG/image-20221130235548118.png) 181 | 182 | ![image-20221130235602289](./IMG/image-20221130235602289.png) 183 | 184 | ### (2)使用自己训练的权重 185 | 186 | #### 1.按照训练步骤训练 187 | 188 | #### 2.在`frcnn.py`文件里面,在如下部分修改model_path和classes_path使其对应训练好的文件;model_path对应logs文件夹下面的权值文件,classes_path是model_path对应分的类。 189 | 190 | ```python 191 | class FRCNN(object): 192 | _defaults = { 193 | #--------------------------------------------------------------------------# 194 | # 使用自己训练好的模型进行预测一定要修改model_path和classes_path! 195 | # model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt 196 | # 197 | # 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。 198 | # 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 199 | # 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 200 | #--------------------------------------------------------------------------# 201 | "model_path" : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth', 202 | "classes_path" : './faster-rcnn-pytorch-master/model_data/voc_classes.txt', 203 | #---------------------------------------------------------------------# 204 | # 网络的主干特征提取网络,resnet50或者vgg 205 | #---------------------------------------------------------------------# 206 | "backbone" : "resnet50", 207 | #---------------------------------------------------------------------# 208 | # 只有得分大于置信度的预测框会被保留下来 209 | #---------------------------------------------------------------------# 210 | "confidence" : 0.5, 211 | #---------------------------------------------------------------------# 212 | # 非极大抑制所用到的nms_iou大小 213 | #---------------------------------------------------------------------# 214 | "nms_iou" : 0.3, 215 | #---------------------------------------------------------------------# 216 | # 用于指定先验框的大小 217 | #---------------------------------------------------------------------# 218 | 'anchors_size' : [8, 16, 32], 219 | #-------------------------------# 220 | # 是否使用Cuda 221 | # 没有GPU可以设置成False 222 | #-------------------------------# 223 | "cuda" : True, 224 | } 225 | ``` 226 | 227 | ### (3)运行predict.py 228 | 229 | ### (4)在predict.py里面进行设置可以进行fps测试和video视频检测 230 | 231 | ## 六、评估步骤 232 | 233 | ## (1)评估VOC07+12的测试集 234 | 235 | #### 1.本文使用VOC格式进行评估。 236 | 237 | `VOC07+12`已经划分好了测试集,无需利用`voc_annotation.py`生成`ImageSets`文件夹下的txt。 238 | 239 | #### 2.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt 240 | 241 | #### 3.运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中 242 | 243 | ## (2)评估自己的数据集 244 | 245 | #### 1.本文使用`VOC`格式进行评估。 246 | 247 | #### 2.如果在训练前已经运行过`voc_annotation.py`文件,代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例,可以修改`voc_annotation.py`文件下的`trainval_percent`。`trainval_percent`用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = `9:1`。train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = `9:1`。 248 | 249 | #### 3.利用`voc_annotation.py`划分测试集后,前往get_map.py文件修改classes_path,classes_path用于指向检测类别所对应的txt,这个txt和训练时的txt一样。评估自己的数据集必须要修改。 250 | 251 | #### 4.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的`txt`。 252 | 253 | #### 5.运行`get_map.py`即可获得评估结果,评估结果会保存在map_out文件夹中。 254 | 255 | ![image-20221201140006267](./IMG/image-20221201140006267.png) 256 | 257 | ![image-20221201140844546](./IMG/image-20221201140844546.png) 258 | 259 | 等待一阵子! 260 | 261 | ![image-20221201141815392](./IMG/image-20221201141815392.png) 262 | 263 | ![image-20221201141758142](./IMG/image-20221201141758142.png) 264 | 265 | ## 七、参考 266 | 267 | https://github.com/bubbliiiing/faster-rcnn-pytorch 268 | 269 | https://github.com/longcw/faster_rcnn_pytorch 270 | 271 | https://github.com/jwyang/faster-rcnn.pytorch 272 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/Faster R-CNN代码使用说明书.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN代码使用说明书 2 | 3 | ## 一、我的配置环境 4 | 5 | ```python 6 | python == 3.10.6 7 | numpy == 1.23.3 8 | opencv == 4.6.0 9 | pillow == 9.2.0 10 | pycocotools == 2.0.6 11 | pytorch == 1.12.1 12 | scipy == 1.9.3 13 | torchvision == 0.13.1 14 | tqdm == 4.64.1 15 | matplotlib == 3.6.2 16 | hdf5 == 1.12.1 17 | ``` 18 | 19 | ## 二、参数值文件下载 20 | 21 | 我们需要的权重包括`voc_weights_resnet.pth`或者`voc_weights_vgg.pth`以及主干的网络权重我已经上传了百度云,可以自行下载。 22 | 23 | 首先来看第一个权重文件`voc_weights_resnet.pth`,是`resnet`为主干特征提取网络用到的。 24 | 25 | 第二个权重文件`voc_weights_vgg.pth`,是`vgg`为主干特征提取网络用到的。 26 | 27 | ![image-20221130224209895](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130224209895.png) 28 | 29 | 顺便训练好的参数我也一并放入了文件夹: 30 | 31 | ![image-20221130225020444](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130225020444.png) 32 | 33 | ```python 34 | 链接:https://pan.baidu.com/s/1IiBMIyw8bF132FQGz79Q6Q 35 | 提取码:dpje 36 | ``` 37 | 38 | ## 三、VOC数据集下载 39 | 40 | `VOC`数据集下载地址如下,里面已经包括了训练集、测试集、验证集(与测试集一样),无需再次划分: 41 | 42 | 该数据集为`VOC07+12`的数据集,包括了训练与测试用的数据集。为了训练方便,该数据集中`val.txt`与`test.txt`相同。 43 | 44 | ```python 45 | 链接:https://pan.baidu.com/s/1STBDRK2MpZfJJ-jRzL6iuA 46 | 提取码:vh7m 47 | ``` 48 | 49 | ## 四、模型训练步骤 50 | 51 | ### (1)训练VOC07+12数据集 52 | 53 | #### 1.数据集的准备 54 | 55 | 本文使用`VOC`格式进行训练,训练前需要下载好`VOC07+12`的数据集,解压后放在根目录 56 | 57 | 根目录就是第一级目录下: 58 | 59 | ![image-20221130231306518](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130231306518.png) 60 | 61 | 会自动填到`VOCdevkit`文件下面。 62 | 63 | #### 2.数据集的处理 64 | 65 | 修改`voc_annotation.py`里面的`annotation_mode = 2`,运行`voc_annotation.py`生成根目录下的`2007_train.txt`和`2007_val.txt`。 66 | 67 | 源码对应为: 68 | 69 | ![image-20221130232150008](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130232150008.png) 70 | 71 | 生成的目录为: 72 | 73 | ![image-20221130232226853](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130232226853.png) 74 | 75 | #### 3.开始网络训练 76 | 77 | `train.py`的默认参数用于训练`VOC`数据集,直接运行`train.py`即可开始训练。 78 | 79 | 这个我起初是在自己的笔记本上运行的,显卡为3060,显存为`6G`,但是无法运行,显存不够。 80 | 81 | 我换到了实验室的电脑,`Ubuntu18.04`,双`2080Ti`,`64G`内存,`i9`处理器,100个batch_size,平均一个花费15分钟左右。 82 | 83 | 电脑配置不好的同学可以训练不出来,但是没关系,我把训练好的参数也一并上传了,就在第一份百度盘文件中: 84 | 85 | ![image-20221130233557805](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130233557805.png) 86 | 87 | #### 4.预测 88 | 89 | 训练结果预测需要用到两个文件,分别是`frcnn.py`和`predict.py`。 90 | 91 | 我们首先需要去`frcnn.py`里面修改model_path以及classes_path,这两个参数必须要修改。 92 | 93 | model_path指向训练好的权值文件,在logs文件夹里。 94 | 95 | classes_path指向检测类别所对应的txt。 96 | 97 | ![image-20221130234049168](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130234049168.png) 98 | 99 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。 100 | 101 | ### (2)训练自己的数据集 102 | 103 | #### 1.数据集的准备 104 | 105 | 本文使用`VOC`格式进行训练,训练前需要自己制作好数据集。 106 | 107 | 训练前将标签文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`Annotation`中。 108 | 109 | 训练前将图片文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`JPEGImages`中。 110 | 111 | #### 2.数据集的处理 112 | 113 | 在完成数据集的摆放之后,我们需要利用`voc_annotation.py`获得训练用的`2007_train.txt`和`2007_val.txt`。 114 | 115 | 修改`voc_annotation.py`里面的参数。 116 | 117 | 第一次训练可以仅修改classes_path,classes_path用于指向检测类别所对应的txt。 118 | 119 | 训练自己的数据集时,可以自己建立一个`cls_classes.txt`,里面写自己所需要区分的类别。 120 | `./faster-rcnn-pytorch-master/model_data/cls_classes.txt`文件内容为: 121 | 122 | 例如我们`VOC`数据的类别为: 123 | 124 | ```python 125 | aeroplane 126 | bicycle 127 | bird 128 | boat 129 | bottle 130 | bus 131 | car 132 | cat 133 | chair 134 | cow 135 | diningtable 136 | dog 137 | horse 138 | motorbike 139 | person 140 | pottedplant 141 | sheep 142 | sofa 143 | train 144 | tvmonitor 145 | ``` 146 | 147 | 修改`voc_annotation.py`中的classes_path,使其对应`cls_classes.txt`,并运行`voc_annotation.py`。 148 | 149 | #### 3.开始网络训练 150 | 151 | **训练的参数较多,均在train.py中,大家可以在下载库后仔细看注释,其中最重要的部分依然是train.py里的classes_path。** 152 | 153 | classes_path用于指向检测类别所对应的txt,这个txt和`voc_annotation.py`里面的txt一样!训练自己的数据集必须要修改! 154 | 155 | 修改完classes_path后就可以运行`train.py`开始训练了,在训练多个epoch后,权值会生成在logs文件夹中。 156 | 157 | #### 4.训练结果预测 158 | 159 | 训练结果预测需要用到两个文件,分别是`frcnn.py`和`predict.py`。在`frcnn.py`里面修改model_path以及classes_path。 160 | 161 | model_path指向训练好的权值文件,在logs文件夹里。 162 | 163 | classes_path指向检测类别所对应的txt。 164 | 165 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。 166 | 167 | ## 五、预测步骤 168 | 169 | ### (1)使用预训练权重 170 | 171 | #### 1.下载完库后解压,在百度网盘下载`frcnn_weights.pth`,放入model_data,运行`predict.py`,输入: 172 | 173 | ![image-20221130235406487](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235406487.png) 174 | 175 | ![image-20221130235431124](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235431124.png) 176 | 177 | #### 2.在predict.py里面进行设置可以进行fps测试和video视频检测。 178 | 179 | ![image-20221130235548118](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235548118.png) 180 | 181 | ![image-20221130235602289](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235602289.png) 182 | 183 | ### (2)使用自己训练的权重 184 | 185 | #### 1.按照训练步骤训练 186 | 187 | #### 2.在`frcnn.py`文件里面,在如下部分修改model_path和classes_path使其对应训练好的文件;model_path对应logs文件夹下面的权值文件,classes_path是model_path对应分的类。 188 | 189 | ```python 190 | class FRCNN(object): 191 | _defaults = { 192 | #--------------------------------------------------------------------------# 193 | # 使用自己训练好的模型进行预测一定要修改model_path和classes_path! 194 | # model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt 195 | # 196 | # 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。 197 | # 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 198 | # 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 199 | #--------------------------------------------------------------------------# 200 | "model_path" : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth', 201 | "classes_path" : './faster-rcnn-pytorch-master/model_data/voc_classes.txt', 202 | #---------------------------------------------------------------------# 203 | # 网络的主干特征提取网络,resnet50或者vgg 204 | #---------------------------------------------------------------------# 205 | "backbone" : "resnet50", 206 | #---------------------------------------------------------------------# 207 | # 只有得分大于置信度的预测框会被保留下来 208 | #---------------------------------------------------------------------# 209 | "confidence" : 0.5, 210 | #---------------------------------------------------------------------# 211 | # 非极大抑制所用到的nms_iou大小 212 | #---------------------------------------------------------------------# 213 | "nms_iou" : 0.3, 214 | #---------------------------------------------------------------------# 215 | # 用于指定先验框的大小 216 | #---------------------------------------------------------------------# 217 | 'anchors_size' : [8, 16, 32], 218 | #-------------------------------# 219 | # 是否使用Cuda 220 | # 没有GPU可以设置成False 221 | #-------------------------------# 222 | "cuda" : True, 223 | } 224 | ``` 225 | 226 | ### (3)运行predict.py 227 | 228 | ### (4)在predict.py里面进行设置可以进行fps测试和video视频检测 229 | 230 | ## 六、评估步骤 231 | 232 | ## (1)评估VOC07+12的测试集 233 | 234 | #### 1.本文使用VOC格式进行评估。 235 | 236 | `VOC07+12`已经划分好了测试集,无需利用`voc_annotation.py`生成`ImageSets`文件夹下的txt。 237 | 238 | #### 2.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt 239 | 240 | #### 3.运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中 241 | 242 | ## (2)评估自己的数据集 243 | 244 | #### 1.本文使用`VOC`格式进行评估。 245 | 246 | #### 2.如果在训练前已经运行过`voc_annotation.py`文件,代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例,可以修改`voc_annotation.py`文件下的`trainval_percent`。`trainval_percent`用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = `9:1`。train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = `9:1`。 247 | 248 | #### 3.利用`voc_annotation.py`划分测试集后,前往get_map.py文件修改classes_path,classes_path用于指向检测类别所对应的txt,这个txt和训练时的txt一样。评估自己的数据集必须要修改。 249 | 250 | #### 4.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的`txt`。 251 | 252 | #### 5.运行`get_map.py`即可获得评估结果,评估结果会保存在map_out文件夹中。 253 | 254 | ![image-20221201140006267](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201140006267.png) 255 | 256 | ![image-20221201140844546](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201140844546.png) 257 | 258 | 等待一阵子! 259 | 260 | ![image-20221201141815392](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201141815392.png) 261 | 262 | ![image-20221201141758142](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201141758142.png) 263 | 264 | ## 七、参考 265 | 266 | https://github.com/bubbliiiing/faster-rcnn-pytorch 267 | 268 | https://github.com/longcw/faster_rcnn_pytorch 269 | 270 | https://github.com/jwyang/faster-rcnn.pytorch 271 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/VOCdevkit/VOC2007/Annotations/说明书.txt: -------------------------------------------------------------------------------- 1 | 存放标签文件! -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/VOCdevkit/VOC2007/ImageSets/Main/说明书.txt: -------------------------------------------------------------------------------- 1 | 存放训练索引文件 -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/VOCdevkit/VOC2007/JPEGImages/说明书.txt: -------------------------------------------------------------------------------- 1 | 存放图片文件 -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/frcnn.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import os 3 | import time 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from PIL import Image, ImageDraw, ImageFont 8 | from nets.frcnn import FasterRCNN 9 | from utils.utils import (cvtColor, get_classes, get_new_img_size, resize_image, preprocess_input, show_config) 10 | from utils.utils_bbox import DecodeBox 11 | 12 | 13 | #--------------------------------------------# 14 | # 使用自己训练好的模型预测需要修改2个参数 15 | # model_path和classes_path都需要修改! 16 | # 如果出现shape不匹配 17 | # 一定要注意训练时的NUM_CLASSES、 18 | # model_path和classes_path参数的修改 19 | #--------------------------------------------# 20 | class FRCNN(object): 21 | _defaults = { 22 | #--------------------------------------------------------------------------# 23 | # 使用自己训练好的模型进行预测一定要修改model_path和classes_path! 24 | # model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt 25 | # 26 | # 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。 27 | # 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 28 | # 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 29 | #--------------------------------------------------------------------------# 30 | "model_path" : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth', 31 | "classes_path" : './faster-rcnn-pytorch-master/model_data/voc_classes.txt', 32 | #---------------------------------------------------------------------# 33 | # 网络的主干特征提取网络,resnet50或者vgg 34 | #---------------------------------------------------------------------# 35 | "backbone" : "resnet50", 36 | #---------------------------------------------------------------------# 37 | # 只有得分大于置信度的预测框会被保留下来 38 | #---------------------------------------------------------------------# 39 | "confidence" : 0.5, 40 | #---------------------------------------------------------------------# 41 | # 非极大抑制所用到的nms_iou大小 42 | #---------------------------------------------------------------------# 43 | "nms_iou" : 0.3, 44 | #---------------------------------------------------------------------# 45 | # 用于指定先验框的大小 46 | #---------------------------------------------------------------------# 47 | 'anchors_size' : [8, 16, 32], 48 | #-------------------------------# 49 | # 是否使用Cuda 50 | # 没有GPU可以设置成False 51 | #-------------------------------# 52 | "cuda" : True, 53 | } 54 | 55 | @classmethod 56 | def get_defaults(cls, n): 57 | if n in cls._defaults: 58 | return cls._defaults[n] 59 | else: 60 | return "Unrecognized attribute name '" + n + "'" 61 | 62 | #---------------------------------------------------# 63 | # 初始化faster RCNN 64 | #---------------------------------------------------# 65 | def __init__(self, **kwargs): 66 | self.__dict__.update(self._defaults) 67 | for name, value in kwargs.items(): 68 | setattr(self, name, value) 69 | self._defaults[name] = value 70 | #---------------------------------------------------# 71 | # 获得种类和先验框的数量 72 | #---------------------------------------------------# 73 | self.class_names, self.num_classes = get_classes(self.classes_path) 74 | 75 | self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] 76 | if self.cuda: 77 | self.std = self.std.cuda() 78 | self.bbox_util = DecodeBox(self.std, self.num_classes) 79 | 80 | #---------------------------------------------------# 81 | # 画框设置不同的颜色 82 | #---------------------------------------------------# 83 | hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] 84 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 85 | self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) 86 | self.generate() 87 | 88 | show_config(**self._defaults) 89 | 90 | #---------------------------------------------------# 91 | # 载入模型 92 | #---------------------------------------------------# 93 | def generate(self): 94 | #-------------------------------# 95 | # 载入模型与权值 96 | #-------------------------------# 97 | self.net = FasterRCNN(self.num_classes, "predict", anchor_scales = self.anchors_size, backbone = self.backbone) 98 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 99 | self.net.load_state_dict(torch.load(self.model_path, map_location=device)) 100 | self.net = self.net.eval() 101 | print('{} model, anchors, and classes loaded.'.format(self.model_path)) 102 | 103 | if self.cuda: 104 | self.net = nn.DataParallel(self.net) 105 | self.net = self.net.cuda() 106 | 107 | #---------------------------------------------------# 108 | # 检测图片 109 | #---------------------------------------------------# 110 | def detect_image(self, image, crop = False, count = False): 111 | #---------------------------------------------------# 112 | # 计算输入图片的高和宽 113 | #---------------------------------------------------# 114 | image_shape = np.array(np.shape(image)[0:2]) 115 | #---------------------------------------------------# 116 | # 计算resize后的图片的大小,resize后的图片短边为600 117 | #---------------------------------------------------# 118 | input_shape = get_new_img_size(image_shape[0], image_shape[1]) 119 | #---------------------------------------------------------# 120 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 121 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 122 | #---------------------------------------------------------# 123 | image = cvtColor(image) 124 | #---------------------------------------------------------# 125 | # 给原图像进行resize,resize到短边为600的大小上 126 | #---------------------------------------------------------# 127 | image_data = resize_image(image, [input_shape[1], input_shape[0]]) 128 | #---------------------------------------------------------# 129 | # 添加上batch_size维度 130 | #---------------------------------------------------------# 131 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 132 | 133 | with torch.no_grad(): 134 | images = torch.from_numpy(image_data) 135 | if self.cuda: 136 | images = images.cuda() 137 | 138 | #-------------------------------------------------------------# 139 | # roi_cls_locs 建议框的调整参数 140 | # roi_scores 建议框的种类得分 141 | # rois 建议框的坐标 142 | #-------------------------------------------------------------# 143 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 144 | #-------------------------------------------------------------# 145 | # 利用classifier的预测结果对建议框进行解码,获得预测框 146 | #-------------------------------------------------------------# 147 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 148 | nms_iou = self.nms_iou, confidence = self.confidence) 149 | #---------------------------------------------------------# 150 | # 如果没有检测出物体,返回原图 151 | #---------------------------------------------------------# 152 | if len(results[0]) <= 0: 153 | return image 154 | 155 | top_label = np.array(results[0][:, 5], dtype = 'int32') 156 | top_conf = results[0][:, 4] 157 | top_boxes = results[0][:, :4] 158 | 159 | #---------------------------------------------------------# 160 | # 设置字体与边框厚度 161 | #---------------------------------------------------------# 162 | font = ImageFont.truetype(font='./faster-rcnn-pytorch-master/model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 163 | thickness = int(max((image.size[0] + image.size[1]) // np.mean(input_shape), 1)) 164 | #---------------------------------------------------------# 165 | # 计数 166 | #---------------------------------------------------------# 167 | if count: 168 | print("top_label:", top_label) 169 | classes_nums = np.zeros([self.num_classes]) 170 | for i in range(self.num_classes): 171 | num = np.sum(top_label == i) 172 | if num > 0: 173 | print(self.class_names[i], " : ", num) 174 | classes_nums[i] = num 175 | print("classes_nums:", classes_nums) 176 | #---------------------------------------------------------# 177 | # 是否进行目标的裁剪 178 | #---------------------------------------------------------# 179 | if crop: 180 | for i, c in list(enumerate(top_label)): 181 | top, left, bottom, right = top_boxes[i] 182 | top = max(0, np.floor(top).astype('int32')) 183 | left = max(0, np.floor(left).astype('int32')) 184 | bottom = min(image.size[1], np.floor(bottom).astype('int32')) 185 | right = min(image.size[0], np.floor(right).astype('int32')) 186 | 187 | dir_save_path = "img_crop" 188 | if not os.path.exists(dir_save_path): 189 | os.makedirs(dir_save_path) 190 | crop_image = image.crop([left, top, right, bottom]) 191 | crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) 192 | print("save crop_" + str(i) + ".png to " + dir_save_path) 193 | #---------------------------------------------------------# 194 | # 图像绘制 195 | #---------------------------------------------------------# 196 | for i, c in list(enumerate(top_label)): 197 | predicted_class = self.class_names[int(c)] 198 | box = top_boxes[i] 199 | score = top_conf[i] 200 | 201 | top, left, bottom, right = box 202 | 203 | top = max(0, np.floor(top).astype('int32')) 204 | left = max(0, np.floor(left).astype('int32')) 205 | bottom = min(image.size[1], np.floor(bottom).astype('int32')) 206 | right = min(image.size[0], np.floor(right).astype('int32')) 207 | 208 | label = '{} {:.2f}'.format(predicted_class, score) 209 | draw = ImageDraw.Draw(image) 210 | label_size = draw.textsize(label, font) 211 | label = label.encode('utf-8') 212 | # print(label, top, left, bottom, right) 213 | 214 | if top - label_size[1] >= 0: 215 | text_origin = np.array([left, top - label_size[1]]) 216 | else: 217 | text_origin = np.array([left, top + 1]) 218 | 219 | for i in range(thickness): 220 | draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) 221 | draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) 222 | draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) 223 | del draw 224 | 225 | return image 226 | 227 | def get_FPS(self, image, test_interval): 228 | #---------------------------------------------------# 229 | # 计算输入图片的高和宽 230 | #---------------------------------------------------# 231 | image_shape = np.array(np.shape(image)[0:2]) 232 | input_shape = get_new_img_size(image_shape[0], image_shape[1]) 233 | #---------------------------------------------------------# 234 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 235 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 236 | #---------------------------------------------------------# 237 | image = cvtColor(image) 238 | 239 | #---------------------------------------------------------# 240 | # 给原图像进行resize,resize到短边为600的大小上 241 | #---------------------------------------------------------# 242 | image_data = resize_image(image, [input_shape[1], input_shape[0]]) 243 | #---------------------------------------------------------# 244 | # 添加上batch_size维度 245 | #---------------------------------------------------------# 246 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 247 | 248 | with torch.no_grad(): 249 | images = torch.from_numpy(image_data) 250 | if self.cuda: 251 | images = images.cuda() 252 | 253 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 254 | #-------------------------------------------------------------# 255 | # 利用classifier的预测结果对建议框进行解码,获得预测框 256 | #-------------------------------------------------------------# 257 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 258 | nms_iou = self.nms_iou, confidence = self.confidence) 259 | t1 = time.time() 260 | for _ in range(test_interval): 261 | with torch.no_grad(): 262 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 263 | #-------------------------------------------------------------# 264 | # 利用classifier的预测结果对建议框进行解码,获得预测框 265 | #-------------------------------------------------------------# 266 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 267 | nms_iou = self.nms_iou, confidence = self.confidence) 268 | 269 | t2 = time.time() 270 | tact_time = (t2 - t1) / test_interval 271 | return tact_time 272 | 273 | #---------------------------------------------------# 274 | # 检测图片 275 | #---------------------------------------------------# 276 | def get_map_txt(self, image_id, image, class_names, map_out_path): 277 | f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 278 | #---------------------------------------------------# 279 | # 计算输入图片的高和宽 280 | #---------------------------------------------------# 281 | image_shape = np.array(np.shape(image)[0:2]) 282 | input_shape = get_new_img_size(image_shape[0], image_shape[1]) 283 | #---------------------------------------------------------# 284 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 285 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 286 | #---------------------------------------------------------# 287 | image = cvtColor(image) 288 | 289 | #---------------------------------------------------------# 290 | # 给原图像进行resize,resize到短边为600的大小上 291 | #---------------------------------------------------------# 292 | image_data = resize_image(image, [input_shape[1], input_shape[0]]) 293 | #---------------------------------------------------------# 294 | # 添加上batch_size维度 295 | #---------------------------------------------------------# 296 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 297 | 298 | with torch.no_grad(): 299 | images = torch.from_numpy(image_data) 300 | if self.cuda: 301 | images = images.cuda() 302 | 303 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 304 | #-------------------------------------------------------------# 305 | # 利用classifier的预测结果对建议框进行解码,获得预测框 306 | #-------------------------------------------------------------# 307 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 308 | nms_iou = self.nms_iou, confidence = self.confidence) 309 | #--------------------------------------# 310 | # 如果没有检测到物体,则返回原图 311 | #--------------------------------------# 312 | if len(results[0]) <= 0: 313 | return 314 | 315 | top_label = np.array(results[0][:, 5], dtype = 'int32') 316 | top_conf = results[0][:, 4] 317 | top_boxes = results[0][:, :4] 318 | 319 | for i, c in list(enumerate(top_label)): 320 | predicted_class = self.class_names[int(c)] 321 | box = top_boxes[i] 322 | score = str(top_conf[i]) 323 | 324 | top, left, bottom, right = box 325 | if predicted_class not in class_names: 326 | continue 327 | 328 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) 329 | 330 | f.close() 331 | return 332 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/get_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | 4 | from PIL import Image 5 | from tqdm import tqdm 6 | 7 | from utils.utils import get_classes 8 | from utils.utils_map import get_coco_map, get_map 9 | from frcnn import FRCNN 10 | 11 | if __name__ == "__main__": 12 | ''' 13 | Recall和Precision不像AP是一个面积的概念,因此在门限值(Confidence)不同时,网络的Recall和Precision值是不同的。 14 | 默认情况下,本代码计算的Recall和Precision代表的是当门限值(Confidence)为0.5时,所对应的Recall和Precision值。 15 | 16 | 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算不同门限条件下的Recall和Precision值 17 | 因此,本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些,目的是列出所有可能的预测框, 18 | ''' 19 | #------------------------------------------------------------------------------------------------------------------# 20 | # map_mode用于指定该文件运行时计算的内容 21 | # map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。 22 | # map_mode为1代表仅仅获得预测结果。 23 | # map_mode为2代表仅仅获得真实框。 24 | # map_mode为3代表仅仅计算VOC_map。 25 | # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 26 | #-------------------------------------------------------------------------------------------------------------------# 27 | map_mode = 0 28 | #--------------------------------------------------------------------------------------# 29 | # 此处的classes_path用于指定需要测量VOC_map的类别 30 | # 一般情况下与训练和预测所用的classes_path一致即可 31 | #--------------------------------------------------------------------------------------# 32 | classes_path = './faster-rcnn-pytorch-master/model_data/voc_classes.txt' 33 | #--------------------------------------------------------------------------------------# 34 | # MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。 35 | # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 36 | # 37 | # 当某一预测框与真实框重合度大于MINOVERLAP时,该预测框被认为是正样本,否则为负样本。 38 | # 因此MINOVERLAP的值越大,预测框要预测的越准确才能被认为是正样本,此时算出来的mAP值越低, 39 | #--------------------------------------------------------------------------------------# 40 | MINOVERLAP = 0.5 41 | #--------------------------------------------------------------------------------------# 42 | # 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算mAP 43 | # 因此,confidence的值应当设置的尽量小进而获得全部可能的预测框。 44 | # 45 | # 该值一般不调整。因为计算mAP需要获得近乎所有的预测框,此处的confidence不能随便更改。 46 | # 想要获得不同门限值下的Recall和Precision值,请修改下方的score_threhold。 47 | #--------------------------------------------------------------------------------------# 48 | confidence = 0.02 49 | #--------------------------------------------------------------------------------------# 50 | # 预测时使用到的非极大抑制值的大小,越大表示非极大抑制越不严格。 51 | # 52 | # 该值一般不调整。 53 | #--------------------------------------------------------------------------------------# 54 | nms_iou = 0.5 55 | #---------------------------------------------------------------------------------------------------------------# 56 | # Recall和Precision不像AP是一个面积的概念,因此在门限值不同时,网络的Recall和Precision值是不同的。 57 | # 58 | # 默认情况下,本代码计算的Recall和Precision代表的是当门限值为0.5(此处定义为score_threhold)时所对应的Recall和Precision值。 59 | # 因为计算mAP需要获得近乎所有的预测框,上面定义的confidence不能随便更改。 60 | # 这里专门定义一个score_threhold用于代表门限值,进而在计算mAP时找到门限值对应的Recall和Precision值。 61 | #---------------------------------------------------------------------------------------------------------------# 62 | score_threhold = 0.5 63 | #-------------------------------------------------------# 64 | # map_vis用于指定是否开启VOC_map计算的可视化 65 | #-------------------------------------------------------# 66 | map_vis = False 67 | #-------------------------------------------------------# 68 | # 指向VOC数据集所在的文件夹 69 | # 默认指向根目录下的VOC数据集 70 | #-------------------------------------------------------# 71 | VOCdevkit_path = './faster-rcnn-pytorch-master/VOCdevkit' 72 | #-------------------------------------------------------# 73 | # 结果输出的文件夹,默认为map_out 74 | #-------------------------------------------------------# 75 | map_out_path = 'map_out' 76 | 77 | image_ids = open(os.path.join(VOCdevkit_path, "./VOC2007/ImageSets/Main/test.txt")).read().strip().split() 78 | 79 | if not os.path.exists(map_out_path): 80 | os.makedirs(map_out_path) 81 | if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): 82 | os.makedirs(os.path.join(map_out_path, 'ground-truth')) 83 | if not os.path.exists(os.path.join(map_out_path, 'detection-results')): 84 | os.makedirs(os.path.join(map_out_path, 'detection-results')) 85 | if not os.path.exists(os.path.join(map_out_path, 'images-optional')): 86 | os.makedirs(os.path.join(map_out_path, 'images-optional')) 87 | 88 | class_names, _ = get_classes(classes_path) 89 | 90 | if map_mode == 0 or map_mode == 1: 91 | print("Load model.") 92 | frcnn = FRCNN(confidence = confidence, nms_iou = nms_iou) 93 | print("Load model done.") 94 | 95 | print("Get predict result.") 96 | for image_id in tqdm(image_ids): 97 | image_path = os.path.join(VOCdevkit_path, "./VOC2007/JPEGImages/"+image_id+".jpg") 98 | image = Image.open(image_path) 99 | if map_vis: 100 | image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) 101 | frcnn.get_map_txt(image_id, image, class_names, map_out_path) 102 | print("Get predict result done.") 103 | 104 | if map_mode == 0 or map_mode == 2: 105 | print("Get ground truth result.") 106 | for image_id in tqdm(image_ids): 107 | with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 108 | root = ET.parse(os.path.join(VOCdevkit_path, "./VOC2007/Annotations/"+image_id+".xml")).getroot() 109 | for obj in root.findall('object'): 110 | difficult_flag = False 111 | if obj.find('difficult')!=None: 112 | difficult = obj.find('difficult').text 113 | if int(difficult)==1: 114 | difficult_flag = True 115 | obj_name = obj.find('name').text 116 | if obj_name not in class_names: 117 | continue 118 | bndbox = obj.find('bndbox') 119 | left = bndbox.find('xmin').text 120 | top = bndbox.find('ymin').text 121 | right = bndbox.find('xmax').text 122 | bottom = bndbox.find('ymax').text 123 | 124 | if difficult_flag: 125 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) 126 | else: 127 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 128 | print("Get ground truth result done.") 129 | 130 | if map_mode == 0 or map_mode == 3: 131 | print("Get map.") 132 | get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path) 133 | print("Get map done.") 134 | 135 | if map_mode == 4: 136 | print("Get map.") 137 | get_coco_map(class_names = class_names, path = map_out_path) 138 | print("Get map done.") 139 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/1.jpg -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/img/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/2.jpg -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/img/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/3.jpg -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/logs/说明书.txt: -------------------------------------------------------------------------------- 1 | 训练好的文件会保存在这里! -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/model_data/simhei.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/model_data/simhei.ttf -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/model_data/voc_classes.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/classifier.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch 4 | from torch import nn 5 | from torchvision.ops import RoIPool 6 | 7 | warnings.filterwarnings("ignore") 8 | 9 | class VGG16RoIHead(nn.Module): 10 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 11 | super(VGG16RoIHead, self).__init__() 12 | self.classifier = classifier 13 | #--------------------------------------# 14 | # 对ROIPooling后的的结果进行回归预测 15 | #--------------------------------------# 16 | self.cls_loc = nn.Linear(4096, n_class * 4) 17 | #-----------------------------------# 18 | # 对ROIPooling后的的结果进行分类 19 | #-----------------------------------# 20 | self.score = nn.Linear(4096, n_class) 21 | #-----------------------------------# 22 | # 权值初始化 23 | #-----------------------------------# 24 | normal_init(self.cls_loc, 0, 0.001) 25 | normal_init(self.score, 0, 0.01) 26 | 27 | self.roi = RoIPool((roi_size, roi_size), spatial_scale) 28 | 29 | def forward(self, x, rois, roi_indices, img_size): 30 | n, _, _, _ = x.shape 31 | if x.is_cuda: 32 | roi_indices = roi_indices.cuda() 33 | rois = rois.cuda() 34 | rois = torch.flatten(rois, 0, 1) 35 | roi_indices = torch.flatten(roi_indices, 0, 1) 36 | 37 | rois_feature_map = torch.zeros_like(rois) 38 | rois_feature_map[:, [0, 2]] = rois[:, [0, 2]] / img_size[1] * x.size()[3] 39 | rois_feature_map[:, [1, 3]] = rois[:, [1, 3]] / img_size[0] * x.size()[2] 40 | 41 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim = 1) 42 | #-----------------------------------# 43 | # 利用建议框对公用特征层进行截取 44 | #-----------------------------------# 45 | pool = self.roi(x, indices_and_rois) 46 | #-----------------------------------# 47 | # 利用classifier网络进行特征提取 48 | #-----------------------------------# 49 | pool = pool.view(pool.size(0), -1) 50 | #--------------------------------------------------------------# 51 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 4096] 52 | #--------------------------------------------------------------# 53 | fc7 = self.classifier(pool) 54 | 55 | roi_cls_locs = self.cls_loc(fc7) 56 | roi_scores = self.score(fc7) 57 | 58 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 59 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 60 | return roi_cls_locs, roi_scores 61 | 62 | class Resnet50RoIHead(nn.Module): 63 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 64 | super(Resnet50RoIHead, self).__init__() 65 | self.classifier = classifier 66 | #--------------------------------------# 67 | # 对ROIPooling后的的结果进行回归预测 68 | #--------------------------------------# 69 | self.cls_loc = nn.Linear(2048, n_class * 4) 70 | #-----------------------------------# 71 | # 对ROIPooling后的的结果进行分类 72 | #-----------------------------------# 73 | self.score = nn.Linear(2048, n_class) 74 | #-----------------------------------# 75 | # 权值初始化 76 | #-----------------------------------# 77 | normal_init(self.cls_loc, 0, 0.001) 78 | normal_init(self.score, 0, 0.01) 79 | 80 | self.roi = RoIPool((roi_size, roi_size), spatial_scale) 81 | 82 | def forward(self, x, rois, roi_indices, img_size): 83 | n, _, _, _ = x.shape 84 | if x.is_cuda: 85 | roi_indices = roi_indices.cuda() 86 | rois = rois.cuda() 87 | rois = torch.flatten(rois, 0, 1) 88 | roi_indices = torch.flatten(roi_indices, 0, 1) 89 | 90 | rois_feature_map = torch.zeros_like(rois) 91 | rois_feature_map[:, [0, 2]] = rois[:, [0, 2]] / img_size[1] * x.size()[3] 92 | rois_feature_map[:, [1, 3]] = rois[:, [1, 3]] / img_size[0] * x.size()[2] 93 | 94 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim = 1) 95 | #-----------------------------------# 96 | # 利用建议框对公用特征层进行截取 97 | #-----------------------------------# 98 | pool = self.roi(x, indices_and_rois) 99 | #-----------------------------------# 100 | # 利用classifier网络进行特征提取 101 | #-----------------------------------# 102 | fc7 = self.classifier(pool) 103 | #--------------------------------------------------------------# 104 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 2048] 105 | #--------------------------------------------------------------# 106 | fc7 = fc7.view(fc7.size(0), -1) 107 | 108 | roi_cls_locs = self.cls_loc(fc7) 109 | roi_scores = self.score(fc7) 110 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 111 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 112 | return roi_cls_locs, roi_scores 113 | 114 | def normal_init(m, mean, stddev, truncated = False): 115 | if truncated: 116 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 117 | else: 118 | m.weight.data.normal_(mean, stddev) 119 | m.bias.data.zero_() 120 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/frcnn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from nets.classifier import Resnet50RoIHead, VGG16RoIHead 3 | from nets.resnet50 import resnet50 4 | from nets.rpn import RegionProposalNetwork 5 | from nets.vgg16 import decom_vgg16 6 | 7 | 8 | class FasterRCNN(nn.Module): 9 | def __init__(self, num_classes, 10 | mode = "training", 11 | feat_stride = 16, 12 | anchor_scales = [8, 16, 32], 13 | ratios = [0.5, 1, 2], 14 | backbone = 'vgg', 15 | pretrained = False): 16 | super(FasterRCNN, self).__init__() 17 | self.feat_stride = feat_stride 18 | #---------------------------------# 19 | # 一共存在两个主干 20 | # vgg和resnet50 21 | #---------------------------------# 22 | if backbone == 'vgg': 23 | self.extractor, classifier = decom_vgg16(pretrained) 24 | #---------------------------------# 25 | # 构建建议框网络 26 | #---------------------------------# 27 | self.rpn = RegionProposalNetwork( 28 | 512, 512, 29 | ratios = ratios, 30 | anchor_scales = anchor_scales, 31 | feat_stride = self.feat_stride, 32 | mode = mode 33 | ) 34 | #---------------------------------# 35 | # 构建分类器网络 36 | #---------------------------------# 37 | self.head = VGG16RoIHead( 38 | n_class = num_classes + 1, 39 | roi_size = 7, 40 | spatial_scale = 1, 41 | classifier = classifier 42 | ) 43 | elif backbone == 'resnet50': 44 | self.extractor, classifier = resnet50(pretrained) 45 | #---------------------------------# 46 | # 构建classifier网络 47 | #---------------------------------# 48 | self.rpn = RegionProposalNetwork( 49 | 1024, 512, 50 | ratios = ratios, 51 | anchor_scales = anchor_scales, 52 | feat_stride = self.feat_stride, 53 | mode = mode 54 | ) 55 | #---------------------------------# 56 | # 构建classifier网络 57 | #---------------------------------# 58 | self.head = Resnet50RoIHead( 59 | n_class = num_classes + 1, 60 | roi_size = 14, 61 | spatial_scale = 1, 62 | classifier = classifier 63 | ) 64 | 65 | def forward(self, x, scale=1., mode="forward"): 66 | if mode == "forward": 67 | #---------------------------------# 68 | # 计算输入图片的大小 69 | #---------------------------------# 70 | img_size = x.shape[2:] 71 | #---------------------------------# 72 | # 利用主干网络提取特征 73 | #---------------------------------# 74 | base_feature = self.extractor.forward(x) 75 | 76 | #---------------------------------# 77 | # 获得建议框 78 | #---------------------------------# 79 | _, _, rois, roi_indices, _ = self.rpn.forward(base_feature, img_size, scale) 80 | #---------------------------------------# 81 | # 获得classifier的分类结果和回归结果 82 | #---------------------------------------# 83 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size) 84 | return roi_cls_locs, roi_scores, rois, roi_indices 85 | elif mode == "extractor": 86 | #---------------------------------# 87 | # 利用主干网络提取特征 88 | #---------------------------------# 89 | base_feature = self.extractor.forward(x) 90 | return base_feature 91 | elif mode == "rpn": 92 | base_feature, img_size = x 93 | #---------------------------------# 94 | # 获得建议框 95 | #---------------------------------# 96 | rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn.forward(base_feature, img_size, scale) 97 | return rpn_locs, rpn_scores, rois, roi_indices, anchor 98 | elif mode == "head": 99 | base_feature, rois, roi_indices, img_size = x 100 | #---------------------------------------# 101 | # 获得classifier的分类结果和回归结果 102 | #---------------------------------------# 103 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size) 104 | return roi_cls_locs, roi_scores 105 | 106 | def freeze_bn(self): 107 | for m in self.modules(): 108 | if isinstance(m, nn.BatchNorm2d): 109 | m.eval() 110 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/frcnn_training.py: -------------------------------------------------------------------------------- 1 | import math 2 | from functools import partial 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn import functional as F 8 | 9 | 10 | def bbox_iou(bbox_a, bbox_b): 11 | if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4: 12 | print(bbox_a, bbox_b) 13 | raise IndexError 14 | tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2]) 15 | br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:]) 16 | area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2) 17 | area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1) 18 | area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1) 19 | return area_i / (area_a[:, None] + area_b - area_i) 20 | 21 | def bbox2loc(src_bbox, dst_bbox): 22 | width = src_bbox[:, 2] - src_bbox[:, 0] 23 | height = src_bbox[:, 3] - src_bbox[:, 1] 24 | ctr_x = src_bbox[:, 0] + 0.5 * width 25 | ctr_y = src_bbox[:, 1] + 0.5 * height 26 | 27 | base_width = dst_bbox[:, 2] - dst_bbox[:, 0] 28 | base_height = dst_bbox[:, 3] - dst_bbox[:, 1] 29 | base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width 30 | base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height 31 | 32 | eps = np.finfo(height.dtype).eps 33 | width = np.maximum(width, eps) 34 | height = np.maximum(height, eps) 35 | 36 | dx = (base_ctr_x - ctr_x) / width 37 | dy = (base_ctr_y - ctr_y) / height 38 | dw = np.log(base_width / width) 39 | dh = np.log(base_height / height) 40 | 41 | loc = np.vstack((dx, dy, dw, dh)).transpose() 42 | return loc 43 | 44 | class AnchorTargetCreator(object): 45 | def __init__(self, n_sample=256, pos_iou_thresh=0.7, neg_iou_thresh=0.3, pos_ratio=0.5): 46 | self.n_sample = n_sample 47 | self.pos_iou_thresh = pos_iou_thresh 48 | self.neg_iou_thresh = neg_iou_thresh 49 | self.pos_ratio = pos_ratio 50 | 51 | def __call__(self, bbox, anchor): 52 | argmax_ious, label = self._create_label(anchor, bbox) 53 | if (label > 0).any(): 54 | loc = bbox2loc(anchor, bbox[argmax_ious]) 55 | return loc, label 56 | else: 57 | return np.zeros_like(anchor), label 58 | 59 | def _calc_ious(self, anchor, bbox): 60 | #----------------------------------------------# 61 | # anchor和bbox的iou 62 | # 获得的ious的shape为[num_anchors, num_gt] 63 | #----------------------------------------------# 64 | ious = bbox_iou(anchor, bbox) 65 | 66 | if len(bbox)==0: 67 | return np.zeros(len(anchor), np.int32), np.zeros(len(anchor)), np.zeros(len(bbox)) 68 | #---------------------------------------------------------# 69 | # 获得每一个先验框最对应的真实框 [num_anchors, ] 70 | #---------------------------------------------------------# 71 | argmax_ious = ious.argmax(axis=1) 72 | #---------------------------------------------------------# 73 | # 找出每一个先验框最对应的真实框的iou [num_anchors, ] 74 | #---------------------------------------------------------# 75 | max_ious = np.max(ious, axis=1) 76 | #---------------------------------------------------------# 77 | # 获得每一个真实框最对应的先验框 [num_gt, ] 78 | #---------------------------------------------------------# 79 | gt_argmax_ious = ious.argmax(axis=0) 80 | #---------------------------------------------------------# 81 | # 保证每一个真实框都存在对应的先验框 82 | #---------------------------------------------------------# 83 | for i in range(len(gt_argmax_ious)): 84 | argmax_ious[gt_argmax_ious[i]] = i 85 | 86 | return argmax_ious, max_ious, gt_argmax_ious 87 | 88 | def _create_label(self, anchor, bbox): 89 | # ------------------------------------------ # 90 | # 1是正样本,0是负样本,-1忽略 91 | # 初始化的时候全部设置为-1 92 | # ------------------------------------------ # 93 | label = np.empty((len(anchor),), dtype=np.int32) 94 | label.fill(-1) 95 | 96 | # ------------------------------------------------------------------------ # 97 | # argmax_ious为每个先验框对应的最大的真实框的序号 [num_anchors, ] 98 | # max_ious为每个真实框对应的最大的真实框的iou [num_anchors, ] 99 | # gt_argmax_ious为每一个真实框对应的最大的先验框的序号 [num_gt, ] 100 | # ------------------------------------------------------------------------ # 101 | argmax_ious, max_ious, gt_argmax_ious = self._calc_ious(anchor, bbox) 102 | 103 | # ----------------------------------------------------- # 104 | # 如果小于门限值则设置为负样本 105 | # 如果大于门限值则设置为正样本 106 | # 每个真实框至少对应一个先验框 107 | # ----------------------------------------------------- # 108 | label[max_ious < self.neg_iou_thresh] = 0 109 | label[max_ious >= self.pos_iou_thresh] = 1 110 | if len(gt_argmax_ious)>0: 111 | label[gt_argmax_ious] = 1 112 | 113 | # ----------------------------------------------------- # 114 | # 判断正样本数量是否大于128,如果大于则限制在128 115 | # ----------------------------------------------------- # 116 | n_pos = int(self.pos_ratio * self.n_sample) 117 | pos_index = np.where(label == 1)[0] 118 | if len(pos_index) > n_pos: 119 | disable_index = np.random.choice(pos_index, size=(len(pos_index) - n_pos), replace=False) 120 | label[disable_index] = -1 121 | 122 | # ----------------------------------------------------- # 123 | # 平衡正负样本,保持总数量为256 124 | # ----------------------------------------------------- # 125 | n_neg = self.n_sample - np.sum(label == 1) 126 | neg_index = np.where(label == 0)[0] 127 | if len(neg_index) > n_neg: 128 | disable_index = np.random.choice(neg_index, size=(len(neg_index) - n_neg), replace=False) 129 | label[disable_index] = -1 130 | 131 | return argmax_ious, label 132 | 133 | 134 | class ProposalTargetCreator(object): 135 | def __init__(self, n_sample=128, pos_ratio=0.5, pos_iou_thresh=0.5, neg_iou_thresh_high=0.5, neg_iou_thresh_low=0): 136 | self.n_sample = n_sample 137 | self.pos_ratio = pos_ratio 138 | self.pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) 139 | self.pos_iou_thresh = pos_iou_thresh 140 | self.neg_iou_thresh_high = neg_iou_thresh_high 141 | self.neg_iou_thresh_low = neg_iou_thresh_low 142 | 143 | def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): 144 | roi = np.concatenate((roi.detach().cpu().numpy(), bbox), axis=0) 145 | # ----------------------------------------------------- # 146 | # 计算建议框和真实框的重合程度 147 | # ----------------------------------------------------- # 148 | iou = bbox_iou(roi, bbox) 149 | 150 | if len(bbox)==0: 151 | gt_assignment = np.zeros(len(roi), np.int32) 152 | max_iou = np.zeros(len(roi)) 153 | gt_roi_label = np.zeros(len(roi)) 154 | else: 155 | #---------------------------------------------------------# 156 | # 获得每一个建议框最对应的真实框 [num_roi, ] 157 | #---------------------------------------------------------# 158 | gt_assignment = iou.argmax(axis=1) 159 | #---------------------------------------------------------# 160 | # 获得每一个建议框最对应的真实框的iou [num_roi, ] 161 | #---------------------------------------------------------# 162 | max_iou = iou.max(axis=1) 163 | #---------------------------------------------------------# 164 | # 真实框的标签要+1因为有背景的存在 165 | #---------------------------------------------------------# 166 | gt_roi_label = label[gt_assignment] + 1 167 | 168 | #----------------------------------------------------------------# 169 | # 满足建议框和真实框重合程度大于neg_iou_thresh_high的作为负样本 170 | # 将正样本的数量限制在self.pos_roi_per_image以内 171 | #----------------------------------------------------------------# 172 | pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] 173 | pos_roi_per_this_image = int(min(self.pos_roi_per_image, pos_index.size)) 174 | if pos_index.size > 0: 175 | pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) 176 | 177 | #-----------------------------------------------------------------------------------------------------# 178 | # 满足建议框和真实框重合程度小于neg_iou_thresh_high大于neg_iou_thresh_low作为负样本 179 | # 将正样本的数量和负样本的数量的总和固定成self.n_sample 180 | #-----------------------------------------------------------------------------------------------------# 181 | neg_index = np.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0] 182 | neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image 183 | neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) 184 | if neg_index.size > 0: 185 | neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) 186 | 187 | #---------------------------------------------------------# 188 | # sample_roi [n_sample, ] 189 | # gt_roi_loc [n_sample, 4] 190 | # gt_roi_label [n_sample, ] 191 | #---------------------------------------------------------# 192 | keep_index = np.append(pos_index, neg_index) 193 | 194 | sample_roi = roi[keep_index] 195 | if len(bbox)==0: 196 | return sample_roi, np.zeros_like(sample_roi), gt_roi_label[keep_index] 197 | 198 | gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) 199 | gt_roi_loc = (gt_roi_loc / np.array(loc_normalize_std, np.float32)) 200 | 201 | gt_roi_label = gt_roi_label[keep_index] 202 | gt_roi_label[pos_roi_per_this_image:] = 0 203 | return sample_roi, gt_roi_loc, gt_roi_label 204 | 205 | class FasterRCNNTrainer(nn.Module): 206 | def __init__(self, model_train, optimizer): 207 | super(FasterRCNNTrainer, self).__init__() 208 | self.model_train = model_train 209 | self.optimizer = optimizer 210 | 211 | self.rpn_sigma = 1 212 | self.roi_sigma = 1 213 | 214 | self.anchor_target_creator = AnchorTargetCreator() 215 | self.proposal_target_creator = ProposalTargetCreator() 216 | 217 | self.loc_normalize_std = [0.1, 0.1, 0.2, 0.2] 218 | 219 | def _fast_rcnn_loc_loss(self, pred_loc, gt_loc, gt_label, sigma): 220 | pred_loc = pred_loc[gt_label > 0] 221 | gt_loc = gt_loc[gt_label > 0] 222 | 223 | sigma_squared = sigma ** 2 224 | regression_diff = (gt_loc - pred_loc) 225 | regression_diff = regression_diff.abs().float() 226 | regression_loss = torch.where( 227 | regression_diff < (1. / sigma_squared), 228 | 0.5 * sigma_squared * regression_diff ** 2, 229 | regression_diff - 0.5 / sigma_squared 230 | ) 231 | regression_loss = regression_loss.sum() 232 | num_pos = (gt_label > 0).sum().float() 233 | 234 | regression_loss /= torch.max(num_pos, torch.ones_like(num_pos)) 235 | return regression_loss 236 | 237 | def forward(self, imgs, bboxes, labels, scale): 238 | n = imgs.shape[0] 239 | img_size = imgs.shape[2:] 240 | #-------------------------------# 241 | # 获取公用特征层 242 | #-------------------------------# 243 | base_feature = self.model_train(imgs, mode = 'extractor') 244 | 245 | # -------------------------------------------------- # 246 | # 利用rpn网络获得调整参数、得分、建议框、先验框 247 | # -------------------------------------------------- # 248 | rpn_locs, rpn_scores, rois, roi_indices, anchor = self.model_train(x = [base_feature, img_size], scale = scale, mode = 'rpn') 249 | 250 | rpn_loc_loss_all, rpn_cls_loss_all, roi_loc_loss_all, roi_cls_loss_all = 0, 0, 0, 0 251 | sample_rois, sample_indexes, gt_roi_locs, gt_roi_labels = [], [], [], [] 252 | for i in range(n): 253 | bbox = bboxes[i] 254 | label = labels[i] 255 | rpn_loc = rpn_locs[i] 256 | rpn_score = rpn_scores[i] 257 | roi = rois[i] 258 | # -------------------------------------------------- # 259 | # 利用真实框和先验框获得建议框网络应该有的预测结果 260 | # 给每个先验框都打上标签 261 | # gt_rpn_loc [num_anchors, 4] 262 | # gt_rpn_label [num_anchors, ] 263 | # -------------------------------------------------- # 264 | gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(bbox, anchor[0].cpu().numpy()) 265 | gt_rpn_loc = torch.Tensor(gt_rpn_loc).type_as(rpn_locs) 266 | gt_rpn_label = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long() 267 | # -------------------------------------------------- # 268 | # 分别计算建议框网络的回归损失和分类损失 269 | # -------------------------------------------------- # 270 | rpn_loc_loss = self._fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label, self.rpn_sigma) 271 | rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1) 272 | 273 | rpn_loc_loss_all += rpn_loc_loss 274 | rpn_cls_loss_all += rpn_cls_loss 275 | # ------------------------------------------------------ # 276 | # 利用真实框和建议框获得classifier网络应该有的预测结果 277 | # 获得三个变量,分别是sample_roi, gt_roi_loc, gt_roi_label 278 | # sample_roi [n_sample, ] 279 | # gt_roi_loc [n_sample, 4] 280 | # gt_roi_label [n_sample, ] 281 | # ------------------------------------------------------ # 282 | sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std) 283 | sample_rois.append(torch.Tensor(sample_roi).type_as(rpn_locs)) 284 | sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0]) 285 | gt_roi_locs.append(torch.Tensor(gt_roi_loc).type_as(rpn_locs)) 286 | gt_roi_labels.append(torch.Tensor(gt_roi_label).type_as(rpn_locs).long()) 287 | 288 | sample_rois = torch.stack(sample_rois, dim=0) 289 | sample_indexes = torch.stack(sample_indexes, dim=0) 290 | roi_cls_locs, roi_scores = self.model_train([base_feature, sample_rois, sample_indexes, img_size], mode = 'head') 291 | for i in range(n): 292 | # ------------------------------------------------------ # 293 | # 根据建议框的种类,取出对应的回归预测结果 294 | # ------------------------------------------------------ # 295 | n_sample = roi_cls_locs.size()[1] 296 | 297 | roi_cls_loc = roi_cls_locs[i] 298 | roi_score = roi_scores[i] 299 | gt_roi_loc = gt_roi_locs[i] 300 | gt_roi_label = gt_roi_labels[i] 301 | 302 | roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 303 | roi_loc = roi_cls_loc[torch.arange(0, n_sample), gt_roi_label] 304 | 305 | # -------------------------------------------------- # 306 | # 分别计算Classifier网络的回归损失和分类损失 307 | # -------------------------------------------------- # 308 | roi_loc_loss = self._fast_rcnn_loc_loss(roi_loc, gt_roi_loc, gt_roi_label.data, self.roi_sigma) 309 | roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 310 | 311 | roi_loc_loss_all += roi_loc_loss 312 | roi_cls_loss_all += roi_cls_loss 313 | 314 | losses = [rpn_loc_loss_all/n, rpn_cls_loss_all/n, roi_loc_loss_all/n, roi_cls_loss_all/n] 315 | losses = losses + [sum(losses)] 316 | return losses 317 | 318 | def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None): 319 | self.optimizer.zero_grad() 320 | if not fp16: 321 | losses = self.forward(imgs, bboxes, labels, scale) 322 | losses[-1].backward() 323 | self.optimizer.step() 324 | else: 325 | from torch.cuda.amp import autocast 326 | with autocast(): 327 | losses = self.forward(imgs, bboxes, labels, scale) 328 | 329 | #----------------------# 330 | # 反向传播 331 | #----------------------# 332 | scaler.scale(losses[-1]).backward() 333 | scaler.step(self.optimizer) 334 | scaler.update() 335 | 336 | return losses 337 | 338 | def weights_init(net, init_type='normal', init_gain=0.02): 339 | def init_func(m): 340 | classname = m.__class__.__name__ 341 | if hasattr(m, 'weight') and classname.find('Conv') != -1: 342 | if init_type == 'normal': 343 | torch.nn.init.normal_(m.weight.data, 0.0, init_gain) 344 | elif init_type == 'xavier': 345 | torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain) 346 | elif init_type == 'kaiming': 347 | torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 348 | elif init_type == 'orthogonal': 349 | torch.nn.init.orthogonal_(m.weight.data, gain=init_gain) 350 | else: 351 | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) 352 | elif classname.find('BatchNorm2d') != -1: 353 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 354 | torch.nn.init.constant_(m.bias.data, 0.0) 355 | print('initialize network with %s type' % init_type) 356 | net.apply(init_func) 357 | 358 | def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10): 359 | def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters): 360 | if iters <= warmup_total_iters: 361 | # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start 362 | lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start 363 | elif iters >= total_iters - no_aug_iter: 364 | lr = min_lr 365 | else: 366 | lr = min_lr + 0.5 * (lr - min_lr) * ( 367 | 1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter)) 368 | ) 369 | return lr 370 | 371 | def step_lr(lr, decay_rate, step_size, iters): 372 | if step_size < 1: 373 | raise ValueError("step_size must above 1.") 374 | n = iters // step_size 375 | out_lr = lr * decay_rate ** n 376 | return out_lr 377 | 378 | if lr_decay_type == "cos": 379 | warmup_total_iters = min(max(warmup_iters_ratio * total_iters, 1), 3) 380 | warmup_lr_start = max(warmup_lr_ratio * lr, 1e-6) 381 | no_aug_iter = min(max(no_aug_iter_ratio * total_iters, 1), 15) 382 | func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter) 383 | else: 384 | decay_rate = (min_lr / lr) ** (1 / (step_num - 1)) 385 | step_size = total_iters / step_num 386 | func = partial(step_lr, lr, decay_rate, step_size) 387 | 388 | return func 389 | 390 | def set_optimizer_lr(optimizer, lr_scheduler_func, epoch): 391 | lr = lr_scheduler_func(epoch) 392 | for param_group in optimizer.param_groups: 393 | param_group['lr'] = lr 394 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/resnet50.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | from torch.hub import load_state_dict_from_url 5 | 6 | 7 | class Bottleneck(nn.Module): 8 | expansion = 4 9 | def __init__(self, inplanes, planes, stride=1, downsample=None): 10 | super(Bottleneck, self).__init__() 11 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 12 | self.bn1 = nn.BatchNorm2d(planes) 13 | 14 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 15 | self.bn2 = nn.BatchNorm2d(planes) 16 | 17 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 18 | self.bn3 = nn.BatchNorm2d(planes * 4) 19 | 20 | self.relu = nn.ReLU(inplace=True) 21 | self.downsample = downsample 22 | self.stride = stride 23 | 24 | def forward(self, x): 25 | residual = x 26 | 27 | out = self.conv1(x) 28 | out = self.bn1(out) 29 | out = self.relu(out) 30 | 31 | out = self.conv2(out) 32 | out = self.bn2(out) 33 | out = self.relu(out) 34 | 35 | out = self.conv3(out) 36 | out = self.bn3(out) 37 | if self.downsample is not None: 38 | residual = self.downsample(x) 39 | 40 | out += residual 41 | out = self.relu(out) 42 | 43 | return out 44 | 45 | class ResNet(nn.Module): 46 | def __init__(self, block, layers, num_classes=1000): 47 | #-----------------------------------# 48 | # 假设输入进来的图片是600,600,3 49 | #-----------------------------------# 50 | self.inplanes = 64 51 | super(ResNet, self).__init__() 52 | 53 | # 600,600,3 -> 300,300,64 54 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 55 | self.bn1 = nn.BatchNorm2d(64) 56 | self.relu = nn.ReLU(inplace=True) 57 | 58 | # 300,300,64 -> 150,150,64 59 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 60 | 61 | # 150,150,64 -> 150,150,256 62 | self.layer1 = self._make_layer(block, 64, layers[0]) 63 | # 150,150,256 -> 75,75,512 64 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 65 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 66 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 67 | # self.layer4被用在classifier模型中 68 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 69 | 70 | self.avgpool = nn.AvgPool2d(7) 71 | self.fc = nn.Linear(512 * block.expansion, num_classes) 72 | 73 | for m in self.modules(): 74 | if isinstance(m, nn.Conv2d): 75 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 76 | m.weight.data.normal_(0, math.sqrt(2. / n)) 77 | elif isinstance(m, nn.BatchNorm2d): 78 | m.weight.data.fill_(1) 79 | m.bias.data.zero_() 80 | 81 | def _make_layer(self, block, planes, blocks, stride=1): 82 | downsample = None 83 | #-------------------------------------------------------------------# 84 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample 85 | #-------------------------------------------------------------------# 86 | if stride != 1 or self.inplanes != planes * block.expansion: 87 | downsample = nn.Sequential( 88 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 89 | nn.BatchNorm2d(planes * block.expansion), 90 | ) 91 | layers = [] 92 | layers.append(block(self.inplanes, planes, stride, downsample)) 93 | self.inplanes = planes * block.expansion 94 | for i in range(1, blocks): 95 | layers.append(block(self.inplanes, planes)) 96 | return nn.Sequential(*layers) 97 | 98 | def forward(self, x): 99 | x = self.conv1(x) 100 | x = self.bn1(x) 101 | x = self.relu(x) 102 | x = self.maxpool(x) 103 | 104 | x = self.layer1(x) 105 | x = self.layer2(x) 106 | x = self.layer3(x) 107 | x = self.layer4(x) 108 | 109 | x = self.avgpool(x) 110 | x = x.view(x.size(0), -1) 111 | x = self.fc(x) 112 | return x 113 | 114 | def resnet50(pretrained = False): 115 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 116 | if pretrained: 117 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data") 118 | model.load_state_dict(state_dict) 119 | #----------------------------------------------------------------------------# 120 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层 121 | #----------------------------------------------------------------------------# 122 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3]) 123 | #----------------------------------------------------------------------------# 124 | # 获取分类部分,从model.layer4到model.avgpool 125 | #----------------------------------------------------------------------------# 126 | classifier = list([model.layer4, model.avgpool]) 127 | 128 | features = nn.Sequential(*features) 129 | classifier = nn.Sequential(*classifier) 130 | return features, classifier 131 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/rpn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from torchvision.ops import nms 7 | from utils.anchors import _enumerate_shifted_anchor, generate_anchor_base 8 | from utils.utils_bbox import loc2bbox 9 | 10 | 11 | class ProposalCreator(): 12 | def __init__( 13 | self, 14 | mode, 15 | nms_iou = 0.7, 16 | n_train_pre_nms = 12000, 17 | n_train_post_nms = 600, 18 | n_test_pre_nms = 3000, 19 | n_test_post_nms = 300, 20 | min_size = 16 21 | 22 | ): 23 | #-----------------------------------# 24 | # 设置预测还是训练 25 | #-----------------------------------# 26 | self.mode = mode 27 | #-----------------------------------# 28 | # 建议框非极大抑制的iou大小 29 | #-----------------------------------# 30 | self.nms_iou = nms_iou 31 | #-----------------------------------# 32 | # 训练用到的建议框数量 33 | #-----------------------------------# 34 | self.n_train_pre_nms = n_train_pre_nms 35 | self.n_train_post_nms = n_train_post_nms 36 | #-----------------------------------# 37 | # 预测用到的建议框数量 38 | #-----------------------------------# 39 | self.n_test_pre_nms = n_test_pre_nms 40 | self.n_test_post_nms = n_test_post_nms 41 | self.min_size = min_size 42 | 43 | def __call__(self, loc, score, anchor, img_size, scale=1.): 44 | if self.mode == "training": 45 | n_pre_nms = self.n_train_pre_nms 46 | n_post_nms = self.n_train_post_nms 47 | else: 48 | n_pre_nms = self.n_test_pre_nms 49 | n_post_nms = self.n_test_post_nms 50 | 51 | #-----------------------------------# 52 | # 将先验框转换成tensor 53 | #-----------------------------------# 54 | anchor = torch.from_numpy(anchor).type_as(loc) 55 | #-----------------------------------# 56 | # 将RPN网络预测结果转化成建议框 57 | #-----------------------------------# 58 | roi = loc2bbox(anchor, loc) 59 | #-----------------------------------# 60 | # 防止建议框超出图像边缘 61 | #-----------------------------------# 62 | roi[:, [0, 2]] = torch.clamp(roi[:, [0, 2]], min = 0, max = img_size[1]) 63 | roi[:, [1, 3]] = torch.clamp(roi[:, [1, 3]], min = 0, max = img_size[0]) 64 | 65 | #-----------------------------------# 66 | # 建议框的宽高的最小值不可以小于16 67 | #-----------------------------------# 68 | min_size = self.min_size * scale 69 | keep = torch.where(((roi[:, 2] - roi[:, 0]) >= min_size) & ((roi[:, 3] - roi[:, 1]) >= min_size))[0] 70 | #-----------------------------------# 71 | # 将对应的建议框保留下来 72 | #-----------------------------------# 73 | roi = roi[keep, :] 74 | score = score[keep] 75 | 76 | #-----------------------------------# 77 | # 根据得分进行排序,取出建议框 78 | #-----------------------------------# 79 | order = torch.argsort(score, descending=True) 80 | if n_pre_nms > 0: 81 | order = order[:n_pre_nms] 82 | roi = roi[order, :] 83 | score = score[order] 84 | 85 | #-----------------------------------# 86 | # 对建议框进行非极大抑制 87 | # 使用官方的非极大抑制会快非常多 88 | #-----------------------------------# 89 | keep = nms(roi, score, self.nms_iou) 90 | if len(keep) < n_post_nms: 91 | index_extra = np.random.choice(range(len(keep)), size=(n_post_nms - len(keep)), replace=True) 92 | keep = torch.cat([keep, keep[index_extra]]) 93 | keep = keep[:n_post_nms] 94 | roi = roi[keep] 95 | return roi 96 | 97 | 98 | class RegionProposalNetwork(nn.Module): 99 | def __init__( 100 | self, 101 | in_channels = 512, 102 | mid_channels = 512, 103 | ratios = [0.5, 1, 2], 104 | anchor_scales = [8, 16, 32], 105 | feat_stride = 16, 106 | mode = "training", 107 | ): 108 | super(RegionProposalNetwork, self).__init__() 109 | #-----------------------------------------# 110 | # 生成基础先验框,shape为[9, 4] 111 | #-----------------------------------------# 112 | self.anchor_base = generate_anchor_base(anchor_scales = anchor_scales, ratios = ratios) 113 | n_anchor = self.anchor_base.shape[0] 114 | 115 | #-----------------------------------------# 116 | # 先进行一个3x3的卷积,可理解为特征整合 117 | #-----------------------------------------# 118 | self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) 119 | #-----------------------------------------# 120 | # 分类预测先验框内部是否包含物体 121 | #-----------------------------------------# 122 | self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) 123 | #-----------------------------------------# 124 | # 回归预测对先验框进行调整 125 | #-----------------------------------------# 126 | self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) 127 | 128 | #-----------------------------------------# 129 | # 特征点间距步长 130 | #-----------------------------------------# 131 | self.feat_stride = feat_stride 132 | #-----------------------------------------# 133 | # 用于对建议框解码并进行非极大抑制 134 | #-----------------------------------------# 135 | self.proposal_layer = ProposalCreator(mode) 136 | #--------------------------------------# 137 | # 对FPN的网络部分进行权值初始化 138 | #--------------------------------------# 139 | normal_init(self.conv1, 0, 0.01) 140 | normal_init(self.score, 0, 0.01) 141 | normal_init(self.loc, 0, 0.01) 142 | 143 | def forward(self, x, img_size, scale=1.): 144 | n, _, h, w = x.shape 145 | #-----------------------------------------# 146 | # 先进行一个3x3的卷积,可理解为特征整合 147 | #-----------------------------------------# 148 | x = F.relu(self.conv1(x)) 149 | #-----------------------------------------# 150 | # 回归预测对先验框进行调整 151 | #-----------------------------------------# 152 | rpn_locs = self.loc(x) 153 | rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) 154 | #-----------------------------------------# 155 | # 分类预测先验框内部是否包含物体 156 | #-----------------------------------------# 157 | rpn_scores = self.score(x) 158 | rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2) 159 | 160 | #--------------------------------------------------------------------------------------# 161 | # 进行softmax概率计算,每个先验框只有两个判别结果 162 | # 内部包含物体或者内部不包含物体,rpn_softmax_scores[:, :, 1]的内容为包含物体的概率 163 | #--------------------------------------------------------------------------------------# 164 | rpn_softmax_scores = F.softmax(rpn_scores, dim=-1) 165 | rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous() 166 | rpn_fg_scores = rpn_fg_scores.view(n, -1) 167 | 168 | #------------------------------------------------------------------------------------------------# 169 | # 生成先验框,此时获得的anchor是布满网格点的,当输入图片为600,600,3的时候,shape为(12996, 4) 170 | #------------------------------------------------------------------------------------------------# 171 | anchor = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w) 172 | rois = list() 173 | roi_indices = list() 174 | for i in range(n): 175 | roi = self.proposal_layer(rpn_locs[i], rpn_fg_scores[i], anchor, img_size, scale = scale) 176 | batch_index = i * torch.ones((len(roi),)) 177 | rois.append(roi.unsqueeze(0)) 178 | roi_indices.append(batch_index.unsqueeze(0)) 179 | 180 | rois = torch.cat(rois, dim=0).type_as(x) 181 | roi_indices = torch.cat(roi_indices, dim=0).type_as(x) 182 | anchor = torch.from_numpy(anchor).unsqueeze(0).float().to(x.device) 183 | 184 | return rpn_locs, rpn_scores, rois, roi_indices, anchor 185 | 186 | def normal_init(m, mean, stddev, truncated=False): 187 | if truncated: 188 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 189 | else: 190 | m.weight.data.normal_(mean, stddev) 191 | m.bias.data.zero_() 192 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/nets/vgg16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.hub import load_state_dict_from_url 4 | 5 | 6 | #--------------------------------------# 7 | # VGG16的结构 8 | #--------------------------------------# 9 | class VGG(nn.Module): 10 | def __init__(self, features, num_classes=1000, init_weights=True): 11 | super(VGG, self).__init__() 12 | self.features = features 13 | #--------------------------------------# 14 | # 平均池化到7x7大小 15 | #--------------------------------------# 16 | self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) 17 | #--------------------------------------# 18 | # 分类部分 19 | #--------------------------------------# 20 | self.classifier = nn.Sequential( 21 | nn.Linear(512 * 7 * 7, 4096), 22 | nn.ReLU(True), 23 | nn.Dropout(), 24 | nn.Linear(4096, 4096), 25 | nn.ReLU(True), 26 | nn.Dropout(), 27 | nn.Linear(4096, num_classes), 28 | ) 29 | if init_weights: 30 | self._initialize_weights() 31 | 32 | def forward(self, x): 33 | #--------------------------------------# 34 | # 特征提取 35 | #--------------------------------------# 36 | x = self.features(x) 37 | #--------------------------------------# 38 | # 平均池化 39 | #--------------------------------------# 40 | x = self.avgpool(x) 41 | #--------------------------------------# 42 | # 平铺后 43 | #--------------------------------------# 44 | x = torch.flatten(x, 1) 45 | #--------------------------------------# 46 | # 分类部分 47 | #--------------------------------------# 48 | x = self.classifier(x) 49 | return x 50 | 51 | def _initialize_weights(self): 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu') 55 | if m.bias is not None: 56 | nn.init.constant_(m.bias, 0) 57 | elif isinstance(m, nn.BatchNorm2d): 58 | nn.init.constant_(m.weight, 1) 59 | nn.init.constant_(m.bias, 0) 60 | elif isinstance(m, nn.Linear): 61 | nn.init.normal_(m.weight, 0, 0.01) 62 | nn.init.constant_(m.bias, 0) 63 | 64 | ''' 65 | 假设输入图像为(600, 600, 3),随着cfg的循环,特征层变化如下: 66 | 600,600,3 -> 600,600,64 -> 600,600,64 -> 300,300,64 -> 300,300,128 -> 300,300,128 -> 150,150,128 -> 150,150,256 -> 150,150,256 -> 150,150,256 67 | -> 75,75,256 -> 75,75,512 -> 75,75,512 -> 75,75,512 -> 37,37,512 -> 37,37,512 -> 37,37,512 -> 37,37,512 68 | 到cfg结束,我们获得了一个37,37,512的特征层 69 | ''' 70 | 71 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] 72 | 73 | 74 | #--------------------------------------# 75 | # 特征提取部分 76 | #--------------------------------------# 77 | def make_layers(cfg, batch_norm = False): 78 | layers = [] 79 | in_channels = 3 80 | for v in cfg: 81 | if v == 'M': 82 | layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)] 83 | else: 84 | conv2d = nn.Conv2d(in_channels, v, kernel_size = 3, padding = 1) 85 | if batch_norm: 86 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace = True)] 87 | else: 88 | layers += [conv2d, nn.ReLU(inplace = True)] 89 | in_channels = v 90 | return nn.Sequential(*layers) 91 | 92 | def decom_vgg16(pretrained = False): 93 | model = VGG(make_layers(cfg)) 94 | if pretrained: 95 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir = "./model_data") 96 | model.load_state_dict(state_dict) 97 | #----------------------------------------------------------------------------# 98 | # 获取特征提取部分,最终获得一个37,37,1024的特征层 99 | #----------------------------------------------------------------------------# 100 | features = list(model.features)[:30] 101 | #----------------------------------------------------------------------------# 102 | # 获取分类部分,需要除去Dropout部分 103 | #----------------------------------------------------------------------------# 104 | classifier = list(model.classifier) 105 | del classifier[6] 106 | del classifier[5] 107 | del classifier[2] 108 | 109 | features = nn.Sequential(*features) 110 | classifier = nn.Sequential(*classifier) 111 | return features, classifier 112 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/predict.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------# 2 | # 将单张图片预测、摄像头检测和FPS测试功能 3 | # 整合到了一个py文件中,通过指定mode进行模式的修改。 4 | #----------------------------------------------------# 5 | import time 6 | import cv2 7 | import numpy as np 8 | from PIL import Image 9 | import os 10 | from tqdm import tqdm 11 | from frcnn import FRCNN 12 | 13 | 14 | if __name__ == "__main__": 15 | frcnn = FRCNN() 16 | #----------------------------------------------------------------------------------------------------------# 17 | # mode用于指定测试的模式: 18 | # 'predict' 表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 19 | # 'video' 表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 20 | # 'fps' 表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 21 | # 'dir_predict' 表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 22 | #----------------------------------------------------------------------------------------------------------# 23 | mode = "predict" 24 | #-------------------------------------------------------------------------# 25 | # crop 指定了是否在单张图片预测后对目标进行截取 26 | # count 指定了是否进行目标的计数 27 | # crop、count仅在mode='predict'时有效 28 | #-------------------------------------------------------------------------# 29 | crop = False 30 | count = False 31 | #----------------------------------------------------------------------------------------------------------# 32 | # video_path 用于指定视频的路径,当video_path=0时表示检测摄像头 33 | # 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 34 | # video_save_path 表示视频保存的路径,当video_save_path=""时表示不保存 35 | # 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 36 | # video_fps 用于保存的视频的fps 37 | # 38 | # video_path、video_save_path和video_fps仅在mode='video'时有效 39 | # 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。 40 | #----------------------------------------------------------------------------------------------------------# 41 | video_path = 0 42 | video_save_path = "" 43 | video_fps = 25.0 44 | #----------------------------------------------------------------------------------------------------------# 45 | # test_interval 用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。 46 | # fps_image_path 用于指定测试的fps图片 47 | # 48 | # test_interval和fps_image_path仅在mode='fps'有效 49 | #----------------------------------------------------------------------------------------------------------# 50 | test_interval = 100 51 | fps_image_path = "img/street.jpg" 52 | #-------------------------------------------------------------------------# 53 | # dir_origin_path 指定了用于检测的图片的文件夹路径 54 | # dir_save_path 指定了检测完图片的保存路径 55 | # 56 | # dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 57 | #-------------------------------------------------------------------------# 58 | dir_origin_path = "img/" 59 | dir_save_path = "img_out/" 60 | 61 | 62 | if mode == "predict": 63 | ''' 64 | 1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。 65 | 具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。 66 | 2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。 67 | 3、如果想要获得预测框的坐标,可以进入frcnn.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。 68 | 4、如果想要利用预测框截取下目标,可以进入frcnn.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值 69 | 在原图上利用矩阵的方式进行截取。 70 | 5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入frcnn.detect_image函数,在绘图部分对predicted_class进行判断, 71 | 比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。 72 | ''' 73 | while True: 74 | img = input('Input image filename:') 75 | try: 76 | image = Image.open(img) 77 | except: 78 | print('Open Error! Try again!') 79 | else: 80 | r_image = frcnn.detect_image(image, crop = crop, count = count) 81 | r_image.show() 82 | 83 | 84 | elif mode == "video": 85 | capture = cv2.VideoCapture(video_path) 86 | if video_save_path != "": 87 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 88 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) 89 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) 90 | fps = 0.0 91 | while(True): 92 | t1 = time.time() 93 | # 读取某一帧 94 | ref,frame = capture.read() 95 | # 格式转变,BGRtoRGB 96 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 97 | # 转变成Image 98 | frame = Image.fromarray(np.uint8(frame)) 99 | # 进行检测 100 | frame = np.array(frcnn.detect_image(frame)) 101 | # RGBtoBGR满足opencv显示格式 102 | frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) 103 | fps = ( fps + (1. / (time.time() - t1)) ) / 2 104 | print("fps = %.2f"%(fps)) 105 | frame = cv2.putText(frame, "fps = %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 106 | cv2.imshow("video", frame) 107 | c = cv2.waitKey(1) & 0xff 108 | if video_save_path != "": 109 | out.write(frame) 110 | if c == 27: 111 | capture.release() 112 | break 113 | capture.release() 114 | out.release() 115 | cv2.destroyAllWindows() 116 | 117 | 118 | elif mode == "fps": 119 | img = Image.open(fps_image_path) 120 | tact_time = frcnn.get_FPS(img, test_interval) 121 | print(str(tact_time) + ' seconds, ' + str(1 / tact_time) + 'FPS, @batch_size 1') 122 | 123 | 124 | elif mode == "dir_predict": 125 | img_names = os.listdir(dir_origin_path) 126 | for img_name in tqdm(img_names): 127 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): 128 | image_path = os.path.join(dir_origin_path, img_name) 129 | image = Image.open(image_path) 130 | r_image = frcnn.detect_image(image) 131 | if not os.path.exists(dir_save_path): 132 | os.makedirs(dir_save_path) 133 | r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality = 95, subsampling = 0) 134 | 135 | 136 | else: 137 | raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.") 138 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/requirements.txt: -------------------------------------------------------------------------------- 1 | python == 3.10.6 2 | numpy == 1.23.3 3 | opencv == 4.6.0 4 | pillow == 9.2.0 5 | pycocotools == 2.0.6 6 | pytorch == 1.12.1 7 | scipy == 1.9.3 8 | torchvision == 0.13.1 9 | tqdm == 4.64.1 10 | matplotlib == 3.6.2 11 | hdf5 == 1.12.1 -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/summary.py: -------------------------------------------------------------------------------- 1 | #--------------------------------------------# 2 | # 该部分代码用于看网络结构 3 | #--------------------------------------------# 4 | import torch 5 | from thop import clever_format, profile 6 | from torchsummary import summary 7 | 8 | from nets.frcnn import FasterRCNN 9 | 10 | if __name__ == "__main__": 11 | input_shape = [600, 600] 12 | num_classes = 21 13 | 14 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 15 | model = FasterRCNN(num_classes, backbone = 'vgg').to(device) 16 | summary(model, (3, input_shape[0], input_shape[1])) 17 | 18 | dummy_input = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device) 19 | flops, params = profile(model.to(device), (dummy_input, ), verbose = False) 20 | #--------------------------------------------------------# 21 | # flops * 2是因为profile没有将卷积作为两个operations 22 | # 有些论文将卷积算乘法、加法两个operations。此时乘2 23 | # 有些论文只考虑乘法的运算次数,忽略加法。此时不乘2 24 | # 本代码选择乘2,参考YOLOX。 25 | #--------------------------------------------------------# 26 | flops = flops * 2 27 | flops, params = clever_format([flops, params], "%.3f") 28 | print('Total GFLOPS: %s' % (flops)) 29 | print('Total params: %s' % (params)) 30 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/train.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------# 2 | # 对数据集进行训练 3 | #-------------------------------------# 4 | import os 5 | import datetime 6 | 7 | import numpy as np 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim as optim 11 | from torch.utils.data import DataLoader 12 | 13 | from nets.frcnn import FasterRCNN 14 | from nets.frcnn_training import (FasterRCNNTrainer, get_lr_scheduler, 15 | set_optimizer_lr, weights_init) 16 | from utils.callbacks import EvalCallback, LossHistory 17 | from utils.dataloader import FRCNNDataset, frcnn_dataset_collate 18 | from utils.utils import get_classes, show_config 19 | from utils.utils_fit import fit_one_epoch 20 | 21 | 22 | ''' 23 | 训练自己的目标检测模型一定需要注意以下几点: 24 | 1、训练前仔细检查自己的格式是否满足要求,该库要求数据集格式为VOC格式,需要准备好的内容有输入图片和标签 25 | 输入图片为.jpg图片,无需固定大小,传入训练前会自动进行resize。 26 | 灰度图会自动转成RGB图片进行训练,无需自己修改。 27 | 输入图片如果后缀非jpg,需要自己批量转成jpg后再开始训练。 28 | 29 | 标签为.xml格式,文件中会有需要检测的目标信息,标签文件和输入图片文件相对应。 30 | 31 | 2、损失值的大小用于判断是否收敛,比较重要的是有收敛的趋势,即验证集损失不断下降,如果验证集损失基本上不改变的话,模型基本上就收敛了。 32 | 损失值的具体大小并没有什么意义,大和小只在于损失的计算方式,并不是接近于0才好。如果想要让损失好看点,可以直接到对应的损失函数里面除上10000。 33 | 训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中 34 | 35 | 3、训练好的权值文件保存在logs文件夹中,每个训练世代(Epoch)包含若干训练步长(Step),每个训练步长(Step)进行一次梯度下降。 36 | 如果只是训练了几个Step是不会保存的,Epoch和Step的概念要捋清楚一下。 37 | ''' 38 | 39 | 40 | if __name__ == "__main__": 41 | #-------------------------------# 42 | # 是否使用Cuda 43 | # 没有GPU可以设置成False 44 | #-------------------------------# 45 | Cuda = False 46 | #---------------------------------------------------------------------# 47 | # train_gpu 训练用到的GPU 48 | # 默认为第一张卡、双卡为[0, 1]、三卡为[0, 1, 2] 49 | # 在使用多GPU时,每个卡上的batch为总batch除以卡的数量。 50 | #---------------------------------------------------------------------# 51 | train_gpu = [0] 52 | #---------------------------------------------------------------------# 53 | # fp16 是否使用混合精度训练 54 | # 可减少约一半的显存、需要pytorch1.7.1以上 55 | #---------------------------------------------------------------------# 56 | fp16 = False 57 | #---------------------------------------------------------------------# 58 | # classes_path 指向model_data下的txt,与自己训练的数据集相关 59 | # 训练前一定要修改classes_path,使其对应自己的数据集 60 | #---------------------------------------------------------------------# 61 | classes_path = './faster-rcnn-pytorch-master/model_data/voc_classes.txt' 62 | #----------------------------------------------------------------------------------------------------------------------------# 63 | # 权值文件的下载请看README,可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的,因为特征是通用的。 64 | # 模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分,用于进行特征提取。 65 | # 预训练权重对于99%的情况都必须要用,不用的话主干部分的权值太过随机,特征提取效果不明显,网络训练的结果也不会好 66 | # 67 | # 如果训练过程中存在中断训练的操作,可以将model_path设置成logs文件夹下的权值文件,将已经训练了一部分的权值再次载入。 68 | # 同时修改下方的 冻结阶段 或者 解冻阶段 的参数,来保证模型epoch的连续性。 69 | # 70 | # 当model_path = ''的时候不加载整个模型的权值。 71 | # 72 | # 此处使用的是整个模型的权重,因此是在train.py进行加载的,下面的pretrain不影响此处的权值加载。 73 | # 如果想要让模型从主干的预训练权值开始训练,则设置model_path = '',下面的pretrain = True,此时仅加载主干。 74 | # 如果想要让模型从0开始训练,则设置model_path = '',下面的pretrain = Fasle,Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。 75 | # 76 | # 一般来讲,网络从0开始的训练效果会很差,因为权值太过随机,特征提取效果不明显,因此非常、非常、非常不建议大家从0开始训练! 77 | # 如果一定要从0开始,可以了解imagenet数据集,首先训练分类模型,获得网络的主干部分权值,分类模型的 主干部分 和该模型通用,基于此进行训练。 78 | #----------------------------------------------------------------------------------------------------------------------------# 79 | model_path = './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth' 80 | #------------------------------------------------------# 81 | # input_shape 输入的shape大小 82 | #------------------------------------------------------# 83 | input_shape = [600, 600] 84 | #---------------------------------------------# 85 | # vgg 86 | # resnet50 87 | #---------------------------------------------# 88 | backbone = "resnet50" 89 | #----------------------------------------------------------------------------------------------------------------------------# 90 | # pretrained 是否使用主干网络的预训练权重,此处使用的是主干的权重,因此是在模型构建的时候进行加载的。 91 | # 如果设置了model_path,则主干的权值无需加载,pretrained的值无意义。 92 | # 如果不设置model_path,pretrained = True,此时仅加载主干开始训练。 93 | # 如果不设置model_path,pretrained = False,Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。 94 | #----------------------------------------------------------------------------------------------------------------------------# 95 | pretrained = False 96 | #------------------------------------------------------------------------# 97 | # anchors_size用于设定先验框的大小,每个特征点均存在9个先验框。 98 | # anchors_size每个数对应3个先验框。 99 | # 当anchors_size = [8, 16, 32]的时候,生成的先验框宽高约为: 100 | # [90, 180] ; [180, 360]; [360, 720]; [128, 128]; 101 | # [256, 256]; [512, 512]; [180, 90] ; [360, 180]; 102 | # [720, 360]; 详情查看anchors.py 103 | # 如果想要检测小物体,可以减小anchors_size靠前的数。 104 | # 比如设置anchors_size = [4, 16, 32] 105 | #------------------------------------------------------------------------# 106 | anchors_size = [8, 16, 32] 107 | 108 | #----------------------------------------------------------------------------------------------------------------------------# 109 | # 训练分为两个阶段,分别是冻结阶段和解冻阶段。设置冻结阶段是为了满足机器性能不足的同学的训练需求。 110 | # 冻结训练需要的显存较小,显卡非常差的情况下,可设置Freeze_Epoch等于UnFreeze_Epoch,此时仅仅进行冻结训练。 111 | # 112 | # 在此提供若干参数设置建议,各位训练者根据自己的需求进行灵活调整: 113 | # (一)从整个模型的预训练权重开始训练: 114 | # Adam: 115 | # Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 100,Freeze_Train = True,optimizer_type = 'adam',Init_lr = 1e-4。(冻结) 116 | # Init_Epoch = 0,UnFreeze_Epoch = 100,Freeze_Train = False,optimizer_type = 'adam',Init_lr = 1e-4。(不冻结) 117 | # SGD: 118 | # Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 150,Freeze_Train = True,optimizer_type = 'sgd',Init_lr = 1e-2。(冻结) 119 | # Init_Epoch = 0,UnFreeze_Epoch = 150,Freeze_Train = False,optimizer_type = 'sgd',Init_lr = 1e-2。(不冻结) 120 | # 其中:UnFreeze_Epoch可以在100-300之间调整。 121 | # (二)从主干网络的预训练权重开始训练: 122 | # Adam: 123 | # Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 100,Freeze_Train = True,optimizer_type = 'adam',Init_lr = 1e-4。(冻结) 124 | # Init_Epoch = 0,UnFreeze_Epoch = 100,Freeze_Train = False,optimizer_type = 'adam',Init_lr = 1e-4。(不冻结) 125 | # SGD: 126 | # Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 150,Freeze_Train = True,optimizer_type = 'sgd',Init_lr = 1e-2。(冻结) 127 | # Init_Epoch = 0,UnFreeze_Epoch = 150,Freeze_Train = False,optimizer_type = 'sgd',Init_lr = 1e-2。(不冻结) 128 | # 其中:由于从主干网络的预训练权重开始训练,主干的权值不一定适合目标检测,需要更多的训练跳出局部最优解。 129 | # UnFreeze_Epoch可以在150-300之间调整,YOLOV5和YOLOX均推荐使用300。 130 | # Adam相较于SGD收敛的快一些。因此UnFreeze_Epoch理论上可以小一点,但依然推荐更多的Epoch。 131 | # (三)batch_size的设置: 132 | # 在显卡能够接受的范围内,以大为好。显存不足与数据集大小无关,提示显存不足(OOM或者CUDA out of memory)请调小batch_size。 133 | # faster rcnn的Batch BatchNormalization层已经冻结,batch_size可以为1 134 | #----------------------------------------------------------------------------------------------------------------------------# 135 | #------------------------------------------------------------------# 136 | # 冻结阶段训练参数 137 | # 此时模型的主干被冻结了,特征提取网络不发生改变 138 | # 占用的显存较小,仅对网络进行微调 139 | # Init_Epoch 模型当前开始的训练世代,其值可以大于Freeze_Epoch,如设置: 140 | # Init_Epoch = 60、Freeze_Epoch = 50、UnFreeze_Epoch = 100 141 | # 会跳过冻结阶段,直接从60代开始,并调整对应的学习率。 142 | # (断点续练时使用) 143 | # Freeze_Epoch 模型冻结训练的Freeze_Epoch 144 | # (当Freeze_Train=False时失效) 145 | # Freeze_batch_size 模型冻结训练的batch_size 146 | # (当Freeze_Train=False时失效) 147 | #------------------------------------------------------------------# 148 | Init_Epoch = 0 149 | Freeze_Epoch = 50 150 | Freeze_batch_size = 4 151 | #------------------------------------------------------------------# 152 | # 解冻阶段训练参数 153 | # 此时模型的主干不被冻结了,特征提取网络会发生改变 154 | # 占用的显存较大,网络所有的参数都会发生改变 155 | # UnFreeze_Epoch 模型总共训练的epoch 156 | # SGD需要更长的时间收敛,因此设置较大的UnFreeze_Epoch 157 | # Adam可以使用相对较小的UnFreeze_Epoch 158 | # Unfreeze_batch_size 模型在解冻后的batch_size 159 | #------------------------------------------------------------------# 160 | UnFreeze_Epoch = 100 161 | Unfreeze_batch_size = 2 162 | #------------------------------------------------------------------# 163 | # Freeze_Train 是否进行冻结训练 164 | # 默认先冻结主干训练后解冻训练。 165 | # 如果设置Freeze_Train=False,建议使用优化器为sgd 166 | #------------------------------------------------------------------# 167 | Freeze_Train = True 168 | 169 | #------------------------------------------------------------------# 170 | # 其它训练参数:学习率、优化器、学习率下降有关 171 | #------------------------------------------------------------------# 172 | #------------------------------------------------------------------# 173 | # Init_lr 模型的最大学习率 174 | # 当使用Adam优化器时建议设置 Init_lr=1e-4 175 | # 当使用SGD优化器时建议设置 Init_lr=1e-2 176 | # Min_lr 模型的最小学习率,默认为最大学习率的0.01 177 | #------------------------------------------------------------------# 178 | Init_lr = 1e-4 179 | Min_lr = Init_lr * 0.01 180 | #------------------------------------------------------------------# 181 | # optimizer_type 使用到的优化器种类,可选的有adam、sgd 182 | # 当使用Adam优化器时建议设置 Init_lr=1e-4 183 | # 当使用SGD优化器时建议设置 Init_lr=1e-2 184 | # momentum 优化器内部使用到的momentum参数 185 | # weight_decay 权值衰减,可防止过拟合 186 | # adam会导致weight_decay错误,使用adam时建议设置为0。 187 | #------------------------------------------------------------------# 188 | optimizer_type = "adam" 189 | momentum = 0.9 190 | weight_decay = 0 191 | #------------------------------------------------------------------# 192 | # lr_decay_type 使用到的学习率下降方式,可选的有'step'、'cos' 193 | #------------------------------------------------------------------# 194 | lr_decay_type = 'cos' 195 | #------------------------------------------------------------------# 196 | # save_period 多少个epoch保存一次权值 197 | #------------------------------------------------------------------# 198 | save_period = 5 199 | #------------------------------------------------------------------# 200 | # save_dir 权值与日志文件保存的文件夹 201 | #------------------------------------------------------------------# 202 | save_dir = 'logs' 203 | #------------------------------------------------------------------# 204 | # eval_flag 是否在训练时进行评估,评估对象为验证集 205 | # 安装pycocotools库后,评估体验更佳。 206 | # eval_period 代表多少个epoch评估一次,不建议频繁的评估 207 | # 评估需要消耗较多的时间,频繁评估会导致训练非常慢 208 | # 此处获得的mAP会与get_map.py获得的会有所不同,原因有二: 209 | # (一)此处获得的mAP为验证集的mAP。 210 | # (二)此处设置评估参数较为保守,目的是加快评估速度。 211 | #------------------------------------------------------------------# 212 | eval_flag = True 213 | eval_period = 5 214 | #------------------------------------------------------------------# 215 | # num_workers 用于设置是否使用多线程读取数据,1代表关闭多线程 216 | # 开启后会加快数据读取速度,但是会占用更多内存 217 | # 在IO为瓶颈的时候再开启多线程,即GPU运算速度远大于读取图片的速度。 218 | #------------------------------------------------------------------# 219 | num_workers = 4 220 | #----------------------------------------------------# 221 | # 获得图片路径和标签 222 | #----------------------------------------------------# 223 | train_annotation_path = '2007_train.txt' 224 | val_annotation_path = '2007_val.txt' 225 | 226 | #----------------------------------------------------# 227 | # 获取classes和anchor 228 | #----------------------------------------------------# 229 | class_names, num_classes = get_classes(classes_path) 230 | 231 | #------------------------------------------------------# 232 | # 设置用到的显卡 233 | #------------------------------------------------------# 234 | os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in train_gpu) 235 | ngpus_per_node = len(train_gpu) 236 | print('Number of devices: {}'.format(ngpus_per_node)) 237 | 238 | model = FasterRCNN(num_classes, anchor_scales = anchors_size, backbone = backbone, pretrained = pretrained) 239 | if not pretrained: 240 | weights_init(model) 241 | if model_path != '': 242 | #------------------------------------------------------# 243 | # 权值文件请看README,百度网盘下载 244 | #------------------------------------------------------# 245 | print('Load weights {}.'.format(model_path)) 246 | 247 | #------------------------------------------------------# 248 | # 根据预训练权重的Key和模型的Key进行加载 249 | #------------------------------------------------------# 250 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 251 | model_dict = model.state_dict() 252 | pretrained_dict = torch.load(model_path, map_location = device) 253 | load_key, no_load_key, temp_dict = [], [], {} 254 | for k, v in pretrained_dict.items(): 255 | if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v): 256 | temp_dict[k] = v 257 | load_key.append(k) 258 | else: 259 | no_load_key.append(k) 260 | model_dict.update(temp_dict) 261 | model.load_state_dict(model_dict) 262 | #------------------------------------------------------# 263 | # 显示没有匹配上的Key 264 | #------------------------------------------------------# 265 | print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key)) 266 | print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key)) 267 | print("\n\033[1;33;44m温馨提示,head部分没有载入是正常现象,Backbone部分没有载入是错误的。\033[0m") 268 | 269 | #----------------------# 270 | # 记录Loss 271 | #----------------------# 272 | time_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S') 273 | log_dir = os.path.join(save_dir, "loss_" + str(time_str)) 274 | loss_history = LossHistory(log_dir, model, input_shape = input_shape) 275 | 276 | #------------------------------------------------------------------# 277 | # torch 1.2不支持amp,建议使用torch 1.7.1及以上正确使用fp16 278 | # 因此torch1.2这里显示"could not be resolve" 279 | #------------------------------------------------------------------# 280 | if fp16: 281 | from torch.cuda.amp import GradScaler as GradScaler 282 | scaler = GradScaler() 283 | else: 284 | scaler = None 285 | 286 | model_train = model.train() 287 | if Cuda: 288 | model_train = torch.nn.DataParallel(model_train) 289 | cudnn.benchmark = True 290 | model_train = model_train.cuda() 291 | 292 | #---------------------------# 293 | # 读取数据集对应的txt 294 | #---------------------------# 295 | with open(train_annotation_path, encoding='utf-8') as f: 296 | train_lines = f.readlines() 297 | with open(val_annotation_path, encoding='utf-8') as f: 298 | val_lines = f.readlines() 299 | num_train = len(train_lines) 300 | num_val = len(val_lines) 301 | 302 | show_config( 303 | classes_path = classes_path, model_path = model_path, input_shape = input_shape, \ 304 | Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \ 305 | Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \ 306 | save_period = save_period, save_dir = save_dir, num_workers = num_workers, num_train = num_train, num_val = num_val 307 | ) 308 | #---------------------------------------------------------# 309 | # 总训练世代指的是遍历全部数据的总次数 310 | # 总训练步长指的是梯度下降的总次数 311 | # 每个训练世代包含若干训练步长,每个训练步长进行一次梯度下降。 312 | # 此处仅建议最低训练世代,上不封顶,计算时只考虑了解冻部分 313 | #----------------------------------------------------------# 314 | wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4 315 | total_step = num_train // Unfreeze_batch_size * UnFreeze_Epoch 316 | if total_step <= wanted_step: 317 | if num_train // Unfreeze_batch_size == 0: 318 | raise ValueError('数据集过小,无法进行训练,请扩充数据集。') 319 | wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1 320 | print("\n\033[1;33;44m[Warning] 使用%s优化器时,建议将训练总步长设置到%d以上。\033[0m"%(optimizer_type, wanted_step)) 321 | print("\033[1;33;44m[Warning] 本次运行的总训练数据量为%d,Unfreeze_batch_size为%d,共训练%d个Epoch,计算出总训练步长为%d。\033[0m"%(num_train, Unfreeze_batch_size, UnFreeze_Epoch, total_step)) 322 | print("\033[1;33;44m[Warning] 由于总训练步长为%d,小于建议总步长%d,建议设置总世代为%d。\033[0m"%(total_step, wanted_step, wanted_epoch)) 323 | 324 | #------------------------------------------------------# 325 | # 主干特征提取网络特征通用,冻结训练可以加快训练速度 326 | # 也可以在训练初期防止权值被破坏。 327 | # Init_Epoch为起始世代 328 | # Freeze_Epoch为冻结训练的世代 329 | # UnFreeze_Epoch总训练世代 330 | # 提示OOM或者显存不足请调小Batch_size 331 | #------------------------------------------------------# 332 | if True: 333 | UnFreeze_flag = False 334 | #------------------------------------# 335 | # 冻结一定部分训练 336 | #------------------------------------# 337 | if Freeze_Train: 338 | for param in model.extractor.parameters(): 339 | param.requires_grad = False 340 | # ------------------------------------# 341 | # 冻结bn层 342 | # ------------------------------------# 343 | model.freeze_bn() 344 | 345 | #-------------------------------------------------------------------# 346 | # 如果不冻结训练的话,直接设置batch_size为Unfreeze_batch_size 347 | #-------------------------------------------------------------------# 348 | batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size 349 | 350 | #-------------------------------------------------------------------# 351 | # 判断当前batch_size,自适应调整学习率 352 | #-------------------------------------------------------------------# 353 | nbs = 16 354 | lr_limit_max = 1e-4 if optimizer_type == 'adam' else 5e-2 355 | lr_limit_min = 1e-4 if optimizer_type == 'adam' else 5e-4 356 | Init_lr_fit = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max) 357 | Min_lr_fit = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2) 358 | 359 | #---------------------------------------# 360 | # 根据optimizer_type选择优化器 361 | #---------------------------------------# 362 | optimizer = { 363 | 'adam' : optim.Adam(model.parameters(), Init_lr_fit, betas = (momentum, 0.999), weight_decay = weight_decay), 364 | 'sgd' : optim.SGD(model.parameters(), Init_lr_fit, momentum = momentum, nesterov=True, weight_decay = weight_decay) 365 | }[optimizer_type] 366 | 367 | #---------------------------------------# 368 | # 获得学习率下降的公式 369 | #---------------------------------------# 370 | lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch) 371 | 372 | #---------------------------------------# 373 | # 判断每一个世代的长度 374 | #---------------------------------------# 375 | epoch_step = num_train // batch_size 376 | epoch_step_val = num_val // batch_size 377 | 378 | if epoch_step == 0 or epoch_step_val == 0: 379 | raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。") 380 | 381 | train_dataset = FRCNNDataset(train_lines, input_shape, train = True) 382 | val_dataset = FRCNNDataset(val_lines, input_shape, train = False) 383 | 384 | gen = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory = True, 385 | drop_last = True, collate_fn = frcnn_dataset_collate) 386 | gen_val = DataLoader(val_dataset , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory = True, 387 | drop_last = True, collate_fn = frcnn_dataset_collate) 388 | 389 | train_util = FasterRCNNTrainer(model_train, optimizer) 390 | #----------------------# 391 | # 记录eval的map曲线 392 | #----------------------# 393 | eval_callback = EvalCallback(model_train, input_shape, class_names, num_classes, val_lines, log_dir, Cuda, \ 394 | eval_flag=eval_flag, period=eval_period) 395 | 396 | #---------------------------------------# 397 | # 开始模型训练 398 | #---------------------------------------# 399 | for epoch in range(Init_Epoch, UnFreeze_Epoch): 400 | #---------------------------------------# 401 | # 如果模型有冻结学习部分 402 | # 则解冻,并设置参数 403 | #---------------------------------------# 404 | if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train: 405 | batch_size = Unfreeze_batch_size 406 | 407 | #-------------------------------------------------------------------# 408 | # 判断当前batch_size,自适应调整学习率 409 | #-------------------------------------------------------------------# 410 | nbs = 16 411 | lr_limit_max = 1e-4 if optimizer_type == 'adam' else 5e-2 412 | lr_limit_min = 1e-4 if optimizer_type == 'adam' else 5e-4 413 | Init_lr_fit = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max) 414 | Min_lr_fit = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2) 415 | #---------------------------------------# 416 | # 获得学习率下降的公式 417 | #---------------------------------------# 418 | lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch) 419 | 420 | for param in model.extractor.parameters(): 421 | param.requires_grad = True 422 | # ------------------------------------# 423 | # 冻结bn层 424 | # ------------------------------------# 425 | model.freeze_bn() 426 | 427 | epoch_step = num_train // batch_size 428 | epoch_step_val = num_val // batch_size 429 | 430 | if epoch_step == 0 or epoch_step_val == 0: 431 | raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。") 432 | 433 | gen = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 434 | drop_last=True, collate_fn=frcnn_dataset_collate) 435 | gen_val = DataLoader(val_dataset , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 436 | drop_last=True, collate_fn=frcnn_dataset_collate) 437 | 438 | UnFreeze_flag = True 439 | 440 | set_optimizer_lr(optimizer, lr_scheduler_func, epoch) 441 | 442 | fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, UnFreeze_Epoch, Cuda, fp16, scaler, save_period, save_dir) 443 | 444 | loss_history.writer.close() -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #--------------------------------------------# 4 | # 生成基础的先验框 5 | #--------------------------------------------# 6 | def generate_anchor_base(base_size = 16, ratios = [0.5, 1, 2], anchor_scales = [8, 16, 32]): 7 | anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4), dtype = np.float32) 8 | for i in range(len(ratios)): 9 | for j in range(len(anchor_scales)): 10 | h = base_size * anchor_scales[j] * np.sqrt(ratios[i]) 11 | w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i]) 12 | 13 | index = i * len(anchor_scales) + j 14 | anchor_base[index, 0] = - h / 2. 15 | anchor_base[index, 1] = - w / 2. 16 | anchor_base[index, 2] = h / 2. 17 | anchor_base[index, 3] = w / 2. 18 | return anchor_base 19 | 20 | #--------------------------------------------# 21 | # 对基础先验框进行拓展对应到所有特征点上 22 | #--------------------------------------------# 23 | def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width): 24 | #---------------------------------# 25 | # 计算网格中心点 26 | #---------------------------------# 27 | shift_x = np.arange(0, width * feat_stride, feat_stride) 28 | shift_y = np.arange(0, height * feat_stride, feat_stride) 29 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 30 | shift = np.stack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(),), axis=1) 31 | 32 | #---------------------------------# 33 | # 每个网格点上的9个先验框 34 | #---------------------------------# 35 | A = anchor_base.shape[0] 36 | K = shift.shape[0] 37 | anchor = anchor_base.reshape((1, A, 4)) + shift.reshape((K, 1, 4)) 38 | #---------------------------------# 39 | # 所有的先验框 40 | #---------------------------------# 41 | anchor = anchor.reshape((K * A, 4)).astype(np.float32) 42 | return anchor 43 | 44 | if __name__ == "__main__": 45 | import matplotlib.pyplot as plt 46 | nine_anchors = generate_anchor_base() 47 | print(nine_anchors) 48 | 49 | height, width, feat_stride = 38,38,16 50 | anchors_all = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width) 51 | print(np.shape(anchors_all)) 52 | 53 | fig = plt.figure() 54 | ax = fig.add_subplot(111) 55 | plt.ylim(-300,900) 56 | plt.xlim(-300,900) 57 | shift_x = np.arange(0, width * feat_stride, feat_stride) 58 | shift_y = np.arange(0, height * feat_stride, feat_stride) 59 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 60 | plt.scatter(shift_x,shift_y) 61 | box_widths = anchors_all[:,2]-anchors_all[:,0] 62 | box_heights = anchors_all[:,3]-anchors_all[:,1] 63 | 64 | for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]: 65 | rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False) 66 | ax.add_patch(rect) 67 | plt.show() 68 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib 4 | import torch 5 | 6 | matplotlib.use('Agg') 7 | from matplotlib import pyplot as plt 8 | import scipy.signal 9 | 10 | import shutil 11 | import numpy as np 12 | from PIL import Image 13 | from torch.utils.tensorboard import SummaryWriter 14 | from tqdm import tqdm 15 | 16 | from .utils import cvtColor, resize_image, preprocess_input, get_new_img_size 17 | from .utils_bbox import DecodeBox 18 | from .utils_map import get_coco_map, get_map 19 | 20 | class LossHistory(): 21 | def __init__(self, log_dir, model, input_shape): 22 | self.log_dir = log_dir 23 | self.losses = [] 24 | self.val_loss = [] 25 | 26 | os.makedirs(self.log_dir) 27 | self.writer = SummaryWriter(self.log_dir) 28 | # try: 29 | # dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1]) 30 | # self.writer.add_graph(model, dummy_input) 31 | # except: 32 | # pass 33 | 34 | def append_loss(self, epoch, loss, val_loss): 35 | if not os.path.exists(self.log_dir): 36 | os.makedirs(self.log_dir) 37 | 38 | self.losses.append(loss) 39 | self.val_loss.append(val_loss) 40 | 41 | with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f: 42 | f.write(str(loss)) 43 | f.write("\n") 44 | with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f: 45 | f.write(str(val_loss)) 46 | f.write("\n") 47 | 48 | self.writer.add_scalar('loss', loss, epoch) 49 | self.writer.add_scalar('val_loss', val_loss, epoch) 50 | self.loss_plot() 51 | 52 | def loss_plot(self): 53 | iters = range(len(self.losses)) 54 | 55 | plt.figure() 56 | plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss') 57 | plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss') 58 | try: 59 | if len(self.losses) < 25: 60 | num = 5 61 | else: 62 | num = 15 63 | 64 | plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss') 65 | plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss') 66 | except: 67 | pass 68 | 69 | plt.grid(True) 70 | plt.xlabel('Epoch') 71 | plt.ylabel('Loss') 72 | plt.legend(loc="upper right") 73 | 74 | plt.savefig(os.path.join(self.log_dir, "epoch_loss.png")) 75 | 76 | plt.cla() 77 | plt.close("all") 78 | 79 | class EvalCallback(): 80 | def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \ 81 | map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1): 82 | super(EvalCallback, self).__init__() 83 | 84 | self.net = net 85 | self.input_shape = input_shape 86 | self.class_names = class_names 87 | self.num_classes = num_classes 88 | self.val_lines = val_lines 89 | self.log_dir = log_dir 90 | self.cuda = cuda 91 | self.map_out_path = map_out_path 92 | self.max_boxes = max_boxes 93 | self.confidence = confidence 94 | self.nms_iou = nms_iou 95 | self.letterbox_image = letterbox_image 96 | self.MINOVERLAP = MINOVERLAP 97 | self.eval_flag = eval_flag 98 | self.period = period 99 | 100 | self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] 101 | if self.cuda: 102 | self.std = self.std.cuda() 103 | self.bbox_util = DecodeBox(self.std, self.num_classes) 104 | 105 | self.maps = [0] 106 | self.epoches = [0] 107 | if self.eval_flag: 108 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: 109 | f.write(str(0)) 110 | f.write("\n") 111 | 112 | #---------------------------------------------------# 113 | # 检测图片 114 | #---------------------------------------------------# 115 | def get_map_txt(self, image_id, image, class_names, map_out_path): 116 | f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 117 | #---------------------------------------------------# 118 | # 计算输入图片的高和宽 119 | #---------------------------------------------------# 120 | image_shape = np.array(np.shape(image)[0:2]) 121 | input_shape = get_new_img_size(image_shape[0], image_shape[1]) 122 | #---------------------------------------------------------# 123 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 124 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 125 | #---------------------------------------------------------# 126 | image = cvtColor(image) 127 | 128 | #---------------------------------------------------------# 129 | # 给原图像进行resize,resize到短边为600的大小上 130 | #---------------------------------------------------------# 131 | image_data = resize_image(image, [input_shape[1], input_shape[0]]) 132 | #---------------------------------------------------------# 133 | # 添加上batch_size维度 134 | #---------------------------------------------------------# 135 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 136 | 137 | with torch.no_grad(): 138 | images = torch.from_numpy(image_data) 139 | if self.cuda: 140 | images = images.cuda() 141 | 142 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 143 | #-------------------------------------------------------------# 144 | # 利用classifier的预测结果对建议框进行解码,获得预测框 145 | #-------------------------------------------------------------# 146 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 147 | nms_iou = self.nms_iou, confidence = self.confidence) 148 | #--------------------------------------# 149 | # 如果没有检测到物体,则返回原图 150 | #--------------------------------------# 151 | if len(results[0]) <= 0: 152 | return 153 | 154 | top_label = np.array(results[0][:, 5], dtype = 'int32') 155 | top_conf = results[0][:, 4] 156 | top_boxes = results[0][:, :4] 157 | 158 | top_100 = np.argsort(top_conf)[::-1][:self.max_boxes] 159 | top_boxes = top_boxes[top_100] 160 | top_conf = top_conf[top_100] 161 | top_label = top_label[top_100] 162 | 163 | for i, c in list(enumerate(top_label)): 164 | predicted_class = self.class_names[int(c)] 165 | box = top_boxes[i] 166 | score = str(top_conf[i]) 167 | 168 | top, left, bottom, right = box 169 | if predicted_class not in class_names: 170 | continue 171 | 172 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) 173 | 174 | f.close() 175 | return 176 | 177 | def on_epoch_end(self, epoch): 178 | if epoch % self.period == 0 and self.eval_flag: 179 | if not os.path.exists(self.map_out_path): 180 | os.makedirs(self.map_out_path) 181 | if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")): 182 | os.makedirs(os.path.join(self.map_out_path, "ground-truth")) 183 | if not os.path.exists(os.path.join(self.map_out_path, "detection-results")): 184 | os.makedirs(os.path.join(self.map_out_path, "detection-results")) 185 | print("Get map.") 186 | for annotation_line in tqdm(self.val_lines): 187 | line = annotation_line.split() 188 | image_id = os.path.basename(line[0]).split('.')[0] 189 | #------------------------------# 190 | # 读取图像并转换成RGB图像 191 | #------------------------------# 192 | image = Image.open(line[0]) 193 | #------------------------------# 194 | # 获得预测框 195 | #------------------------------# 196 | gt_boxes = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 197 | #------------------------------# 198 | # 获得预测txt 199 | #------------------------------# 200 | self.get_map_txt(image_id, image, self.class_names, self.map_out_path) 201 | 202 | #------------------------------# 203 | # 获得真实框txt 204 | #------------------------------# 205 | with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 206 | for box in gt_boxes: 207 | left, top, right, bottom, obj = box 208 | obj_name = self.class_names[obj] 209 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 210 | 211 | print("Calculate Map.") 212 | try: 213 | temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1] 214 | except: 215 | temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path) 216 | self.maps.append(temp_map) 217 | self.epoches.append(epoch) 218 | 219 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: 220 | f.write(str(temp_map)) 221 | f.write("\n") 222 | 223 | plt.figure() 224 | plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map') 225 | 226 | plt.grid(True) 227 | plt.xlabel('Epoch') 228 | plt.ylabel('Map %s'%str(self.MINOVERLAP)) 229 | plt.title('A Map Curve') 230 | plt.legend(loc="upper right") 231 | 232 | plt.savefig(os.path.join(self.log_dir, "epoch_map.png")) 233 | plt.cla() 234 | plt.close("all") 235 | 236 | print("Get map done.") 237 | shutil.rmtree(self.map_out_path) 238 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | from torch.utils.data.dataset import Dataset 6 | 7 | from utils.utils import cvtColor, preprocess_input 8 | 9 | 10 | class FRCNNDataset(Dataset): 11 | def __init__(self, annotation_lines, input_shape = [600, 600], train = True): 12 | self.annotation_lines = annotation_lines 13 | self.length = len(annotation_lines) 14 | self.input_shape = input_shape 15 | self.train = train 16 | 17 | def __len__(self): 18 | return self.length 19 | 20 | def __getitem__(self, index): 21 | index = index % self.length 22 | #---------------------------------------------------# 23 | # 训练时进行数据的随机增强 24 | # 验证时不进行数据的随机增强 25 | #---------------------------------------------------# 26 | image, y = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train) 27 | image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1)) 28 | box_data = np.zeros((len(y), 5)) 29 | if len(y) > 0: 30 | box_data[:len(y)] = y 31 | 32 | box = box_data[:, :4] 33 | label = box_data[:, -1] 34 | return image, box, label 35 | 36 | def rand(self, a=0, b=1): 37 | return np.random.rand()*(b-a) + a 38 | 39 | def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): 40 | line = annotation_line.split() 41 | #------------------------------# 42 | # 读取图像并转换成RGB图像 43 | #------------------------------# 44 | image = Image.open(line[0]) 45 | image = cvtColor(image) 46 | #------------------------------# 47 | # 获得图像的高宽与目标高宽 48 | #------------------------------# 49 | iw, ih = image.size 50 | h, w = input_shape 51 | #------------------------------# 52 | # 获得预测框 53 | #------------------------------# 54 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 55 | 56 | if not random: 57 | scale = min(w/iw, h/ih) 58 | nw = int(iw*scale) 59 | nh = int(ih*scale) 60 | dx = (w-nw)//2 61 | dy = (h-nh)//2 62 | 63 | #---------------------------------# 64 | # 将图像多余的部分加上灰条 65 | #---------------------------------# 66 | image = image.resize((nw,nh), Image.BICUBIC) 67 | new_image = Image.new('RGB', (w,h), (128,128,128)) 68 | new_image.paste(image, (dx, dy)) 69 | image_data = np.array(new_image, np.float32) 70 | 71 | #---------------------------------# 72 | # 对真实框进行调整 73 | #---------------------------------# 74 | if len(box)>0: 75 | np.random.shuffle(box) 76 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 77 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 78 | box[:, 0:2][box[:, 0:2]<0] = 0 79 | box[:, 2][box[:, 2]>w] = w 80 | box[:, 3][box[:, 3]>h] = h 81 | box_w = box[:, 2] - box[:, 0] 82 | box_h = box[:, 3] - box[:, 1] 83 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 84 | 85 | return image_data, box 86 | 87 | #------------------------------------------# 88 | # 对图像进行缩放并且进行长和宽的扭曲 89 | #------------------------------------------# 90 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 91 | scale = self.rand(.25, 2) 92 | if new_ar < 1: 93 | nh = int(scale*h) 94 | nw = int(nh*new_ar) 95 | else: 96 | nw = int(scale*w) 97 | nh = int(nw/new_ar) 98 | image = image.resize((nw,nh), Image.BICUBIC) 99 | 100 | #------------------------------------------# 101 | # 将图像多余的部分加上灰条 102 | #------------------------------------------# 103 | dx = int(self.rand(0, w-nw)) 104 | dy = int(self.rand(0, h-nh)) 105 | new_image = Image.new('RGB', (w,h), (128,128,128)) 106 | new_image.paste(image, (dx, dy)) 107 | image = new_image 108 | 109 | #------------------------------------------# 110 | # 翻转图像 111 | #------------------------------------------# 112 | flip = self.rand()<.5 113 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 114 | 115 | image_data = np.array(image, np.uint8) 116 | #---------------------------------# 117 | # 对图像进行色域变换 118 | # 计算色域变换的参数 119 | #---------------------------------# 120 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 121 | #---------------------------------# 122 | # 将图像转到HSV上 123 | #---------------------------------# 124 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) 125 | dtype = image_data.dtype 126 | #---------------------------------# 127 | # 应用变换 128 | #---------------------------------# 129 | x = np.arange(0, 256, dtype=r.dtype) 130 | lut_hue = ((x * r[0]) % 180).astype(dtype) 131 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 132 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 133 | 134 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 135 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) 136 | 137 | #---------------------------------# 138 | # 对真实框进行调整 139 | #---------------------------------# 140 | if len(box)>0: 141 | np.random.shuffle(box) 142 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 143 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 144 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 145 | box[:, 0:2][box[:, 0:2]<0] = 0 146 | box[:, 2][box[:, 2]>w] = w 147 | box[:, 3][box[:, 3]>h] = h 148 | box_w = box[:, 2] - box[:, 0] 149 | box_h = box[:, 3] - box[:, 1] 150 | box = box[np.logical_and(box_w>1, box_h>1)] 151 | 152 | return image_data, box 153 | 154 | # DataLoader中collate_fn使用 155 | def frcnn_dataset_collate(batch): 156 | images = [] 157 | bboxes = [] 158 | labels = [] 159 | for img, box, label in batch: 160 | images.append(img) 161 | bboxes.append(box) 162 | labels.append(label) 163 | images = torch.from_numpy(np.array(images)) 164 | return images, bboxes, labels 165 | 166 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | #---------------------------------------------------------# 5 | # 将图像转换成RGB图像,防止灰度图在预测时报错。 6 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 7 | #---------------------------------------------------------# 8 | def cvtColor(image): 9 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: 10 | return image 11 | else: 12 | image = image.convert('RGB') 13 | return image 14 | 15 | #---------------------------------------------------# 16 | # 对输入图像进行resize 17 | #---------------------------------------------------# 18 | def resize_image(image, size): 19 | w, h = size 20 | new_image = image.resize((w, h), Image.BICUBIC) 21 | return new_image 22 | 23 | #---------------------------------------------------# 24 | # 获得类 25 | #---------------------------------------------------# 26 | def get_classes(classes_path): 27 | with open(classes_path, encoding='utf-8') as f: 28 | class_names = f.readlines() 29 | class_names = [c.strip() for c in class_names] 30 | return class_names, len(class_names) 31 | 32 | #---------------------------------------------------# 33 | # 获得学习率 34 | #---------------------------------------------------# 35 | def get_lr(optimizer): 36 | for param_group in optimizer.param_groups: 37 | return param_group['lr'] 38 | 39 | def preprocess_input(image): 40 | image /= 255.0 41 | return image 42 | 43 | def show_config(**kwargs): 44 | print('Configurations:') 45 | print('-' * 70) 46 | print('|%25s | %40s|' % ('keys', 'values')) 47 | print('-' * 70) 48 | for key, value in kwargs.items(): 49 | print('|%25s | %40s|' % (str(key), str(value))) 50 | print('-' * 70) 51 | 52 | def get_new_img_size(height, width, img_min_side=600): 53 | if width <= height: 54 | f = float(img_min_side) / width 55 | resized_height = int(f * height) 56 | resized_width = int(img_min_side) 57 | else: 58 | f = float(img_min_side) / height 59 | resized_width = int(f * width) 60 | resized_height = int(img_min_side) 61 | 62 | return resized_height, resized_width 63 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/utils_bbox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn import functional as F 4 | from torchvision.ops import nms 5 | 6 | 7 | def loc2bbox(src_bbox, loc): 8 | if src_bbox.size()[0] == 0: 9 | return torch.zeros((0, 4), dtype=loc.dtype) 10 | 11 | src_width = torch.unsqueeze(src_bbox[:, 2] - src_bbox[:, 0], -1) 12 | src_height = torch.unsqueeze(src_bbox[:, 3] - src_bbox[:, 1], -1) 13 | src_ctr_x = torch.unsqueeze(src_bbox[:, 0], -1) + 0.5 * src_width 14 | src_ctr_y = torch.unsqueeze(src_bbox[:, 1], -1) + 0.5 * src_height 15 | 16 | dx = loc[:, 0::4] 17 | dy = loc[:, 1::4] 18 | dw = loc[:, 2::4] 19 | dh = loc[:, 3::4] 20 | 21 | ctr_x = dx * src_width + src_ctr_x 22 | ctr_y = dy * src_height + src_ctr_y 23 | w = torch.exp(dw) * src_width 24 | h = torch.exp(dh) * src_height 25 | 26 | dst_bbox = torch.zeros_like(loc) 27 | dst_bbox[:, 0::4] = ctr_x - 0.5 * w 28 | dst_bbox[:, 1::4] = ctr_y - 0.5 * h 29 | dst_bbox[:, 2::4] = ctr_x + 0.5 * w 30 | dst_bbox[:, 3::4] = ctr_y + 0.5 * h 31 | 32 | return dst_bbox 33 | 34 | class DecodeBox(): 35 | def __init__(self, std, num_classes): 36 | self.std = std 37 | self.num_classes = num_classes + 1 38 | 39 | def frcnn_correct_boxes(self, box_xy, box_wh, input_shape, image_shape): 40 | #-----------------------------------------------------------------# 41 | # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 42 | #-----------------------------------------------------------------# 43 | box_yx = box_xy[..., ::-1] 44 | box_hw = box_wh[..., ::-1] 45 | input_shape = np.array(input_shape) 46 | image_shape = np.array(image_shape) 47 | 48 | box_mins = box_yx - (box_hw / 2.) 49 | box_maxes = box_yx + (box_hw / 2.) 50 | boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1) 51 | boxes *= np.concatenate([image_shape, image_shape], axis=-1) 52 | return boxes 53 | 54 | def forward(self, roi_cls_locs, roi_scores, rois, image_shape, input_shape, nms_iou = 0.3, confidence = 0.5): 55 | results = [] 56 | bs = len(roi_cls_locs) 57 | #--------------------------------# 58 | # batch_size, num_rois, 4 59 | #--------------------------------# 60 | rois = rois.view((bs, -1, 4)) 61 | #----------------------------------------------------------------------------------------------------------------# 62 | # 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次 63 | #----------------------------------------------------------------------------------------------------------------# 64 | for i in range(bs): 65 | #----------------------------------------------------------# 66 | # 对回归参数进行reshape 67 | #----------------------------------------------------------# 68 | roi_cls_loc = roi_cls_locs[i] * self.std 69 | #----------------------------------------------------------# 70 | # 第一维度是建议框的数量,第二维度是每个种类 71 | # 第三维度是对应种类的调整参数 72 | #----------------------------------------------------------# 73 | roi_cls_loc = roi_cls_loc.view([-1, self.num_classes, 4]) 74 | 75 | #-------------------------------------------------------------# 76 | # 利用classifier网络的预测结果对建议框进行调整获得预测框 77 | # num_rois, 4 -> num_rois, 1, 4 -> num_rois, num_classes, 4 78 | #-------------------------------------------------------------# 79 | roi = rois[i].view((-1, 1, 4)).expand_as(roi_cls_loc) 80 | cls_bbox = loc2bbox(roi.contiguous().view((-1, 4)), roi_cls_loc.contiguous().view((-1, 4))) 81 | cls_bbox = cls_bbox.view([-1, (self.num_classes), 4]) 82 | #-------------------------------------------------------------# 83 | # 对预测框进行归一化,调整到0-1之间 84 | #-------------------------------------------------------------# 85 | cls_bbox[..., [0, 2]] = (cls_bbox[..., [0, 2]]) / input_shape[1] 86 | cls_bbox[..., [1, 3]] = (cls_bbox[..., [1, 3]]) / input_shape[0] 87 | 88 | roi_score = roi_scores[i] 89 | prob = F.softmax(roi_score, dim=-1) 90 | 91 | results.append([]) 92 | for c in range(1, self.num_classes): 93 | #--------------------------------# 94 | # 取出属于该类的所有框的置信度 95 | # 判断是否大于门限 96 | #--------------------------------# 97 | c_confs = prob[:, c] 98 | c_confs_m = c_confs > confidence 99 | 100 | if len(c_confs[c_confs_m]) > 0: 101 | #-----------------------------------------# 102 | # 取出得分高于confidence的框 103 | #-----------------------------------------# 104 | boxes_to_process = cls_bbox[c_confs_m, c] 105 | confs_to_process = c_confs[c_confs_m] 106 | 107 | keep = nms( 108 | boxes_to_process, 109 | confs_to_process, 110 | nms_iou 111 | ) 112 | #-----------------------------------------# 113 | # 取出在非极大抑制中效果较好的内容 114 | #-----------------------------------------# 115 | good_boxes = boxes_to_process[keep] 116 | confs = confs_to_process[keep][:, None] 117 | labels = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1)) 118 | #-----------------------------------------# 119 | # 将label、置信度、框的位置进行堆叠。 120 | #-----------------------------------------# 121 | c_pred = torch.cat((good_boxes, confs, labels), dim=1).cpu().numpy() 122 | # 添加进result里 123 | results[-1].extend(c_pred) 124 | 125 | if len(results[-1]) > 0: 126 | results[-1] = np.array(results[-1]) 127 | box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2] 128 | results[-1][:, :4] = self.frcnn_correct_boxes(box_xy, box_wh, input_shape, image_shape) 129 | 130 | return results 131 | 132 | -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/utils_fit.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from tqdm import tqdm 5 | 6 | from utils.utils import get_lr 7 | 8 | 9 | def fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir): 10 | total_loss = 0 11 | rpn_loc_loss = 0 12 | rpn_cls_loss = 0 13 | roi_loc_loss = 0 14 | roi_cls_loss = 0 15 | 16 | val_loss = 0 17 | print('Start Train') 18 | with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 19 | for iteration, batch in enumerate(gen): 20 | if iteration >= epoch_step: 21 | break 22 | images, boxes, labels = batch[0], batch[1], batch[2] 23 | with torch.no_grad(): 24 | if cuda: 25 | images = images.cuda() 26 | 27 | rpn_loc, rpn_cls, roi_loc, roi_cls, total = train_util.train_step(images, boxes, labels, 1, fp16, scaler) 28 | total_loss += total.item() 29 | rpn_loc_loss += rpn_loc.item() 30 | rpn_cls_loss += rpn_cls.item() 31 | roi_loc_loss += roi_loc.item() 32 | roi_cls_loss += roi_cls.item() 33 | 34 | pbar.set_postfix(**{'total_loss' : total_loss / (iteration + 1), 35 | 'rpn_loc' : rpn_loc_loss / (iteration + 1), 36 | 'rpn_cls' : rpn_cls_loss / (iteration + 1), 37 | 'roi_loc' : roi_loc_loss / (iteration + 1), 38 | 'roi_cls' : roi_cls_loss / (iteration + 1), 39 | 'lr' : get_lr(optimizer)}) 40 | pbar.update(1) 41 | 42 | print('Finish Train') 43 | print('Start Validation') 44 | with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 45 | for iteration, batch in enumerate(gen_val): 46 | if iteration >= epoch_step_val: 47 | break 48 | images, boxes, labels = batch[0], batch[1], batch[2] 49 | with torch.no_grad(): 50 | if cuda: 51 | images = images.cuda() 52 | 53 | train_util.optimizer.zero_grad() 54 | _, _, _, _, val_total = train_util.forward(images, boxes, labels, 1) 55 | val_loss += val_total.item() 56 | 57 | pbar.set_postfix(**{'val_loss' : val_loss / (iteration + 1)}) 58 | pbar.update(1) 59 | 60 | print('Finish Validation') 61 | loss_history.append_loss(epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val) 62 | eval_callback.on_epoch_end(epoch + 1) 63 | print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch)) 64 | print('Total Loss: %.3f || Val Loss: %.3f ' % (total_loss / epoch_step, val_loss / epoch_step_val)) 65 | 66 | #-----------------------------------------------# 67 | # 保存权值 68 | #-----------------------------------------------# 69 | if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch: 70 | torch.save(model.state_dict(), os.path.join(save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val))) 71 | 72 | if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss): 73 | print('Save best model to best_epoch_weights.pth') 74 | torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth")) 75 | 76 | torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth")) -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/utils/utils_map.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import math 4 | import operator 5 | import os 6 | import shutil 7 | import sys 8 | try: 9 | from pycocotools.coco import COCO 10 | from pycocotools.cocoeval import COCOeval 11 | except: 12 | pass 13 | import cv2 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | from matplotlib import pyplot as plt 17 | import numpy as np 18 | 19 | ''' 20 | 0,0 ------> x (width) 21 | | 22 | | (Left,Top) 23 | | *_________ 24 | | | | 25 | | | 26 | y |_________| 27 | (height) * 28 | (Right,Bottom) 29 | ''' 30 | 31 | def log_average_miss_rate(precision, fp_cumsum, num_images): 32 | """ 33 | log-average miss rate: 34 | Calculated by averaging miss rates at 9 evenly spaced FPPI points 35 | between 10e-2 and 10e0, in log-space. 36 | 37 | output: 38 | lamr | log-average miss rate 39 | mr | miss rate 40 | fppi | false positives per image 41 | 42 | references: 43 | [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the 44 | State of the Art." Pattern Analysis and Machine Intelligence, IEEE 45 | Transactions on 34.4 (2012): 743 - 761. 46 | """ 47 | 48 | if precision.size == 0: 49 | lamr = 0 50 | mr = 1 51 | fppi = 0 52 | return lamr, mr, fppi 53 | 54 | fppi = fp_cumsum / float(num_images) 55 | mr = (1 - precision) 56 | 57 | fppi_tmp = np.insert(fppi, 0, -1.0) 58 | mr_tmp = np.insert(mr, 0, 1.0) 59 | 60 | ref = np.logspace(-2.0, 0.0, num = 9) 61 | for i, ref_i in enumerate(ref): 62 | j = np.where(fppi_tmp <= ref_i)[-1][-1] 63 | ref[i] = mr_tmp[j] 64 | 65 | lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref)))) 66 | 67 | return lamr, mr, fppi 68 | 69 | """ 70 | throw error and exit 71 | """ 72 | def error(msg): 73 | print(msg) 74 | sys.exit(0) 75 | 76 | """ 77 | check if the number is a float between 0.0 and 1.0 78 | """ 79 | def is_float_between_0_and_1(value): 80 | try: 81 | val = float(value) 82 | if val > 0.0 and val < 1.0: 83 | return True 84 | else: 85 | return False 86 | except ValueError: 87 | return False 88 | 89 | """ 90 | Calculate the AP given the recall and precision array 91 | 1st) We compute a version of the measured precision/recall curve with 92 | precision monotonically decreasing 93 | 2nd) We compute the AP as the area under this curve by numerical integration. 94 | """ 95 | def voc_ap(rec, prec): 96 | """ 97 | --- Official matlab code VOC2012--- 98 | mrec=[0 ; rec ; 1]; 99 | mpre=[0 ; prec ; 0]; 100 | for i=numel(mpre)-1:-1:1 101 | mpre(i)=max(mpre(i),mpre(i+1)); 102 | end 103 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 104 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 105 | """ 106 | rec.insert(0, 0.0) # insert 0.0 at begining of list 107 | rec.append(1.0) # insert 1.0 at end of list 108 | mrec = rec[:] 109 | prec.insert(0, 0.0) # insert 0.0 at begining of list 110 | prec.append(0.0) # insert 0.0 at end of list 111 | mpre = prec[:] 112 | """ 113 | This part makes the precision monotonically decreasing 114 | (goes from the end to the beginning) 115 | matlab: for i=numel(mpre)-1:-1:1 116 | mpre(i)=max(mpre(i),mpre(i+1)); 117 | """ 118 | for i in range(len(mpre)-2, -1, -1): 119 | mpre[i] = max(mpre[i], mpre[i+1]) 120 | """ 121 | This part creates a list of indexes where the recall changes 122 | matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1; 123 | """ 124 | i_list = [] 125 | for i in range(1, len(mrec)): 126 | if mrec[i] != mrec[i-1]: 127 | i_list.append(i) # if it was matlab would be i + 1 128 | """ 129 | The Average Precision (AP) is the area under the curve 130 | (numerical integration) 131 | matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 132 | """ 133 | ap = 0.0 134 | for i in i_list: 135 | ap += ((mrec[i]-mrec[i-1])*mpre[i]) 136 | return ap, mrec, mpre 137 | 138 | 139 | """ 140 | Convert the lines of a file to a list 141 | """ 142 | def file_lines_to_list(path): 143 | # open txt file lines to a list 144 | with open(path) as f: 145 | content = f.readlines() 146 | # remove whitespace characters like `\n` at the end of each line 147 | content = [x.strip() for x in content] 148 | return content 149 | 150 | """ 151 | Draws text in image 152 | """ 153 | def draw_text_in_image(img, text, pos, color, line_width): 154 | font = cv2.FONT_HERSHEY_PLAIN 155 | fontScale = 1 156 | lineType = 1 157 | bottomLeftCornerOfText = pos 158 | cv2.putText(img, text, 159 | bottomLeftCornerOfText, 160 | font, 161 | fontScale, 162 | color, 163 | lineType) 164 | text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0] 165 | return img, (line_width + text_width) 166 | 167 | """ 168 | Plot - adjust axes 169 | """ 170 | def adjust_axes(r, t, fig, axes): 171 | # get text width for re-scaling 172 | bb = t.get_window_extent(renderer=r) 173 | text_width_inches = bb.width / fig.dpi 174 | # get axis width in inches 175 | current_fig_width = fig.get_figwidth() 176 | new_fig_width = current_fig_width + text_width_inches 177 | propotion = new_fig_width / current_fig_width 178 | # get axis limit 179 | x_lim = axes.get_xlim() 180 | axes.set_xlim([x_lim[0], x_lim[1]*propotion]) 181 | 182 | """ 183 | Draw plot using Matplotlib 184 | """ 185 | def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar): 186 | # sort the dictionary by decreasing value, into a list of tuples 187 | sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1)) 188 | # unpacking the list of tuples into two lists 189 | sorted_keys, sorted_values = zip(*sorted_dic_by_value) 190 | # 191 | if true_p_bar != "": 192 | """ 193 | Special case to draw in: 194 | - green -> TP: True Positives (object detected and matches ground-truth) 195 | - red -> FP: False Positives (object detected but does not match ground-truth) 196 | - orange -> FN: False Negatives (object not detected but present in the ground-truth) 197 | """ 198 | fp_sorted = [] 199 | tp_sorted = [] 200 | for key in sorted_keys: 201 | fp_sorted.append(dictionary[key] - true_p_bar[key]) 202 | tp_sorted.append(true_p_bar[key]) 203 | plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive') 204 | plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted) 205 | # add legend 206 | plt.legend(loc='lower right') 207 | """ 208 | Write number on side of bar 209 | """ 210 | fig = plt.gcf() # gcf - get current figure 211 | axes = plt.gca() 212 | r = fig.canvas.get_renderer() 213 | for i, val in enumerate(sorted_values): 214 | fp_val = fp_sorted[i] 215 | tp_val = tp_sorted[i] 216 | fp_str_val = " " + str(fp_val) 217 | tp_str_val = fp_str_val + " " + str(tp_val) 218 | # trick to paint multicolor with offset: 219 | # first paint everything and then repaint the first number 220 | t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold') 221 | plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold') 222 | if i == (len(sorted_values)-1): # largest bar 223 | adjust_axes(r, t, fig, axes) 224 | else: 225 | plt.barh(range(n_classes), sorted_values, color=plot_color) 226 | """ 227 | Write number on side of bar 228 | """ 229 | fig = plt.gcf() # gcf - get current figure 230 | axes = plt.gca() 231 | r = fig.canvas.get_renderer() 232 | for i, val in enumerate(sorted_values): 233 | str_val = " " + str(val) # add a space before 234 | if val < 1.0: 235 | str_val = " {0:.2f}".format(val) 236 | t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold') 237 | # re-set axes to show number inside the figure 238 | if i == (len(sorted_values)-1): # largest bar 239 | adjust_axes(r, t, fig, axes) 240 | # set window title 241 | fig.canvas.manager.set_window_title(window_title) 242 | # write classes in y axis 243 | tick_font_size = 12 244 | plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size) 245 | """ 246 | Re-scale height accordingly 247 | """ 248 | init_height = fig.get_figheight() 249 | # comput the matrix height in points and inches 250 | dpi = fig.dpi 251 | height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing) 252 | height_in = height_pt / dpi 253 | # compute the required figure height 254 | top_margin = 0.15 # in percentage of the figure height 255 | bottom_margin = 0.05 # in percentage of the figure height 256 | figure_height = height_in / (1 - top_margin - bottom_margin) 257 | # set new height 258 | if figure_height > init_height: 259 | fig.set_figheight(figure_height) 260 | 261 | # set plot title 262 | plt.title(plot_title, fontsize=14) 263 | # set axis titles 264 | # plt.xlabel('classes') 265 | plt.xlabel(x_label, fontsize='large') 266 | # adjust size of window 267 | fig.tight_layout() 268 | # save the plot 269 | fig.savefig(output_path) 270 | # show image 271 | if to_show: 272 | plt.show() 273 | # close the plot 274 | plt.close() 275 | 276 | def get_map(MINOVERLAP, draw_plot, score_threhold=0.5, path = './map_out'): 277 | GT_PATH = os.path.join(path, 'ground-truth') 278 | DR_PATH = os.path.join(path, 'detection-results') 279 | IMG_PATH = os.path.join(path, 'images-optional') 280 | TEMP_FILES_PATH = os.path.join(path, '.temp_files') 281 | RESULTS_FILES_PATH = os.path.join(path, 'results') 282 | 283 | show_animation = True 284 | if os.path.exists(IMG_PATH): 285 | for dirpath, dirnames, files in os.walk(IMG_PATH): 286 | if not files: 287 | show_animation = False 288 | else: 289 | show_animation = False 290 | 291 | if not os.path.exists(TEMP_FILES_PATH): 292 | os.makedirs(TEMP_FILES_PATH) 293 | 294 | if os.path.exists(RESULTS_FILES_PATH): 295 | shutil.rmtree(RESULTS_FILES_PATH) 296 | else: 297 | os.makedirs(RESULTS_FILES_PATH) 298 | if draw_plot: 299 | try: 300 | matplotlib.use('TkAgg') 301 | except: 302 | pass 303 | os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP")) 304 | os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1")) 305 | os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall")) 306 | os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision")) 307 | if show_animation: 308 | os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one")) 309 | 310 | ground_truth_files_list = glob.glob(GT_PATH + '/*.txt') 311 | if len(ground_truth_files_list) == 0: 312 | error("Error: No ground-truth files found!") 313 | ground_truth_files_list.sort() 314 | gt_counter_per_class = {} 315 | counter_images_per_class = {} 316 | 317 | for txt_file in ground_truth_files_list: 318 | file_id = txt_file.split(".txt", 1)[0] 319 | file_id = os.path.basename(os.path.normpath(file_id)) 320 | temp_path = os.path.join(DR_PATH, (file_id + ".txt")) 321 | if not os.path.exists(temp_path): 322 | error_msg = "Error. File not found: {}\n".format(temp_path) 323 | error(error_msg) 324 | lines_list = file_lines_to_list(txt_file) 325 | bounding_boxes = [] 326 | is_difficult = False 327 | already_seen_classes = [] 328 | for line in lines_list: 329 | try: 330 | if "difficult" in line: 331 | class_name, left, top, right, bottom, _difficult = line.split() 332 | is_difficult = True 333 | else: 334 | class_name, left, top, right, bottom = line.split() 335 | except: 336 | if "difficult" in line: 337 | line_split = line.split() 338 | _difficult = line_split[-1] 339 | bottom = line_split[-2] 340 | right = line_split[-3] 341 | top = line_split[-4] 342 | left = line_split[-5] 343 | class_name = "" 344 | for name in line_split[:-5]: 345 | class_name += name + " " 346 | class_name = class_name[:-1] 347 | is_difficult = True 348 | else: 349 | line_split = line.split() 350 | bottom = line_split[-1] 351 | right = line_split[-2] 352 | top = line_split[-3] 353 | left = line_split[-4] 354 | class_name = "" 355 | for name in line_split[:-4]: 356 | class_name += name + " " 357 | class_name = class_name[:-1] 358 | 359 | bbox = left + " " + top + " " + right + " " + bottom 360 | if is_difficult: 361 | bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True}) 362 | is_difficult = False 363 | else: 364 | bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False}) 365 | if class_name in gt_counter_per_class: 366 | gt_counter_per_class[class_name] += 1 367 | else: 368 | gt_counter_per_class[class_name] = 1 369 | 370 | if class_name not in already_seen_classes: 371 | if class_name in counter_images_per_class: 372 | counter_images_per_class[class_name] += 1 373 | else: 374 | counter_images_per_class[class_name] = 1 375 | already_seen_classes.append(class_name) 376 | 377 | with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile: 378 | json.dump(bounding_boxes, outfile) 379 | 380 | gt_classes = list(gt_counter_per_class.keys()) 381 | gt_classes = sorted(gt_classes) 382 | n_classes = len(gt_classes) 383 | 384 | dr_files_list = glob.glob(DR_PATH + '/*.txt') 385 | dr_files_list.sort() 386 | for class_index, class_name in enumerate(gt_classes): 387 | bounding_boxes = [] 388 | for txt_file in dr_files_list: 389 | file_id = txt_file.split(".txt",1)[0] 390 | file_id = os.path.basename(os.path.normpath(file_id)) 391 | temp_path = os.path.join(GT_PATH, (file_id + ".txt")) 392 | if class_index == 0: 393 | if not os.path.exists(temp_path): 394 | error_msg = "Error. File not found: {}\n".format(temp_path) 395 | error(error_msg) 396 | lines = file_lines_to_list(txt_file) 397 | for line in lines: 398 | try: 399 | tmp_class_name, confidence, left, top, right, bottom = line.split() 400 | except: 401 | line_split = line.split() 402 | bottom = line_split[-1] 403 | right = line_split[-2] 404 | top = line_split[-3] 405 | left = line_split[-4] 406 | confidence = line_split[-5] 407 | tmp_class_name = "" 408 | for name in line_split[:-5]: 409 | tmp_class_name += name + " " 410 | tmp_class_name = tmp_class_name[:-1] 411 | 412 | if tmp_class_name == class_name: 413 | bbox = left + " " + top + " " + right + " " +bottom 414 | bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox}) 415 | 416 | bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True) 417 | with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile: 418 | json.dump(bounding_boxes, outfile) 419 | 420 | sum_AP = 0.0 421 | ap_dictionary = {} 422 | lamr_dictionary = {} 423 | with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file: 424 | results_file.write("# AP and precision/recall per class\n") 425 | count_true_positives = {} 426 | 427 | for class_index, class_name in enumerate(gt_classes): 428 | count_true_positives[class_name] = 0 429 | dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json" 430 | dr_data = json.load(open(dr_file)) 431 | 432 | nd = len(dr_data) 433 | tp = [0] * nd 434 | fp = [0] * nd 435 | score = [0] * nd 436 | score_threhold_idx = 0 437 | for idx, detection in enumerate(dr_data): 438 | file_id = detection["file_id"] 439 | score[idx] = float(detection["confidence"]) 440 | if score[idx] >= score_threhold: 441 | score_threhold_idx = idx 442 | 443 | if show_animation: 444 | ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*") 445 | if len(ground_truth_img) == 0: 446 | error("Error. Image not found with id: " + file_id) 447 | elif len(ground_truth_img) > 1: 448 | error("Error. Multiple image with id: " + file_id) 449 | else: 450 | img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0]) 451 | img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0] 452 | if os.path.isfile(img_cumulative_path): 453 | img_cumulative = cv2.imread(img_cumulative_path) 454 | else: 455 | img_cumulative = img.copy() 456 | bottom_border = 60 457 | BLACK = [0, 0, 0] 458 | img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK) 459 | 460 | gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json" 461 | ground_truth_data = json.load(open(gt_file)) 462 | ovmax = -1 463 | gt_match = -1 464 | bb = [float(x) for x in detection["bbox"].split()] 465 | for obj in ground_truth_data: 466 | if obj["class_name"] == class_name: 467 | bbgt = [ float(x) for x in obj["bbox"].split() ] 468 | bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])] 469 | iw = bi[2] - bi[0] + 1 470 | ih = bi[3] - bi[1] + 1 471 | if iw > 0 and ih > 0: 472 | ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0] 473 | + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih 474 | ov = iw * ih / ua 475 | if ov > ovmax: 476 | ovmax = ov 477 | gt_match = obj 478 | 479 | if show_animation: 480 | status = "NO MATCH FOUND!" 481 | 482 | min_overlap = MINOVERLAP 483 | if ovmax >= min_overlap: 484 | if "difficult" not in gt_match: 485 | if not bool(gt_match["used"]): 486 | tp[idx] = 1 487 | gt_match["used"] = True 488 | count_true_positives[class_name] += 1 489 | with open(gt_file, 'w') as f: 490 | f.write(json.dumps(ground_truth_data)) 491 | if show_animation: 492 | status = "MATCH!" 493 | else: 494 | fp[idx] = 1 495 | if show_animation: 496 | status = "REPEATED MATCH!" 497 | else: 498 | fp[idx] = 1 499 | if ovmax > 0: 500 | status = "INSUFFICIENT OVERLAP" 501 | 502 | """ 503 | Draw image to show animation 504 | """ 505 | if show_animation: 506 | height, widht = img.shape[:2] 507 | white = (255,255,255) 508 | light_blue = (255,200,100) 509 | green = (0,255,0) 510 | light_red = (30,30,255) 511 | margin = 10 512 | # 1nd line 513 | v_pos = int(height - margin - (bottom_border / 2.0)) 514 | text = "Image: " + ground_truth_img[0] + " " 515 | img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0) 516 | text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " " 517 | img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width) 518 | if ovmax != -1: 519 | color = light_red 520 | if status == "INSUFFICIENT OVERLAP": 521 | text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100) 522 | else: 523 | text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100) 524 | color = green 525 | img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width) 526 | # 2nd line 527 | v_pos += int(bottom_border / 2.0) 528 | rank_pos = str(idx+1) 529 | text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100) 530 | img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0) 531 | color = light_red 532 | if status == "MATCH!": 533 | color = green 534 | text = "Result: " + status + " " 535 | img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width) 536 | 537 | font = cv2.FONT_HERSHEY_SIMPLEX 538 | if ovmax > 0: 539 | bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ] 540 | cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2) 541 | cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2) 542 | cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA) 543 | bb = [int(i) for i in bb] 544 | cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2) 545 | cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2) 546 | cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA) 547 | 548 | cv2.imshow("Animation", img) 549 | cv2.waitKey(20) 550 | output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg" 551 | cv2.imwrite(output_img_path, img) 552 | cv2.imwrite(img_cumulative_path, img_cumulative) 553 | 554 | cumsum = 0 555 | for idx, val in enumerate(fp): 556 | fp[idx] += cumsum 557 | cumsum += val 558 | 559 | cumsum = 0 560 | for idx, val in enumerate(tp): 561 | tp[idx] += cumsum 562 | cumsum += val 563 | 564 | rec = tp[:] 565 | for idx, val in enumerate(tp): 566 | rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1) 567 | 568 | prec = tp[:] 569 | for idx, val in enumerate(tp): 570 | prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1) 571 | 572 | ap, mrec, mprec = voc_ap(rec[:], prec[:]) 573 | F1 = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec))) 574 | 575 | sum_AP += ap 576 | text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100) 577 | 578 | if len(prec)>0: 579 | F1_text = "{0:.2f}".format(F1[score_threhold_idx]) + " = " + class_name + " F1 " 580 | Recall_text = "{0:.2f}%".format(rec[score_threhold_idx]*100) + " = " + class_name + " Recall " 581 | Precision_text = "{0:.2f}%".format(prec[score_threhold_idx]*100) + " = " + class_name + " Precision " 582 | else: 583 | F1_text = "0.00" + " = " + class_name + " F1 " 584 | Recall_text = "0.00%" + " = " + class_name + " Recall " 585 | Precision_text = "0.00%" + " = " + class_name + " Precision " 586 | 587 | rounded_prec = [ '%.2f' % elem for elem in prec ] 588 | rounded_rec = [ '%.2f' % elem for elem in rec ] 589 | results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n") 590 | 591 | if len(prec)>0: 592 | print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=" + "{0:.2f}".format(F1[score_threhold_idx])\ 593 | + " ; Recall=" + "{0:.2f}%".format(rec[score_threhold_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score_threhold_idx]*100)) 594 | else: 595 | print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=0.00% ; Recall=0.00% ; Precision=0.00%") 596 | ap_dictionary[class_name] = ap 597 | 598 | n_images = counter_images_per_class[class_name] 599 | lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images) 600 | lamr_dictionary[class_name] = lamr 601 | 602 | if draw_plot: 603 | plt.plot(rec, prec, '-o') 604 | area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]] 605 | area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]] 606 | plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r') 607 | 608 | fig = plt.gcf() 609 | fig.canvas.manager.set_window_title('AP ' + class_name) 610 | 611 | plt.title('class: ' + text) 612 | plt.xlabel('Recall') 613 | plt.ylabel('Precision') 614 | axes = plt.gca() 615 | axes.set_xlim([0.0,1.0]) 616 | axes.set_ylim([0.0,1.05]) 617 | fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png") 618 | plt.cla() 619 | 620 | plt.plot(score, F1, "-", color='orangered') 621 | plt.title('class: ' + F1_text + "\nscore_threhold=" + str(score_threhold)) 622 | plt.xlabel('Score_Threhold') 623 | plt.ylabel('F1') 624 | axes = plt.gca() 625 | axes.set_xlim([0.0,1.0]) 626 | axes.set_ylim([0.0,1.05]) 627 | fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png") 628 | plt.cla() 629 | 630 | plt.plot(score, rec, "-H", color='gold') 631 | plt.title('class: ' + Recall_text + "\nscore_threhold=" + str(score_threhold)) 632 | plt.xlabel('Score_Threhold') 633 | plt.ylabel('Recall') 634 | axes = plt.gca() 635 | axes.set_xlim([0.0,1.0]) 636 | axes.set_ylim([0.0,1.05]) 637 | fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png") 638 | plt.cla() 639 | 640 | plt.plot(score, prec, "-s", color='palevioletred') 641 | plt.title('class: ' + Precision_text + "\nscore_threhold=" + str(score_threhold)) 642 | plt.xlabel('Score_Threhold') 643 | plt.ylabel('Precision') 644 | axes = plt.gca() 645 | axes.set_xlim([0.0,1.0]) 646 | axes.set_ylim([0.0,1.05]) 647 | fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png") 648 | plt.cla() 649 | 650 | if show_animation: 651 | cv2.destroyAllWindows() 652 | if n_classes == 0: 653 | print("未检测到任何种类,请检查标签信息与get_map.py中的classes_path是否修改。") 654 | return 0 655 | results_file.write("\n# mAP of all classes\n") 656 | mAP = sum_AP / n_classes 657 | text = "mAP = {0:.2f}%".format(mAP*100) 658 | results_file.write(text + "\n") 659 | print(text) 660 | 661 | shutil.rmtree(TEMP_FILES_PATH) 662 | 663 | """ 664 | Count total of detection-results 665 | """ 666 | det_counter_per_class = {} 667 | for txt_file in dr_files_list: 668 | lines_list = file_lines_to_list(txt_file) 669 | for line in lines_list: 670 | class_name = line.split()[0] 671 | if class_name in det_counter_per_class: 672 | det_counter_per_class[class_name] += 1 673 | else: 674 | det_counter_per_class[class_name] = 1 675 | dr_classes = list(det_counter_per_class.keys()) 676 | 677 | """ 678 | Write number of ground-truth objects per class to results.txt 679 | """ 680 | with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file: 681 | results_file.write("\n# Number of ground-truth objects per class\n") 682 | for class_name in sorted(gt_counter_per_class): 683 | results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n") 684 | 685 | """ 686 | Finish counting true positives 687 | """ 688 | for class_name in dr_classes: 689 | if class_name not in gt_classes: 690 | count_true_positives[class_name] = 0 691 | 692 | """ 693 | Write number of detected objects per class to results.txt 694 | """ 695 | with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file: 696 | results_file.write("\n# Number of detected objects per class\n") 697 | for class_name in sorted(dr_classes): 698 | n_det = det_counter_per_class[class_name] 699 | text = class_name + ": " + str(n_det) 700 | text += " (tp:" + str(count_true_positives[class_name]) + "" 701 | text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n" 702 | results_file.write(text) 703 | 704 | """ 705 | Plot the total number of occurences of each class in the ground-truth 706 | """ 707 | if draw_plot: 708 | window_title = "ground-truth-info" 709 | plot_title = "ground-truth\n" 710 | plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)" 711 | x_label = "Number of objects per class" 712 | output_path = RESULTS_FILES_PATH + "/ground-truth-info.png" 713 | to_show = False 714 | plot_color = 'forestgreen' 715 | draw_plot_func( 716 | gt_counter_per_class, 717 | n_classes, 718 | window_title, 719 | plot_title, 720 | x_label, 721 | output_path, 722 | to_show, 723 | plot_color, 724 | '', 725 | ) 726 | 727 | # """ 728 | # Plot the total number of occurences of each class in the "detection-results" folder 729 | # """ 730 | # if draw_plot: 731 | # window_title = "detection-results-info" 732 | # # Plot title 733 | # plot_title = "detection-results\n" 734 | # plot_title += "(" + str(len(dr_files_list)) + " files and " 735 | # count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values())) 736 | # plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)" 737 | # # end Plot title 738 | # x_label = "Number of objects per class" 739 | # output_path = RESULTS_FILES_PATH + "/detection-results-info.png" 740 | # to_show = False 741 | # plot_color = 'forestgreen' 742 | # true_p_bar = count_true_positives 743 | # draw_plot_func( 744 | # det_counter_per_class, 745 | # len(det_counter_per_class), 746 | # window_title, 747 | # plot_title, 748 | # x_label, 749 | # output_path, 750 | # to_show, 751 | # plot_color, 752 | # true_p_bar 753 | # ) 754 | 755 | """ 756 | Draw log-average miss rate plot (Show lamr of all classes in decreasing order) 757 | """ 758 | if draw_plot: 759 | window_title = "lamr" 760 | plot_title = "log-average miss rate" 761 | x_label = "log-average miss rate" 762 | output_path = RESULTS_FILES_PATH + "/lamr.png" 763 | to_show = False 764 | plot_color = 'royalblue' 765 | draw_plot_func( 766 | lamr_dictionary, 767 | n_classes, 768 | window_title, 769 | plot_title, 770 | x_label, 771 | output_path, 772 | to_show, 773 | plot_color, 774 | "" 775 | ) 776 | 777 | """ 778 | Draw mAP plot (Show AP's of all classes in decreasing order) 779 | """ 780 | if draw_plot: 781 | window_title = "mAP" 782 | plot_title = "mAP = {0:.2f}%".format(mAP*100) 783 | x_label = "Average Precision" 784 | output_path = RESULTS_FILES_PATH + "/mAP.png" 785 | to_show = True 786 | plot_color = 'royalblue' 787 | draw_plot_func( 788 | ap_dictionary, 789 | n_classes, 790 | window_title, 791 | plot_title, 792 | x_label, 793 | output_path, 794 | to_show, 795 | plot_color, 796 | "" 797 | ) 798 | return mAP 799 | 800 | def preprocess_gt(gt_path, class_names): 801 | image_ids = os.listdir(gt_path) 802 | results = {} 803 | 804 | images = [] 805 | bboxes = [] 806 | for i, image_id in enumerate(image_ids): 807 | lines_list = file_lines_to_list(os.path.join(gt_path, image_id)) 808 | boxes_per_image = [] 809 | image = {} 810 | image_id = os.path.splitext(image_id)[0] 811 | image['file_name'] = image_id + '.jpg' 812 | image['width'] = 1 813 | image['height'] = 1 814 | #-----------------------------------------------------------------# 815 | # 感谢 多学学英语吧 的提醒 816 | # 解决了'Results do not correspond to current coco set'问题 817 | #-----------------------------------------------------------------# 818 | image['id'] = str(image_id) 819 | 820 | for line in lines_list: 821 | difficult = 0 822 | if "difficult" in line: 823 | line_split = line.split() 824 | left, top, right, bottom, _difficult = line_split[-5:] 825 | class_name = "" 826 | for name in line_split[:-5]: 827 | class_name += name + " " 828 | class_name = class_name[:-1] 829 | difficult = 1 830 | else: 831 | line_split = line.split() 832 | left, top, right, bottom = line_split[-4:] 833 | class_name = "" 834 | for name in line_split[:-4]: 835 | class_name += name + " " 836 | class_name = class_name[:-1] 837 | 838 | left, top, right, bottom = float(left), float(top), float(right), float(bottom) 839 | if class_name not in class_names: 840 | continue 841 | cls_id = class_names.index(class_name) + 1 842 | bbox = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id, (right - left) * (bottom - top) - 10.0] 843 | boxes_per_image.append(bbox) 844 | images.append(image) 845 | bboxes.extend(boxes_per_image) 846 | results['images'] = images 847 | 848 | categories = [] 849 | for i, cls in enumerate(class_names): 850 | category = {} 851 | category['supercategory'] = cls 852 | category['name'] = cls 853 | category['id'] = i + 1 854 | categories.append(category) 855 | results['categories'] = categories 856 | 857 | annotations = [] 858 | for i, box in enumerate(bboxes): 859 | annotation = {} 860 | annotation['area'] = box[-1] 861 | annotation['category_id'] = box[-2] 862 | annotation['image_id'] = box[-3] 863 | annotation['iscrowd'] = box[-4] 864 | annotation['bbox'] = box[:4] 865 | annotation['id'] = i 866 | annotations.append(annotation) 867 | results['annotations'] = annotations 868 | return results 869 | 870 | def preprocess_dr(dr_path, class_names): 871 | image_ids = os.listdir(dr_path) 872 | results = [] 873 | for image_id in image_ids: 874 | lines_list = file_lines_to_list(os.path.join(dr_path, image_id)) 875 | image_id = os.path.splitext(image_id)[0] 876 | for line in lines_list: 877 | line_split = line.split() 878 | confidence, left, top, right, bottom = line_split[-5:] 879 | class_name = "" 880 | for name in line_split[:-5]: 881 | class_name += name + " " 882 | class_name = class_name[:-1] 883 | left, top, right, bottom = float(left), float(top), float(right), float(bottom) 884 | result = {} 885 | result["image_id"] = str(image_id) 886 | if class_name not in class_names: 887 | continue 888 | result["category_id"] = class_names.index(class_name) + 1 889 | result["bbox"] = [left, top, right - left, bottom - top] 890 | result["score"] = float(confidence) 891 | results.append(result) 892 | return results 893 | 894 | def get_coco_map(class_names, path): 895 | GT_PATH = os.path.join(path, 'ground-truth') 896 | DR_PATH = os.path.join(path, 'detection-results') 897 | COCO_PATH = os.path.join(path, 'coco_eval') 898 | 899 | if not os.path.exists(COCO_PATH): 900 | os.makedirs(COCO_PATH) 901 | 902 | GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json') 903 | DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json') 904 | 905 | with open(GT_JSON_PATH, "w") as f: 906 | results_gt = preprocess_gt(GT_PATH, class_names) 907 | json.dump(results_gt, f, indent=4) 908 | 909 | with open(DR_JSON_PATH, "w") as f: 910 | results_dr = preprocess_dr(DR_PATH, class_names) 911 | json.dump(results_dr, f, indent=4) 912 | if len(results_dr) == 0: 913 | print("未检测到任何目标。") 914 | return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 915 | 916 | cocoGt = COCO(GT_JSON_PATH) 917 | cocoDt = cocoGt.loadRes(DR_JSON_PATH) 918 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 919 | cocoEval.evaluate() 920 | cocoEval.accumulate() 921 | cocoEval.summarize() 922 | 923 | return cocoEval.stats -------------------------------------------------------------------------------- /faster-rcnn-pytorch-master/voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | import numpy as np 5 | from utils.utils import get_classes 6 | 7 | #--------------------------------------------------------------------------------------------------------------------------------# 8 | # annotation_mode用于指定该文件运行时计算的内容 9 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt 10 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt 11 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt 12 | #--------------------------------------------------------------------------------------------------------------------------------# 13 | annotation_mode = 2 14 | #-------------------------------------------------------------------# 15 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 16 | # 与训练和预测所用的classes_path一致即可 17 | # 如果生成的2007_train.txt里面没有目标信息 18 | # 那么就是因为classes没有设定正确 19 | # 仅在annotation_mode为0和2的时候有效 20 | #-------------------------------------------------------------------# 21 | classes_path = "./faster-rcnn-pytorch-master/model_data/voc_classes.txt" 22 | #--------------------------------------------------------------------------------------------------------------------------------# 23 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 24 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 25 | # 仅在annotation_mode为0和1的时候有效 26 | #--------------------------------------------------------------------------------------------------------------------------------# 27 | trainval_percent = 0.9 28 | train_percent = 0.9 29 | #-------------------------------------------------------# 30 | # 指向VOC数据集所在的文件夹 31 | # 默认指向根目录下的VOC数据集 32 | #-------------------------------------------------------# 33 | VOCdevkit_path = "./faster-rcnn-pytorch-master/VOCdevkit" 34 | 35 | VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')] 36 | classes, _ = get_classes(classes_path) 37 | 38 | #-------------------------------------------------------# 39 | # 统计目标数量 40 | #-------------------------------------------------------# 41 | photo_nums = np.zeros(len(VOCdevkit_sets)) 42 | nums = np.zeros(len(classes)) 43 | def convert_annotation(year, image_id, list_file): 44 | in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8') 45 | tree=ET.parse(in_file) 46 | root = tree.getroot() 47 | 48 | for obj in root.iter('object'): 49 | difficult = 0 50 | if obj.find('difficult')!=None: 51 | difficult = obj.find('difficult').text 52 | cls = obj.find('name').text 53 | if cls not in classes or int(difficult)==1: 54 | continue 55 | cls_id = classes.index(cls) 56 | xmlbox = obj.find('bndbox') 57 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 58 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 59 | 60 | nums[classes.index(cls)] = nums[classes.index(cls)] + 1 61 | 62 | if __name__ == "__main__": 63 | random.seed(0) 64 | 65 | 66 | if annotation_mode == 0 or annotation_mode == 1: 67 | print("Generate txt in ImageSets.") 68 | xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations') 69 | saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main') 70 | temp_xml = os.listdir(xmlfilepath) 71 | total_xml = [] 72 | for xml in temp_xml: 73 | if xml.endswith(".xml"): 74 | total_xml.append(xml) 75 | 76 | num = len(total_xml) 77 | list = range(num) 78 | tv = int(num*trainval_percent) 79 | tr = int(tv*train_percent) 80 | trainval= random.sample(list,tv) 81 | train = random.sample(trainval,tr) 82 | 83 | print("train and val size",tv) 84 | print("train size",tr) 85 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') 86 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') 87 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') 88 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w') 89 | 90 | for i in list: 91 | name=total_xml[i][:-4]+'\n' 92 | if i in trainval: 93 | ftrainval.write(name) 94 | if i in train: 95 | ftrain.write(name) 96 | else: 97 | fval.write(name) 98 | else: 99 | ftest.write(name) 100 | 101 | ftrainval.close() 102 | ftrain.close() 103 | fval.close() 104 | ftest.close() 105 | print("Generate txt in ImageSets done.") 106 | 107 | if annotation_mode == 0 or annotation_mode == 2: 108 | print("Generate 2007_train.txt and 2007_val.txt for train.") 109 | type_index = 0 110 | for year, image_set in VOCdevkit_sets: 111 | image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split() 112 | list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8') 113 | for image_id in image_ids: 114 | list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id)) 115 | 116 | convert_annotation(year, image_id, list_file) 117 | list_file.write('\n') 118 | photo_nums[type_index] = len(image_ids) 119 | type_index += 1 120 | list_file.close() 121 | print("Generate 2007_train.txt and 2007_val.txt for train done.") 122 | 123 | def printTable(List1, List2): 124 | for i in range(len(List1[0])): 125 | print("|", end=' ') 126 | for j in range(len(List1)): 127 | print(List1[j][i].rjust(int(List2[j])), end=' ') 128 | print("|", end=' ') 129 | print() 130 | 131 | str_nums = [str(int(x)) for x in nums] 132 | tableData = [ 133 | classes, str_nums 134 | ] 135 | colWidths = [0]*len(tableData) 136 | len1 = 0 137 | for i in range(len(tableData)): 138 | for j in range(len(tableData[i])): 139 | if len(tableData[i][j]) > colWidths[i]: 140 | colWidths[i] = len(tableData[i][j]) 141 | printTable(tableData, colWidths) 142 | 143 | if photo_nums[0] <= 500: 144 | print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。") 145 | 146 | if np.sum(nums) == 0: 147 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 148 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 149 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 150 | print("(重要的事情说三遍)。") 151 | --------------------------------------------------------------------------------