├── IMG
    ├── image-20221130224209895.png
    ├── image-20221130225020444.png
    ├── image-20221130231306518.png
    ├── image-20221130232150008.png
    ├── image-20221130232226853.png
    ├── image-20221130233557805.png
    ├── image-20221130234049168.png
    ├── image-20221130235406487.png
    ├── image-20221130235431124.png
    ├── image-20221130235548118.png
    ├── image-20221130235602289.png
    ├── image-20221201140006267.png
    ├── image-20221201140844546.png
    ├── image-20221201141758142.png
    └── image-20221201141815392.png
├── README.md
└── faster-rcnn-pytorch-master
    ├── Faster R-CNN 论文复现代码.md
    ├── Faster R-CNN代码使用说明书.md
    ├── VOCdevkit
        └── VOC2007
        │   ├── Annotations
        │       └── 说明书.txt
        │   ├── ImageSets
        │       └── Main
        │       │   └── 说明书.txt
        │   └── JPEGImages
        │       └── 说明书.txt
    ├── frcnn.py
    ├── get_map.py
    ├── img
        ├── 1.jpg
        ├── 2.jpg
        └── 3.jpg
    ├── logs
        └── 说明书.txt
    ├── model_data
        ├── simhei.ttf
        └── voc_classes.txt
    ├── nets
        ├── __init__.py
        ├── classifier.py
        ├── frcnn.py
        ├── frcnn_training.py
        ├── resnet50.py
        ├── rpn.py
        └── vgg16.py
    ├── predict.py
    ├── requirements.txt
    ├── summary.py
    ├── train.py
    ├── utils
        ├── __init__.py
        ├── anchors.py
        ├── callbacks.py
        ├── dataloader.py
        ├── utils.py
        ├── utils_bbox.py
        ├── utils_fit.py
        └── utils_map.py
    └── voc_annotation.py


/IMG/image-20221130224209895.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130224209895.png


--------------------------------------------------------------------------------
/IMG/image-20221130225020444.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130225020444.png


--------------------------------------------------------------------------------
/IMG/image-20221130231306518.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130231306518.png


--------------------------------------------------------------------------------
/IMG/image-20221130232150008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130232150008.png


--------------------------------------------------------------------------------
/IMG/image-20221130232226853.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130232226853.png


--------------------------------------------------------------------------------
/IMG/image-20221130233557805.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130233557805.png


--------------------------------------------------------------------------------
/IMG/image-20221130234049168.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130234049168.png


--------------------------------------------------------------------------------
/IMG/image-20221130235406487.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235406487.png


--------------------------------------------------------------------------------
/IMG/image-20221130235431124.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235431124.png


--------------------------------------------------------------------------------
/IMG/image-20221130235548118.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235548118.png


--------------------------------------------------------------------------------
/IMG/image-20221130235602289.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221130235602289.png


--------------------------------------------------------------------------------
/IMG/image-20221201140006267.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201140006267.png


--------------------------------------------------------------------------------
/IMG/image-20221201140844546.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201140844546.png


--------------------------------------------------------------------------------
/IMG/image-20221201141758142.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201141758142.png


--------------------------------------------------------------------------------
/IMG/image-20221201141815392.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/IMG/image-20221201141815392.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Faster-RCNN-Pytorch-master
  2 | # Faster R-CNN代码使用说明书
  3 | 
  4 | ## 一、我的配置环境
  5 | 
  6 | ```python
  7 | python == 3.10.6
  8 | numpy == 1.23.3
  9 | opencv == 4.6.0
 10 | pillow == 9.2.0
 11 | pycocotools == 2.0.6
 12 | pytorch == 1.12.1
 13 | scipy == 1.9.3
 14 | torchvision == 0.13.1
 15 | tqdm == 4.64.1
 16 | matplotlib == 3.6.2
 17 | hdf5 == 1.12.1
 18 | ```
 19 | 
 20 | ## 二、参数值文件下载
 21 | 
 22 | 我们需要的权重包括`voc_weights_resnet.pth`或者`voc_weights_vgg.pth`以及主干的网络权重我已经上传了百度云，可以自行下载。
 23 | 
 24 | 首先来看第一个权重文件`voc_weights_resnet.pth`，是`resnet`为主干特征提取网络用到的。
 25 | 
 26 | 第二个权重文件`voc_weights_vgg.pth`，是`vgg`为主干特征提取网络用到的。
 27 | 
 28 | ![image-20221130224209895](./IMG/image-20221130224209895.png)
 29 | 
 30 | 顺便训练好的参数我也一并放入了文件夹：
 31 | 
 32 | ![image-20221130225020444](./IMG/image-20221130225020444.png)
 33 | 
 34 | ```python
 35 | 链接：https://pan.baidu.com/s/1IiBMIyw8bF132FQGz79Q6Q 
 36 | 提取码：dpje
 37 | ```
 38 | 
 39 | ## 三、VOC数据集下载
 40 | 
 41 | `VOC`数据集下载地址如下，里面已经包括了训练集、测试集、验证集（与测试集一样），无需再次划分：
 42 | 
 43 | 该数据集为`VOC07+12`的数据集，包括了训练与测试用的数据集。为了训练方便，该数据集中`val.txt`与`test.txt`相同。
 44 | 
 45 | ```python
 46 | 链接：https://pan.baidu.com/s/1STBDRK2MpZfJJ-jRzL6iuA 
 47 | 提取码：vh7m
 48 | ```
 49 | 
 50 | ## 四、模型训练步骤
 51 | 
 52 | ### （1）训练VOC07+12数据集
 53 | 
 54 | #### 1.数据集的准备
 55 | 
 56 | 本文使用`VOC`格式进行训练，训练前需要下载好`VOC07+12`的数据集，解压后放在根目录
 57 | 
 58 | 根目录就是第一级目录下：
 59 | 
 60 | ![image-20221130231306518](./IMG/image-20221130231306518.png)
 61 | 
 62 | 会自动填到`VOCdevkit`文件下面。
 63 | 
 64 | #### 2.数据集的处理
 65 | 
 66 | 修改`voc_annotation.py`里面的`annotation_mode = 2`，运行`voc_annotation.py`生成根目录下的`2007_train.txt`和`2007_val.txt`。
 67 | 
 68 | 源码对应为：
 69 | 
 70 | ![image-20221130232150008](./IMG/image-20221130232150008.png)
 71 | 
 72 | 生成的目录为：
 73 | 
 74 | ![image-20221130232226853](./IMG/image-20221130232226853.png)
 75 | 
 76 | #### 3.开始网络训练
 77 | 
 78 | `train.py`的默认参数用于训练`VOC`数据集，直接运行`train.py`即可开始训练。
 79 | 
 80 | 这个我起初是在自己的笔记本上运行的，显卡为3060，显存为`6G`，但是无法运行，显存不够。
 81 | 
 82 | 我换到了实验室的电脑，`Ubuntu18.04`，双`2080Ti`，`64G`内存，`i9`处理器，100个batch_size，平均一个花费15分钟左右。
 83 | 
 84 | 电脑配置不好的同学可以训练不出来，但是没关系，我把训练好的参数也一并上传了，就在第一份百度盘文件中：
 85 | 
 86 | ![image-20221130233557805](D:\git文件夹\IMG\image-20221130233557805.png)
 87 | 
 88 | #### 4.预测
 89 | 
 90 | 训练结果预测需要用到两个文件，分别是`frcnn.py`和`predict.py`。
 91 | 
 92 | 我们首先需要去`frcnn.py`里面修改model_path以及classes_path，这两个参数必须要修改。
 93 | 
 94 | model_path指向训练好的权值文件，在logs文件夹里。
 95 | 
 96 | classes_path指向检测类别所对应的txt。
 97 | 
 98 | ![image-20221130234049168](./IMG/image-20221130234049168.png)
 99 | 
100 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。
101 | 
102 | ### （2）训练自己的数据集
103 | 
104 | #### 1.数据集的准备
105 | 
106 | 本文使用`VOC`格式进行训练，训练前需要自己制作好数据集。
107 | 
108 | 训练前将标签文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`Annotation`中。
109 | 
110 | 训练前将图片文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`JPEGImages`中。
111 | 
112 | #### 2.数据集的处理
113 | 
114 | 在完成数据集的摆放之后，我们需要利用`voc_annotation.py`获得训练用的`2007_train.txt`和`2007_val.txt`。
115 | 
116 | 修改`voc_annotation.py`里面的参数。
117 | 
118 | 第一次训练可以仅修改classes_path，classes_path用于指向检测类别所对应的txt。
119 | 
120 | 训练自己的数据集时，可以自己建立一个`cls_classes.txt`，里面写自己所需要区分的类别。
121 | `./faster-rcnn-pytorch-master/model_data/cls_classes.txt`文件内容为：
122 | 
123 | 例如我们`VOC`数据的类别为：
124 | 
125 | ```python
126 | aeroplane
127 | bicycle
128 | bird
129 | boat
130 | bottle
131 | bus
132 | car
133 | cat
134 | chair
135 | cow
136 | diningtable
137 | dog
138 | horse
139 | motorbike
140 | person
141 | pottedplant
142 | sheep
143 | sofa
144 | train
145 | tvmonitor
146 | ```
147 | 
148 | 修改`voc_annotation.py`中的classes_path，使其对应`cls_classes.txt`，并运行`voc_annotation.py`。
149 | 
150 | #### 3.开始网络训练
151 | 
152 | **训练的参数较多，均在train.py中，大家可以在下载库后仔细看注释，其中最重要的部分依然是train.py里的classes_path。**
153 | 
154 | classes_path用于指向检测类别所对应的txt，这个txt和`voc_annotation.py`里面的txt一样！训练自己的数据集必须要修改！
155 | 
156 | 修改完classes_path后就可以运行`train.py`开始训练了，在训练多个epoch后，权值会生成在logs文件夹中。
157 | 
158 | #### 4.训练结果预测
159 | 
160 | 训练结果预测需要用到两个文件，分别是`frcnn.py`和`predict.py`。在`frcnn.py`里面修改model_path以及classes_path。
161 | 
162 | model_path指向训练好的权值文件，在logs文件夹里。
163 | 
164 | classes_path指向检测类别所对应的txt。
165 | 
166 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。
167 | 
168 | ## 五、预测步骤
169 | 
170 | ### （1）使用预训练权重
171 | 
172 | #### 1.下载完库后解压，在百度网盘下载`frcnn_weights.pth`，放入model_data，运行`predict.py`，输入：
173 | 
174 | ![image-20221130235406487](./IMG/image-20221130235406487.png)
175 | 
176 | ![image-20221130235431124](./IMG/image-20221130235431124.png)
177 | 
178 | #### 2.在predict.py里面进行设置可以进行fps测试和video视频检测。
179 | 
180 | ![image-20221130235548118](./IMG/image-20221130235548118.png)
181 | 
182 | ![image-20221130235602289](./IMG/image-20221130235602289.png)
183 | 
184 | ### （2）使用自己训练的权重
185 | 
186 | #### 1.按照训练步骤训练
187 | 
188 | #### 2.在`frcnn.py`文件里面，在如下部分修改model_path和classes_path使其对应训练好的文件；model_path对应logs文件夹下面的权值文件，classes_path是model_path对应分的类。
189 | 
190 | ```python
191 | class FRCNN(object):
192 |     _defaults = {
193 |         #--------------------------------------------------------------------------#
194 |         #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
195 |         #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
196 |         #
197 |         #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
198 |         #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
199 |         #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
200 |         #--------------------------------------------------------------------------#
201 |         "model_path"    : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth',
202 |         "classes_path"  : './faster-rcnn-pytorch-master/model_data/voc_classes.txt',
203 |         #---------------------------------------------------------------------#
204 |         #   网络的主干特征提取网络，resnet50或者vgg
205 |         #---------------------------------------------------------------------#
206 |         "backbone"      : "resnet50",
207 |         #---------------------------------------------------------------------#
208 |         #   只有得分大于置信度的预测框会被保留下来
209 |         #---------------------------------------------------------------------#
210 |         "confidence"    : 0.5,
211 |         #---------------------------------------------------------------------#
212 |         #   非极大抑制所用到的nms_iou大小
213 |         #---------------------------------------------------------------------#
214 |         "nms_iou"       : 0.3,
215 |         #---------------------------------------------------------------------#
216 |         #   用于指定先验框的大小
217 |         #---------------------------------------------------------------------#
218 |         'anchors_size'  : [8, 16, 32],
219 |         #-------------------------------#
220 |         #   是否使用Cuda
221 |         #   没有GPU可以设置成False
222 |         #-------------------------------#
223 |         "cuda"          : True,
224 |     }
225 | ```
226 | 
227 | ### （3）运行predict.py
228 | 
229 | ### （4）在predict.py里面进行设置可以进行fps测试和video视频检测
230 | 
231 | ## 六、评估步骤
232 | 
233 | ## （1）评估VOC07+12的测试集
234 | 
235 | #### 1.本文使用VOC格式进行评估。
236 | 
237 | `VOC07+12`已经划分好了测试集，无需利用`voc_annotation.py`生成`ImageSets`文件夹下的txt。
238 | 
239 | #### 2.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt
240 | 
241 | #### 3.运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中
242 | 
243 | ## （2）评估自己的数据集
244 | 
245 | #### 1.本文使用`VOC`格式进行评估。
246 | 
247 | #### 2.如果在训练前已经运行过`voc_annotation.py`文件，代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例，可以修改`voc_annotation.py`文件下的`trainval_percent`。`trainval_percent`用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = `9:1`。train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = `9:1`。
248 | 
249 | #### 3.利用`voc_annotation.py`划分测试集后，前往get_map.py文件修改classes_path，classes_path用于指向检测类别所对应的txt，这个txt和训练时的txt一样。评估自己的数据集必须要修改。
250 | 
251 | #### 4.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的`txt`。
252 | 
253 | #### 5.运行`get_map.py`即可获得评估结果，评估结果会保存在map_out文件夹中。
254 | 
255 | ![image-20221201140006267](./IMG/image-20221201140006267.png)
256 | 
257 | ![image-20221201140844546](./IMG/image-20221201140844546.png)
258 | 
259 | 等待一阵子！
260 | 
261 | ![image-20221201141815392](./IMG/image-20221201141815392.png)
262 | 
263 | ![image-20221201141758142](./IMG/image-20221201141758142.png)
264 | 
265 | ## 七、参考
266 | 
267 | https://github.com/bubbliiiing/faster-rcnn-pytorch
268 | 
269 | https://github.com/longcw/faster_rcnn_pytorch
270 | 
271 | https://github.com/jwyang/faster-rcnn.pytorch
272 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/Faster R-CNN代码使用说明书.md:
--------------------------------------------------------------------------------
  1 | # Faster R-CNN代码使用说明书
  2 | 
  3 | ## 一、我的配置环境
  4 | 
  5 | ```python
  6 | python == 3.10.6
  7 | numpy == 1.23.3
  8 | opencv == 4.6.0
  9 | pillow == 9.2.0
 10 | pycocotools == 2.0.6
 11 | pytorch == 1.12.1
 12 | scipy == 1.9.3
 13 | torchvision == 0.13.1
 14 | tqdm == 4.64.1
 15 | matplotlib == 3.6.2
 16 | hdf5 == 1.12.1
 17 | ```
 18 | 
 19 | ## 二、参数值文件下载
 20 | 
 21 | 我们需要的权重包括`voc_weights_resnet.pth`或者`voc_weights_vgg.pth`以及主干的网络权重我已经上传了百度云，可以自行下载。
 22 | 
 23 | 首先来看第一个权重文件`voc_weights_resnet.pth`，是`resnet`为主干特征提取网络用到的。
 24 | 
 25 | 第二个权重文件`voc_weights_vgg.pth`，是`vgg`为主干特征提取网络用到的。
 26 | 
 27 | ![image-20221130224209895](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130224209895.png)
 28 | 
 29 | 顺便训练好的参数我也一并放入了文件夹：
 30 | 
 31 | ![image-20221130225020444](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130225020444.png)
 32 | 
 33 | ```python
 34 | 链接：https://pan.baidu.com/s/1IiBMIyw8bF132FQGz79Q6Q 
 35 | 提取码：dpje
 36 | ```
 37 | 
 38 | ## 三、VOC数据集下载
 39 | 
 40 | `VOC`数据集下载地址如下，里面已经包括了训练集、测试集、验证集（与测试集一样），无需再次划分：
 41 | 
 42 | 该数据集为`VOC07+12`的数据集，包括了训练与测试用的数据集。为了训练方便，该数据集中`val.txt`与`test.txt`相同。
 43 | 
 44 | ```python
 45 | 链接：https://pan.baidu.com/s/1STBDRK2MpZfJJ-jRzL6iuA 
 46 | 提取码：vh7m
 47 | ```
 48 | 
 49 | ## 四、模型训练步骤
 50 | 
 51 | ### （1）训练VOC07+12数据集
 52 | 
 53 | #### 1.数据集的准备
 54 | 
 55 | 本文使用`VOC`格式进行训练，训练前需要下载好`VOC07+12`的数据集，解压后放在根目录
 56 | 
 57 | 根目录就是第一级目录下：
 58 | 
 59 | ![image-20221130231306518](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130231306518.png)
 60 | 
 61 | 会自动填到`VOCdevkit`文件下面。
 62 | 
 63 | #### 2.数据集的处理
 64 | 
 65 | 修改`voc_annotation.py`里面的`annotation_mode = 2`，运行`voc_annotation.py`生成根目录下的`2007_train.txt`和`2007_val.txt`。
 66 | 
 67 | 源码对应为：
 68 | 
 69 | ![image-20221130232150008](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130232150008.png)
 70 | 
 71 | 生成的目录为：
 72 | 
 73 | ![image-20221130232226853](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130232226853.png)
 74 | 
 75 | #### 3.开始网络训练
 76 | 
 77 | `train.py`的默认参数用于训练`VOC`数据集，直接运行`train.py`即可开始训练。
 78 | 
 79 | 这个我起初是在自己的笔记本上运行的，显卡为3060，显存为`6G`，但是无法运行，显存不够。
 80 | 
 81 | 我换到了实验室的电脑，`Ubuntu18.04`，双`2080Ti`，`64G`内存，`i9`处理器，100个batch_size，平均一个花费15分钟左右。
 82 | 
 83 | 电脑配置不好的同学可以训练不出来，但是没关系，我把训练好的参数也一并上传了，就在第一份百度盘文件中：
 84 | 
 85 | ![image-20221130233557805](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130233557805.png)
 86 | 
 87 | #### 4.预测
 88 | 
 89 | 训练结果预测需要用到两个文件，分别是`frcnn.py`和`predict.py`。
 90 | 
 91 | 我们首先需要去`frcnn.py`里面修改model_path以及classes_path，这两个参数必须要修改。
 92 | 
 93 | model_path指向训练好的权值文件，在logs文件夹里。
 94 | 
 95 | classes_path指向检测类别所对应的txt。
 96 | 
 97 | ![image-20221130234049168](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130234049168.png)
 98 | 
 99 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。
100 | 
101 | ### （2）训练自己的数据集
102 | 
103 | #### 1.数据集的准备
104 | 
105 | 本文使用`VOC`格式进行训练，训练前需要自己制作好数据集。
106 | 
107 | 训练前将标签文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`Annotation`中。
108 | 
109 | 训练前将图片文件放在`VOCdevkit`文件夹下的`VOC2007`文件夹下的`JPEGImages`中。
110 | 
111 | #### 2.数据集的处理
112 | 
113 | 在完成数据集的摆放之后，我们需要利用`voc_annotation.py`获得训练用的`2007_train.txt`和`2007_val.txt`。
114 | 
115 | 修改`voc_annotation.py`里面的参数。
116 | 
117 | 第一次训练可以仅修改classes_path，classes_path用于指向检测类别所对应的txt。
118 | 
119 | 训练自己的数据集时，可以自己建立一个`cls_classes.txt`，里面写自己所需要区分的类别。
120 | `./faster-rcnn-pytorch-master/model_data/cls_classes.txt`文件内容为：
121 | 
122 | 例如我们`VOC`数据的类别为：
123 | 
124 | ```python
125 | aeroplane
126 | bicycle
127 | bird
128 | boat
129 | bottle
130 | bus
131 | car
132 | cat
133 | chair
134 | cow
135 | diningtable
136 | dog
137 | horse
138 | motorbike
139 | person
140 | pottedplant
141 | sheep
142 | sofa
143 | train
144 | tvmonitor
145 | ```
146 | 
147 | 修改`voc_annotation.py`中的classes_path，使其对应`cls_classes.txt`，并运行`voc_annotation.py`。
148 | 
149 | #### 3.开始网络训练
150 | 
151 | **训练的参数较多，均在train.py中，大家可以在下载库后仔细看注释，其中最重要的部分依然是train.py里的classes_path。**
152 | 
153 | classes_path用于指向检测类别所对应的txt，这个txt和`voc_annotation.py`里面的txt一样！训练自己的数据集必须要修改！
154 | 
155 | 修改完classes_path后就可以运行`train.py`开始训练了，在训练多个epoch后，权值会生成在logs文件夹中。
156 | 
157 | #### 4.训练结果预测
158 | 
159 | 训练结果预测需要用到两个文件，分别是`frcnn.py`和`predict.py`。在`frcnn.py`里面修改model_path以及classes_path。
160 | 
161 | model_path指向训练好的权值文件，在logs文件夹里。
162 | 
163 | classes_path指向检测类别所对应的txt。
164 | 
165 | 完成修改后就可以运行`predict.py`进行检测了。运行后输入图片路径即可检测。
166 | 
167 | ## 五、预测步骤
168 | 
169 | ### （1）使用预训练权重
170 | 
171 | #### 1.下载完库后解压，在百度网盘下载`frcnn_weights.pth`，放入model_data，运行`predict.py`，输入：
172 | 
173 | ![image-20221130235406487](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235406487.png)
174 | 
175 | ![image-20221130235431124](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235431124.png)
176 | 
177 | #### 2.在predict.py里面进行设置可以进行fps测试和video视频检测。
178 | 
179 | ![image-20221130235548118](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235548118.png)
180 | 
181 | ![image-20221130235602289](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221130235602289.png)
182 | 
183 | ### （2）使用自己训练的权重
184 | 
185 | #### 1.按照训练步骤训练
186 | 
187 | #### 2.在`frcnn.py`文件里面，在如下部分修改model_path和classes_path使其对应训练好的文件；model_path对应logs文件夹下面的权值文件，classes_path是model_path对应分的类。
188 | 
189 | ```python
190 | class FRCNN(object):
191 |     _defaults = {
192 |         #--------------------------------------------------------------------------#
193 |         #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
194 |         #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
195 |         #
196 |         #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
197 |         #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
198 |         #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
199 |         #--------------------------------------------------------------------------#
200 |         "model_path"    : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth',
201 |         "classes_path"  : './faster-rcnn-pytorch-master/model_data/voc_classes.txt',
202 |         #---------------------------------------------------------------------#
203 |         #   网络的主干特征提取网络，resnet50或者vgg
204 |         #---------------------------------------------------------------------#
205 |         "backbone"      : "resnet50",
206 |         #---------------------------------------------------------------------#
207 |         #   只有得分大于置信度的预测框会被保留下来
208 |         #---------------------------------------------------------------------#
209 |         "confidence"    : 0.5,
210 |         #---------------------------------------------------------------------#
211 |         #   非极大抑制所用到的nms_iou大小
212 |         #---------------------------------------------------------------------#
213 |         "nms_iou"       : 0.3,
214 |         #---------------------------------------------------------------------#
215 |         #   用于指定先验框的大小
216 |         #---------------------------------------------------------------------#
217 |         'anchors_size'  : [8, 16, 32],
218 |         #-------------------------------#
219 |         #   是否使用Cuda
220 |         #   没有GPU可以设置成False
221 |         #-------------------------------#
222 |         "cuda"          : True,
223 |     }
224 | ```
225 | 
226 | ### （3）运行predict.py
227 | 
228 | ### （4）在predict.py里面进行设置可以进行fps测试和video视频检测
229 | 
230 | ## 六、评估步骤
231 | 
232 | ## （1）评估VOC07+12的测试集
233 | 
234 | #### 1.本文使用VOC格式进行评估。
235 | 
236 | `VOC07+12`已经划分好了测试集，无需利用`voc_annotation.py`生成`ImageSets`文件夹下的txt。
237 | 
238 | #### 2.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt
239 | 
240 | #### 3.运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中
241 | 
242 | ## （2）评估自己的数据集
243 | 
244 | #### 1.本文使用`VOC`格式进行评估。
245 | 
246 | #### 2.如果在训练前已经运行过`voc_annotation.py`文件，代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例，可以修改`voc_annotation.py`文件下的`trainval_percent`。`trainval_percent`用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = `9:1`。train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = `9:1`。
247 | 
248 | #### 3.利用`voc_annotation.py`划分测试集后，前往get_map.py文件修改classes_path，classes_path用于指向检测类别所对应的txt，这个txt和训练时的txt一样。评估自己的数据集必须要修改。
249 | 
250 | #### 4.在`frcnn.py`里面修改model_path以及classes_path。model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的`txt`。
251 | 
252 | #### 5.运行`get_map.py`即可获得评估结果，评估结果会保存在map_out文件夹中。
253 | 
254 | ![image-20221201140006267](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201140006267.png)
255 | 
256 | ![image-20221201140844546](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201140844546.png)
257 | 
258 | 等待一阵子！
259 | 
260 | ![image-20221201141815392](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201141815392.png)
261 | 
262 | ![image-20221201141758142](C:\Users\XiaoWang\AppData\Roaming\Typora\typora-user-images\image-20221201141758142.png)
263 | 
264 | ## 七、参考
265 | 
266 | https://github.com/bubbliiiing/faster-rcnn-pytorch
267 | 
268 | https://github.com/longcw/faster_rcnn_pytorch
269 | 
270 | https://github.com/jwyang/faster-rcnn.pytorch
271 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/VOCdevkit/VOC2007/Annotations/说明书.txt:
--------------------------------------------------------------------------------
1 | 存放标签文件！


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/VOCdevkit/VOC2007/ImageSets/Main/说明书.txt:
--------------------------------------------------------------------------------
1 | 存放训练索引文件


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/VOCdevkit/VOC2007/JPEGImages/说明书.txt:
--------------------------------------------------------------------------------
1 | 存放图片文件


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/frcnn.py:
--------------------------------------------------------------------------------
  1 | import colorsys
  2 | import os
  3 | import time
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | from nets.frcnn import FasterRCNN
  9 | from utils.utils import (cvtColor, get_classes, get_new_img_size, resize_image, preprocess_input, show_config)
 10 | from utils.utils_bbox import DecodeBox
 11 | 
 12 | 
 13 | #--------------------------------------------#
 14 | #   使用自己训练好的模型预测需要修改2个参数
 15 | #   model_path和classes_path都需要修改！
 16 | #   如果出现shape不匹配
 17 | #   一定要注意训练时的NUM_CLASSES、
 18 | #   model_path和classes_path参数的修改
 19 | #--------------------------------------------#
 20 | class FRCNN(object):
 21 |     _defaults = {
 22 |         #--------------------------------------------------------------------------#
 23 |         #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
 24 |         #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
 25 |         #
 26 |         #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
 27 |         #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
 28 |         #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
 29 |         #--------------------------------------------------------------------------#
 30 |         "model_path"    : './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth',
 31 |         "classes_path"  : './faster-rcnn-pytorch-master/model_data/voc_classes.txt',
 32 |         #---------------------------------------------------------------------#
 33 |         #   网络的主干特征提取网络，resnet50或者vgg
 34 |         #---------------------------------------------------------------------#
 35 |         "backbone"      : "resnet50",
 36 |         #---------------------------------------------------------------------#
 37 |         #   只有得分大于置信度的预测框会被保留下来
 38 |         #---------------------------------------------------------------------#
 39 |         "confidence"    : 0.5,
 40 |         #---------------------------------------------------------------------#
 41 |         #   非极大抑制所用到的nms_iou大小
 42 |         #---------------------------------------------------------------------#
 43 |         "nms_iou"       : 0.3,
 44 |         #---------------------------------------------------------------------#
 45 |         #   用于指定先验框的大小
 46 |         #---------------------------------------------------------------------#
 47 |         'anchors_size'  : [8, 16, 32],
 48 |         #-------------------------------#
 49 |         #   是否使用Cuda
 50 |         #   没有GPU可以设置成False
 51 |         #-------------------------------#
 52 |         "cuda"          : True,
 53 |     }
 54 | 
 55 |     @classmethod
 56 |     def get_defaults(cls, n):
 57 |         if n in cls._defaults:
 58 |             return cls._defaults[n]
 59 |         else:
 60 |             return "Unrecognized attribute name '" + n + "'"
 61 | 
 62 |     #---------------------------------------------------#
 63 |     #   初始化faster RCNN
 64 |     #---------------------------------------------------#
 65 |     def __init__(self, **kwargs):
 66 |         self.__dict__.update(self._defaults)
 67 |         for name, value in kwargs.items():
 68 |             setattr(self, name, value)
 69 |             self._defaults[name] = value 
 70 |         #---------------------------------------------------#
 71 |         #   获得种类和先验框的数量
 72 |         #---------------------------------------------------#
 73 |         self.class_names, self.num_classes  = get_classes(self.classes_path)
 74 | 
 75 |         self.std    = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None]
 76 |         if self.cuda:
 77 |             self.std    = self.std.cuda()
 78 |         self.bbox_util  = DecodeBox(self.std, self.num_classes)
 79 | 
 80 |         #---------------------------------------------------#
 81 |         #   画框设置不同的颜色
 82 |         #---------------------------------------------------#
 83 |         hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
 84 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 85 |         self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
 86 |         self.generate()
 87 | 
 88 |         show_config(**self._defaults)
 89 | 
 90 |     #---------------------------------------------------#
 91 |     #   载入模型
 92 |     #---------------------------------------------------#
 93 |     def generate(self):
 94 |         #-------------------------------#
 95 |         #   载入模型与权值
 96 |         #-------------------------------#
 97 |         self.net    = FasterRCNN(self.num_classes, "predict", anchor_scales = self.anchors_size, backbone = self.backbone)
 98 |         device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 99 |         self.net.load_state_dict(torch.load(self.model_path, map_location=device))
100 |         self.net    = self.net.eval()
101 |         print('{} model, anchors, and classes loaded.'.format(self.model_path))
102 |         
103 |         if self.cuda:
104 |             self.net = nn.DataParallel(self.net)
105 |             self.net = self.net.cuda()
106 |     
107 |     #---------------------------------------------------#
108 |     #   检测图片
109 |     #---------------------------------------------------#
110 |     def detect_image(self, image, crop = False, count = False):
111 |         #---------------------------------------------------#
112 |         #   计算输入图片的高和宽
113 |         #---------------------------------------------------#
114 |         image_shape = np.array(np.shape(image)[0:2])
115 |         #---------------------------------------------------#
116 |         #   计算resize后的图片的大小，resize后的图片短边为600
117 |         #---------------------------------------------------#
118 |         input_shape = get_new_img_size(image_shape[0], image_shape[1])
119 |         #---------------------------------------------------------#
120 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
121 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
122 |         #---------------------------------------------------------#
123 |         image       = cvtColor(image)
124 |         #---------------------------------------------------------#
125 |         #   给原图像进行resize，resize到短边为600的大小上
126 |         #---------------------------------------------------------#
127 |         image_data  = resize_image(image, [input_shape[1], input_shape[0]])
128 |         #---------------------------------------------------------#
129 |         #   添加上batch_size维度
130 |         #---------------------------------------------------------#
131 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
132 | 
133 |         with torch.no_grad():
134 |             images = torch.from_numpy(image_data)
135 |             if self.cuda:
136 |                 images = images.cuda()
137 |             
138 |             #-------------------------------------------------------------#
139 |             #   roi_cls_locs  建议框的调整参数
140 |             #   roi_scores    建议框的种类得分
141 |             #   rois          建议框的坐标
142 |             #-------------------------------------------------------------#
143 |             roi_cls_locs, roi_scores, rois, _ = self.net(images)
144 |             #-------------------------------------------------------------#
145 |             #   利用classifier的预测结果对建议框进行解码，获得预测框
146 |             #-------------------------------------------------------------#
147 |             results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
148 |                                                     nms_iou = self.nms_iou, confidence = self.confidence)
149 |             #---------------------------------------------------------#
150 |             #   如果没有检测出物体，返回原图
151 |             #---------------------------------------------------------#           
152 |             if len(results[0]) <= 0:
153 |                 return image
154 |                 
155 |             top_label   = np.array(results[0][:, 5], dtype = 'int32')
156 |             top_conf    = results[0][:, 4]
157 |             top_boxes   = results[0][:, :4]
158 |         
159 |         #---------------------------------------------------------#
160 |         #   设置字体与边框厚度
161 |         #---------------------------------------------------------#
162 |         font        = ImageFont.truetype(font='./faster-rcnn-pytorch-master/model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
163 |         thickness   = int(max((image.size[0] + image.size[1]) // np.mean(input_shape), 1))
164 |         #---------------------------------------------------------#
165 |         #   计数
166 |         #---------------------------------------------------------#
167 |         if count:
168 |             print("top_label:", top_label)
169 |             classes_nums    = np.zeros([self.num_classes])
170 |             for i in range(self.num_classes):
171 |                 num = np.sum(top_label == i)
172 |                 if num > 0:
173 |                     print(self.class_names[i], " : ", num)
174 |                 classes_nums[i] = num
175 |             print("classes_nums:", classes_nums)
176 |         #---------------------------------------------------------#
177 |         #   是否进行目标的裁剪
178 |         #---------------------------------------------------------#
179 |         if crop:
180 |             for i, c in list(enumerate(top_label)):
181 |                 top, left, bottom, right = top_boxes[i]
182 |                 top     = max(0, np.floor(top).astype('int32'))
183 |                 left    = max(0, np.floor(left).astype('int32'))
184 |                 bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
185 |                 right   = min(image.size[0], np.floor(right).astype('int32'))
186 |                 
187 |                 dir_save_path = "img_crop"
188 |                 if not os.path.exists(dir_save_path):
189 |                     os.makedirs(dir_save_path)
190 |                 crop_image = image.crop([left, top, right, bottom])
191 |                 crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
192 |                 print("save crop_" + str(i) + ".png to " + dir_save_path)
193 |         #---------------------------------------------------------#
194 |         #   图像绘制
195 |         #---------------------------------------------------------#
196 |         for i, c in list(enumerate(top_label)):
197 |             predicted_class = self.class_names[int(c)]
198 |             box             = top_boxes[i]
199 |             score           = top_conf[i]
200 | 
201 |             top, left, bottom, right = box
202 | 
203 |             top     = max(0, np.floor(top).astype('int32'))
204 |             left    = max(0, np.floor(left).astype('int32'))
205 |             bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
206 |             right   = min(image.size[0], np.floor(right).astype('int32'))
207 | 
208 |             label = '{} {:.2f}'.format(predicted_class, score)
209 |             draw = ImageDraw.Draw(image)
210 |             label_size = draw.textsize(label, font)
211 |             label = label.encode('utf-8')
212 |             # print(label, top, left, bottom, right)
213 |             
214 |             if top - label_size[1] >= 0:
215 |                 text_origin = np.array([left, top - label_size[1]])
216 |             else:
217 |                 text_origin = np.array([left, top + 1])
218 | 
219 |             for i in range(thickness):
220 |                 draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
221 |             draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
222 |             draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
223 |             del draw
224 | 
225 |         return image
226 | 
227 |     def get_FPS(self, image, test_interval):
228 |         #---------------------------------------------------#
229 |         #   计算输入图片的高和宽
230 |         #---------------------------------------------------#
231 |         image_shape = np.array(np.shape(image)[0:2])
232 |         input_shape = get_new_img_size(image_shape[0], image_shape[1])
233 |         #---------------------------------------------------------#
234 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
235 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
236 |         #---------------------------------------------------------#
237 |         image       = cvtColor(image)
238 |         
239 |         #---------------------------------------------------------#
240 |         #   给原图像进行resize，resize到短边为600的大小上
241 |         #---------------------------------------------------------#
242 |         image_data  = resize_image(image, [input_shape[1], input_shape[0]])
243 |         #---------------------------------------------------------#
244 |         #   添加上batch_size维度
245 |         #---------------------------------------------------------#
246 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
247 | 
248 |         with torch.no_grad():
249 |             images = torch.from_numpy(image_data)
250 |             if self.cuda:
251 |                 images = images.cuda()
252 | 
253 |             roi_cls_locs, roi_scores, rois, _ = self.net(images)
254 |             #-------------------------------------------------------------#
255 |             #   利用classifier的预测结果对建议框进行解码，获得预测框
256 |             #-------------------------------------------------------------#
257 |             results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
258 |                                                     nms_iou = self.nms_iou, confidence = self.confidence)
259 |         t1 = time.time()
260 |         for _ in range(test_interval):
261 |             with torch.no_grad():
262 |                 roi_cls_locs, roi_scores, rois, _ = self.net(images)
263 |                 #-------------------------------------------------------------#
264 |                 #   利用classifier的预测结果对建议框进行解码，获得预测框
265 |                 #-------------------------------------------------------------#
266 |                 results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
267 |                                                         nms_iou = self.nms_iou, confidence = self.confidence)
268 |                 
269 |         t2 = time.time()
270 |         tact_time = (t2 - t1) / test_interval
271 |         return tact_time
272 | 
273 |     #---------------------------------------------------#
274 |     #   检测图片
275 |     #---------------------------------------------------#
276 |     def get_map_txt(self, image_id, image, class_names, map_out_path):
277 |         f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
278 |         #---------------------------------------------------#
279 |         #   计算输入图片的高和宽
280 |         #---------------------------------------------------#
281 |         image_shape = np.array(np.shape(image)[0:2])
282 |         input_shape = get_new_img_size(image_shape[0], image_shape[1])
283 |         #---------------------------------------------------------#
284 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
285 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
286 |         #---------------------------------------------------------#
287 |         image       = cvtColor(image)
288 |         
289 |         #---------------------------------------------------------#
290 |         #   给原图像进行resize，resize到短边为600的大小上
291 |         #---------------------------------------------------------#
292 |         image_data  = resize_image(image, [input_shape[1], input_shape[0]])
293 |         #---------------------------------------------------------#
294 |         #   添加上batch_size维度
295 |         #---------------------------------------------------------#
296 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
297 | 
298 |         with torch.no_grad():
299 |             images = torch.from_numpy(image_data)
300 |             if self.cuda:
301 |                 images = images.cuda()
302 | 
303 |             roi_cls_locs, roi_scores, rois, _ = self.net(images)
304 |             #-------------------------------------------------------------#
305 |             #   利用classifier的预测结果对建议框进行解码，获得预测框
306 |             #-------------------------------------------------------------#
307 |             results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
308 |                                                     nms_iou = self.nms_iou, confidence = self.confidence)
309 |             #--------------------------------------#
310 |             #   如果没有检测到物体，则返回原图
311 |             #--------------------------------------#
312 |             if len(results[0]) <= 0:
313 |                 return 
314 | 
315 |             top_label   = np.array(results[0][:, 5], dtype = 'int32')
316 |             top_conf    = results[0][:, 4]
317 |             top_boxes   = results[0][:, :4]
318 |         
319 |         for i, c in list(enumerate(top_label)):
320 |             predicted_class = self.class_names[int(c)]
321 |             box             = top_boxes[i]
322 |             score           = str(top_conf[i])
323 | 
324 |             top, left, bottom, right = box
325 |             if predicted_class not in class_names:
326 |                 continue
327 | 
328 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
329 | 
330 |         f.close()
331 |         return 
332 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/get_map.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | from PIL import Image
  5 | from tqdm import tqdm
  6 | 
  7 | from utils.utils import get_classes
  8 | from utils.utils_map import get_coco_map, get_map
  9 | from frcnn import FRCNN
 10 | 
 11 | if __name__ == "__main__":
 12 |     '''
 13 |     Recall和Precision不像AP是一个面积的概念,因此在门限值(Confidence)不同时,网络的Recall和Precision值是不同的。
 14 |     默认情况下,本代码计算的Recall和Precision代表的是当门限值(Confidence)为0.5时,所对应的Recall和Precision值。
 15 | 
 16 |     受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算不同门限条件下的Recall和Precision值
 17 |     因此,本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些,目的是列出所有可能的预测框,
 18 |     '''
 19 |     #------------------------------------------------------------------------------------------------------------------#
 20 |     #   map_mode用于指定该文件运行时计算的内容
 21 |     #   map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。
 22 |     #   map_mode为1代表仅仅获得预测结果。
 23 |     #   map_mode为2代表仅仅获得真实框。
 24 |     #   map_mode为3代表仅仅计算VOC_map。
 25 |     #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
 26 |     #-------------------------------------------------------------------------------------------------------------------#
 27 |     map_mode        = 0
 28 |     #--------------------------------------------------------------------------------------#
 29 |     #   此处的classes_path用于指定需要测量VOC_map的类别
 30 |     #   一般情况下与训练和预测所用的classes_path一致即可
 31 |     #--------------------------------------------------------------------------------------#
 32 |     classes_path    = './faster-rcnn-pytorch-master/model_data/voc_classes.txt'
 33 |     #--------------------------------------------------------------------------------------#
 34 |     #   MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。
 35 |     #   比如计算mAP0.75,可以设定MINOVERLAP = 0.75。
 36 |     #
 37 |     #   当某一预测框与真实框重合度大于MINOVERLAP时,该预测框被认为是正样本,否则为负样本。
 38 |     #   因此MINOVERLAP的值越大,预测框要预测的越准确才能被认为是正样本,此时算出来的mAP值越低,
 39 |     #--------------------------------------------------------------------------------------#
 40 |     MINOVERLAP      = 0.5
 41 |     #--------------------------------------------------------------------------------------#
 42 |     #   受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算mAP
 43 |     #   因此,confidence的值应当设置的尽量小进而获得全部可能的预测框。
 44 |     #   
 45 |     #   该值一般不调整。因为计算mAP需要获得近乎所有的预测框,此处的confidence不能随便更改。
 46 |     #   想要获得不同门限值下的Recall和Precision值,请修改下方的score_threhold。
 47 |     #--------------------------------------------------------------------------------------#
 48 |     confidence      = 0.02
 49 |     #--------------------------------------------------------------------------------------#
 50 |     #   预测时使用到的非极大抑制值的大小,越大表示非极大抑制越不严格。
 51 |     #   
 52 |     #   该值一般不调整。
 53 |     #--------------------------------------------------------------------------------------#
 54 |     nms_iou         = 0.5
 55 |     #---------------------------------------------------------------------------------------------------------------#
 56 |     #   Recall和Precision不像AP是一个面积的概念,因此在门限值不同时,网络的Recall和Precision值是不同的。
 57 |     #   
 58 |     #   默认情况下,本代码计算的Recall和Precision代表的是当门限值为0.5（此处定义为score_threhold）时所对应的Recall和Precision值。
 59 |     #   因为计算mAP需要获得近乎所有的预测框,上面定义的confidence不能随便更改。
 60 |     #   这里专门定义一个score_threhold用于代表门限值,进而在计算mAP时找到门限值对应的Recall和Precision值。
 61 |     #---------------------------------------------------------------------------------------------------------------#
 62 |     score_threhold  = 0.5
 63 |     #-------------------------------------------------------#
 64 |     #   map_vis用于指定是否开启VOC_map计算的可视化
 65 |     #-------------------------------------------------------#
 66 |     map_vis         = False
 67 |     #-------------------------------------------------------#
 68 |     #   指向VOC数据集所在的文件夹
 69 |     #   默认指向根目录下的VOC数据集
 70 |     #-------------------------------------------------------#
 71 |     VOCdevkit_path  = './faster-rcnn-pytorch-master/VOCdevkit'
 72 |     #-------------------------------------------------------#
 73 |     #   结果输出的文件夹,默认为map_out
 74 |     #-------------------------------------------------------#
 75 |     map_out_path    = 'map_out'
 76 | 
 77 |     image_ids = open(os.path.join(VOCdevkit_path, "./VOC2007/ImageSets/Main/test.txt")).read().strip().split()
 78 | 
 79 |     if not os.path.exists(map_out_path):
 80 |         os.makedirs(map_out_path)
 81 |     if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
 82 |         os.makedirs(os.path.join(map_out_path, 'ground-truth'))
 83 |     if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
 84 |         os.makedirs(os.path.join(map_out_path, 'detection-results'))
 85 |     if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
 86 |         os.makedirs(os.path.join(map_out_path, 'images-optional'))
 87 | 
 88 |     class_names, _ = get_classes(classes_path)
 89 | 
 90 |     if map_mode == 0 or map_mode == 1:
 91 |         print("Load model.")
 92 |         frcnn = FRCNN(confidence = confidence, nms_iou = nms_iou)
 93 |         print("Load model done.")
 94 | 
 95 |         print("Get predict result.")
 96 |         for image_id in tqdm(image_ids):
 97 |             image_path  = os.path.join(VOCdevkit_path, "./VOC2007/JPEGImages/"+image_id+".jpg")
 98 |             image       = Image.open(image_path)
 99 |             if map_vis:
100 |                 image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
101 |             frcnn.get_map_txt(image_id, image, class_names, map_out_path)
102 |         print("Get predict result done.")
103 |         
104 |     if map_mode == 0 or map_mode == 2:
105 |         print("Get ground truth result.")
106 |         for image_id in tqdm(image_ids):
107 |             with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
108 |                 root = ET.parse(os.path.join(VOCdevkit_path, "./VOC2007/Annotations/"+image_id+".xml")).getroot()
109 |                 for obj in root.findall('object'):
110 |                     difficult_flag = False
111 |                     if obj.find('difficult')!=None:
112 |                         difficult = obj.find('difficult').text
113 |                         if int(difficult)==1:
114 |                             difficult_flag = True
115 |                     obj_name = obj.find('name').text
116 |                     if obj_name not in class_names:
117 |                         continue
118 |                     bndbox  = obj.find('bndbox')
119 |                     left    = bndbox.find('xmin').text
120 |                     top     = bndbox.find('ymin').text
121 |                     right   = bndbox.find('xmax').text
122 |                     bottom  = bndbox.find('ymax').text
123 | 
124 |                     if difficult_flag:
125 |                         new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
126 |                     else:
127 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
128 |         print("Get ground truth result done.")
129 | 
130 |     if map_mode == 0 or map_mode == 3:
131 |         print("Get map.")
132 |         get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path)
133 |         print("Get map done.")
134 | 
135 |     if map_mode == 4:
136 |         print("Get map.")
137 |         get_coco_map(class_names = class_names, path = map_out_path)
138 |         print("Get map done.")
139 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/img/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/1.jpg


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/img/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/2.jpg


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/img/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/img/3.jpg


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/logs/说明书.txt:
--------------------------------------------------------------------------------
1 | 训练好的文件会保存在这里！


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/model_data/simhei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biluko/Faster-RCNN-Pytorch/3fe311bde21ae91fc87cdaf250e56f20c02020ba/faster-rcnn-pytorch-master/model_data/simhei.ttf


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/classifier.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | from torchvision.ops import RoIPool
  6 | 
  7 | warnings.filterwarnings("ignore")
  8 | 
  9 | class VGG16RoIHead(nn.Module):
 10 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
 11 |         super(VGG16RoIHead, self).__init__()
 12 |         self.classifier = classifier
 13 |         #--------------------------------------#
 14 |         #   对ROIPooling后的的结果进行回归预测
 15 |         #--------------------------------------#
 16 |         self.cls_loc    = nn.Linear(4096, n_class * 4)
 17 |         #-----------------------------------#
 18 |         #   对ROIPooling后的的结果进行分类
 19 |         #-----------------------------------#
 20 |         self.score      = nn.Linear(4096, n_class)
 21 |         #-----------------------------------#
 22 |         #   权值初始化
 23 |         #-----------------------------------#
 24 |         normal_init(self.cls_loc, 0, 0.001)
 25 |         normal_init(self.score, 0, 0.01)
 26 | 
 27 |         self.roi = RoIPool((roi_size, roi_size), spatial_scale)
 28 |         
 29 |     def forward(self, x, rois, roi_indices, img_size):
 30 |         n, _, _, _ = x.shape
 31 |         if x.is_cuda:
 32 |             roi_indices = roi_indices.cuda()
 33 |             rois = rois.cuda()
 34 |         rois        = torch.flatten(rois, 0, 1)
 35 |         roi_indices = torch.flatten(roi_indices, 0, 1)
 36 | 
 37 |         rois_feature_map = torch.zeros_like(rois)
 38 |         rois_feature_map[:, [0, 2]] = rois[:, [0, 2]] / img_size[1] * x.size()[3]
 39 |         rois_feature_map[:, [1, 3]] = rois[:, [1, 3]] / img_size[0] * x.size()[2]
 40 | 
 41 |         indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim = 1)
 42 |         #-----------------------------------#
 43 |         #   利用建议框对公用特征层进行截取
 44 |         #-----------------------------------#
 45 |         pool = self.roi(x, indices_and_rois)
 46 |         #-----------------------------------#
 47 |         #   利用classifier网络进行特征提取
 48 |         #-----------------------------------#
 49 |         pool = pool.view(pool.size(0), -1)
 50 |         #--------------------------------------------------------------#
 51 |         #   当输入为一张图片的时候，这里获得的f7的shape为[300, 4096]
 52 |         #--------------------------------------------------------------#
 53 |         fc7 = self.classifier(pool)
 54 | 
 55 |         roi_cls_locs    = self.cls_loc(fc7)
 56 |         roi_scores      = self.score(fc7)
 57 | 
 58 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
 59 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
 60 |         return roi_cls_locs, roi_scores
 61 | 
 62 | class Resnet50RoIHead(nn.Module):
 63 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
 64 |         super(Resnet50RoIHead, self).__init__()
 65 |         self.classifier = classifier
 66 |         #--------------------------------------#
 67 |         #   对ROIPooling后的的结果进行回归预测
 68 |         #--------------------------------------#
 69 |         self.cls_loc = nn.Linear(2048, n_class * 4)
 70 |         #-----------------------------------#
 71 |         #   对ROIPooling后的的结果进行分类
 72 |         #-----------------------------------#
 73 |         self.score = nn.Linear(2048, n_class)
 74 |         #-----------------------------------#
 75 |         #   权值初始化
 76 |         #-----------------------------------#
 77 |         normal_init(self.cls_loc, 0, 0.001)
 78 |         normal_init(self.score, 0, 0.01)
 79 | 
 80 |         self.roi = RoIPool((roi_size, roi_size), spatial_scale)
 81 | 
 82 |     def forward(self, x, rois, roi_indices, img_size):
 83 |         n, _, _, _ = x.shape
 84 |         if x.is_cuda:
 85 |             roi_indices = roi_indices.cuda()
 86 |             rois = rois.cuda()
 87 |         rois        = torch.flatten(rois, 0, 1)
 88 |         roi_indices = torch.flatten(roi_indices, 0, 1)
 89 |         
 90 |         rois_feature_map = torch.zeros_like(rois)
 91 |         rois_feature_map[:, [0, 2]] = rois[:, [0, 2]] / img_size[1] * x.size()[3]
 92 |         rois_feature_map[:, [1, 3]] = rois[:, [1, 3]] / img_size[0] * x.size()[2]
 93 | 
 94 |         indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim = 1)
 95 |         #-----------------------------------#
 96 |         #   利用建议框对公用特征层进行截取
 97 |         #-----------------------------------#
 98 |         pool = self.roi(x, indices_and_rois)
 99 |         #-----------------------------------#
100 |         #   利用classifier网络进行特征提取
101 |         #-----------------------------------#
102 |         fc7 = self.classifier(pool)
103 |         #--------------------------------------------------------------#
104 |         #   当输入为一张图片的时候，这里获得的f7的shape为[300, 2048]
105 |         #--------------------------------------------------------------#
106 |         fc7 = fc7.view(fc7.size(0), -1)
107 | 
108 |         roi_cls_locs    = self.cls_loc(fc7)
109 |         roi_scores      = self.score(fc7)
110 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
111 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
112 |         return roi_cls_locs, roi_scores
113 | 
114 | def normal_init(m, mean, stddev, truncated = False):
115 |     if truncated:
116 |         m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
117 |     else:
118 |         m.weight.data.normal_(mean, stddev)
119 |         m.bias.data.zero_()
120 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/frcnn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from nets.classifier import Resnet50RoIHead, VGG16RoIHead
  3 | from nets.resnet50 import resnet50
  4 | from nets.rpn import RegionProposalNetwork
  5 | from nets.vgg16 import decom_vgg16
  6 | 
  7 | 
  8 | class FasterRCNN(nn.Module):
  9 |     def __init__(self,  num_classes,  
 10 |                     mode = "training",
 11 |                     feat_stride = 16,
 12 |                     anchor_scales = [8, 16, 32],
 13 |                     ratios = [0.5, 1, 2],
 14 |                     backbone = 'vgg',
 15 |                     pretrained = False):
 16 |         super(FasterRCNN, self).__init__()
 17 |         self.feat_stride = feat_stride
 18 |         #---------------------------------#
 19 |         #   一共存在两个主干
 20 |         #   vgg和resnet50
 21 |         #---------------------------------#
 22 |         if backbone == 'vgg':
 23 |             self.extractor, classifier = decom_vgg16(pretrained)
 24 |             #---------------------------------#
 25 |             #   构建建议框网络
 26 |             #---------------------------------#
 27 |             self.rpn = RegionProposalNetwork(
 28 |                 512, 512,
 29 |                 ratios          = ratios,
 30 |                 anchor_scales   = anchor_scales,
 31 |                 feat_stride     = self.feat_stride,
 32 |                 mode            = mode
 33 |             )
 34 |             #---------------------------------#
 35 |             #   构建分类器网络
 36 |             #---------------------------------#
 37 |             self.head = VGG16RoIHead(
 38 |                 n_class         = num_classes + 1,
 39 |                 roi_size        = 7,
 40 |                 spatial_scale   = 1,
 41 |                 classifier      = classifier
 42 |             )
 43 |         elif backbone == 'resnet50':
 44 |             self.extractor, classifier = resnet50(pretrained)
 45 |             #---------------------------------#
 46 |             #   构建classifier网络
 47 |             #---------------------------------#
 48 |             self.rpn = RegionProposalNetwork(
 49 |                 1024, 512,
 50 |                 ratios          = ratios,
 51 |                 anchor_scales   = anchor_scales,
 52 |                 feat_stride     = self.feat_stride,
 53 |                 mode            = mode
 54 |             )
 55 |             #---------------------------------#
 56 |             #   构建classifier网络
 57 |             #---------------------------------#
 58 |             self.head = Resnet50RoIHead(
 59 |                 n_class         = num_classes + 1,
 60 |                 roi_size        = 14,
 61 |                 spatial_scale   = 1,
 62 |                 classifier      = classifier
 63 |             )
 64 |             
 65 |     def forward(self, x, scale=1., mode="forward"):
 66 |         if mode == "forward":
 67 |             #---------------------------------#
 68 |             #   计算输入图片的大小
 69 |             #---------------------------------#
 70 |             img_size        = x.shape[2:]
 71 |             #---------------------------------#
 72 |             #   利用主干网络提取特征
 73 |             #---------------------------------#
 74 |             base_feature    = self.extractor.forward(x)
 75 | 
 76 |             #---------------------------------#
 77 |             #   获得建议框
 78 |             #---------------------------------#
 79 |             _, _, rois, roi_indices, _  = self.rpn.forward(base_feature, img_size, scale)
 80 |             #---------------------------------------#
 81 |             #   获得classifier的分类结果和回归结果
 82 |             #---------------------------------------#
 83 |             roi_cls_locs, roi_scores    = self.head.forward(base_feature, rois, roi_indices, img_size)
 84 |             return roi_cls_locs, roi_scores, rois, roi_indices
 85 |         elif mode == "extractor":
 86 |             #---------------------------------#
 87 |             #   利用主干网络提取特征
 88 |             #---------------------------------#
 89 |             base_feature    = self.extractor.forward(x)
 90 |             return base_feature
 91 |         elif mode == "rpn":
 92 |             base_feature, img_size = x
 93 |             #---------------------------------#
 94 |             #   获得建议框
 95 |             #---------------------------------#
 96 |             rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn.forward(base_feature, img_size, scale)
 97 |             return rpn_locs, rpn_scores, rois, roi_indices, anchor
 98 |         elif mode == "head":
 99 |             base_feature, rois, roi_indices, img_size = x
100 |             #---------------------------------------#
101 |             #   获得classifier的分类结果和回归结果
102 |             #---------------------------------------#
103 |             roi_cls_locs, roi_scores    = self.head.forward(base_feature, rois, roi_indices, img_size)
104 |             return roi_cls_locs, roi_scores
105 | 
106 |     def freeze_bn(self):
107 |         for m in self.modules():
108 |             if isinstance(m, nn.BatchNorm2d):
109 |                 m.eval()
110 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/frcnn_training.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from functools import partial
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.nn import functional as F
  8 | 
  9 | 
 10 | def bbox_iou(bbox_a, bbox_b):
 11 |     if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
 12 |         print(bbox_a, bbox_b)
 13 |         raise IndexError
 14 |     tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
 15 |     br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
 16 |     area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
 17 |     area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
 18 |     area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
 19 |     return area_i / (area_a[:, None] + area_b - area_i)
 20 | 
 21 | def bbox2loc(src_bbox, dst_bbox):
 22 |     width = src_bbox[:, 2] - src_bbox[:, 0]
 23 |     height = src_bbox[:, 3] - src_bbox[:, 1]
 24 |     ctr_x = src_bbox[:, 0] + 0.5 * width
 25 |     ctr_y = src_bbox[:, 1] + 0.5 * height
 26 | 
 27 |     base_width = dst_bbox[:, 2] - dst_bbox[:, 0]
 28 |     base_height = dst_bbox[:, 3] - dst_bbox[:, 1]
 29 |     base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width
 30 |     base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
 31 | 
 32 |     eps = np.finfo(height.dtype).eps
 33 |     width = np.maximum(width, eps)
 34 |     height = np.maximum(height, eps)
 35 | 
 36 |     dx = (base_ctr_x - ctr_x) / width
 37 |     dy = (base_ctr_y - ctr_y) / height
 38 |     dw = np.log(base_width / width)
 39 |     dh = np.log(base_height / height)
 40 | 
 41 |     loc = np.vstack((dx, dy, dw, dh)).transpose()
 42 |     return loc
 43 | 
 44 | class AnchorTargetCreator(object):
 45 |     def __init__(self, n_sample=256, pos_iou_thresh=0.7, neg_iou_thresh=0.3, pos_ratio=0.5):
 46 |         self.n_sample       = n_sample
 47 |         self.pos_iou_thresh = pos_iou_thresh
 48 |         self.neg_iou_thresh = neg_iou_thresh
 49 |         self.pos_ratio      = pos_ratio
 50 | 
 51 |     def __call__(self, bbox, anchor):
 52 |         argmax_ious, label = self._create_label(anchor, bbox)
 53 |         if (label > 0).any():
 54 |             loc = bbox2loc(anchor, bbox[argmax_ious])
 55 |             return loc, label
 56 |         else:
 57 |             return np.zeros_like(anchor), label
 58 | 
 59 |     def _calc_ious(self, anchor, bbox):
 60 |         #----------------------------------------------#
 61 |         #   anchor和bbox的iou
 62 |         #   获得的ious的shape为[num_anchors, num_gt]
 63 |         #----------------------------------------------#
 64 |         ious = bbox_iou(anchor, bbox)
 65 | 
 66 |         if len(bbox)==0:
 67 |             return np.zeros(len(anchor), np.int32), np.zeros(len(anchor)), np.zeros(len(bbox))
 68 |         #---------------------------------------------------------#
 69 |         #   获得每一个先验框最对应的真实框  [num_anchors, ]
 70 |         #---------------------------------------------------------#
 71 |         argmax_ious = ious.argmax(axis=1)
 72 |         #---------------------------------------------------------#
 73 |         #   找出每一个先验框最对应的真实框的iou  [num_anchors, ]
 74 |         #---------------------------------------------------------#
 75 |         max_ious = np.max(ious, axis=1)
 76 |         #---------------------------------------------------------#
 77 |         #   获得每一个真实框最对应的先验框  [num_gt, ]
 78 |         #---------------------------------------------------------#
 79 |         gt_argmax_ious = ious.argmax(axis=0)
 80 |         #---------------------------------------------------------#
 81 |         #   保证每一个真实框都存在对应的先验框
 82 |         #---------------------------------------------------------#
 83 |         for i in range(len(gt_argmax_ious)):
 84 |             argmax_ious[gt_argmax_ious[i]] = i
 85 | 
 86 |         return argmax_ious, max_ious, gt_argmax_ious
 87 |         
 88 |     def _create_label(self, anchor, bbox):
 89 |         # ------------------------------------------ #
 90 |         #   1是正样本，0是负样本，-1忽略
 91 |         #   初始化的时候全部设置为-1
 92 |         # ------------------------------------------ #
 93 |         label = np.empty((len(anchor),), dtype=np.int32)
 94 |         label.fill(-1)
 95 | 
 96 |         # ------------------------------------------------------------------------ #
 97 |         #   argmax_ious为每个先验框对应的最大的真实框的序号         [num_anchors, ]
 98 |         #   max_ious为每个真实框对应的最大的真实框的iou             [num_anchors, ]
 99 |         #   gt_argmax_ious为每一个真实框对应的最大的先验框的序号    [num_gt, ]
100 |         # ------------------------------------------------------------------------ #
101 |         argmax_ious, max_ious, gt_argmax_ious = self._calc_ious(anchor, bbox)
102 |         
103 |         # ----------------------------------------------------- #
104 |         #   如果小于门限值则设置为负样本
105 |         #   如果大于门限值则设置为正样本
106 |         #   每个真实框至少对应一个先验框
107 |         # ----------------------------------------------------- #
108 |         label[max_ious < self.neg_iou_thresh] = 0
109 |         label[max_ious >= self.pos_iou_thresh] = 1
110 |         if len(gt_argmax_ious)>0:
111 |             label[gt_argmax_ious] = 1
112 | 
113 |         # ----------------------------------------------------- #
114 |         #   判断正样本数量是否大于128，如果大于则限制在128
115 |         # ----------------------------------------------------- #
116 |         n_pos = int(self.pos_ratio * self.n_sample)
117 |         pos_index = np.where(label == 1)[0]
118 |         if len(pos_index) > n_pos:
119 |             disable_index = np.random.choice(pos_index, size=(len(pos_index) - n_pos), replace=False)
120 |             label[disable_index] = -1
121 | 
122 |         # ----------------------------------------------------- #
123 |         #   平衡正负样本，保持总数量为256
124 |         # ----------------------------------------------------- #
125 |         n_neg = self.n_sample - np.sum(label == 1)
126 |         neg_index = np.where(label == 0)[0]
127 |         if len(neg_index) > n_neg:
128 |             disable_index = np.random.choice(neg_index, size=(len(neg_index) - n_neg), replace=False)
129 |             label[disable_index] = -1
130 | 
131 |         return argmax_ious, label
132 | 
133 | 
134 | class ProposalTargetCreator(object):
135 |     def __init__(self, n_sample=128, pos_ratio=0.5, pos_iou_thresh=0.5, neg_iou_thresh_high=0.5, neg_iou_thresh_low=0):
136 |         self.n_sample = n_sample
137 |         self.pos_ratio = pos_ratio
138 |         self.pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
139 |         self.pos_iou_thresh = pos_iou_thresh
140 |         self.neg_iou_thresh_high = neg_iou_thresh_high
141 |         self.neg_iou_thresh_low = neg_iou_thresh_low
142 | 
143 |     def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
144 |         roi = np.concatenate((roi.detach().cpu().numpy(), bbox), axis=0)
145 |         # ----------------------------------------------------- #
146 |         #   计算建议框和真实框的重合程度
147 |         # ----------------------------------------------------- #
148 |         iou = bbox_iou(roi, bbox)
149 |         
150 |         if len(bbox)==0:
151 |             gt_assignment = np.zeros(len(roi), np.int32)
152 |             max_iou = np.zeros(len(roi))
153 |             gt_roi_label = np.zeros(len(roi))
154 |         else:
155 |             #---------------------------------------------------------#
156 |             #   获得每一个建议框最对应的真实框  [num_roi, ]
157 |             #---------------------------------------------------------#
158 |             gt_assignment = iou.argmax(axis=1)
159 |             #---------------------------------------------------------#
160 |             #   获得每一个建议框最对应的真实框的iou  [num_roi, ]
161 |             #---------------------------------------------------------#
162 |             max_iou = iou.max(axis=1)
163 |             #---------------------------------------------------------#
164 |             #   真实框的标签要+1因为有背景的存在
165 |             #---------------------------------------------------------#
166 |             gt_roi_label = label[gt_assignment] + 1
167 | 
168 |         #----------------------------------------------------------------#
169 |         #   满足建议框和真实框重合程度大于neg_iou_thresh_high的作为负样本
170 |         #   将正样本的数量限制在self.pos_roi_per_image以内
171 |         #----------------------------------------------------------------#
172 |         pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
173 |         pos_roi_per_this_image = int(min(self.pos_roi_per_image, pos_index.size))
174 |         if pos_index.size > 0:
175 |             pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False)
176 | 
177 |         #-----------------------------------------------------------------------------------------------------#
178 |         #   满足建议框和真实框重合程度小于neg_iou_thresh_high大于neg_iou_thresh_low作为负样本
179 |         #   将正样本的数量和负样本的数量的总和固定成self.n_sample
180 |         #-----------------------------------------------------------------------------------------------------#
181 |         neg_index = np.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0]
182 |         neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
183 |         neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size))
184 |         if neg_index.size > 0:
185 |             neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False)
186 |             
187 |         #---------------------------------------------------------#
188 |         #   sample_roi      [n_sample, ]
189 |         #   gt_roi_loc      [n_sample, 4]
190 |         #   gt_roi_label    [n_sample, ]
191 |         #---------------------------------------------------------#
192 |         keep_index = np.append(pos_index, neg_index)
193 | 
194 |         sample_roi = roi[keep_index]
195 |         if len(bbox)==0:
196 |             return sample_roi, np.zeros_like(sample_roi), gt_roi_label[keep_index]
197 | 
198 |         gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
199 |         gt_roi_loc = (gt_roi_loc / np.array(loc_normalize_std, np.float32))
200 | 
201 |         gt_roi_label = gt_roi_label[keep_index]
202 |         gt_roi_label[pos_roi_per_this_image:] = 0
203 |         return sample_roi, gt_roi_loc, gt_roi_label
204 | 
205 | class FasterRCNNTrainer(nn.Module):
206 |     def __init__(self, model_train, optimizer):
207 |         super(FasterRCNNTrainer, self).__init__()
208 |         self.model_train    = model_train
209 |         self.optimizer      = optimizer
210 | 
211 |         self.rpn_sigma      = 1
212 |         self.roi_sigma      = 1
213 | 
214 |         self.anchor_target_creator      = AnchorTargetCreator()
215 |         self.proposal_target_creator    = ProposalTargetCreator()
216 | 
217 |         self.loc_normalize_std          = [0.1, 0.1, 0.2, 0.2]
218 | 
219 |     def _fast_rcnn_loc_loss(self, pred_loc, gt_loc, gt_label, sigma):
220 |         pred_loc    = pred_loc[gt_label > 0]
221 |         gt_loc      = gt_loc[gt_label > 0]
222 | 
223 |         sigma_squared = sigma ** 2
224 |         regression_diff = (gt_loc - pred_loc)
225 |         regression_diff = regression_diff.abs().float()
226 |         regression_loss = torch.where(
227 |                 regression_diff < (1. / sigma_squared),
228 |                 0.5 * sigma_squared * regression_diff ** 2,
229 |                 regression_diff - 0.5 / sigma_squared
230 |             )
231 |         regression_loss = regression_loss.sum()
232 |         num_pos         = (gt_label > 0).sum().float()
233 |         
234 |         regression_loss /= torch.max(num_pos, torch.ones_like(num_pos))
235 |         return regression_loss
236 |         
237 |     def forward(self, imgs, bboxes, labels, scale):
238 |         n           = imgs.shape[0]
239 |         img_size    = imgs.shape[2:]
240 |         #-------------------------------#
241 |         #   获取公用特征层
242 |         #-------------------------------#
243 |         base_feature = self.model_train(imgs, mode = 'extractor')
244 | 
245 |         # -------------------------------------------------- #
246 |         #   利用rpn网络获得调整参数、得分、建议框、先验框
247 |         # -------------------------------------------------- #
248 |         rpn_locs, rpn_scores, rois, roi_indices, anchor = self.model_train(x = [base_feature, img_size], scale = scale, mode = 'rpn')
249 |         
250 |         rpn_loc_loss_all, rpn_cls_loss_all, roi_loc_loss_all, roi_cls_loss_all  = 0, 0, 0, 0
251 |         sample_rois, sample_indexes, gt_roi_locs, gt_roi_labels                 = [], [], [], []
252 |         for i in range(n):
253 |             bbox        = bboxes[i]
254 |             label       = labels[i]
255 |             rpn_loc     = rpn_locs[i]
256 |             rpn_score   = rpn_scores[i]
257 |             roi         = rois[i]
258 |             # -------------------------------------------------- #
259 |             #   利用真实框和先验框获得建议框网络应该有的预测结果
260 |             #   给每个先验框都打上标签
261 |             #   gt_rpn_loc      [num_anchors, 4]
262 |             #   gt_rpn_label    [num_anchors, ]
263 |             # -------------------------------------------------- #
264 |             gt_rpn_loc, gt_rpn_label    = self.anchor_target_creator(bbox, anchor[0].cpu().numpy())
265 |             gt_rpn_loc                  = torch.Tensor(gt_rpn_loc).type_as(rpn_locs)
266 |             gt_rpn_label                = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long()
267 |             # -------------------------------------------------- #
268 |             #   分别计算建议框网络的回归损失和分类损失
269 |             # -------------------------------------------------- #
270 |             rpn_loc_loss = self._fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label, self.rpn_sigma)
271 |             rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1)
272 |   
273 |             rpn_loc_loss_all += rpn_loc_loss
274 |             rpn_cls_loss_all += rpn_cls_loss
275 |             # ------------------------------------------------------ #
276 |             #   利用真实框和建议框获得classifier网络应该有的预测结果
277 |             #   获得三个变量，分别是sample_roi, gt_roi_loc, gt_roi_label
278 |             #   sample_roi      [n_sample, ]
279 |             #   gt_roi_loc      [n_sample, 4]
280 |             #   gt_roi_label    [n_sample, ]
281 |             # ------------------------------------------------------ #
282 |             sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std)
283 |             sample_rois.append(torch.Tensor(sample_roi).type_as(rpn_locs))
284 |             sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
285 |             gt_roi_locs.append(torch.Tensor(gt_roi_loc).type_as(rpn_locs))
286 |             gt_roi_labels.append(torch.Tensor(gt_roi_label).type_as(rpn_locs).long())
287 |             
288 |         sample_rois     = torch.stack(sample_rois, dim=0)
289 |         sample_indexes  = torch.stack(sample_indexes, dim=0)
290 |         roi_cls_locs, roi_scores = self.model_train([base_feature, sample_rois, sample_indexes, img_size], mode = 'head')
291 |         for i in range(n):
292 |             # ------------------------------------------------------ #
293 |             #   根据建议框的种类，取出对应的回归预测结果
294 |             # ------------------------------------------------------ #
295 |             n_sample = roi_cls_locs.size()[1]
296 |             
297 |             roi_cls_loc     = roi_cls_locs[i]
298 |             roi_score       = roi_scores[i]
299 |             gt_roi_loc      = gt_roi_locs[i]
300 |             gt_roi_label    = gt_roi_labels[i]
301 |             
302 |             roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
303 |             roi_loc     = roi_cls_loc[torch.arange(0, n_sample), gt_roi_label]
304 | 
305 |             # -------------------------------------------------- #
306 |             #   分别计算Classifier网络的回归损失和分类损失
307 |             # -------------------------------------------------- #
308 |             roi_loc_loss = self._fast_rcnn_loc_loss(roi_loc, gt_roi_loc, gt_roi_label.data, self.roi_sigma)
309 |             roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
310 | 
311 |             roi_loc_loss_all += roi_loc_loss
312 |             roi_cls_loss_all += roi_cls_loss
313 |             
314 |         losses = [rpn_loc_loss_all/n, rpn_cls_loss_all/n, roi_loc_loss_all/n, roi_cls_loss_all/n]
315 |         losses = losses + [sum(losses)]
316 |         return losses
317 | 
318 |     def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None):
319 |         self.optimizer.zero_grad()
320 |         if not fp16:
321 |             losses = self.forward(imgs, bboxes, labels, scale)
322 |             losses[-1].backward()
323 |             self.optimizer.step()
324 |         else:
325 |             from torch.cuda.amp import autocast
326 |             with autocast():
327 |                 losses = self.forward(imgs, bboxes, labels, scale)
328 | 
329 |             #----------------------#
330 |             #   反向传播
331 |             #----------------------#
332 |             scaler.scale(losses[-1]).backward()
333 |             scaler.step(self.optimizer)
334 |             scaler.update()
335 |             
336 |         return losses
337 | 
338 | def weights_init(net, init_type='normal', init_gain=0.02):
339 |     def init_func(m):
340 |         classname = m.__class__.__name__
341 |         if hasattr(m, 'weight') and classname.find('Conv') != -1:
342 |             if init_type == 'normal':
343 |                 torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
344 |             elif init_type == 'xavier':
345 |                 torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
346 |             elif init_type == 'kaiming':
347 |                 torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
348 |             elif init_type == 'orthogonal':
349 |                 torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
350 |             else:
351 |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
352 |         elif classname.find('BatchNorm2d') != -1:
353 |             torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
354 |             torch.nn.init.constant_(m.bias.data, 0.0)
355 |     print('initialize network with %s type' % init_type)
356 |     net.apply(init_func)
357 | 
358 | def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
359 |     def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
360 |         if iters <= warmup_total_iters:
361 |             # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
362 |             lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
363 |         elif iters >= total_iters - no_aug_iter:
364 |             lr = min_lr
365 |         else:
366 |             lr = min_lr + 0.5 * (lr - min_lr) * (
367 |                 1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter))
368 |             )
369 |         return lr
370 | 
371 |     def step_lr(lr, decay_rate, step_size, iters):
372 |         if step_size < 1:
373 |             raise ValueError("step_size must above 1.")
374 |         n       = iters // step_size
375 |         out_lr  = lr * decay_rate ** n
376 |         return out_lr
377 | 
378 |     if lr_decay_type == "cos":
379 |         warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
380 |         warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
381 |         no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
382 |         func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
383 |     else:
384 |         decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
385 |         step_size   = total_iters / step_num
386 |         func = partial(step_lr, lr, decay_rate, step_size)
387 | 
388 |     return func
389 | 
390 | def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
391 |     lr = lr_scheduler_func(epoch)
392 |     for param_group in optimizer.param_groups:
393 |         param_group['lr'] = lr
394 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/resnet50.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | from torch.hub import load_state_dict_from_url
  5 | 
  6 | 
  7 | class Bottleneck(nn.Module):
  8 |     expansion = 4
  9 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 10 |         super(Bottleneck, self).__init__()
 11 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 12 |         self.bn1 = nn.BatchNorm2d(planes)
 13 | 
 14 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 15 |         self.bn2 = nn.BatchNorm2d(planes)
 16 | 
 17 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 18 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 19 | 
 20 |         self.relu = nn.ReLU(inplace=True)
 21 |         self.downsample = downsample
 22 |         self.stride = stride
 23 | 
 24 |     def forward(self, x):
 25 |         residual = x
 26 | 
 27 |         out = self.conv1(x)
 28 |         out = self.bn1(out)
 29 |         out = self.relu(out)
 30 | 
 31 |         out = self.conv2(out)
 32 |         out = self.bn2(out)
 33 |         out = self.relu(out)
 34 | 
 35 |         out = self.conv3(out)
 36 |         out = self.bn3(out)
 37 |         if self.downsample is not None:
 38 |             residual = self.downsample(x)
 39 | 
 40 |         out += residual
 41 |         out = self.relu(out)
 42 | 
 43 |         return out
 44 | 
 45 | class ResNet(nn.Module):
 46 |     def __init__(self, block, layers, num_classes=1000):
 47 |         #-----------------------------------#
 48 |         #   假设输入进来的图片是600,600,3
 49 |         #-----------------------------------#
 50 |         self.inplanes = 64
 51 |         super(ResNet, self).__init__()
 52 | 
 53 |         # 600,600,3 -> 300,300,64
 54 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 55 |         self.bn1 = nn.BatchNorm2d(64)
 56 |         self.relu = nn.ReLU(inplace=True)
 57 | 
 58 |         # 300,300,64 -> 150,150,64
 59 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
 60 | 
 61 |         # 150,150,64 -> 150,150,256
 62 |         self.layer1 = self._make_layer(block, 64, layers[0])
 63 |         # 150,150,256 -> 75,75,512
 64 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 65 |         # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
 66 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 67 |         # self.layer4被用在classifier模型中
 68 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 69 |         
 70 |         self.avgpool = nn.AvgPool2d(7)
 71 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 72 | 
 73 |         for m in self.modules():
 74 |             if isinstance(m, nn.Conv2d):
 75 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 76 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 77 |             elif isinstance(m, nn.BatchNorm2d):
 78 |                 m.weight.data.fill_(1)
 79 |                 m.bias.data.zero_()
 80 | 
 81 |     def _make_layer(self, block, planes, blocks, stride=1):
 82 |         downsample = None
 83 |         #-------------------------------------------------------------------#
 84 |         #   当模型需要进行高和宽的压缩的时候，就需要用到残差边的downsample
 85 |         #-------------------------------------------------------------------#
 86 |         if stride != 1 or self.inplanes != planes * block.expansion:
 87 |             downsample = nn.Sequential(
 88 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
 89 |                 nn.BatchNorm2d(planes * block.expansion),
 90 |             )
 91 |         layers = []
 92 |         layers.append(block(self.inplanes, planes, stride, downsample))
 93 |         self.inplanes = planes * block.expansion
 94 |         for i in range(1, blocks):
 95 |             layers.append(block(self.inplanes, planes))
 96 |         return nn.Sequential(*layers)
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.conv1(x)
100 |         x = self.bn1(x)
101 |         x = self.relu(x)
102 |         x = self.maxpool(x)
103 | 
104 |         x = self.layer1(x)
105 |         x = self.layer2(x)
106 |         x = self.layer3(x)
107 |         x = self.layer4(x)
108 | 
109 |         x = self.avgpool(x)
110 |         x = x.view(x.size(0), -1)
111 |         x = self.fc(x)
112 |         return x
113 | 
114 | def resnet50(pretrained = False):
115 |     model = ResNet(Bottleneck, [3, 4, 6, 3])
116 |     if pretrained:
117 |         state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data")
118 |         model.load_state_dict(state_dict)
119 |     #----------------------------------------------------------------------------#
120 |     #   获取特征提取部分，从conv1到model.layer3，最终获得一个38,38,1024的特征层
121 |     #----------------------------------------------------------------------------#
122 |     features    = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
123 |     #----------------------------------------------------------------------------#
124 |     #   获取分类部分，从model.layer4到model.avgpool
125 |     #----------------------------------------------------------------------------#
126 |     classifier  = list([model.layer4, model.avgpool])
127 |     
128 |     features    = nn.Sequential(*features)
129 |     classifier  = nn.Sequential(*classifier)
130 |     return features, classifier
131 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/rpn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | from torchvision.ops import nms
  7 | from utils.anchors import _enumerate_shifted_anchor, generate_anchor_base
  8 | from utils.utils_bbox import loc2bbox
  9 | 
 10 | 
 11 | class ProposalCreator():
 12 |     def __init__(
 13 |         self, 
 14 |         mode, 
 15 |         nms_iou             = 0.7,
 16 |         n_train_pre_nms     = 12000,
 17 |         n_train_post_nms    = 600,
 18 |         n_test_pre_nms      = 3000,
 19 |         n_test_post_nms     = 300,
 20 |         min_size            = 16
 21 |     
 22 |     ):
 23 |         #-----------------------------------#
 24 |         #   设置预测还是训练
 25 |         #-----------------------------------#
 26 |         self.mode               = mode
 27 |         #-----------------------------------#
 28 |         #   建议框非极大抑制的iou大小
 29 |         #-----------------------------------#
 30 |         self.nms_iou            = nms_iou
 31 |         #-----------------------------------#
 32 |         #   训练用到的建议框数量
 33 |         #-----------------------------------#
 34 |         self.n_train_pre_nms    = n_train_pre_nms
 35 |         self.n_train_post_nms   = n_train_post_nms
 36 |         #-----------------------------------#
 37 |         #   预测用到的建议框数量
 38 |         #-----------------------------------#
 39 |         self.n_test_pre_nms     = n_test_pre_nms
 40 |         self.n_test_post_nms    = n_test_post_nms
 41 |         self.min_size           = min_size
 42 | 
 43 |     def __call__(self, loc, score, anchor, img_size, scale=1.):
 44 |         if self.mode == "training":
 45 |             n_pre_nms   = self.n_train_pre_nms
 46 |             n_post_nms  = self.n_train_post_nms
 47 |         else:
 48 |             n_pre_nms   = self.n_test_pre_nms
 49 |             n_post_nms  = self.n_test_post_nms
 50 | 
 51 |         #-----------------------------------#
 52 |         #   将先验框转换成tensor
 53 |         #-----------------------------------#
 54 |         anchor = torch.from_numpy(anchor).type_as(loc)
 55 |         #-----------------------------------#
 56 |         #   将RPN网络预测结果转化成建议框
 57 |         #-----------------------------------#
 58 |         roi = loc2bbox(anchor, loc)
 59 |         #-----------------------------------#
 60 |         #   防止建议框超出图像边缘
 61 |         #-----------------------------------#
 62 |         roi[:, [0, 2]] = torch.clamp(roi[:, [0, 2]], min = 0, max = img_size[1])
 63 |         roi[:, [1, 3]] = torch.clamp(roi[:, [1, 3]], min = 0, max = img_size[0])
 64 |         
 65 |         #-----------------------------------#
 66 |         #   建议框的宽高的最小值不可以小于16
 67 |         #-----------------------------------#
 68 |         min_size    = self.min_size * scale
 69 |         keep        = torch.where(((roi[:, 2] - roi[:, 0]) >= min_size) & ((roi[:, 3] - roi[:, 1]) >= min_size))[0]
 70 |         #-----------------------------------#
 71 |         #   将对应的建议框保留下来
 72 |         #-----------------------------------#
 73 |         roi         = roi[keep, :]
 74 |         score       = score[keep]
 75 | 
 76 |         #-----------------------------------#
 77 |         #   根据得分进行排序，取出建议框
 78 |         #-----------------------------------#
 79 |         order       = torch.argsort(score, descending=True)
 80 |         if n_pre_nms > 0:
 81 |             order   = order[:n_pre_nms]
 82 |         roi     = roi[order, :]
 83 |         score   = score[order]
 84 | 
 85 |         #-----------------------------------#
 86 |         #   对建议框进行非极大抑制
 87 |         #   使用官方的非极大抑制会快非常多
 88 |         #-----------------------------------#
 89 |         keep    = nms(roi, score, self.nms_iou)
 90 |         if len(keep) < n_post_nms:
 91 |             index_extra = np.random.choice(range(len(keep)), size=(n_post_nms - len(keep)), replace=True)
 92 |             keep        = torch.cat([keep, keep[index_extra]])
 93 |         keep    = keep[:n_post_nms]
 94 |         roi     = roi[keep]
 95 |         return roi
 96 | 
 97 | 
 98 | class RegionProposalNetwork(nn.Module):
 99 |     def __init__(
100 |         self, 
101 |         in_channels     = 512, 
102 |         mid_channels    = 512, 
103 |         ratios          = [0.5, 1, 2],
104 |         anchor_scales   = [8, 16, 32], 
105 |         feat_stride     = 16,
106 |         mode            = "training",
107 |     ):
108 |         super(RegionProposalNetwork, self).__init__()
109 |         #-----------------------------------------#
110 |         #   生成基础先验框，shape为[9, 4]
111 |         #-----------------------------------------#
112 |         self.anchor_base    = generate_anchor_base(anchor_scales = anchor_scales, ratios = ratios)
113 |         n_anchor            = self.anchor_base.shape[0]
114 | 
115 |         #-----------------------------------------#
116 |         #   先进行一个3x3的卷积，可理解为特征整合
117 |         #-----------------------------------------#
118 |         self.conv1  = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
119 |         #-----------------------------------------#
120 |         #   分类预测先验框内部是否包含物体
121 |         #-----------------------------------------#
122 |         self.score  = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
123 |         #-----------------------------------------#
124 |         #   回归预测对先验框进行调整
125 |         #-----------------------------------------#
126 |         self.loc    = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
127 | 
128 |         #-----------------------------------------#
129 |         #   特征点间距步长
130 |         #-----------------------------------------#
131 |         self.feat_stride    = feat_stride
132 |         #-----------------------------------------#
133 |         #   用于对建议框解码并进行非极大抑制
134 |         #-----------------------------------------#
135 |         self.proposal_layer = ProposalCreator(mode)
136 |         #--------------------------------------#
137 |         #   对FPN的网络部分进行权值初始化
138 |         #--------------------------------------#
139 |         normal_init(self.conv1, 0, 0.01)
140 |         normal_init(self.score, 0, 0.01)
141 |         normal_init(self.loc, 0, 0.01)
142 | 
143 |     def forward(self, x, img_size, scale=1.):
144 |         n, _, h, w = x.shape
145 |         #-----------------------------------------#
146 |         #   先进行一个3x3的卷积，可理解为特征整合
147 |         #-----------------------------------------#
148 |         x = F.relu(self.conv1(x))
149 |         #-----------------------------------------#
150 |         #   回归预测对先验框进行调整
151 |         #-----------------------------------------#
152 |         rpn_locs = self.loc(x)
153 |         rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
154 |         #-----------------------------------------#
155 |         #   分类预测先验框内部是否包含物体
156 |         #-----------------------------------------#
157 |         rpn_scores = self.score(x)
158 |         rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
159 |         
160 |         #--------------------------------------------------------------------------------------#
161 |         #   进行softmax概率计算，每个先验框只有两个判别结果
162 |         #   内部包含物体或者内部不包含物体，rpn_softmax_scores[:, :, 1]的内容为包含物体的概率
163 |         #--------------------------------------------------------------------------------------#
164 |         rpn_softmax_scores  = F.softmax(rpn_scores, dim=-1)
165 |         rpn_fg_scores       = rpn_softmax_scores[:, :, 1].contiguous()
166 |         rpn_fg_scores       = rpn_fg_scores.view(n, -1)
167 | 
168 |         #------------------------------------------------------------------------------------------------#
169 |         #   生成先验框，此时获得的anchor是布满网格点的，当输入图片为600,600,3的时候，shape为(12996, 4)
170 |         #------------------------------------------------------------------------------------------------#
171 |         anchor = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w)
172 |         rois        = list()
173 |         roi_indices = list()
174 |         for i in range(n):
175 |             roi         = self.proposal_layer(rpn_locs[i], rpn_fg_scores[i], anchor, img_size, scale = scale)
176 |             batch_index = i * torch.ones((len(roi),))
177 |             rois.append(roi.unsqueeze(0))
178 |             roi_indices.append(batch_index.unsqueeze(0))
179 | 
180 |         rois        = torch.cat(rois, dim=0).type_as(x)
181 |         roi_indices = torch.cat(roi_indices, dim=0).type_as(x)
182 |         anchor      = torch.from_numpy(anchor).unsqueeze(0).float().to(x.device)
183 |         
184 |         return rpn_locs, rpn_scores, rois, roi_indices, anchor
185 | 
186 | def normal_init(m, mean, stddev, truncated=False):
187 |     if truncated:
188 |         m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
189 |     else:
190 |         m.weight.data.normal_(mean, stddev)
191 |         m.bias.data.zero_()
192 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/nets/vgg16.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | 
  6 | #--------------------------------------#
  7 | #   VGG16的结构
  8 | #--------------------------------------#
  9 | class VGG(nn.Module):
 10 |     def __init__(self, features, num_classes=1000, init_weights=True):
 11 |         super(VGG, self).__init__()
 12 |         self.features = features
 13 |         #--------------------------------------#
 14 |         #   平均池化到7x7大小
 15 |         #--------------------------------------#
 16 |         self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
 17 |         #--------------------------------------#
 18 |         #   分类部分
 19 |         #--------------------------------------#
 20 |         self.classifier = nn.Sequential(
 21 |             nn.Linear(512 * 7 * 7, 4096),
 22 |             nn.ReLU(True),
 23 |             nn.Dropout(),
 24 |             nn.Linear(4096, 4096),
 25 |             nn.ReLU(True),
 26 |             nn.Dropout(),
 27 |             nn.Linear(4096, num_classes),
 28 |         )
 29 |         if init_weights:
 30 |             self._initialize_weights()
 31 | 
 32 |     def forward(self, x):
 33 |         #--------------------------------------#
 34 |         #   特征提取
 35 |         #--------------------------------------#
 36 |         x = self.features(x)
 37 |         #--------------------------------------#
 38 |         #   平均池化
 39 |         #--------------------------------------#
 40 |         x = self.avgpool(x)
 41 |         #--------------------------------------#
 42 |         #   平铺后
 43 |         #--------------------------------------#
 44 |         x = torch.flatten(x, 1)
 45 |         #--------------------------------------#
 46 |         #   分类部分
 47 |         #--------------------------------------#
 48 |         x = self.classifier(x)
 49 |         return x
 50 | 
 51 |     def _initialize_weights(self):
 52 |         for m in self.modules():
 53 |             if isinstance(m, nn.Conv2d):
 54 |                 nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
 55 |                 if m.bias is not None:
 56 |                     nn.init.constant_(m.bias, 0)
 57 |             elif isinstance(m, nn.BatchNorm2d):
 58 |                 nn.init.constant_(m.weight, 1)
 59 |                 nn.init.constant_(m.bias, 0)
 60 |             elif isinstance(m, nn.Linear):
 61 |                 nn.init.normal_(m.weight, 0, 0.01)
 62 |                 nn.init.constant_(m.bias, 0)
 63 | 
 64 | '''
 65 | 假设输入图像为(600, 600, 3),随着cfg的循环,特征层变化如下：
 66 | 600,600,3 -> 600,600,64 -> 600,600,64 -> 300,300,64 -> 300,300,128 -> 300,300,128 -> 150,150,128 -> 150,150,256 -> 150,150,256 -> 150,150,256 
 67 | -> 75,75,256 -> 75,75,512 -> 75,75,512 -> 75,75,512 -> 37,37,512 ->  37,37,512 -> 37,37,512 -> 37,37,512
 68 | 到cfg结束,我们获得了一个37,37,512的特征层
 69 | '''
 70 | 
 71 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
 72 | 
 73 | 
 74 | #--------------------------------------#
 75 | #   特征提取部分
 76 | #--------------------------------------#
 77 | def make_layers(cfg, batch_norm = False):
 78 |     layers = []
 79 |     in_channels = 3
 80 |     for v in cfg:
 81 |         if v == 'M':
 82 |             layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)]
 83 |         else:
 84 |             conv2d = nn.Conv2d(in_channels, v, kernel_size = 3, padding = 1)
 85 |             if batch_norm:
 86 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace = True)]
 87 |             else:
 88 |                 layers += [conv2d, nn.ReLU(inplace = True)]
 89 |             in_channels = v 
 90 |     return nn.Sequential(*layers)
 91 | 
 92 | def decom_vgg16(pretrained = False):
 93 |     model = VGG(make_layers(cfg))
 94 |     if pretrained:
 95 |         state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir = "./model_data")
 96 |         model.load_state_dict(state_dict)
 97 |     #----------------------------------------------------------------------------#
 98 |     #   获取特征提取部分,最终获得一个37,37,1024的特征层
 99 |     #----------------------------------------------------------------------------#
100 |     features    = list(model.features)[:30]
101 |     #----------------------------------------------------------------------------#
102 |     #   获取分类部分,需要除去Dropout部分
103 |     #----------------------------------------------------------------------------#
104 |     classifier  = list(model.classifier)
105 |     del classifier[6]
106 |     del classifier[5]
107 |     del classifier[2]
108 | 
109 |     features    = nn.Sequential(*features)
110 |     classifier  = nn.Sequential(*classifier)
111 |     return features, classifier
112 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/predict.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------#
  2 | #   将单张图片预测、摄像头检测和FPS测试功能
  3 | #   整合到了一个py文件中,通过指定mode进行模式的修改。
  4 | #----------------------------------------------------#
  5 | import time
  6 | import cv2
  7 | import numpy as np
  8 | from PIL import Image
  9 | import os
 10 | from tqdm import tqdm
 11 | from frcnn import FRCNN
 12 | 
 13 | 
 14 | if __name__ == "__main__":
 15 |     frcnn = FRCNN()
 16 |     #----------------------------------------------------------------------------------------------------------#
 17 |     #   mode用于指定测试的模式：
 18 |     #   'predict'           表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释
 19 |     #   'video'             表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。
 20 |     #   'fps'               表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。
 21 |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。
 22 |     #----------------------------------------------------------------------------------------------------------#
 23 |     mode = "predict"
 24 |     #-------------------------------------------------------------------------#
 25 |     #   crop                指定了是否在单张图片预测后对目标进行截取
 26 |     #   count               指定了是否进行目标的计数
 27 |     #   crop、count仅在mode='predict'时有效
 28 |     #-------------------------------------------------------------------------#
 29 |     crop            = False
 30 |     count           = False
 31 |     #----------------------------------------------------------------------------------------------------------#
 32 |     #   video_path          用于指定视频的路径,当video_path=0时表示检测摄像头
 33 |     #                       想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。
 34 |     #   video_save_path     表示视频保存的路径,当video_save_path=""时表示不保存
 35 |     #                       想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。
 36 |     #   video_fps           用于保存的视频的fps
 37 |     #
 38 |     #   video_path、video_save_path和video_fps仅在mode='video'时有效
 39 |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
 40 |     #----------------------------------------------------------------------------------------------------------#
 41 |     video_path      = 0
 42 |     video_save_path = ""
 43 |     video_fps       = 25.0
 44 |     #----------------------------------------------------------------------------------------------------------#
 45 |     #   test_interval       用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。
 46 |     #   fps_image_path      用于指定测试的fps图片
 47 |     #   
 48 |     #   test_interval和fps_image_path仅在mode='fps'有效
 49 |     #----------------------------------------------------------------------------------------------------------#
 50 |     test_interval   = 100
 51 |     fps_image_path  = "img/street.jpg"
 52 |     #-------------------------------------------------------------------------#
 53 |     #   dir_origin_path     指定了用于检测的图片的文件夹路径
 54 |     #   dir_save_path       指定了检测完图片的保存路径
 55 |     #   
 56 |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
 57 |     #-------------------------------------------------------------------------#
 58 |     dir_origin_path = "img/"
 59 |     dir_save_path   = "img_out/"
 60 | 
 61 | 
 62 |     if mode == "predict":
 63 |         '''
 64 |         1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
 65 |         具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
 66 |         2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。 
 67 |         3、如果想要获得预测框的坐标,可以进入frcnn.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。
 68 |         4、如果想要利用预测框截取下目标,可以进入frcnn.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值
 69 |         在原图上利用矩阵的方式进行截取。
 70 |         5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入frcnn.detect_image函数,在绘图部分对predicted_class进行判断,
 71 |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。
 72 |         '''
 73 |         while True:
 74 |             img = input('Input image filename:')
 75 |             try:
 76 |                 image = Image.open(img)
 77 |             except:
 78 |                 print('Open Error! Try again!')
 79 |             else:
 80 |                 r_image = frcnn.detect_image(image, crop = crop, count = count)
 81 |                 r_image.show()
 82 |                 
 83 | 
 84 |     elif mode == "video":
 85 |         capture = cv2.VideoCapture(video_path)
 86 |         if video_save_path != "":
 87 |             fourcc = cv2.VideoWriter_fourcc(*'XVID')
 88 |             size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 89 |             out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
 90 |         fps = 0.0
 91 |         while(True):
 92 |             t1 = time.time()
 93 |             # 读取某一帧
 94 |             ref,frame = capture.read()
 95 |             # 格式转变,BGRtoRGB
 96 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 97 |             # 转变成Image
 98 |             frame = Image.fromarray(np.uint8(frame))
 99 |             # 进行检测
100 |             frame = np.array(frcnn.detect_image(frame))
101 |             # RGBtoBGR满足opencv显示格式
102 |             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
103 |             fps  = ( fps + (1. / (time.time() - t1)) ) / 2
104 |             print("fps = %.2f"%(fps))
105 |             frame = cv2.putText(frame, "fps = %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
106 |             cv2.imshow("video", frame)
107 |             c = cv2.waitKey(1) & 0xff 
108 |             if video_save_path != "":
109 |                 out.write(frame)
110 |             if c == 27:
111 |                 capture.release()
112 |                 break
113 |         capture.release()
114 |         out.release()
115 |         cv2.destroyAllWindows()
116 | 
117 | 
118 |     elif mode == "fps":
119 |         img = Image.open(fps_image_path)
120 |         tact_time = frcnn.get_FPS(img, test_interval)
121 |         print(str(tact_time) + ' seconds, ' + str(1 / tact_time) + 'FPS, @batch_size 1')
122 | 
123 | 
124 |     elif mode == "dir_predict":
125 |         img_names = os.listdir(dir_origin_path)
126 |         for img_name in tqdm(img_names):
127 |             if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
128 |                 image_path  = os.path.join(dir_origin_path, img_name)
129 |                 image       = Image.open(image_path)
130 |                 r_image     = frcnn.detect_image(image)
131 |                 if not os.path.exists(dir_save_path):
132 |                     os.makedirs(dir_save_path)
133 |                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality = 95, subsampling = 0)
134 | 
135 | 
136 |     else:
137 |         raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
138 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/requirements.txt:
--------------------------------------------------------------------------------
 1 | python == 3.10.6
 2 | numpy == 1.23.3
 3 | opencv == 4.6.0
 4 | pillow == 9.2.0
 5 | pycocotools == 2.0.6
 6 | pytorch == 1.12.1
 7 | scipy == 1.9.3
 8 | torchvision == 0.13.1
 9 | tqdm == 4.64.1
10 | matplotlib == 3.6.2
11 | hdf5 == 1.12.1


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/summary.py:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------#
 2 | #   该部分代码用于看网络结构
 3 | #--------------------------------------------#
 4 | import torch
 5 | from thop import clever_format, profile
 6 | from torchsummary import summary
 7 | 
 8 | from nets.frcnn import FasterRCNN
 9 | 
10 | if __name__ == "__main__":
11 |     input_shape     = [600, 600]
12 |     num_classes     = 21
13 |     
14 |     device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15 |     model   = FasterRCNN(num_classes, backbone = 'vgg').to(device)
16 |     summary(model, (3, input_shape[0], input_shape[1]))
17 |     
18 |     dummy_input     = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device)
19 |     flops, params   = profile(model.to(device), (dummy_input, ), verbose = False)
20 |     #--------------------------------------------------------#
21 |     #   flops * 2是因为profile没有将卷积作为两个operations
22 |     #   有些论文将卷积算乘法、加法两个operations。此时乘2
23 |     #   有些论文只考虑乘法的运算次数，忽略加法。此时不乘2
24 |     #   本代码选择乘2，参考YOLOX。
25 |     #--------------------------------------------------------#
26 |     flops           = flops * 2
27 |     flops, params   = clever_format([flops, params], "%.3f")
28 |     print('Total GFLOPS: %s' % (flops))
29 |     print('Total params: %s' % (params))
30 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/train.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------#
  2 | #       对数据集进行训练
  3 | #-------------------------------------#
  4 | import os
  5 | import datetime
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.backends.cudnn as cudnn
 10 | import torch.optim as optim
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | from nets.frcnn import FasterRCNN
 14 | from nets.frcnn_training import (FasterRCNNTrainer, get_lr_scheduler,
 15 |                                  set_optimizer_lr, weights_init)
 16 | from utils.callbacks import EvalCallback, LossHistory
 17 | from utils.dataloader import FRCNNDataset, frcnn_dataset_collate
 18 | from utils.utils import get_classes, show_config
 19 | from utils.utils_fit import fit_one_epoch
 20 | 
 21 | 
 22 | '''
 23 | 训练自己的目标检测模型一定需要注意以下几点：
 24 | 1、训练前仔细检查自己的格式是否满足要求,该库要求数据集格式为VOC格式,需要准备好的内容有输入图片和标签
 25 |    输入图片为.jpg图片,无需固定大小,传入训练前会自动进行resize。
 26 |    灰度图会自动转成RGB图片进行训练,无需自己修改。
 27 |    输入图片如果后缀非jpg,需要自己批量转成jpg后再开始训练。
 28 | 
 29 |    标签为.xml格式,文件中会有需要检测的目标信息,标签文件和输入图片文件相对应。
 30 | 
 31 | 2、损失值的大小用于判断是否收敛,比较重要的是有收敛的趋势,即验证集损失不断下降,如果验证集损失基本上不改变的话,模型基本上就收敛了。
 32 |    损失值的具体大小并没有什么意义,大和小只在于损失的计算方式,并不是接近于0才好。如果想要让损失好看点,可以直接到对应的损失函数里面除上10000。
 33 |    训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中
 34 |    
 35 | 3、训练好的权值文件保存在logs文件夹中,每个训练世代(Epoch)包含若干训练步长(Step),每个训练步长(Step)进行一次梯度下降。
 36 |    如果只是训练了几个Step是不会保存的,Epoch和Step的概念要捋清楚一下。
 37 | '''
 38 | 
 39 | 
 40 | if __name__ == "__main__":
 41 |     #-------------------------------#
 42 |     #   是否使用Cuda
 43 |     #   没有GPU可以设置成False
 44 |     #-------------------------------#
 45 |     Cuda            = False
 46 |     #---------------------------------------------------------------------#
 47 |     #   train_gpu   训练用到的GPU
 48 |     #               默认为第一张卡、双卡为[0, 1]、三卡为[0, 1, 2]
 49 |     #               在使用多GPU时,每个卡上的batch为总batch除以卡的数量。
 50 |     #---------------------------------------------------------------------#
 51 |     train_gpu       = [0]
 52 |     #---------------------------------------------------------------------#
 53 |     #   fp16        是否使用混合精度训练
 54 |     #               可减少约一半的显存、需要pytorch1.7.1以上
 55 |     #---------------------------------------------------------------------#
 56 |     fp16            = False
 57 |     #---------------------------------------------------------------------#
 58 |     #   classes_path    指向model_data下的txt,与自己训练的数据集相关 
 59 |     #                   训练前一定要修改classes_path,使其对应自己的数据集
 60 |     #---------------------------------------------------------------------#
 61 |     classes_path    = './faster-rcnn-pytorch-master/model_data/voc_classes.txt'
 62 |     #----------------------------------------------------------------------------------------------------------------------------#
 63 |     #   权值文件的下载请看README,可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的,因为特征是通用的。
 64 |     #   模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分,用于进行特征提取。
 65 |     #   预训练权重对于99%的情况都必须要用,不用的话主干部分的权值太过随机,特征提取效果不明显,网络训练的结果也不会好
 66 |     #
 67 |     #   如果训练过程中存在中断训练的操作,可以将model_path设置成logs文件夹下的权值文件,将已经训练了一部分的权值再次载入。
 68 |     #   同时修改下方的 冻结阶段 或者 解冻阶段 的参数,来保证模型epoch的连续性。
 69 |     #   
 70 |     #   当model_path = ''的时候不加载整个模型的权值。
 71 |     #
 72 |     #   此处使用的是整个模型的权重,因此是在train.py进行加载的,下面的pretrain不影响此处的权值加载。
 73 |     #   如果想要让模型从主干的预训练权值开始训练,则设置model_path = '',下面的pretrain = True,此时仅加载主干。
 74 |     #   如果想要让模型从0开始训练,则设置model_path = '',下面的pretrain = Fasle,Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。
 75 |     #   
 76 |     #   一般来讲,网络从0开始的训练效果会很差,因为权值太过随机,特征提取效果不明显,因此非常、非常、非常不建议大家从0开始训练！
 77 |     #   如果一定要从0开始,可以了解imagenet数据集,首先训练分类模型,获得网络的主干部分权值,分类模型的 主干部分 和该模型通用,基于此进行训练。
 78 |     #----------------------------------------------------------------------------------------------------------------------------#
 79 |     model_path      = './faster-rcnn-pytorch-master/model_data/voc_weights_resnet.pth'
 80 |     #------------------------------------------------------#
 81 |     #   input_shape     输入的shape大小
 82 |     #------------------------------------------------------#
 83 |     input_shape     = [600, 600]
 84 |     #---------------------------------------------#
 85 |     #   vgg
 86 |     #   resnet50
 87 |     #---------------------------------------------#
 88 |     backbone        = "resnet50"
 89 |     #----------------------------------------------------------------------------------------------------------------------------#
 90 |     #   pretrained      是否使用主干网络的预训练权重,此处使用的是主干的权重,因此是在模型构建的时候进行加载的。
 91 |     #                   如果设置了model_path,则主干的权值无需加载,pretrained的值无意义。
 92 |     #                   如果不设置model_path,pretrained = True,此时仅加载主干开始训练。
 93 |     #                   如果不设置model_path,pretrained = False,Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。
 94 |     #----------------------------------------------------------------------------------------------------------------------------#
 95 |     pretrained      = False
 96 |     #------------------------------------------------------------------------#
 97 |     #   anchors_size用于设定先验框的大小,每个特征点均存在9个先验框。
 98 |     #   anchors_size每个数对应3个先验框。
 99 |     #   当anchors_size = [8, 16, 32]的时候,生成的先验框宽高约为：
100 |     #   [90, 180] ; [180, 360]; [360, 720]; [128, 128]; 
101 |     #   [256, 256]; [512, 512]; [180, 90] ; [360, 180]; 
102 |     #   [720, 360]; 详情查看anchors.py
103 |     #   如果想要检测小物体,可以减小anchors_size靠前的数。
104 |     #   比如设置anchors_size = [4, 16, 32]
105 |     #------------------------------------------------------------------------#
106 |     anchors_size    = [8, 16, 32]
107 | 
108 |     #----------------------------------------------------------------------------------------------------------------------------#
109 |     #   训练分为两个阶段,分别是冻结阶段和解冻阶段。设置冻结阶段是为了满足机器性能不足的同学的训练需求。
110 |     #   冻结训练需要的显存较小,显卡非常差的情况下,可设置Freeze_Epoch等于UnFreeze_Epoch,此时仅仅进行冻结训练。
111 |     #      
112 |     #   在此提供若干参数设置建议,各位训练者根据自己的需求进行灵活调整：
113 |     #   (一)从整个模型的预训练权重开始训练： 
114 |     #       Adam：
115 |     #           Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 100,Freeze_Train = True,optimizer_type = 'adam',Init_lr = 1e-4。(冻结)
116 |     #           Init_Epoch = 0,UnFreeze_Epoch = 100,Freeze_Train = False,optimizer_type = 'adam',Init_lr = 1e-4。(不冻结)
117 |     #       SGD：
118 |     #           Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 150,Freeze_Train = True,optimizer_type = 'sgd',Init_lr = 1e-2。(冻结)
119 |     #           Init_Epoch = 0,UnFreeze_Epoch = 150,Freeze_Train = False,optimizer_type = 'sgd',Init_lr = 1e-2。(不冻结)
120 |     #       其中：UnFreeze_Epoch可以在100-300之间调整。
121 |     #   (二)从主干网络的预训练权重开始训练：
122 |     #       Adam：
123 |     #           Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 100,Freeze_Train = True,optimizer_type = 'adam',Init_lr = 1e-4。(冻结)
124 |     #           Init_Epoch = 0,UnFreeze_Epoch = 100,Freeze_Train = False,optimizer_type = 'adam',Init_lr = 1e-4。(不冻结)
125 |     #       SGD：
126 |     #           Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 150,Freeze_Train = True,optimizer_type = 'sgd',Init_lr = 1e-2。(冻结)
127 |     #           Init_Epoch = 0,UnFreeze_Epoch = 150,Freeze_Train = False,optimizer_type = 'sgd',Init_lr = 1e-2。(不冻结)
128 |     #       其中：由于从主干网络的预训练权重开始训练,主干的权值不一定适合目标检测,需要更多的训练跳出局部最优解。
129 |     #             UnFreeze_Epoch可以在150-300之间调整,YOLOV5和YOLOX均推荐使用300。
130 |     #             Adam相较于SGD收敛的快一些。因此UnFreeze_Epoch理论上可以小一点,但依然推荐更多的Epoch。
131 |     #   (三)batch_size的设置：
132 |     #       在显卡能够接受的范围内,以大为好。显存不足与数据集大小无关,提示显存不足(OOM或者CUDA out of memory)请调小batch_size。
133 |     #       faster rcnn的Batch BatchNormalization层已经冻结,batch_size可以为1
134 |     #----------------------------------------------------------------------------------------------------------------------------#
135 |     #------------------------------------------------------------------#
136 |     #   冻结阶段训练参数
137 |     #   此时模型的主干被冻结了,特征提取网络不发生改变
138 |     #   占用的显存较小,仅对网络进行微调
139 |     #   Init_Epoch          模型当前开始的训练世代,其值可以大于Freeze_Epoch,如设置：
140 |     #                       Init_Epoch = 60、Freeze_Epoch = 50、UnFreeze_Epoch = 100
141 |     #                       会跳过冻结阶段,直接从60代开始,并调整对应的学习率。
142 |     #                       (断点续练时使用)
143 |     #   Freeze_Epoch        模型冻结训练的Freeze_Epoch
144 |     #                       (当Freeze_Train=False时失效)
145 |     #   Freeze_batch_size   模型冻结训练的batch_size
146 |     #                       (当Freeze_Train=False时失效)
147 |     #------------------------------------------------------------------#
148 |     Init_Epoch          = 0
149 |     Freeze_Epoch        = 50
150 |     Freeze_batch_size   = 4
151 |     #------------------------------------------------------------------#
152 |     #   解冻阶段训练参数
153 |     #   此时模型的主干不被冻结了,特征提取网络会发生改变
154 |     #   占用的显存较大,网络所有的参数都会发生改变
155 |     #   UnFreeze_Epoch          模型总共训练的epoch
156 |     #                           SGD需要更长的时间收敛,因此设置较大的UnFreeze_Epoch
157 |     #                           Adam可以使用相对较小的UnFreeze_Epoch
158 |     #   Unfreeze_batch_size     模型在解冻后的batch_size
159 |     #------------------------------------------------------------------#
160 |     UnFreeze_Epoch      = 100
161 |     Unfreeze_batch_size = 2
162 |     #------------------------------------------------------------------#
163 |     #   Freeze_Train    是否进行冻结训练
164 |     #                   默认先冻结主干训练后解冻训练。
165 |     #                   如果设置Freeze_Train=False,建议使用优化器为sgd
166 |     #------------------------------------------------------------------#
167 |     Freeze_Train        = True
168 |     
169 |     #------------------------------------------------------------------#
170 |     #   其它训练参数：学习率、优化器、学习率下降有关
171 |     #------------------------------------------------------------------#
172 |     #------------------------------------------------------------------#
173 |     #   Init_lr         模型的最大学习率
174 |     #                   当使用Adam优化器时建议设置  Init_lr=1e-4
175 |     #                   当使用SGD优化器时建议设置   Init_lr=1e-2
176 |     #   Min_lr          模型的最小学习率,默认为最大学习率的0.01
177 |     #------------------------------------------------------------------#
178 |     Init_lr             = 1e-4
179 |     Min_lr              = Init_lr * 0.01
180 |     #------------------------------------------------------------------#
181 |     #   optimizer_type  使用到的优化器种类,可选的有adam、sgd
182 |     #                   当使用Adam优化器时建议设置  Init_lr=1e-4
183 |     #                   当使用SGD优化器时建议设置   Init_lr=1e-2
184 |     #   momentum        优化器内部使用到的momentum参数
185 |     #   weight_decay    权值衰减,可防止过拟合
186 |     #                   adam会导致weight_decay错误,使用adam时建议设置为0。
187 |     #------------------------------------------------------------------#
188 |     optimizer_type      = "adam"
189 |     momentum            = 0.9
190 |     weight_decay        = 0
191 |     #------------------------------------------------------------------#
192 |     #   lr_decay_type   使用到的学习率下降方式,可选的有'step'、'cos'
193 |     #------------------------------------------------------------------#
194 |     lr_decay_type       = 'cos'
195 |     #------------------------------------------------------------------#
196 |     #   save_period     多少个epoch保存一次权值
197 |     #------------------------------------------------------------------#
198 |     save_period         = 5
199 |     #------------------------------------------------------------------#
200 |     #   save_dir        权值与日志文件保存的文件夹
201 |     #------------------------------------------------------------------#
202 |     save_dir            = 'logs'
203 |     #------------------------------------------------------------------#
204 |     #   eval_flag       是否在训练时进行评估,评估对象为验证集
205 |     #                   安装pycocotools库后,评估体验更佳。
206 |     #   eval_period     代表多少个epoch评估一次,不建议频繁的评估
207 |     #                   评估需要消耗较多的时间,频繁评估会导致训练非常慢
208 |     #   此处获得的mAP会与get_map.py获得的会有所不同,原因有二：
209 |     #   (一)此处获得的mAP为验证集的mAP。
210 |     #   (二)此处设置评估参数较为保守,目的是加快评估速度。
211 |     #------------------------------------------------------------------#
212 |     eval_flag           = True
213 |     eval_period         = 5
214 |     #------------------------------------------------------------------#
215 |     #   num_workers     用于设置是否使用多线程读取数据,1代表关闭多线程
216 |     #                   开启后会加快数据读取速度,但是会占用更多内存
217 |     #                   在IO为瓶颈的时候再开启多线程,即GPU运算速度远大于读取图片的速度。
218 |     #------------------------------------------------------------------#
219 |     num_workers         = 4
220 |     #----------------------------------------------------#
221 |     #   获得图片路径和标签
222 |     #----------------------------------------------------#
223 |     train_annotation_path   = '2007_train.txt'
224 |     val_annotation_path     = '2007_val.txt'
225 |     
226 |     #----------------------------------------------------#
227 |     #   获取classes和anchor
228 |     #----------------------------------------------------#
229 |     class_names, num_classes = get_classes(classes_path)
230 | 
231 |     #------------------------------------------------------#
232 |     #   设置用到的显卡
233 |     #------------------------------------------------------#
234 |     os.environ["CUDA_VISIBLE_DEVICES"]  = ','.join(str(x) for x in train_gpu)
235 |     ngpus_per_node                      = len(train_gpu)
236 |     print('Number of devices: {}'.format(ngpus_per_node))
237 |     
238 |     model = FasterRCNN(num_classes, anchor_scales = anchors_size, backbone = backbone, pretrained = pretrained)
239 |     if not pretrained:
240 |         weights_init(model)
241 |     if model_path != '':
242 |         #------------------------------------------------------#
243 |         #   权值文件请看README,百度网盘下载
244 |         #------------------------------------------------------#
245 |         print('Load weights {}.'.format(model_path))
246 |         
247 |         #------------------------------------------------------#
248 |         #   根据预训练权重的Key和模型的Key进行加载
249 |         #------------------------------------------------------#
250 |         device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
251 |         model_dict      = model.state_dict()
252 |         pretrained_dict = torch.load(model_path, map_location = device)
253 |         load_key, no_load_key, temp_dict = [], [], {}
254 |         for k, v in pretrained_dict.items():
255 |             if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
256 |                 temp_dict[k] = v
257 |                 load_key.append(k)
258 |             else:
259 |                 no_load_key.append(k)
260 |         model_dict.update(temp_dict)
261 |         model.load_state_dict(model_dict)
262 |         #------------------------------------------------------#
263 |         #   显示没有匹配上的Key
264 |         #------------------------------------------------------#
265 |         print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
266 |         print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
267 |         print("\n\033[1;33;44m温馨提示,head部分没有载入是正常现象,Backbone部分没有载入是错误的。\033[0m")
268 | 
269 |     #----------------------#
270 |     #   记录Loss
271 |     #----------------------#
272 |     time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
273 |     log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
274 |     loss_history    = LossHistory(log_dir, model, input_shape = input_shape)
275 | 
276 |     #------------------------------------------------------------------#
277 |     #   torch 1.2不支持amp,建议使用torch 1.7.1及以上正确使用fp16
278 |     #   因此torch1.2这里显示"could not be resolve"
279 |     #------------------------------------------------------------------#
280 |     if fp16:
281 |         from torch.cuda.amp import GradScaler as GradScaler
282 |         scaler = GradScaler()
283 |     else:
284 |         scaler = None
285 | 
286 |     model_train     = model.train()
287 |     if Cuda:
288 |         model_train = torch.nn.DataParallel(model_train)
289 |         cudnn.benchmark = True
290 |         model_train = model_train.cuda()
291 | 
292 |     #---------------------------#
293 |     #   读取数据集对应的txt
294 |     #---------------------------#
295 |     with open(train_annotation_path, encoding='utf-8') as f:
296 |         train_lines = f.readlines()
297 |     with open(val_annotation_path, encoding='utf-8') as f:
298 |         val_lines   = f.readlines()
299 |     num_train   = len(train_lines)
300 |     num_val     = len(val_lines)
301 |     
302 |     show_config(
303 |         classes_path = classes_path, model_path = model_path, input_shape = input_shape, \
304 |         Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \
305 |         Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \
306 |         save_period = save_period, save_dir = save_dir, num_workers = num_workers, num_train = num_train, num_val = num_val
307 |     )
308 |     #---------------------------------------------------------#
309 |     #   总训练世代指的是遍历全部数据的总次数
310 |     #   总训练步长指的是梯度下降的总次数 
311 |     #   每个训练世代包含若干训练步长,每个训练步长进行一次梯度下降。
312 |     #   此处仅建议最低训练世代,上不封顶,计算时只考虑了解冻部分
313 |     #----------------------------------------------------------#
314 |     wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4
315 |     total_step  = num_train // Unfreeze_batch_size * UnFreeze_Epoch
316 |     if total_step <= wanted_step:
317 |         if num_train // Unfreeze_batch_size == 0:
318 |             raise ValueError('数据集过小,无法进行训练,请扩充数据集。')
319 |         wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1
320 |         print("\n\033[1;33;44m[Warning] 使用%s优化器时,建议将训练总步长设置到%d以上。\033[0m"%(optimizer_type, wanted_step))
321 |         print("\033[1;33;44m[Warning] 本次运行的总训练数据量为%d,Unfreeze_batch_size为%d,共训练%d个Epoch,计算出总训练步长为%d。\033[0m"%(num_train, Unfreeze_batch_size, UnFreeze_Epoch, total_step))
322 |         print("\033[1;33;44m[Warning] 由于总训练步长为%d,小于建议总步长%d,建议设置总世代为%d。\033[0m"%(total_step, wanted_step, wanted_epoch))
323 | 
324 |     #------------------------------------------------------#
325 |     #   主干特征提取网络特征通用,冻结训练可以加快训练速度
326 |     #   也可以在训练初期防止权值被破坏。
327 |     #   Init_Epoch为起始世代
328 |     #   Freeze_Epoch为冻结训练的世代
329 |     #   UnFreeze_Epoch总训练世代
330 |     #   提示OOM或者显存不足请调小Batch_size
331 |     #------------------------------------------------------#
332 |     if True:
333 |         UnFreeze_flag = False
334 |         #------------------------------------#
335 |         #   冻结一定部分训练
336 |         #------------------------------------#
337 |         if Freeze_Train:
338 |             for param in model.extractor.parameters():
339 |                 param.requires_grad = False
340 |         # ------------------------------------#
341 |         #   冻结bn层
342 |         # ------------------------------------#
343 |         model.freeze_bn()
344 | 
345 |         #-------------------------------------------------------------------#
346 |         #   如果不冻结训练的话,直接设置batch_size为Unfreeze_batch_size
347 |         #-------------------------------------------------------------------#
348 |         batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size
349 | 
350 |         #-------------------------------------------------------------------#
351 |         #   判断当前batch_size,自适应调整学习率
352 |         #-------------------------------------------------------------------#
353 |         nbs             = 16
354 |         lr_limit_max    = 1e-4 if optimizer_type == 'adam' else 5e-2
355 |         lr_limit_min    = 1e-4 if optimizer_type == 'adam' else 5e-4
356 |         Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
357 |         Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
358 |         
359 |         #---------------------------------------#
360 |         #   根据optimizer_type选择优化器
361 |         #---------------------------------------#
362 |         optimizer = {
363 |             'adam'  : optim.Adam(model.parameters(), Init_lr_fit, betas = (momentum, 0.999), weight_decay = weight_decay),
364 |             'sgd'   : optim.SGD(model.parameters(), Init_lr_fit, momentum = momentum, nesterov=True, weight_decay = weight_decay)
365 |         }[optimizer_type]
366 | 
367 |         #---------------------------------------#
368 |         #   获得学习率下降的公式
369 |         #---------------------------------------#
370 |         lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
371 |         
372 |         #---------------------------------------#
373 |         #   判断每一个世代的长度
374 |         #---------------------------------------#
375 |         epoch_step      = num_train // batch_size
376 |         epoch_step_val  = num_val // batch_size
377 | 
378 |         if epoch_step == 0 or epoch_step_val == 0:
379 |             raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。")
380 | 
381 |         train_dataset   = FRCNNDataset(train_lines, input_shape, train = True)
382 |         val_dataset     = FRCNNDataset(val_lines, input_shape, train = False)
383 | 
384 |         gen             = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory = True,
385 |                                     drop_last = True, collate_fn = frcnn_dataset_collate)
386 |         gen_val         = DataLoader(val_dataset  , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory = True, 
387 |                                     drop_last = True, collate_fn = frcnn_dataset_collate)
388 | 
389 |         train_util      = FasterRCNNTrainer(model_train, optimizer)
390 |         #----------------------#
391 |         #   记录eval的map曲线
392 |         #----------------------#
393 |         eval_callback   = EvalCallback(model_train, input_shape, class_names, num_classes, val_lines, log_dir, Cuda, \
394 |                                         eval_flag=eval_flag, period=eval_period)
395 | 
396 |         #---------------------------------------#
397 |         #   开始模型训练
398 |         #---------------------------------------#
399 |         for epoch in range(Init_Epoch, UnFreeze_Epoch):
400 |             #---------------------------------------#
401 |             #   如果模型有冻结学习部分
402 |             #   则解冻,并设置参数
403 |             #---------------------------------------#
404 |             if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train:
405 |                 batch_size = Unfreeze_batch_size
406 | 
407 |                 #-------------------------------------------------------------------#
408 |                 #   判断当前batch_size,自适应调整学习率
409 |                 #-------------------------------------------------------------------#
410 |                 nbs             = 16
411 |                 lr_limit_max    = 1e-4 if optimizer_type == 'adam' else 5e-2
412 |                 lr_limit_min    = 1e-4 if optimizer_type == 'adam' else 5e-4
413 |                 Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
414 |                 Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
415 |                 #---------------------------------------#
416 |                 #   获得学习率下降的公式
417 |                 #---------------------------------------#
418 |                 lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
419 |                 
420 |                 for param in model.extractor.parameters():
421 |                     param.requires_grad = True
422 |                 # ------------------------------------#
423 |                 #   冻结bn层
424 |                 # ------------------------------------#
425 |                 model.freeze_bn()
426 | 
427 |                 epoch_step      = num_train // batch_size
428 |                 epoch_step_val  = num_val // batch_size
429 | 
430 |                 if epoch_step == 0 or epoch_step_val == 0:
431 |                     raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。")
432 | 
433 |                 gen             = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
434 |                                             drop_last=True, collate_fn=frcnn_dataset_collate)
435 |                 gen_val         = DataLoader(val_dataset  , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
436 |                                             drop_last=True, collate_fn=frcnn_dataset_collate)
437 | 
438 |                 UnFreeze_flag = True
439 |                 
440 |             set_optimizer_lr(optimizer, lr_scheduler_func, epoch)
441 |             
442 |             fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, UnFreeze_Epoch, Cuda, fp16, scaler, save_period, save_dir)
443 |             
444 |         loss_history.writer.close()


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/anchors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #--------------------------------------------#
 4 | #   生成基础的先验框
 5 | #--------------------------------------------#
 6 | def generate_anchor_base(base_size = 16, ratios = [0.5, 1, 2], anchor_scales = [8, 16, 32]):
 7 |     anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4), dtype = np.float32)
 8 |     for i in range(len(ratios)):
 9 |         for j in range(len(anchor_scales)):
10 |             h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
11 |             w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
12 | 
13 |             index = i * len(anchor_scales) + j
14 |             anchor_base[index, 0] = - h / 2.
15 |             anchor_base[index, 1] = - w / 2.
16 |             anchor_base[index, 2] = h / 2.
17 |             anchor_base[index, 3] = w / 2.
18 |     return anchor_base
19 | 
20 | #--------------------------------------------#
21 | #   对基础先验框进行拓展对应到所有特征点上
22 | #--------------------------------------------#
23 | def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
24 |     #---------------------------------#
25 |     #   计算网格中心点
26 |     #---------------------------------#
27 |     shift_x             = np.arange(0, width * feat_stride, feat_stride)
28 |     shift_y             = np.arange(0, height * feat_stride, feat_stride)
29 |     shift_x, shift_y    = np.meshgrid(shift_x, shift_y)
30 |     shift               = np.stack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(),), axis=1)
31 | 
32 |     #---------------------------------#
33 |     #   每个网格点上的9个先验框
34 |     #---------------------------------#
35 |     A       = anchor_base.shape[0]
36 |     K       = shift.shape[0]
37 |     anchor  = anchor_base.reshape((1, A, 4)) + shift.reshape((K, 1, 4))
38 |     #---------------------------------#
39 |     #   所有的先验框
40 |     #---------------------------------#
41 |     anchor  = anchor.reshape((K * A, 4)).astype(np.float32)
42 |     return anchor
43 |     
44 | if __name__ == "__main__":
45 |     import matplotlib.pyplot as plt
46 |     nine_anchors = generate_anchor_base()
47 |     print(nine_anchors)
48 | 
49 |     height, width, feat_stride  = 38,38,16
50 |     anchors_all                 = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width)
51 |     print(np.shape(anchors_all))
52 |     
53 |     fig     = plt.figure()
54 |     ax      = fig.add_subplot(111)
55 |     plt.ylim(-300,900)
56 |     plt.xlim(-300,900)
57 |     shift_x = np.arange(0, width * feat_stride, feat_stride)
58 |     shift_y = np.arange(0, height * feat_stride, feat_stride)
59 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
60 |     plt.scatter(shift_x,shift_y)
61 |     box_widths  = anchors_all[:,2]-anchors_all[:,0]
62 |     box_heights = anchors_all[:,3]-anchors_all[:,1]
63 |     
64 |     for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]:
65 |         rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False)
66 |         ax.add_patch(rect)
67 |     plt.show()
68 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/callbacks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import matplotlib
  4 | import torch
  5 | 
  6 | matplotlib.use('Agg')
  7 | from matplotlib import pyplot as plt
  8 | import scipy.signal
  9 | 
 10 | import shutil
 11 | import numpy as np
 12 | from PIL import Image
 13 | from torch.utils.tensorboard import SummaryWriter
 14 | from tqdm import tqdm
 15 | 
 16 | from .utils import cvtColor, resize_image, preprocess_input, get_new_img_size
 17 | from .utils_bbox import DecodeBox
 18 | from .utils_map import get_coco_map, get_map
 19 | 
 20 | class LossHistory():
 21 |     def __init__(self, log_dir, model, input_shape):
 22 |         self.log_dir    = log_dir
 23 |         self.losses     = []
 24 |         self.val_loss   = []
 25 |         
 26 |         os.makedirs(self.log_dir)
 27 |         self.writer     = SummaryWriter(self.log_dir)
 28 |         # try:
 29 |         #     dummy_input     = torch.randn(2, 3, input_shape[0], input_shape[1])
 30 |         #     self.writer.add_graph(model, dummy_input)
 31 |         # except:
 32 |         #     pass
 33 | 
 34 |     def append_loss(self, epoch, loss, val_loss):
 35 |         if not os.path.exists(self.log_dir):
 36 |             os.makedirs(self.log_dir)
 37 | 
 38 |         self.losses.append(loss)
 39 |         self.val_loss.append(val_loss)
 40 | 
 41 |         with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
 42 |             f.write(str(loss))
 43 |             f.write("\n")
 44 |         with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
 45 |             f.write(str(val_loss))
 46 |             f.write("\n")
 47 | 
 48 |         self.writer.add_scalar('loss', loss, epoch)
 49 |         self.writer.add_scalar('val_loss', val_loss, epoch)
 50 |         self.loss_plot()
 51 | 
 52 |     def loss_plot(self):
 53 |         iters = range(len(self.losses))
 54 | 
 55 |         plt.figure()
 56 |         plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
 57 |         plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
 58 |         try:
 59 |             if len(self.losses) < 25:
 60 |                 num = 5
 61 |             else:
 62 |                 num = 15
 63 |             
 64 |             plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
 65 |             plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
 66 |         except:
 67 |             pass
 68 | 
 69 |         plt.grid(True)
 70 |         plt.xlabel('Epoch')
 71 |         plt.ylabel('Loss')
 72 |         plt.legend(loc="upper right")
 73 | 
 74 |         plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
 75 | 
 76 |         plt.cla()
 77 |         plt.close("all")
 78 | 
 79 | class EvalCallback():
 80 |     def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \
 81 |             map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
 82 |         super(EvalCallback, self).__init__()
 83 |         
 84 |         self.net                = net
 85 |         self.input_shape        = input_shape
 86 |         self.class_names        = class_names
 87 |         self.num_classes        = num_classes
 88 |         self.val_lines          = val_lines
 89 |         self.log_dir            = log_dir
 90 |         self.cuda               = cuda
 91 |         self.map_out_path       = map_out_path
 92 |         self.max_boxes          = max_boxes
 93 |         self.confidence         = confidence
 94 |         self.nms_iou            = nms_iou
 95 |         self.letterbox_image    = letterbox_image
 96 |         self.MINOVERLAP         = MINOVERLAP
 97 |         self.eval_flag          = eval_flag
 98 |         self.period             = period
 99 |         
100 |         self.std    = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None]
101 |         if self.cuda:
102 |             self.std    = self.std.cuda()
103 |         self.bbox_util  = DecodeBox(self.std, self.num_classes)
104 | 
105 |         self.maps       = [0]
106 |         self.epoches    = [0]
107 |         if self.eval_flag:
108 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
109 |                 f.write(str(0))
110 |                 f.write("\n")
111 | 
112 |     #---------------------------------------------------#
113 |     #   检测图片
114 |     #---------------------------------------------------#
115 |     def get_map_txt(self, image_id, image, class_names, map_out_path):
116 |         f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
117 |         #---------------------------------------------------#
118 |         #   计算输入图片的高和宽
119 |         #---------------------------------------------------#
120 |         image_shape = np.array(np.shape(image)[0:2])
121 |         input_shape = get_new_img_size(image_shape[0], image_shape[1])
122 |         #---------------------------------------------------------#
123 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
124 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
125 |         #---------------------------------------------------------#
126 |         image       = cvtColor(image)
127 |         
128 |         #---------------------------------------------------------#
129 |         #   给原图像进行resize，resize到短边为600的大小上
130 |         #---------------------------------------------------------#
131 |         image_data  = resize_image(image, [input_shape[1], input_shape[0]])
132 |         #---------------------------------------------------------#
133 |         #   添加上batch_size维度
134 |         #---------------------------------------------------------#
135 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
136 | 
137 |         with torch.no_grad():
138 |             images = torch.from_numpy(image_data)
139 |             if self.cuda:
140 |                 images = images.cuda()
141 | 
142 |             roi_cls_locs, roi_scores, rois, _ = self.net(images)
143 |             #-------------------------------------------------------------#
144 |             #   利用classifier的预测结果对建议框进行解码，获得预测框
145 |             #-------------------------------------------------------------#
146 |             results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
147 |                                                     nms_iou = self.nms_iou, confidence = self.confidence)
148 |             #--------------------------------------#
149 |             #   如果没有检测到物体，则返回原图
150 |             #--------------------------------------#
151 |             if len(results[0]) <= 0:
152 |                 return 
153 | 
154 |             top_label   = np.array(results[0][:, 5], dtype = 'int32')
155 |             top_conf    = results[0][:, 4]
156 |             top_boxes   = results[0][:, :4]
157 | 
158 |         top_100     = np.argsort(top_conf)[::-1][:self.max_boxes]
159 |         top_boxes   = top_boxes[top_100]
160 |         top_conf    = top_conf[top_100]
161 |         top_label   = top_label[top_100]
162 | 
163 |         for i, c in list(enumerate(top_label)):
164 |             predicted_class = self.class_names[int(c)]
165 |             box             = top_boxes[i]
166 |             score           = str(top_conf[i])
167 | 
168 |             top, left, bottom, right = box
169 |             if predicted_class not in class_names:
170 |                 continue
171 | 
172 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
173 | 
174 |         f.close()
175 |         return 
176 |     
177 |     def on_epoch_end(self, epoch):
178 |         if epoch % self.period == 0 and self.eval_flag:
179 |             if not os.path.exists(self.map_out_path):
180 |                 os.makedirs(self.map_out_path)
181 |             if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
182 |                 os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
183 |             if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
184 |                 os.makedirs(os.path.join(self.map_out_path, "detection-results"))
185 |             print("Get map.")
186 |             for annotation_line in tqdm(self.val_lines):
187 |                 line        = annotation_line.split()
188 |                 image_id    = os.path.basename(line[0]).split('.')[0]
189 |                 #------------------------------#
190 |                 #   读取图像并转换成RGB图像
191 |                 #------------------------------#
192 |                 image       = Image.open(line[0])
193 |                 #------------------------------#
194 |                 #   获得预测框
195 |                 #------------------------------#
196 |                 gt_boxes    = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
197 |                 #------------------------------#
198 |                 #   获得预测txt
199 |                 #------------------------------#
200 |                 self.get_map_txt(image_id, image, self.class_names, self.map_out_path)
201 |                 
202 |                 #------------------------------#
203 |                 #   获得真实框txt
204 |                 #------------------------------#
205 |                 with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
206 |                     for box in gt_boxes:
207 |                         left, top, right, bottom, obj = box
208 |                         obj_name = self.class_names[obj]
209 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
210 |                         
211 |             print("Calculate Map.")
212 |             try:
213 |                 temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
214 |             except:
215 |                 temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path)
216 |             self.maps.append(temp_map)
217 |             self.epoches.append(epoch)
218 | 
219 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
220 |                 f.write(str(temp_map))
221 |                 f.write("\n")
222 |             
223 |             plt.figure()
224 |             plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')
225 | 
226 |             plt.grid(True)
227 |             plt.xlabel('Epoch')
228 |             plt.ylabel('Map %s'%str(self.MINOVERLAP))
229 |             plt.title('A Map Curve')
230 |             plt.legend(loc="upper right")
231 | 
232 |             plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
233 |             plt.cla()
234 |             plt.close("all")
235 | 
236 |             print("Get map done.")
237 |             shutil.rmtree(self.map_out_path)
238 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/dataloader.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | from PIL import Image
  5 | from torch.utils.data.dataset import Dataset
  6 | 
  7 | from utils.utils import cvtColor, preprocess_input
  8 | 
  9 | 
 10 | class FRCNNDataset(Dataset):
 11 |     def __init__(self, annotation_lines, input_shape = [600, 600], train = True):
 12 |         self.annotation_lines   = annotation_lines
 13 |         self.length             = len(annotation_lines)
 14 |         self.input_shape        = input_shape
 15 |         self.train              = train
 16 | 
 17 |     def __len__(self):
 18 |         return self.length
 19 | 
 20 |     def __getitem__(self, index):
 21 |         index       = index % self.length
 22 |         #---------------------------------------------------#
 23 |         #   训练时进行数据的随机增强
 24 |         #   验证时不进行数据的随机增强
 25 |         #---------------------------------------------------#
 26 |         image, y    = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
 27 |         image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
 28 |         box_data    = np.zeros((len(y), 5))
 29 |         if len(y) > 0:
 30 |             box_data[:len(y)] = y
 31 | 
 32 |         box         = box_data[:, :4]
 33 |         label       = box_data[:, -1]
 34 |         return image, box, label
 35 | 
 36 |     def rand(self, a=0, b=1):
 37 |         return np.random.rand()*(b-a) + a
 38 | 
 39 |     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
 40 |         line = annotation_line.split()
 41 |         #------------------------------#
 42 |         #   读取图像并转换成RGB图像
 43 |         #------------------------------#
 44 |         image   = Image.open(line[0])
 45 |         image   = cvtColor(image)
 46 |         #------------------------------#
 47 |         #   获得图像的高宽与目标高宽
 48 |         #------------------------------#
 49 |         iw, ih  = image.size
 50 |         h, w    = input_shape
 51 |         #------------------------------#
 52 |         #   获得预测框
 53 |         #------------------------------#
 54 |         box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 55 | 
 56 |         if not random:
 57 |             scale = min(w/iw, h/ih)
 58 |             nw = int(iw*scale)
 59 |             nh = int(ih*scale)
 60 |             dx = (w-nw)//2
 61 |             dy = (h-nh)//2
 62 | 
 63 |             #---------------------------------#
 64 |             #   将图像多余的部分加上灰条
 65 |             #---------------------------------#
 66 |             image       = image.resize((nw,nh), Image.BICUBIC)
 67 |             new_image   = Image.new('RGB', (w,h), (128,128,128))
 68 |             new_image.paste(image, (dx, dy))
 69 |             image_data  = np.array(new_image, np.float32)
 70 | 
 71 |             #---------------------------------#
 72 |             #   对真实框进行调整
 73 |             #---------------------------------#
 74 |             if len(box)>0:
 75 |                 np.random.shuffle(box)
 76 |                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
 77 |                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
 78 |                 box[:, 0:2][box[:, 0:2]<0] = 0
 79 |                 box[:, 2][box[:, 2]>w] = w
 80 |                 box[:, 3][box[:, 3]>h] = h
 81 |                 box_w = box[:, 2] - box[:, 0]
 82 |                 box_h = box[:, 3] - box[:, 1]
 83 |                 box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
 84 | 
 85 |             return image_data, box
 86 |                 
 87 |         #------------------------------------------#
 88 |         #   对图像进行缩放并且进行长和宽的扭曲
 89 |         #------------------------------------------#
 90 |         new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
 91 |         scale = self.rand(.25, 2)
 92 |         if new_ar < 1:
 93 |             nh = int(scale*h)
 94 |             nw = int(nh*new_ar)
 95 |         else:
 96 |             nw = int(scale*w)
 97 |             nh = int(nw/new_ar)
 98 |         image = image.resize((nw,nh), Image.BICUBIC)
 99 | 
100 |         #------------------------------------------#
101 |         #   将图像多余的部分加上灰条
102 |         #------------------------------------------#
103 |         dx = int(self.rand(0, w-nw))
104 |         dy = int(self.rand(0, h-nh))
105 |         new_image = Image.new('RGB', (w,h), (128,128,128))
106 |         new_image.paste(image, (dx, dy))
107 |         image = new_image
108 | 
109 |         #------------------------------------------#
110 |         #   翻转图像
111 |         #------------------------------------------#
112 |         flip = self.rand()<.5
113 |         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
114 | 
115 |         image_data      = np.array(image, np.uint8)
116 |         #---------------------------------#
117 |         #   对图像进行色域变换
118 |         #   计算色域变换的参数
119 |         #---------------------------------#
120 |         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
121 |         #---------------------------------#
122 |         #   将图像转到HSV上
123 |         #---------------------------------#
124 |         hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
125 |         dtype           = image_data.dtype
126 |         #---------------------------------#
127 |         #   应用变换
128 |         #---------------------------------#
129 |         x       = np.arange(0, 256, dtype=r.dtype)
130 |         lut_hue = ((x * r[0]) % 180).astype(dtype)
131 |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
132 |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
133 | 
134 |         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
135 |         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
136 | 
137 |         #---------------------------------#
138 |         #   对真实框进行调整
139 |         #---------------------------------#
140 |         if len(box)>0:
141 |             np.random.shuffle(box)
142 |             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
143 |             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
144 |             if flip: box[:, [0,2]] = w - box[:, [2,0]]
145 |             box[:, 0:2][box[:, 0:2]<0] = 0
146 |             box[:, 2][box[:, 2]>w] = w
147 |             box[:, 3][box[:, 3]>h] = h
148 |             box_w = box[:, 2] - box[:, 0]
149 |             box_h = box[:, 3] - box[:, 1]
150 |             box = box[np.logical_and(box_w>1, box_h>1)] 
151 |         
152 |         return image_data, box
153 | 
154 | # DataLoader中collate_fn使用
155 | def frcnn_dataset_collate(batch):
156 |     images = []
157 |     bboxes = []
158 |     labels = []
159 |     for img, box, label in batch:
160 |         images.append(img)
161 |         bboxes.append(box)
162 |         labels.append(label)
163 |     images = torch.from_numpy(np.array(images))
164 |     return images, bboxes, labels
165 | 
166 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | 
 4 | #---------------------------------------------------------#
 5 | #   将图像转换成RGB图像，防止灰度图在预测时报错。
 6 | #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
 7 | #---------------------------------------------------------#
 8 | def cvtColor(image):
 9 |     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
10 |         return image 
11 |     else:
12 |         image = image.convert('RGB')
13 |         return image 
14 | 
15 | #---------------------------------------------------#
16 | #   对输入图像进行resize
17 | #---------------------------------------------------#
18 | def resize_image(image, size):
19 |     w, h        = size
20 |     new_image   = image.resize((w, h), Image.BICUBIC)
21 |     return new_image
22 | 
23 | #---------------------------------------------------#
24 | #   获得类
25 | #---------------------------------------------------#
26 | def get_classes(classes_path):
27 |     with open(classes_path, encoding='utf-8') as f:
28 |         class_names = f.readlines()
29 |     class_names = [c.strip() for c in class_names]
30 |     return class_names, len(class_names)
31 | 
32 | #---------------------------------------------------#
33 | #   获得学习率
34 | #---------------------------------------------------#
35 | def get_lr(optimizer):
36 |     for param_group in optimizer.param_groups:
37 |         return param_group['lr']
38 | 
39 | def preprocess_input(image):
40 |     image /= 255.0
41 |     return image
42 | 
43 | def show_config(**kwargs):
44 |     print('Configurations:')
45 |     print('-' * 70)
46 |     print('|%25s | %40s|' % ('keys', 'values'))
47 |     print('-' * 70)
48 |     for key, value in kwargs.items():
49 |         print('|%25s | %40s|' % (str(key), str(value)))
50 |     print('-' * 70)
51 | 
52 | def get_new_img_size(height, width, img_min_side=600):
53 |     if width <= height:
54 |         f = float(img_min_side) / width
55 |         resized_height = int(f * height)
56 |         resized_width = int(img_min_side)
57 |     else:
58 |         f = float(img_min_side) / height
59 |         resized_width = int(f * width)
60 |         resized_height = int(img_min_side)
61 | 
62 |     return resized_height, resized_width
63 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/utils_bbox.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.nn import functional as F
  4 | from torchvision.ops import nms
  5 | 
  6 | 
  7 | def loc2bbox(src_bbox, loc):
  8 |     if src_bbox.size()[0] == 0:
  9 |         return torch.zeros((0, 4), dtype=loc.dtype)
 10 | 
 11 |     src_width   = torch.unsqueeze(src_bbox[:, 2] - src_bbox[:, 0], -1)
 12 |     src_height  = torch.unsqueeze(src_bbox[:, 3] - src_bbox[:, 1], -1)
 13 |     src_ctr_x   = torch.unsqueeze(src_bbox[:, 0], -1) + 0.5 * src_width
 14 |     src_ctr_y   = torch.unsqueeze(src_bbox[:, 1], -1) + 0.5 * src_height
 15 | 
 16 |     dx          = loc[:, 0::4]
 17 |     dy          = loc[:, 1::4]
 18 |     dw          = loc[:, 2::4]
 19 |     dh          = loc[:, 3::4]
 20 | 
 21 |     ctr_x = dx * src_width + src_ctr_x
 22 |     ctr_y = dy * src_height + src_ctr_y
 23 |     w = torch.exp(dw) * src_width
 24 |     h = torch.exp(dh) * src_height
 25 | 
 26 |     dst_bbox = torch.zeros_like(loc)
 27 |     dst_bbox[:, 0::4] = ctr_x - 0.5 * w
 28 |     dst_bbox[:, 1::4] = ctr_y - 0.5 * h
 29 |     dst_bbox[:, 2::4] = ctr_x + 0.5 * w
 30 |     dst_bbox[:, 3::4] = ctr_y + 0.5 * h
 31 | 
 32 |     return dst_bbox
 33 | 
 34 | class DecodeBox():
 35 |     def __init__(self, std, num_classes):
 36 |         self.std            = std
 37 |         self.num_classes    = num_classes + 1    
 38 | 
 39 |     def frcnn_correct_boxes(self, box_xy, box_wh, input_shape, image_shape):
 40 |         #-----------------------------------------------------------------#
 41 |         #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
 42 |         #-----------------------------------------------------------------#
 43 |         box_yx = box_xy[..., ::-1]
 44 |         box_hw = box_wh[..., ::-1]
 45 |         input_shape = np.array(input_shape)
 46 |         image_shape = np.array(image_shape)
 47 | 
 48 |         box_mins    = box_yx - (box_hw / 2.)
 49 |         box_maxes   = box_yx + (box_hw / 2.)
 50 |         boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
 51 |         boxes *= np.concatenate([image_shape, image_shape], axis=-1)
 52 |         return boxes
 53 | 
 54 |     def forward(self, roi_cls_locs, roi_scores, rois, image_shape, input_shape, nms_iou = 0.3, confidence = 0.5):
 55 |         results = []
 56 |         bs      = len(roi_cls_locs)
 57 |         #--------------------------------#
 58 |         #   batch_size, num_rois, 4
 59 |         #--------------------------------#
 60 |         rois    = rois.view((bs, -1, 4))
 61 |         #----------------------------------------------------------------------------------------------------------------#
 62 |         #   对每一张图片进行处理，由于在predict.py的时候，我们只输入一张图片，所以for i in range(len(mbox_loc))只进行一次
 63 |         #----------------------------------------------------------------------------------------------------------------#
 64 |         for i in range(bs):
 65 |             #----------------------------------------------------------#
 66 |             #   对回归参数进行reshape
 67 |             #----------------------------------------------------------#
 68 |             roi_cls_loc = roi_cls_locs[i] * self.std
 69 |             #----------------------------------------------------------#
 70 |             #   第一维度是建议框的数量，第二维度是每个种类
 71 |             #   第三维度是对应种类的调整参数
 72 |             #----------------------------------------------------------#
 73 |             roi_cls_loc = roi_cls_loc.view([-1, self.num_classes, 4])
 74 | 
 75 |             #-------------------------------------------------------------#
 76 |             #   利用classifier网络的预测结果对建议框进行调整获得预测框
 77 |             #   num_rois, 4 -> num_rois, 1, 4 -> num_rois, num_classes, 4
 78 |             #-------------------------------------------------------------#
 79 |             roi         = rois[i].view((-1, 1, 4)).expand_as(roi_cls_loc)
 80 |             cls_bbox    = loc2bbox(roi.contiguous().view((-1, 4)), roi_cls_loc.contiguous().view((-1, 4)))
 81 |             cls_bbox    = cls_bbox.view([-1, (self.num_classes), 4])
 82 |             #-------------------------------------------------------------#
 83 |             #   对预测框进行归一化，调整到0-1之间
 84 |             #-------------------------------------------------------------#
 85 |             cls_bbox[..., [0, 2]] = (cls_bbox[..., [0, 2]]) / input_shape[1]
 86 |             cls_bbox[..., [1, 3]] = (cls_bbox[..., [1, 3]]) / input_shape[0]
 87 | 
 88 |             roi_score   = roi_scores[i]
 89 |             prob        = F.softmax(roi_score, dim=-1)
 90 | 
 91 |             results.append([])
 92 |             for c in range(1, self.num_classes):
 93 |                 #--------------------------------#
 94 |                 #   取出属于该类的所有框的置信度
 95 |                 #   判断是否大于门限
 96 |                 #--------------------------------#
 97 |                 c_confs     = prob[:, c]
 98 |                 c_confs_m   = c_confs > confidence
 99 | 
100 |                 if len(c_confs[c_confs_m]) > 0:
101 |                     #-----------------------------------------#
102 |                     #   取出得分高于confidence的框
103 |                     #-----------------------------------------#
104 |                     boxes_to_process = cls_bbox[c_confs_m, c]
105 |                     confs_to_process = c_confs[c_confs_m]
106 | 
107 |                     keep = nms(
108 |                         boxes_to_process,
109 |                         confs_to_process,
110 |                         nms_iou
111 |                     )
112 |                     #-----------------------------------------#
113 |                     #   取出在非极大抑制中效果较好的内容
114 |                     #-----------------------------------------#
115 |                     good_boxes  = boxes_to_process[keep]
116 |                     confs       = confs_to_process[keep][:, None]
117 |                     labels      = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1))
118 |                     #-----------------------------------------#
119 |                     #   将label、置信度、框的位置进行堆叠。
120 |                     #-----------------------------------------#
121 |                     c_pred      = torch.cat((good_boxes, confs, labels), dim=1).cpu().numpy()
122 |                     # 添加进result里
123 |                     results[-1].extend(c_pred)
124 | 
125 |             if len(results[-1]) > 0:
126 |                 results[-1] = np.array(results[-1])
127 |                 box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
128 |                 results[-1][:, :4] = self.frcnn_correct_boxes(box_xy, box_wh, input_shape, image_shape)
129 | 
130 |         return results
131 |         
132 | 


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/utils_fit.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from tqdm import tqdm
 5 | 
 6 | from utils.utils import get_lr
 7 | 
 8 | 
 9 | def fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir):
10 |     total_loss = 0
11 |     rpn_loc_loss = 0
12 |     rpn_cls_loss = 0
13 |     roi_loc_loss = 0
14 |     roi_cls_loss = 0
15 |     
16 |     val_loss = 0
17 |     print('Start Train')
18 |     with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
19 |         for iteration, batch in enumerate(gen):
20 |             if iteration >= epoch_step:
21 |                 break
22 |             images, boxes, labels = batch[0], batch[1], batch[2]
23 |             with torch.no_grad():
24 |                 if cuda:
25 |                     images = images.cuda()
26 | 
27 |             rpn_loc, rpn_cls, roi_loc, roi_cls, total = train_util.train_step(images, boxes, labels, 1, fp16, scaler)
28 |             total_loss      += total.item()
29 |             rpn_loc_loss    += rpn_loc.item()
30 |             rpn_cls_loss    += rpn_cls.item()
31 |             roi_loc_loss    += roi_loc.item()
32 |             roi_cls_loss    += roi_cls.item()
33 |             
34 |             pbar.set_postfix(**{'total_loss'    : total_loss / (iteration + 1), 
35 |                                 'rpn_loc'       : rpn_loc_loss / (iteration + 1),  
36 |                                 'rpn_cls'       : rpn_cls_loss / (iteration + 1), 
37 |                                 'roi_loc'       : roi_loc_loss / (iteration + 1), 
38 |                                 'roi_cls'       : roi_cls_loss / (iteration + 1), 
39 |                                 'lr'            : get_lr(optimizer)})
40 |             pbar.update(1)
41 | 
42 |     print('Finish Train')
43 |     print('Start Validation')
44 |     with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
45 |         for iteration, batch in enumerate(gen_val):
46 |             if iteration >= epoch_step_val:
47 |                 break
48 |             images, boxes, labels = batch[0], batch[1], batch[2]
49 |             with torch.no_grad():
50 |                 if cuda:
51 |                     images = images.cuda()
52 | 
53 |                 train_util.optimizer.zero_grad()
54 |                 _, _, _, _, val_total = train_util.forward(images, boxes, labels, 1)
55 |                 val_loss += val_total.item()
56 |                 
57 |                 pbar.set_postfix(**{'val_loss'  : val_loss / (iteration + 1)})
58 |                 pbar.update(1)
59 | 
60 |     print('Finish Validation')
61 |     loss_history.append_loss(epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)
62 |     eval_callback.on_epoch_end(epoch + 1)
63 |     print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
64 |     print('Total Loss: %.3f || Val Loss: %.3f ' % (total_loss / epoch_step, val_loss / epoch_step_val))
65 |     
66 |     #-----------------------------------------------#
67 |     #   保存权值
68 |     #-----------------------------------------------#
69 |     if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
70 |         torch.save(model.state_dict(), os.path.join(save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)))
71 | 
72 |     if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
73 |         print('Save best model to best_epoch_weights.pth')
74 |         torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth"))
75 |             
76 |     torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth"))


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/utils/utils_map.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import math
  4 | import operator
  5 | import os
  6 | import shutil
  7 | import sys
  8 | try:
  9 |     from pycocotools.coco import COCO
 10 |     from pycocotools.cocoeval import COCOeval
 11 | except:
 12 |     pass
 13 | import cv2
 14 | import matplotlib
 15 | matplotlib.use('Agg')
 16 | from matplotlib import pyplot as plt
 17 | import numpy as np
 18 | 
 19 | '''
 20 |     0,0 ------> x (width)
 21 |      |
 22 |      |  (Left,Top)
 23 |      |      *_________
 24 |      |      |         |
 25 |             |         |
 26 |      y      |_________|
 27 |   (height)            *
 28 |                 (Right,Bottom)
 29 | '''
 30 | 
 31 | def log_average_miss_rate(precision, fp_cumsum, num_images):
 32 |     """
 33 |         log-average miss rate:
 34 |             Calculated by averaging miss rates at 9 evenly spaced FPPI points
 35 |             between 10e-2 and 10e0, in log-space.
 36 | 
 37 |         output:
 38 |                 lamr | log-average miss rate
 39 |                 mr | miss rate
 40 |                 fppi | false positives per image
 41 | 
 42 |         references:
 43 |             [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
 44 |                State of the Art." Pattern Analysis and Machine Intelligence, IEEE
 45 |                Transactions on 34.4 (2012): 743 - 761.
 46 |     """
 47 | 
 48 |     if precision.size == 0:
 49 |         lamr = 0
 50 |         mr = 1
 51 |         fppi = 0
 52 |         return lamr, mr, fppi
 53 | 
 54 |     fppi = fp_cumsum / float(num_images)
 55 |     mr = (1 - precision)
 56 | 
 57 |     fppi_tmp = np.insert(fppi, 0, -1.0)
 58 |     mr_tmp = np.insert(mr, 0, 1.0)
 59 | 
 60 |     ref = np.logspace(-2.0, 0.0, num = 9)
 61 |     for i, ref_i in enumerate(ref):
 62 |         j = np.where(fppi_tmp <= ref_i)[-1][-1]
 63 |         ref[i] = mr_tmp[j]
 64 | 
 65 |     lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
 66 | 
 67 |     return lamr, mr, fppi
 68 | 
 69 | """
 70 |  throw error and exit
 71 | """
 72 | def error(msg):
 73 |     print(msg)
 74 |     sys.exit(0)
 75 | 
 76 | """
 77 |  check if the number is a float between 0.0 and 1.0
 78 | """
 79 | def is_float_between_0_and_1(value):
 80 |     try:
 81 |         val = float(value)
 82 |         if val > 0.0 and val < 1.0:
 83 |             return True
 84 |         else:
 85 |             return False
 86 |     except ValueError:
 87 |         return False
 88 | 
 89 | """
 90 |  Calculate the AP given the recall and precision array
 91 |     1st) We compute a version of the measured precision/recall curve with
 92 |          precision monotonically decreasing
 93 |     2nd) We compute the AP as the area under this curve by numerical integration.
 94 | """
 95 | def voc_ap(rec, prec):
 96 |     """
 97 |     --- Official matlab code VOC2012---
 98 |     mrec=[0 ; rec ; 1];
 99 |     mpre=[0 ; prec ; 0];
100 |     for i=numel(mpre)-1:-1:1
101 |             mpre(i)=max(mpre(i),mpre(i+1));
102 |     end
103 |     i=find(mrec(2:end)~=mrec(1:end-1))+1;
104 |     ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
105 |     """
106 |     rec.insert(0, 0.0) # insert 0.0 at begining of list
107 |     rec.append(1.0) # insert 1.0 at end of list
108 |     mrec = rec[:]
109 |     prec.insert(0, 0.0) # insert 0.0 at begining of list
110 |     prec.append(0.0) # insert 0.0 at end of list
111 |     mpre = prec[:]
112 |     """
113 |      This part makes the precision monotonically decreasing
114 |         (goes from the end to the beginning)
115 |         matlab: for i=numel(mpre)-1:-1:1
116 |                     mpre(i)=max(mpre(i),mpre(i+1));
117 |     """
118 |     for i in range(len(mpre)-2, -1, -1):
119 |         mpre[i] = max(mpre[i], mpre[i+1])
120 |     """
121 |      This part creates a list of indexes where the recall changes
122 |         matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
123 |     """
124 |     i_list = []
125 |     for i in range(1, len(mrec)):
126 |         if mrec[i] != mrec[i-1]:
127 |             i_list.append(i) # if it was matlab would be i + 1
128 |     """
129 |      The Average Precision (AP) is the area under the curve
130 |         (numerical integration)
131 |         matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
132 |     """
133 |     ap = 0.0
134 |     for i in i_list:
135 |         ap += ((mrec[i]-mrec[i-1])*mpre[i])
136 |     return ap, mrec, mpre
137 | 
138 | 
139 | """
140 |  Convert the lines of a file to a list
141 | """
142 | def file_lines_to_list(path):
143 |     # open txt file lines to a list
144 |     with open(path) as f:
145 |         content = f.readlines()
146 |     # remove whitespace characters like `\n` at the end of each line
147 |     content = [x.strip() for x in content]
148 |     return content
149 | 
150 | """
151 |  Draws text in image
152 | """
153 | def draw_text_in_image(img, text, pos, color, line_width):
154 |     font = cv2.FONT_HERSHEY_PLAIN
155 |     fontScale = 1
156 |     lineType = 1
157 |     bottomLeftCornerOfText = pos
158 |     cv2.putText(img, text,
159 |             bottomLeftCornerOfText,
160 |             font,
161 |             fontScale,
162 |             color,
163 |             lineType)
164 |     text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
165 |     return img, (line_width + text_width)
166 | 
167 | """
168 |  Plot - adjust axes
169 | """
170 | def adjust_axes(r, t, fig, axes):
171 |     # get text width for re-scaling
172 |     bb = t.get_window_extent(renderer=r)
173 |     text_width_inches = bb.width / fig.dpi
174 |     # get axis width in inches
175 |     current_fig_width = fig.get_figwidth()
176 |     new_fig_width = current_fig_width + text_width_inches
177 |     propotion = new_fig_width / current_fig_width
178 |     # get axis limit
179 |     x_lim = axes.get_xlim()
180 |     axes.set_xlim([x_lim[0], x_lim[1]*propotion])
181 | 
182 | """
183 |  Draw plot using Matplotlib
184 | """
185 | def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
186 |     # sort the dictionary by decreasing value, into a list of tuples
187 |     sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
188 |     # unpacking the list of tuples into two lists
189 |     sorted_keys, sorted_values = zip(*sorted_dic_by_value)
190 |     # 
191 |     if true_p_bar != "":
192 |         """
193 |          Special case to draw in:
194 |             - green -> TP: True Positives (object detected and matches ground-truth)
195 |             - red -> FP: False Positives (object detected but does not match ground-truth)
196 |             - orange -> FN: False Negatives (object not detected but present in the ground-truth)
197 |         """
198 |         fp_sorted = []
199 |         tp_sorted = []
200 |         for key in sorted_keys:
201 |             fp_sorted.append(dictionary[key] - true_p_bar[key])
202 |             tp_sorted.append(true_p_bar[key])
203 |         plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
204 |         plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
205 |         # add legend
206 |         plt.legend(loc='lower right')
207 |         """
208 |          Write number on side of bar
209 |         """
210 |         fig = plt.gcf() # gcf - get current figure
211 |         axes = plt.gca()
212 |         r = fig.canvas.get_renderer()
213 |         for i, val in enumerate(sorted_values):
214 |             fp_val = fp_sorted[i]
215 |             tp_val = tp_sorted[i]
216 |             fp_str_val = " " + str(fp_val)
217 |             tp_str_val = fp_str_val + " " + str(tp_val)
218 |             # trick to paint multicolor with offset:
219 |             # first paint everything and then repaint the first number
220 |             t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
221 |             plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
222 |             if i == (len(sorted_values)-1): # largest bar
223 |                 adjust_axes(r, t, fig, axes)
224 |     else:
225 |         plt.barh(range(n_classes), sorted_values, color=plot_color)
226 |         """
227 |          Write number on side of bar
228 |         """
229 |         fig = plt.gcf() # gcf - get current figure
230 |         axes = plt.gca()
231 |         r = fig.canvas.get_renderer()
232 |         for i, val in enumerate(sorted_values):
233 |             str_val = " " + str(val) # add a space before
234 |             if val < 1.0:
235 |                 str_val = " {0:.2f}".format(val)
236 |             t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
237 |             # re-set axes to show number inside the figure
238 |             if i == (len(sorted_values)-1): # largest bar
239 |                 adjust_axes(r, t, fig, axes)
240 |     # set window title
241 |     fig.canvas.manager.set_window_title(window_title)
242 |     # write classes in y axis
243 |     tick_font_size = 12
244 |     plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
245 |     """
246 |      Re-scale height accordingly
247 |     """
248 |     init_height = fig.get_figheight()
249 |     # comput the matrix height in points and inches
250 |     dpi = fig.dpi
251 |     height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
252 |     height_in = height_pt / dpi
253 |     # compute the required figure height 
254 |     top_margin = 0.15 # in percentage of the figure height
255 |     bottom_margin = 0.05 # in percentage of the figure height
256 |     figure_height = height_in / (1 - top_margin - bottom_margin)
257 |     # set new height
258 |     if figure_height > init_height:
259 |         fig.set_figheight(figure_height)
260 | 
261 |     # set plot title
262 |     plt.title(plot_title, fontsize=14)
263 |     # set axis titles
264 |     # plt.xlabel('classes')
265 |     plt.xlabel(x_label, fontsize='large')
266 |     # adjust size of window
267 |     fig.tight_layout()
268 |     # save the plot
269 |     fig.savefig(output_path)
270 |     # show image
271 |     if to_show:
272 |         plt.show()
273 |     # close the plot
274 |     plt.close()
275 | 
276 | def get_map(MINOVERLAP, draw_plot, score_threhold=0.5, path = './map_out'):
277 |     GT_PATH             = os.path.join(path, 'ground-truth')
278 |     DR_PATH             = os.path.join(path, 'detection-results')
279 |     IMG_PATH            = os.path.join(path, 'images-optional')
280 |     TEMP_FILES_PATH     = os.path.join(path, '.temp_files')
281 |     RESULTS_FILES_PATH  = os.path.join(path, 'results')
282 | 
283 |     show_animation = True
284 |     if os.path.exists(IMG_PATH): 
285 |         for dirpath, dirnames, files in os.walk(IMG_PATH):
286 |             if not files:
287 |                 show_animation = False
288 |     else:
289 |         show_animation = False
290 | 
291 |     if not os.path.exists(TEMP_FILES_PATH):
292 |         os.makedirs(TEMP_FILES_PATH)
293 |         
294 |     if os.path.exists(RESULTS_FILES_PATH):
295 |         shutil.rmtree(RESULTS_FILES_PATH)
296 |     else:
297 |         os.makedirs(RESULTS_FILES_PATH)
298 |     if draw_plot:
299 |         try:
300 |             matplotlib.use('TkAgg')
301 |         except:
302 |             pass
303 |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP"))
304 |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1"))
305 |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall"))
306 |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision"))
307 |     if show_animation:
308 |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one"))
309 | 
310 |     ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
311 |     if len(ground_truth_files_list) == 0:
312 |         error("Error: No ground-truth files found!")
313 |     ground_truth_files_list.sort()
314 |     gt_counter_per_class     = {}
315 |     counter_images_per_class = {}
316 | 
317 |     for txt_file in ground_truth_files_list:
318 |         file_id     = txt_file.split(".txt", 1)[0]
319 |         file_id     = os.path.basename(os.path.normpath(file_id))
320 |         temp_path   = os.path.join(DR_PATH, (file_id + ".txt"))
321 |         if not os.path.exists(temp_path):
322 |             error_msg = "Error. File not found: {}\n".format(temp_path)
323 |             error(error_msg)
324 |         lines_list      = file_lines_to_list(txt_file)
325 |         bounding_boxes  = []
326 |         is_difficult    = False
327 |         already_seen_classes = []
328 |         for line in lines_list:
329 |             try:
330 |                 if "difficult" in line:
331 |                     class_name, left, top, right, bottom, _difficult = line.split()
332 |                     is_difficult = True
333 |                 else:
334 |                     class_name, left, top, right, bottom = line.split()
335 |             except:
336 |                 if "difficult" in line:
337 |                     line_split  = line.split()
338 |                     _difficult  = line_split[-1]
339 |                     bottom      = line_split[-2]
340 |                     right       = line_split[-3]
341 |                     top         = line_split[-4]
342 |                     left        = line_split[-5]
343 |                     class_name  = ""
344 |                     for name in line_split[:-5]:
345 |                         class_name += name + " "
346 |                     class_name  = class_name[:-1]
347 |                     is_difficult = True
348 |                 else:
349 |                     line_split  = line.split()
350 |                     bottom      = line_split[-1]
351 |                     right       = line_split[-2]
352 |                     top         = line_split[-3]
353 |                     left        = line_split[-4]
354 |                     class_name  = ""
355 |                     for name in line_split[:-4]:
356 |                         class_name += name + " "
357 |                     class_name = class_name[:-1]
358 | 
359 |             bbox = left + " " + top + " " + right + " " + bottom
360 |             if is_difficult:
361 |                 bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
362 |                 is_difficult = False
363 |             else:
364 |                 bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
365 |                 if class_name in gt_counter_per_class:
366 |                     gt_counter_per_class[class_name] += 1
367 |                 else:
368 |                     gt_counter_per_class[class_name] = 1
369 | 
370 |                 if class_name not in already_seen_classes:
371 |                     if class_name in counter_images_per_class:
372 |                         counter_images_per_class[class_name] += 1
373 |                     else:
374 |                         counter_images_per_class[class_name] = 1
375 |                     already_seen_classes.append(class_name)
376 | 
377 |         with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
378 |             json.dump(bounding_boxes, outfile)
379 | 
380 |     gt_classes  = list(gt_counter_per_class.keys())
381 |     gt_classes  = sorted(gt_classes)
382 |     n_classes   = len(gt_classes)
383 | 
384 |     dr_files_list = glob.glob(DR_PATH + '/*.txt')
385 |     dr_files_list.sort()
386 |     for class_index, class_name in enumerate(gt_classes):
387 |         bounding_boxes = []
388 |         for txt_file in dr_files_list:
389 |             file_id = txt_file.split(".txt",1)[0]
390 |             file_id = os.path.basename(os.path.normpath(file_id))
391 |             temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
392 |             if class_index == 0:
393 |                 if not os.path.exists(temp_path):
394 |                     error_msg = "Error. File not found: {}\n".format(temp_path)
395 |                     error(error_msg)
396 |             lines = file_lines_to_list(txt_file)
397 |             for line in lines:
398 |                 try:
399 |                     tmp_class_name, confidence, left, top, right, bottom = line.split()
400 |                 except:
401 |                     line_split      = line.split()
402 |                     bottom          = line_split[-1]
403 |                     right           = line_split[-2]
404 |                     top             = line_split[-3]
405 |                     left            = line_split[-4]
406 |                     confidence      = line_split[-5]
407 |                     tmp_class_name  = ""
408 |                     for name in line_split[:-5]:
409 |                         tmp_class_name += name + " "
410 |                     tmp_class_name  = tmp_class_name[:-1]
411 | 
412 |                 if tmp_class_name == class_name:
413 |                     bbox = left + " " + top + " " + right + " " +bottom
414 |                     bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
415 | 
416 |         bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
417 |         with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
418 |             json.dump(bounding_boxes, outfile)
419 | 
420 |     sum_AP = 0.0
421 |     ap_dictionary = {}
422 |     lamr_dictionary = {}
423 |     with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
424 |         results_file.write("# AP and precision/recall per class\n")
425 |         count_true_positives = {}
426 | 
427 |         for class_index, class_name in enumerate(gt_classes):
428 |             count_true_positives[class_name] = 0
429 |             dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
430 |             dr_data = json.load(open(dr_file))
431 | 
432 |             nd          = len(dr_data)
433 |             tp          = [0] * nd
434 |             fp          = [0] * nd
435 |             score       = [0] * nd
436 |             score_threhold_idx = 0
437 |             for idx, detection in enumerate(dr_data):
438 |                 file_id     = detection["file_id"]
439 |                 score[idx]  = float(detection["confidence"])
440 |                 if score[idx] >= score_threhold:
441 |                     score_threhold_idx = idx
442 | 
443 |                 if show_animation:
444 |                     ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
445 |                     if len(ground_truth_img) == 0:
446 |                         error("Error. Image not found with id: " + file_id)
447 |                     elif len(ground_truth_img) > 1:
448 |                         error("Error. Multiple image with id: " + file_id)
449 |                     else:
450 |                         img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
451 |                         img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0]
452 |                         if os.path.isfile(img_cumulative_path):
453 |                             img_cumulative = cv2.imread(img_cumulative_path)
454 |                         else:
455 |                             img_cumulative = img.copy()
456 |                         bottom_border = 60
457 |                         BLACK = [0, 0, 0]
458 |                         img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
459 | 
460 |                 gt_file             = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
461 |                 ground_truth_data   = json.load(open(gt_file))
462 |                 ovmax       = -1
463 |                 gt_match    = -1
464 |                 bb          = [float(x) for x in detection["bbox"].split()]
465 |                 for obj in ground_truth_data:
466 |                     if obj["class_name"] == class_name:
467 |                         bbgt    = [ float(x) for x in obj["bbox"].split() ]
468 |                         bi      = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
469 |                         iw      = bi[2] - bi[0] + 1
470 |                         ih      = bi[3] - bi[1] + 1
471 |                         if iw > 0 and ih > 0:
472 |                             ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
473 |                                             + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
474 |                             ov = iw * ih / ua
475 |                             if ov > ovmax:
476 |                                 ovmax = ov
477 |                                 gt_match = obj
478 | 
479 |                 if show_animation:
480 |                     status = "NO MATCH FOUND!" 
481 |                     
482 |                 min_overlap = MINOVERLAP
483 |                 if ovmax >= min_overlap:
484 |                     if "difficult" not in gt_match:
485 |                         if not bool(gt_match["used"]):
486 |                             tp[idx] = 1
487 |                             gt_match["used"] = True
488 |                             count_true_positives[class_name] += 1
489 |                             with open(gt_file, 'w') as f:
490 |                                     f.write(json.dumps(ground_truth_data))
491 |                             if show_animation:
492 |                                 status = "MATCH!"
493 |                         else:
494 |                             fp[idx] = 1
495 |                             if show_animation:
496 |                                 status = "REPEATED MATCH!"
497 |                 else:
498 |                     fp[idx] = 1
499 |                     if ovmax > 0:
500 |                         status = "INSUFFICIENT OVERLAP"
501 | 
502 |                 """
503 |                 Draw image to show animation
504 |                 """
505 |                 if show_animation:
506 |                     height, widht = img.shape[:2]
507 |                     white           = (255,255,255)
508 |                     light_blue      = (255,200,100)
509 |                     green           = (0,255,0)
510 |                     light_red       = (30,30,255)
511 |                     margin          = 10
512 |                     # 1nd line
513 |                     v_pos           = int(height - margin - (bottom_border / 2.0))
514 |                     text            = "Image: " + ground_truth_img[0] + " "
515 |                     img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
516 |                     text            = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
517 |                     img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
518 |                     if ovmax != -1:
519 |                         color       = light_red
520 |                         if status   == "INSUFFICIENT OVERLAP":
521 |                             text    = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
522 |                         else:
523 |                             text    = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
524 |                             color   = green
525 |                         img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
526 |                     # 2nd line
527 |                     v_pos           += int(bottom_border / 2.0)
528 |                     rank_pos        = str(idx+1)
529 |                     text            = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
530 |                     img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
531 |                     color           = light_red
532 |                     if status == "MATCH!":
533 |                         color = green
534 |                     text            = "Result: " + status + " "
535 |                     img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
536 | 
537 |                     font = cv2.FONT_HERSHEY_SIMPLEX
538 |                     if ovmax > 0: 
539 |                         bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
540 |                         cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
541 |                         cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
542 |                         cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
543 |                     bb = [int(i) for i in bb]
544 |                     cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
545 |                     cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
546 |                     cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
547 | 
548 |                     cv2.imshow("Animation", img)
549 |                     cv2.waitKey(20) 
550 |                     output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
551 |                     cv2.imwrite(output_img_path, img)
552 |                     cv2.imwrite(img_cumulative_path, img_cumulative)
553 | 
554 |             cumsum = 0
555 |             for idx, val in enumerate(fp):
556 |                 fp[idx] += cumsum
557 |                 cumsum += val
558 |                 
559 |             cumsum = 0
560 |             for idx, val in enumerate(tp):
561 |                 tp[idx] += cumsum
562 |                 cumsum += val
563 | 
564 |             rec = tp[:]
565 |             for idx, val in enumerate(tp):
566 |                 rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
567 | 
568 |             prec = tp[:]
569 |             for idx, val in enumerate(tp):
570 |                 prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
571 | 
572 |             ap, mrec, mprec = voc_ap(rec[:], prec[:])
573 |             F1  = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
574 | 
575 |             sum_AP  += ap
576 |             text    = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
577 | 
578 |             if len(prec)>0:
579 |                 F1_text         = "{0:.2f}".format(F1[score_threhold_idx]) + " = " + class_name + " F1 "
580 |                 Recall_text     = "{0:.2f}%".format(rec[score_threhold_idx]*100) + " = " + class_name + " Recall "
581 |                 Precision_text  = "{0:.2f}%".format(prec[score_threhold_idx]*100) + " = " + class_name + " Precision "
582 |             else:
583 |                 F1_text         = "0.00" + " = " + class_name + " F1 " 
584 |                 Recall_text     = "0.00%" + " = " + class_name + " Recall " 
585 |                 Precision_text  = "0.00%" + " = " + class_name + " Precision " 
586 | 
587 |             rounded_prec    = [ '%.2f' % elem for elem in prec ]
588 |             rounded_rec     = [ '%.2f' % elem for elem in rec ]
589 |             results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
590 |             
591 |             if len(prec)>0:
592 |                 print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=" + "{0:.2f}".format(F1[score_threhold_idx])\
593 |                     + " ; Recall=" + "{0:.2f}%".format(rec[score_threhold_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score_threhold_idx]*100))
594 |             else:
595 |                 print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=0.00% ; Recall=0.00% ; Precision=0.00%")
596 |             ap_dictionary[class_name] = ap
597 | 
598 |             n_images = counter_images_per_class[class_name]
599 |             lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
600 |             lamr_dictionary[class_name] = lamr
601 | 
602 |             if draw_plot:
603 |                 plt.plot(rec, prec, '-o')
604 |                 area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
605 |                 area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
606 |                 plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
607 | 
608 |                 fig = plt.gcf()
609 |                 fig.canvas.manager.set_window_title('AP ' + class_name)
610 | 
611 |                 plt.title('class: ' + text)
612 |                 plt.xlabel('Recall')
613 |                 plt.ylabel('Precision')
614 |                 axes = plt.gca()
615 |                 axes.set_xlim([0.0,1.0])
616 |                 axes.set_ylim([0.0,1.05]) 
617 |                 fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png")
618 |                 plt.cla()
619 | 
620 |                 plt.plot(score, F1, "-", color='orangered')
621 |                 plt.title('class: ' + F1_text + "\nscore_threhold=" + str(score_threhold))
622 |                 plt.xlabel('Score_Threhold')
623 |                 plt.ylabel('F1')
624 |                 axes = plt.gca()
625 |                 axes.set_xlim([0.0,1.0])
626 |                 axes.set_ylim([0.0,1.05])
627 |                 fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png")
628 |                 plt.cla()
629 | 
630 |                 plt.plot(score, rec, "-H", color='gold')
631 |                 plt.title('class: ' + Recall_text + "\nscore_threhold=" + str(score_threhold))
632 |                 plt.xlabel('Score_Threhold')
633 |                 plt.ylabel('Recall')
634 |                 axes = plt.gca()
635 |                 axes.set_xlim([0.0,1.0])
636 |                 axes.set_ylim([0.0,1.05])
637 |                 fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png")
638 |                 plt.cla()
639 | 
640 |                 plt.plot(score, prec, "-s", color='palevioletred')
641 |                 plt.title('class: ' + Precision_text + "\nscore_threhold=" + str(score_threhold))
642 |                 plt.xlabel('Score_Threhold')
643 |                 plt.ylabel('Precision')
644 |                 axes = plt.gca()
645 |                 axes.set_xlim([0.0,1.0])
646 |                 axes.set_ylim([0.0,1.05])
647 |                 fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png")
648 |                 plt.cla()
649 |                 
650 |         if show_animation:
651 |             cv2.destroyAllWindows()
652 |         if n_classes == 0:
653 |             print("未检测到任何种类，请检查标签信息与get_map.py中的classes_path是否修改。")
654 |             return 0
655 |         results_file.write("\n# mAP of all classes\n")
656 |         mAP     = sum_AP / n_classes
657 |         text    = "mAP = {0:.2f}%".format(mAP*100)
658 |         results_file.write(text + "\n")
659 |         print(text)
660 | 
661 |     shutil.rmtree(TEMP_FILES_PATH)
662 | 
663 |     """
664 |     Count total of detection-results
665 |     """
666 |     det_counter_per_class = {}
667 |     for txt_file in dr_files_list:
668 |         lines_list = file_lines_to_list(txt_file)
669 |         for line in lines_list:
670 |             class_name = line.split()[0]
671 |             if class_name in det_counter_per_class:
672 |                 det_counter_per_class[class_name] += 1
673 |             else:
674 |                 det_counter_per_class[class_name] = 1
675 |     dr_classes = list(det_counter_per_class.keys())
676 | 
677 |     """
678 |     Write number of ground-truth objects per class to results.txt
679 |     """
680 |     with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
681 |         results_file.write("\n# Number of ground-truth objects per class\n")
682 |         for class_name in sorted(gt_counter_per_class):
683 |             results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
684 | 
685 |     """
686 |     Finish counting true positives
687 |     """
688 |     for class_name in dr_classes:
689 |         if class_name not in gt_classes:
690 |             count_true_positives[class_name] = 0
691 | 
692 |     """
693 |     Write number of detected objects per class to results.txt
694 |     """
695 |     with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
696 |         results_file.write("\n# Number of detected objects per class\n")
697 |         for class_name in sorted(dr_classes):
698 |             n_det = det_counter_per_class[class_name]
699 |             text = class_name + ": " + str(n_det)
700 |             text += " (tp:" + str(count_true_positives[class_name]) + ""
701 |             text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
702 |             results_file.write(text)
703 | 
704 |     """
705 |     Plot the total number of occurences of each class in the ground-truth
706 |     """
707 |     if draw_plot:
708 |         window_title = "ground-truth-info"
709 |         plot_title = "ground-truth\n"
710 |         plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
711 |         x_label = "Number of objects per class"
712 |         output_path = RESULTS_FILES_PATH + "/ground-truth-info.png"
713 |         to_show = False
714 |         plot_color = 'forestgreen'
715 |         draw_plot_func(
716 |             gt_counter_per_class,
717 |             n_classes,
718 |             window_title,
719 |             plot_title,
720 |             x_label,
721 |             output_path,
722 |             to_show,
723 |             plot_color,
724 |             '',
725 |             )
726 | 
727 |     # """
728 |     # Plot the total number of occurences of each class in the "detection-results" folder
729 |     # """
730 |     # if draw_plot:
731 |     #     window_title = "detection-results-info"
732 |     #     # Plot title
733 |     #     plot_title = "detection-results\n"
734 |     #     plot_title += "(" + str(len(dr_files_list)) + " files and "
735 |     #     count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
736 |     #     plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
737 |     #     # end Plot title
738 |     #     x_label = "Number of objects per class"
739 |     #     output_path = RESULTS_FILES_PATH + "/detection-results-info.png"
740 |     #     to_show = False
741 |     #     plot_color = 'forestgreen'
742 |     #     true_p_bar = count_true_positives
743 |     #     draw_plot_func(
744 |     #         det_counter_per_class,
745 |     #         len(det_counter_per_class),
746 |     #         window_title,
747 |     #         plot_title,
748 |     #         x_label,
749 |     #         output_path,
750 |     #         to_show,
751 |     #         plot_color,
752 |     #         true_p_bar
753 |     #         )
754 | 
755 |     """
756 |     Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
757 |     """
758 |     if draw_plot:
759 |         window_title = "lamr"
760 |         plot_title = "log-average miss rate"
761 |         x_label = "log-average miss rate"
762 |         output_path = RESULTS_FILES_PATH + "/lamr.png"
763 |         to_show = False
764 |         plot_color = 'royalblue'
765 |         draw_plot_func(
766 |             lamr_dictionary,
767 |             n_classes,
768 |             window_title,
769 |             plot_title,
770 |             x_label,
771 |             output_path,
772 |             to_show,
773 |             plot_color,
774 |             ""
775 |             )
776 | 
777 |     """
778 |     Draw mAP plot (Show AP's of all classes in decreasing order)
779 |     """
780 |     if draw_plot:
781 |         window_title = "mAP"
782 |         plot_title = "mAP = {0:.2f}%".format(mAP*100)
783 |         x_label = "Average Precision"
784 |         output_path = RESULTS_FILES_PATH + "/mAP.png"
785 |         to_show = True
786 |         plot_color = 'royalblue'
787 |         draw_plot_func(
788 |             ap_dictionary,
789 |             n_classes,
790 |             window_title,
791 |             plot_title,
792 |             x_label,
793 |             output_path,
794 |             to_show,
795 |             plot_color,
796 |             ""
797 |             )
798 |     return mAP
799 | 
800 | def preprocess_gt(gt_path, class_names):
801 |     image_ids   = os.listdir(gt_path)
802 |     results = {}
803 | 
804 |     images = []
805 |     bboxes = []
806 |     for i, image_id in enumerate(image_ids):
807 |         lines_list      = file_lines_to_list(os.path.join(gt_path, image_id))
808 |         boxes_per_image = []
809 |         image           = {}
810 |         image_id        = os.path.splitext(image_id)[0]
811 |         image['file_name'] = image_id + '.jpg'
812 |         image['width']     = 1
813 |         image['height']    = 1
814 |         #-----------------------------------------------------------------#
815 |         #   感谢 多学学英语吧 的提醒
816 |         #   解决了'Results do not correspond to current coco set'问题
817 |         #-----------------------------------------------------------------#
818 |         image['id']        = str(image_id)
819 | 
820 |         for line in lines_list:
821 |             difficult = 0 
822 |             if "difficult" in line:
823 |                 line_split  = line.split()
824 |                 left, top, right, bottom, _difficult = line_split[-5:]
825 |                 class_name  = ""
826 |                 for name in line_split[:-5]:
827 |                     class_name += name + " "
828 |                 class_name  = class_name[:-1]
829 |                 difficult = 1
830 |             else:
831 |                 line_split  = line.split()
832 |                 left, top, right, bottom = line_split[-4:]
833 |                 class_name  = ""
834 |                 for name in line_split[:-4]:
835 |                     class_name += name + " "
836 |                 class_name = class_name[:-1]
837 |             
838 |             left, top, right, bottom = float(left), float(top), float(right), float(bottom)
839 |             if class_name not in class_names:
840 |                 continue
841 |             cls_id  = class_names.index(class_name) + 1
842 |             bbox    = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id, (right - left) * (bottom - top) - 10.0]
843 |             boxes_per_image.append(bbox)
844 |         images.append(image)
845 |         bboxes.extend(boxes_per_image)
846 |     results['images']        = images
847 | 
848 |     categories = []
849 |     for i, cls in enumerate(class_names):
850 |         category = {}
851 |         category['supercategory']   = cls
852 |         category['name']            = cls
853 |         category['id']              = i + 1
854 |         categories.append(category)
855 |     results['categories']   = categories
856 | 
857 |     annotations = []
858 |     for i, box in enumerate(bboxes):
859 |         annotation = {}
860 |         annotation['area']        = box[-1]
861 |         annotation['category_id'] = box[-2]
862 |         annotation['image_id']    = box[-3]
863 |         annotation['iscrowd']     = box[-4]
864 |         annotation['bbox']        = box[:4]
865 |         annotation['id']          = i
866 |         annotations.append(annotation)
867 |     results['annotations'] = annotations
868 |     return results
869 | 
870 | def preprocess_dr(dr_path, class_names):
871 |     image_ids = os.listdir(dr_path)
872 |     results = []
873 |     for image_id in image_ids:
874 |         lines_list      = file_lines_to_list(os.path.join(dr_path, image_id))
875 |         image_id        = os.path.splitext(image_id)[0]
876 |         for line in lines_list:
877 |             line_split  = line.split()
878 |             confidence, left, top, right, bottom = line_split[-5:]
879 |             class_name  = ""
880 |             for name in line_split[:-5]:
881 |                 class_name += name + " "
882 |             class_name  = class_name[:-1]
883 |             left, top, right, bottom = float(left), float(top), float(right), float(bottom)
884 |             result                  = {}
885 |             result["image_id"]      = str(image_id)
886 |             if class_name not in class_names:
887 |                 continue
888 |             result["category_id"]   = class_names.index(class_name) + 1
889 |             result["bbox"]          = [left, top, right - left, bottom - top]
890 |             result["score"]         = float(confidence)
891 |             results.append(result)
892 |     return results
893 |  
894 | def get_coco_map(class_names, path):
895 |     GT_PATH     = os.path.join(path, 'ground-truth')
896 |     DR_PATH     = os.path.join(path, 'detection-results')
897 |     COCO_PATH   = os.path.join(path, 'coco_eval')
898 | 
899 |     if not os.path.exists(COCO_PATH):
900 |         os.makedirs(COCO_PATH)
901 | 
902 |     GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
903 |     DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')
904 | 
905 |     with open(GT_JSON_PATH, "w") as f:
906 |         results_gt  = preprocess_gt(GT_PATH, class_names)
907 |         json.dump(results_gt, f, indent=4)
908 | 
909 |     with open(DR_JSON_PATH, "w") as f:
910 |         results_dr  = preprocess_dr(DR_PATH, class_names)
911 |         json.dump(results_dr, f, indent=4)
912 |         if len(results_dr) == 0:
913 |             print("未检测到任何目标。")
914 |             return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
915 | 
916 |     cocoGt      = COCO(GT_JSON_PATH)
917 |     cocoDt      = cocoGt.loadRes(DR_JSON_PATH)
918 |     cocoEval    = COCOeval(cocoGt, cocoDt, 'bbox') 
919 |     cocoEval.evaluate()
920 |     cocoEval.accumulate()
921 |     cocoEval.summarize()
922 | 
923 |     return cocoEval.stats


--------------------------------------------------------------------------------
/faster-rcnn-pytorch-master/voc_annotation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import xml.etree.ElementTree as ET
  4 | import numpy as np
  5 | from utils.utils import get_classes
  6 | 
  7 | #--------------------------------------------------------------------------------------------------------------------------------#
  8 | #   annotation_mode用于指定该文件运行时计算的内容
  9 | #   annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
 10 | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
 11 | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
 12 | #--------------------------------------------------------------------------------------------------------------------------------#
 13 | annotation_mode     = 2
 14 | #-------------------------------------------------------------------#
 15 | #   必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息
 16 | #   与训练和预测所用的classes_path一致即可
 17 | #   如果生成的2007_train.txt里面没有目标信息
 18 | #   那么就是因为classes没有设定正确
 19 | #   仅在annotation_mode为0和2的时候有效
 20 | #-------------------------------------------------------------------#
 21 | classes_path        = "./faster-rcnn-pytorch-master/model_data/voc_classes.txt"
 22 | #--------------------------------------------------------------------------------------------------------------------------------#
 23 | #   trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1
 24 | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1
 25 | #   仅在annotation_mode为0和1的时候有效
 26 | #--------------------------------------------------------------------------------------------------------------------------------#
 27 | trainval_percent    = 0.9
 28 | train_percent       = 0.9
 29 | #-------------------------------------------------------#
 30 | #   指向VOC数据集所在的文件夹
 31 | #   默认指向根目录下的VOC数据集
 32 | #-------------------------------------------------------#
 33 | VOCdevkit_path  = "./faster-rcnn-pytorch-master/VOCdevkit"
 34 | 
 35 | VOCdevkit_sets  = [('2007', 'train'), ('2007', 'val')]
 36 | classes, _      = get_classes(classes_path)
 37 | 
 38 | #-------------------------------------------------------#
 39 | #   统计目标数量
 40 | #-------------------------------------------------------#
 41 | photo_nums  = np.zeros(len(VOCdevkit_sets))
 42 | nums        = np.zeros(len(classes))
 43 | def convert_annotation(year, image_id, list_file):
 44 |     in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
 45 |     tree=ET.parse(in_file)
 46 |     root = tree.getroot()
 47 | 
 48 |     for obj in root.iter('object'):
 49 |         difficult = 0 
 50 |         if obj.find('difficult')!=None:
 51 |             difficult = obj.find('difficult').text
 52 |         cls = obj.find('name').text
 53 |         if cls not in classes or int(difficult)==1:
 54 |             continue
 55 |         cls_id = classes.index(cls)
 56 |         xmlbox = obj.find('bndbox')
 57 |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
 58 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 59 |         
 60 |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1
 61 |         
 62 | if __name__ == "__main__":
 63 |     random.seed(0)
 64 |    
 65 | 
 66 |     if annotation_mode == 0 or annotation_mode == 1:
 67 |         print("Generate txt in ImageSets.")
 68 |         xmlfilepath     = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
 69 |         saveBasePath    = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
 70 |         temp_xml        = os.listdir(xmlfilepath)
 71 |         total_xml       = []
 72 |         for xml in temp_xml:
 73 |             if xml.endswith(".xml"):
 74 |                 total_xml.append(xml)
 75 | 
 76 |         num     = len(total_xml)  
 77 |         list    = range(num)  
 78 |         tv      = int(num*trainval_percent)  
 79 |         tr      = int(tv*train_percent)  
 80 |         trainval= random.sample(list,tv)  
 81 |         train   = random.sample(trainval,tr)  
 82 |         
 83 |         print("train and val size",tv)
 84 |         print("train size",tr)
 85 |         ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
 86 |         ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
 87 |         ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 88 |         fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
 89 |         
 90 |         for i in list:  
 91 |             name=total_xml[i][:-4]+'\n'  
 92 |             if i in trainval:  
 93 |                 ftrainval.write(name)  
 94 |                 if i in train:  
 95 |                     ftrain.write(name)  
 96 |                 else:  
 97 |                     fval.write(name)  
 98 |             else:  
 99 |                 ftest.write(name)  
100 |         
101 |         ftrainval.close()  
102 |         ftrain.close()  
103 |         fval.close()  
104 |         ftest.close()
105 |         print("Generate txt in ImageSets done.")
106 | 
107 |     if annotation_mode == 0 or annotation_mode == 2:
108 |         print("Generate 2007_train.txt and 2007_val.txt for train.")
109 |         type_index = 0
110 |         for year, image_set in VOCdevkit_sets:
111 |             image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split()
112 |             list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
113 |             for image_id in image_ids:
114 |                 list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id))
115 | 
116 |                 convert_annotation(year, image_id, list_file)
117 |                 list_file.write('\n')
118 |             photo_nums[type_index] = len(image_ids)
119 |             type_index += 1
120 |             list_file.close()
121 |         print("Generate 2007_train.txt and 2007_val.txt for train done.")
122 |         
123 |         def printTable(List1, List2):
124 |             for i in range(len(List1[0])):
125 |                 print("|", end=' ')
126 |                 for j in range(len(List1)):
127 |                     print(List1[j][i].rjust(int(List2[j])), end=' ')
128 |                     print("|", end=' ')
129 |                 print()
130 | 
131 |         str_nums = [str(int(x)) for x in nums]
132 |         tableData = [
133 |             classes, str_nums
134 |         ]
135 |         colWidths = [0]*len(tableData)
136 |         len1 = 0
137 |         for i in range(len(tableData)):
138 |             for j in range(len(tableData[i])):
139 |                 if len(tableData[i][j]) > colWidths[i]:
140 |                     colWidths[i] = len(tableData[i][j])
141 |         printTable(tableData, colWidths)
142 | 
143 |         if photo_nums[0] <= 500:
144 |             print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。")
145 | 
146 |         if np.sum(nums) == 0:
147 |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果！")
148 |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果！")
149 |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果！")
150 |             print("（重要的事情说三遍）。")
151 | 


--------------------------------------------------------------------------------