├── utils
├── __init__.py
└── augmentations.py
├── layers
├── __init__.py
├── functions
│ ├── __init__.py
│ ├── prior_box.py
│ └── detection.py
├── modules
│ ├── __init__.py
│ ├── l2norm.py
│ ├── focal_loss.py
│ └── multibox_loss.py
└── box_utils.py
├── .gitattributes
├── netModel
├── testModel.py
├── multi_flow.py
└── resnet.py
├── data
├── validPhoto.py
├── splitTrainVal.py
├── splitTrainVal copy.py
├── xmlPaser.py
├── coco
│ └── coco_labels.txt
├── __init__.py
├── resultVisualize.py
├── config.py
├── xmlPaserGenLabel.py
├── voc0712.py
├── custom.py
├── custom_for_visual.py
└── coco.py
├── LICENSE
├── README.md
├── .gitignore
├── 实验 4.1
├── ssd_resnet_101.py
├── trainCustom_101.py
├── visualTest_gauge.py
└── evalCustom_101.py
└── 实验 4.2
├── ssd_resnet_18.py
├── visualTest_building.py
├── trainCustom_18.py
└── evalCustom_18.py
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 | .ipynb_checkpoints/* linguist-documentation
3 | dev.ipynb linguist-documentation
4 |
--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 |
4 |
5 | __all__ = ['Detect', 'PriorBox']
6 |
--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 |
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 |
--------------------------------------------------------------------------------
/netModel/testModel.py:
--------------------------------------------------------------------------------
1 | from resnet import resnet101
2 | import torch
3 |
4 | if __name__ == '__main__':
5 | model = resnet101()
6 | input = torch.rand(2,3,512,512)
7 | res = model(input)
8 | print(model)
--------------------------------------------------------------------------------
/data/validPhoto.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path, PurePath
2 | import cv2
3 |
4 | if __name__ == '__main__':
5 | p = Path('./piaofu/piao/shenhe/JPEGImages/')
6 | files = [x for x in p.iterdir() if x.is_file()]
7 | for file in files:
8 | try:
9 | print(file.name)
10 | img = cv2.imread('./piaofu/piao/shenhe/JPEGImages/%s' % file.name, cv2.IMREAD_COLOR)
11 | except Exception:
12 | print(file.name)
--------------------------------------------------------------------------------
/data/splitTrainVal.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path, PurePath
2 |
3 | resultPath = "./video/buildingwater/ImageSets/Main/"
4 | def splitDataset(path, filename):
5 | p = Path(path)
6 | files = [x for x in p.iterdir() if x.is_file()]
7 | count = 0
8 | with open(resultPath+filename+'trainval.txt', 'w+') as f:
9 | with open(resultPath+filename+'train.txt', 'w+') as ft:
10 | with open(resultPath+filename+'val.txt', 'w+') as fv:
11 | for file in files:
12 | f.write(file.stem + '\n')
13 | if count % 5 == 4:
14 | fv.write(file.stem + '\n')
15 | else:
16 | ft.write(file.stem + '\n')
17 | count += 1
18 |
19 | splitDataset('./video/buildingwater/Annotations', '')
20 |
--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Function
4 | from torch.autograd import Variable
5 | import torch.nn.init as init
6 |
7 | class L2Norm(nn.Module):
8 | def __init__(self,n_channels, scale):
9 | super(L2Norm,self).__init__()
10 | self.n_channels = n_channels
11 | self.gamma = scale or None
12 | self.eps = 1e-10
13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 | self.reset_parameters()
15 |
16 | def reset_parameters(self):
17 | init.constant_(self.weight,self.gamma)
18 |
19 | def forward(self, x):
20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 | #x /= norm
22 | x = torch.div(x,norm)
23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 | return out
25 |
--------------------------------------------------------------------------------
/data/splitTrainVal copy.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path, PurePath
2 |
3 | resultPath = "./video/buildingwater/ImageSets/Main/"
4 | def splitDataset(path, filename):
5 | p = Path(path)
6 | files = [x for x in p.iterdir() if x.is_file()]
7 | count = 0
8 | with open(resultPath+filename+'trainval0.txt', 'w+') as f:
9 | with open(resultPath+filename+'train0.txt', 'w+') as ft:
10 | with open(resultPath+filename+'val0.txt', 'w+') as fv:
11 | for file in files:
12 | f.write(file.stem + '\n')
13 | if file.stem.find('v1') > -1 or file.stem.find('v2') > -1 or file.stem.find('v4') > -1 or file.stem.find('v5') > -1 or file.stem.find('v6') > -1:
14 | ft.write(file.stem + '\n')
15 | elif file.stem.find('v3') > -1:
16 | fv.write(file.stem + '\n')
17 | count += 1
18 |
19 | splitDataset('./video/buildingwater/Annotations', '')
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Max deGroot, Ellis Brown
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/data/xmlPaser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: UTF-8 -*-
3 |
4 | import xml.sax
5 | from pathlib import Path, PurePath
6 |
7 | total = {}
8 |
9 | class MovieHandler( xml.sax.ContentHandler ):
10 | def __init__(self):
11 | self.CurrentData = ""
12 | self.name = ''
13 |
14 | # 元素开始事件处理
15 | def startElement(self, tag, attributes):
16 | self.CurrentData = tag
17 |
18 | # 元素结束事件处理
19 | def endElement(self, tag):
20 | if self.CurrentData == "name":
21 | if self.name in total:
22 | total[self.name] += 1
23 | else:
24 | total[self.name] = 1
25 | self.CurrentData = ""
26 |
27 | # 内容事件处理
28 | def characters(self, content):
29 | if self.CurrentData == "name":
30 | self.name = content
31 |
32 | if ( __name__ == "__main__"):
33 |
34 | # 创建一个 XMLReader
35 | parser = xml.sax.make_parser()
36 | # turn off namepsaces
37 | parser.setFeature(xml.sax.handler.feature_namespaces, 0)
38 |
39 | # 重写 ContextHandler
40 | Handler = MovieHandler()
41 | parser.setContentHandler( Handler )
42 |
43 | path = '.\\piaofu\\piao\\shenhe\\Annotations'
44 | p = Path(path)
45 | files = [x for x in p.iterdir() if x.is_file()]
46 | for f in files:
47 | parser.parse(path+'\\'+f.name)
48 | print(total)
--------------------------------------------------------------------------------
/data/coco/coco_labels.txt:
--------------------------------------------------------------------------------
1 | 1,1,person
2 | 2,2,bicycle
3 | 3,3,car
4 | 4,4,motorcycle
5 | 5,5,airplane
6 | 6,6,bus
7 | 7,7,train
8 | 8,8,truck
9 | 9,9,boat
10 | 10,10,traffic light
11 | 11,11,fire hydrant
12 | 13,12,stop sign
13 | 14,13,parking meter
14 | 15,14,bench
15 | 16,15,bird
16 | 17,16,cat
17 | 18,17,dog
18 | 19,18,horse
19 | 20,19,sheep
20 | 21,20,cow
21 | 22,21,elephant
22 | 23,22,bear
23 | 24,23,zebra
24 | 25,24,giraffe
25 | 27,25,backpack
26 | 28,26,umbrella
27 | 31,27,handbag
28 | 32,28,tie
29 | 33,29,suitcase
30 | 34,30,frisbee
31 | 35,31,skis
32 | 36,32,snowboard
33 | 37,33,sports ball
34 | 38,34,kite
35 | 39,35,baseball bat
36 | 40,36,baseball glove
37 | 41,37,skateboard
38 | 42,38,surfboard
39 | 43,39,tennis racket
40 | 44,40,bottle
41 | 46,41,wine glass
42 | 47,42,cup
43 | 48,43,fork
44 | 49,44,knife
45 | 50,45,spoon
46 | 51,46,bowl
47 | 52,47,banana
48 | 53,48,apple
49 | 54,49,sandwich
50 | 55,50,orange
51 | 56,51,broccoli
52 | 57,52,carrot
53 | 58,53,hot dog
54 | 59,54,pizza
55 | 60,55,donut
56 | 61,56,cake
57 | 62,57,chair
58 | 63,58,couch
59 | 64,59,potted plant
60 | 65,60,bed
61 | 67,61,dining table
62 | 70,62,toilet
63 | 72,63,tv
64 | 73,64,laptop
65 | 74,65,mouse
66 | 75,66,remote
67 | 76,67,keyboard
68 | 77,68,cell phone
69 | 78,69,microwave
70 | 79,70,oven
71 | 80,71,toaster
72 | 81,72,sink
73 | 82,73,refrigerator
74 | 84,74,book
75 | 85,75,clock
76 | 86,76,vase
77 | 87,77,scissors
78 | 88,78,teddy bear
79 | 89,79,hair drier
80 | 90,80,toothbrush
81 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
2 | from .custom import customDetection, customAnnotationTransform, CUSTOM_CLASSES, CUSTOM_ROOT
3 |
4 | # from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
5 | from .config import *
6 | import torch
7 | import cv2
8 | import numpy as np
9 |
10 | def detection_collate(batch):
11 | """Custom collate fn for dealing with batches of images that have a different
12 | number of associated object annotations (bounding boxes).
13 |
14 | Arguments:
15 | batch: (tuple) A tuple of tensor images and lists of annotations
16 |
17 | Return:
18 | A tuple containing:
19 | 1) (tensor) batch of images stacked on their 0 dim
20 | 2) (list of tensors) annotations for a given image are stacked on
21 | 0 dim
22 | """
23 | targets = []
24 | imgs = []
25 | for sample in batch:
26 | imgs.append(sample[0])
27 | targets.append(torch.FloatTensor(sample[1]))
28 | return torch.stack(imgs, 0), targets
29 |
30 |
31 | def base_transform(image, size, mean):
32 | x = cv2.resize(image, (size, size)).astype(np.float32)
33 | x -= mean
34 | x = x.astype(np.float32)
35 | return x
36 |
37 |
38 | class BaseTransform:
39 | def __init__(self, size, mean):
40 | self.size = size
41 | self.mean = np.array(mean, dtype=np.float32)
42 |
43 | def __call__(self, image, boxes=None, labels=None):
44 | return base_transform(image, self.size, self.mean), boxes, labels
45 |
--------------------------------------------------------------------------------
/data/resultVisualize.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import os.path as osp
4 | import math
5 |
6 | rootPath = 'F:/ssd/data/video/waterline'
7 |
8 | imgList = {}
9 |
10 | if __name__ == "__main__":
11 | with open('./det_test_waterline_99.txt', 'r') as f:
12 | text_lines = f.readlines()
13 | for line in text_lines:
14 | info = line.split(" ")
15 | name, score, x1, y1, x2, y2 = info
16 | if name in imgList:
17 | if float(score) > imgList[name]['score']:
18 | imgList[name] = {
19 | 'score': float(score),
20 | 'x1': float(x1),
21 | 'y1': float(y1),
22 | 'x2': float(x2),
23 | 'y2': float(y2)
24 | }
25 | else:
26 | imgList[name] = {
27 | 'score': float(score),
28 | 'x1': float(x1),
29 | 'y1': float(y1),
30 | 'x2': float(x2),
31 | 'y2': float(y2)
32 | }
33 |
34 | cv2.namedWindow('w1',1)
35 | img_path = osp.join(rootPath, 'JPEGImages', '%s.jpg')
36 | for obj in imgList.items():
37 | name, img = obj
38 | image = cv2.imread(img_path % name)
39 | (h, w, c) = image.shape
40 | cv2.rectangle(image, (math.floor(img['x1']), math.floor(img['y1'])), (math.floor(img['x2']), math.floor(img['y2'])), (255,0,0), 5)
41 | # cv2.putText(image, img['score'], (math.floor(img['x1']), math.floor(img['y1'])), cv2.FONT_HERSHEY_COMPLEX, 5, (0, 255, 0), 12)
42 | # sc = min(512, h) / h
43 | # image = cv2.resize(image, (math.floor(w * sc), math.floor(h * sc)))
44 | image = cv2.resize(image, (512, 512))
45 | cv2.imshow('w1', image)
46 | cv2.waitKey()
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SSD: Single Shot MultiBox Object Detector, in PyTorch
2 | A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detector](http://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang, and Alexander C. Berg. The official and original Caffe code can be found [here](https://github.com/weiliu89/caffe/tree/ssd).
3 |
4 |
5 |
6 |
7 | ### Table of Contents
8 | - Installation
9 | - Datasets
10 |
11 |
12 |
13 |
14 |
15 |
16 | ## Installation
17 | - Install [PyTorch](http://pytorch.org/) by selecting your environment on the website and running the appropriate command.
18 | - Clone this repository.
19 | * Note: We currently only support Python 3+.
20 | - Then download the dataset by following the [instructions](#datasets) below.
21 | - We now support [Visdom](https://github.com/facebookresearch/visdom) for real-time loss visualization during training!
22 | * To use Visdom in the browser:
23 | ```Shell
24 | # First install Python server and client
25 | pip install visdom
26 | # Start the server (probably in a screen or tmux)
27 | python -m visdom.server
28 | ```
29 | * Then (during training) navigate to http://localhost:8097/ (see the Train section below for training details).
30 | - Note: For training, we currently support [VOC](http://host.robots.ox.ac.uk/pascal/VOC/) and [COCO](http://mscoco.org/), and aim to add [ImageNet](http://www.image-net.org/) support soon.
31 |
32 | ## train
33 |
34 | ### 实验4.1
35 |
36 | 使用gauge.zip的数据训练一个水位尺检测模型
37 |
38 | 使用mark.zip的数据训练一个实际水位和警戒水位检测模型
39 |
40 | ### 实验4.2
41 |
42 | 使用buildingwater.zip的数据训练一个河道区域和建筑区域的检测模型
43 |
44 | ## 注意
45 |
46 | 修改data/custom.py中的CUSTOM_CLASSES以适应不同的数据集
47 |
48 | 修改data/config.py中的num_classes, lr_steps, max_iter以适应不同的数据集
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | # atom remote-sync package
92 | .remote-sync.json
93 |
94 | # weights
95 | weights/
96 |
97 | #DS_Store
98 | .DS_Store
99 |
100 | # dev stuff
101 | eval/
102 | eval.ipynb
103 | dev.ipynb
104 | .vscode/
105 |
106 | # not ready
107 | videos/
108 | templates/
109 | data/ssd_dataloader.py
110 | data/datasets/
111 | data/video/
112 | doc/visualize.py
113 | read_results.py
114 | ssd300_120000/
115 | demos/live
116 | webdemo.py
117 | test_data_aug.py
118 |
119 | # attributes
120 |
121 | # pycharm
122 | .idea/
123 |
124 | # temp checkout soln
125 | data/datasets/
126 | data/ssd_dataloader.py
127 | data/piaofu
128 | data/VOCdevkit
129 | data/*.zip
130 |
131 | # pylint
132 | .pylintrc
--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from math import sqrt as sqrt
3 | from itertools import product as product
4 | import torch
5 |
6 |
7 | class PriorBox(object):
8 | """Compute priorbox coordinates in center-offset form for each source
9 | feature map.
10 | """
11 | def __init__(self, cfg):
12 | super(PriorBox, self).__init__()
13 | self.image_size = cfg['min_dim']
14 | # number of priors for feature map location (either 4 or 6)
15 | self.num_priors = len(cfg['aspect_ratios'])
16 | self.variance = cfg['variance'] or [0.1]
17 | self.feature_maps = cfg['feature_maps']
18 | self.min_sizes = cfg['min_sizes']
19 | self.max_sizes = cfg['max_sizes']
20 | self.steps = cfg['steps']
21 | self.aspect_ratios = cfg['aspect_ratios']
22 | self.clip = cfg['clip']
23 | self.version = cfg['name']
24 | for v in self.variance:
25 | if v <= 0:
26 | raise ValueError('Variances must be greater than 0')
27 |
28 | def forward(self):
29 | mean = []
30 | for k, f in enumerate(self.feature_maps):
31 | for i, j in product(range(f), repeat=2):
32 | f_k = self.image_size / self.steps[k]
33 | # unit center x,y
34 | cx = (j + 0.5) / f_k
35 | cy = (i + 0.5) / f_k
36 |
37 | # aspect_ratio: 1
38 | # rel size: min_size
39 | s_k = self.min_sizes[k]/self.image_size
40 | mean += [cx, cy, s_k, s_k]
41 |
42 | # aspect_ratio: 1
43 | # rel size: sqrt(s_k * s_(k+1))
44 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
45 | mean += [cx, cy, s_k_prime, s_k_prime]
46 |
47 | # rest of aspect ratios
48 | for ar in self.aspect_ratios[k]:
49 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
50 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
51 | # back to torch land
52 | output = torch.Tensor(mean).view(-1, 4)
53 | if self.clip:
54 | output.clamp_(max=1, min=0)
55 | return output
56 |
--------------------------------------------------------------------------------
/netModel/multi_flow.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | try:
4 | from torch.hub import load_state_dict_from_url
5 | except ImportError:
6 | from torch.utils.model_zoo import load_url as load_state_dict_from_url
7 |
8 | class MultiFlow_Block(nn.Module):
9 |
10 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
11 | base_width=64, dilation=1, norm_layer=None):
12 | super(BasicBlock, self).__init__()
13 | if norm_layer is None:
14 | norm_layer = nn.BatchNorm2d
15 | if groups != 1 or base_width != 64:
16 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
17 | if dilation > 1:
18 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
19 |
20 | class MultiFlow(nn.Module):
21 | expansion = 1
22 | __constants__ = ['downsample']
23 |
24 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
25 | base_width=64, dilation=1, norm_layer=None):
26 | super(BasicBlock, self).__init__()
27 | if norm_layer is None:
28 | norm_layer = nn.BatchNorm2d
29 | if groups != 1 or base_width != 64:
30 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
31 | if dilation > 1:
32 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
33 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
34 | self.conv1 = conv3x3(inplanes, planes, stride)
35 | self.bn1 = norm_layer(planes)
36 | self.relu = nn.ReLU(inplace=True)
37 | self.conv2 = conv3x3(planes, planes)
38 | self.bn2 = norm_layer(planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 |
42 | def forward(self, x):
43 | identity = x
44 |
45 | out = self.conv1(x)
46 | out = self.bn1(out)
47 | out = self.relu(out)
48 |
49 | out = self.conv2(out)
50 | out = self.bn2(out)
51 |
52 | if self.downsample is not None:
53 | identity = self.downsample(x)
54 |
55 | out += identity
56 | out = self.relu(out)
57 |
58 | return out
--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
1 | # config.py
2 | import os.path
3 |
4 | # gets home dir cross platform
5 | HOME = "F:/ssd/" # os.path.expanduser("~")
6 |
7 | # for making bounding boxes pretty
8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
9 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
10 |
11 | MEANS = (104, 117, 123)
12 |
13 | # SSD300 CONFIGS
14 | custom = {
15 | 'num_classes': 2,
16 | 'lr_steps': (22500, 30000, 37500),
17 | 'max_iter': 120000,
18 | 'feature_maps': [38, 19, 10, 5, 3, 1],
19 | 'min_dim': 300,
20 | 'steps': [8, 16, 32, 64, 100, 300],
21 | 'min_sizes': [30, 60, 111, 162, 213, 264],
22 | 'max_sizes': [60, 111, 162, 213, 264, 315],
23 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
24 | 'variance': [0.1, 0.2],
25 | 'clip': True,
26 | 'name': 'CUSTOM',
27 | }
28 |
29 | VOC_300_2 = {
30 | 'num_classes': 5,
31 | 'lr_steps': (100000, 130000, 160000),
32 | 'max_iter': 160000,
33 | 'feature_maps' : [38, 19, 10, 5, 3],
34 | 'min_dim' : 300,
35 | 'steps' : [8, 16, 32, 64, 100],
36 | 'min_sizes' : [30, 60, 111, 162, 213],
37 | 'max_sizes' : [60, 111, 162, 213, 315],
38 | 'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2,3]],
39 | 'variance' : [0.1, 0.2],
40 | 'clip' : True,
41 | 'name': 'CUSTOM',
42 | }
43 |
44 | voc = {
45 | 'num_classes': 21,
46 | 'lr_steps': (80000, 100000, 120000),
47 | 'max_iter': 120000,
48 | 'feature_maps': [38, 19, 10, 5, 3, 1],
49 | 'min_dim': 300,
50 | 'steps': [8, 16, 32, 64, 100, 300],
51 | 'min_sizes': [30, 60, 111, 162, 213, 264],
52 | 'max_sizes': [60, 111, 162, 213, 264, 315],
53 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
54 | 'variance': [0.1, 0.2],
55 | 'clip': True,
56 | 'name': 'VOC',
57 | }
58 |
59 | coco = {
60 | 'num_classes': 201,
61 | 'lr_steps': (280000, 360000, 400000),
62 | 'max_iter': 400000,
63 | 'feature_maps': [38, 19, 10, 5, 3, 1],
64 | 'min_dim': 300,
65 | 'steps': [8, 16, 32, 64, 100, 300],
66 | 'min_sizes': [21, 45, 99, 153, 207, 261],
67 | 'max_sizes': [45, 99, 153, 207, 261, 315],
68 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
69 | 'variance': [0.1, 0.2],
70 | 'clip': True,
71 | 'name': 'COCO',
72 | }
73 |
--------------------------------------------------------------------------------
/layers/modules/focal_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 |
7 | class FocalLoss(nn.Module):
8 | r"""
9 | This criterion is a implemenation of Focal Loss, which is proposed in
10 | Focal Loss for Dense Object Detection.
11 |
12 | Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
13 |
14 | The losses are averaged across observations for each minibatch.
15 |
16 | Args:
17 | alpha(1D Tensor, Variable) : the scalar factor for this criterion
18 | gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5),
19 | putting more focus on hard, misclassified examples
20 | size_average(bool): By default, the losses are averaged over observations for each minibatch.
21 | However, if the field size_average is set to False, the losses are
22 | instead summed for each minibatch.
23 |
24 |
25 | """
26 | def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
27 | super(FocalLoss, self).__init__()
28 | if alpha is None:
29 | self.alpha = Variable(torch.ones(class_num, 1))
30 | else:
31 | if isinstance(alpha, Variable):
32 | self.alpha = alpha
33 | else:
34 | self.alpha = Variable(alpha)
35 | self.gamma = gamma
36 | self.class_num = class_num
37 | self.size_average = size_average
38 |
39 | def forward(self, inputs, targets):
40 | N = inputs.size(0)
41 | C = inputs.size(1)
42 | P = F.softmax(inputs, dim=-1)
43 |
44 | class_mask = inputs.data.new(N, C).fill_(0)
45 | class_mask = Variable(class_mask)
46 | ids = targets.view(-1, 1)
47 | class_mask.scatter_(1, ids.data, 1.)
48 | #print(class_mask)
49 |
50 |
51 | if inputs.is_cuda and not self.alpha.is_cuda:
52 | self.alpha = self.alpha.cuda()
53 | alpha = self.alpha[ids.data.view(-1)]
54 |
55 | probs = (P*class_mask).sum(1).view(-1,1)
56 |
57 | log_p = probs.log()
58 | #print('probs size= {}'.format(probs.size()))
59 | #print(probs)
60 |
61 | batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
62 | #print('-----bacth_loss------')
63 | #print(batch_loss)
64 |
65 |
66 | if self.size_average:
67 | loss = batch_loss.mean()
68 | else:
69 | loss = batch_loss.sum()
70 | return loss
--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from ..box_utils import decode, nms
4 | from data import voc as cfg
5 |
6 |
7 | class Detect(Function):
8 | """At test time, Detect is the final layer of SSD. Decode location preds,
9 | apply non-maximum suppression to location predictions based on conf
10 | scores and threshold to a top_k number of output predictions for both
11 | confidence score and locations.
12 | """
13 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
14 | self.num_classes = num_classes
15 | self.background_label = bkg_label
16 | self.top_k = top_k
17 | # Parameters used in nms.
18 | self.nms_thresh = nms_thresh
19 | if nms_thresh <= 0:
20 | raise ValueError('nms_threshold must be non negative.')
21 | self.conf_thresh = conf_thresh
22 | self.variance = cfg['variance']
23 |
24 | def forward(self, loc_data, conf_data, prior_data):
25 | """
26 | Args:
27 | loc_data: (tensor) Loc preds from loc layers
28 | Shape: [batch,num_priors*4]
29 | conf_data: (tensor) Shape: Conf preds from conf layers
30 | Shape: [batch*num_priors,num_classes]
31 | prior_data: (tensor) Prior boxes and variances from priorbox layers
32 | Shape: [1,num_priors,4]
33 | """
34 | num = loc_data.size(0) # batch size
35 | num_priors = prior_data.size(0)
36 | output = torch.zeros(num, self.num_classes, self.top_k, 5)
37 | conf_preds = conf_data.view(num, num_priors,
38 | self.num_classes).transpose(2, 1)
39 |
40 | # Decode predictions into bboxes.
41 | for i in range(num):
42 | decoded_boxes = decode(loc_data[i], prior_data, self.variance)
43 | # For each class, perform nms
44 | conf_scores = conf_preds[i].clone()
45 |
46 | for cl in range(1, self.num_classes):
47 | c_mask = conf_scores[cl].gt(self.conf_thresh)
48 | scores = conf_scores[cl][c_mask]
49 | if scores.size(0) == 0:
50 | continue
51 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
52 | boxes = decoded_boxes[l_mask].view(-1, 4)
53 | # idx of highest scoring and non-overlapping boxes per class
54 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
55 | output[i, cl, :count] = \
56 | torch.cat((scores[ids[:count]].unsqueeze(1),
57 | boxes[ids[:count]]), 1)
58 | flt = output.contiguous().view(num, -1, 5)
59 | _, idx = flt[:, :, 0].sort(1, descending=True)
60 | _, rank = idx.sort(1)
61 | flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
62 | return output
63 |
--------------------------------------------------------------------------------
/data/xmlPaserGenLabel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: UTF-8 -*-
3 |
4 | import xml.sax
5 | from pathlib import Path, PurePath
6 |
7 | r = {}
8 |
9 | result = []
10 |
11 | # label = {
12 | # 'garbage': 0,
13 | # 'garbagew': 1,
14 | # 'www': 2,
15 | # 'w': 3
16 | # }
17 |
18 | label = {
19 | 'waterline': 0,
20 | }
21 |
22 | class MovieHandler( xml.sax.ContentHandler ):
23 | def __init__(self):
24 | self.tag = ""
25 | self.boxes = []
26 | self.box = {
27 | 'name': '',
28 | 'xmin': 0,
29 | 'xmax': 0,
30 | 'ymin': 0,
31 | 'ymax': 0
32 | }
33 | self.size = {
34 | 'width': 0,
35 | 'height': 0,
36 | 'depth': 0
37 | }
38 |
39 | # 元素开始事件处理
40 | def startElement(self, tag, attributes):
41 | self.tag = tag
42 |
43 | # 元素结束事件处理
44 | def endElement(self, tag):
45 | if self.tag == 'depth':
46 | r['data']['size'] = self.size
47 | if self.tag == 'ymax':
48 | r['data']['boxes'].append(self.box)
49 | self.tag = ""
50 |
51 | # 内容事件处理
52 | def characters(self, content):
53 | if self.tag == 'size':
54 | self.size = {
55 | 'width': 0,
56 | 'height': 0,
57 | 'depth': 0
58 | }
59 | elif self.tag == 'object':
60 | self.box = {
61 | 'name': '',
62 | 'xmin': 0,
63 | 'xmax': 0,
64 | 'ymin': 0,
65 | 'ymax': 0
66 | }
67 | elif self.tag == 'width':
68 | self.size['width'] = int(content)
69 | elif self.tag == 'height':
70 | self.size['height'] = int(content)
71 | elif self.tag == 'depth':
72 | self.size['depth'] = int(content)
73 | elif self.tag == 'name':
74 | self.box['name'] = content
75 | elif self.tag == 'xmin':
76 | self.box['xmin'] = int(content)
77 | elif self.tag == 'xmax':
78 | self.box['xmax'] = int(content)
79 | elif self.tag == 'ymin':
80 | self.box['ymin'] = int(content)
81 | elif self.tag == 'ymax':
82 | self.box['ymax'] = int(content)
83 |
84 | if ( __name__ == "__main__"):
85 |
86 | # 创建一个 XMLReader
87 | parser = xml.sax.make_parser()
88 | # turn off namepsaces
89 | parser.setFeature(xml.sax.handler.feature_namespaces, 0)
90 |
91 | # 重写 ContextHandler
92 | Handler = MovieHandler()
93 | parser.setContentHandler( Handler )
94 |
95 | path = './video/waterline/Annotations'
96 | p = Path(path)
97 | files = [x for x in p.iterdir() if x.is_file()]
98 | for f in files:
99 | r = {
100 | 'file': f.name[0: -4],
101 | 'data': {
102 | 'size': {},
103 | 'boxes': []
104 | }
105 | }
106 | parser.parse(path+'/'+f.name)
107 | result.append(r)
108 | for r in result:
109 | # with open(".\\labels\\" + r['file'] + ".txt", "w") as f:
110 | width = r['data']['size']['width']
111 | height = r['data']['size']['height']
112 | for b in r['data']['boxes']:
113 | center_x = (b['xmax'] + b['xmin']) / 2 / width
114 | center_y = (b['ymax'] + b['ymax']) / 2 / height
115 | width_x = (b['xmax'] - b['xmin']) / width
116 | height_y = (b['ymax'] - b['ymin']) / height
117 | label_idx = label[b['name']]
118 | if width_x == 0 or height_y == 0 or (b['name'] != 'waterline'):
119 | print(r['file'])
120 | break
121 | # f.write(str(label_idx) + ' ' + str(center_x) + ' ' + str(center_y) + ' ' + str(width_x) + ' ' + str(height_y) + "\n")
--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | from data import coco as cfg
7 | from ..box_utils import match, log_sum_exp
8 | from .focal_loss import FocalLoss
9 |
10 |
11 | class MultiBoxLoss(nn.Module):
12 | """SSD Weighted Loss Function
13 | Compute Targets:
14 | 1) Produce Confidence Target Indices by matching ground truth boxes
15 | with (default) 'priorboxes' that have jaccard index > threshold parameter
16 | (default threshold: 0.5).
17 | 2) Produce localization target by 'encoding' variance into offsets of ground
18 | truth boxes and their matched 'priorboxes'.
19 | 3) Hard negative mining to filter the excessive number of negative examples
20 | that comes with using a large number of default bounding boxes.
21 | (default negative:positive ratio 3:1)
22 | Objective Loss:
23 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
24 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
25 | weighted by α which is set to 1 by cross val.
26 | Args:
27 | c: class confidences,
28 | l: predicted boxes,
29 | g: ground truth boxes
30 | N: number of matched default boxes
31 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
32 | """
33 |
34 | def __init__(self, num_classes, overlap_thresh, prior_for_matching,
35 | bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
36 | use_gpu=True):
37 | super(MultiBoxLoss, self).__init__()
38 | self.use_gpu = use_gpu
39 | self.num_classes = num_classes
40 | self.threshold = overlap_thresh
41 | self.background_label = bkg_label
42 | self.encode_target = encode_target
43 | self.use_prior_for_matching = prior_for_matching
44 | self.do_neg_mining = neg_mining
45 | self.negpos_ratio = neg_pos
46 | self.neg_overlap = neg_overlap
47 | self.variance = cfg['variance']
48 | self.FL = FocalLoss(class_num=cfg['num_classes'], alpha=torch.Tensor([[0.25], [0.25]]), size_average=False)
49 |
50 | def forward(self, predictions, targets):
51 | """Multibox Loss
52 | Args:
53 | predictions (tuple): A tuple containing loc preds, conf preds,
54 | and prior boxes from SSD net.
55 | conf shape: torch.size(batch_size,num_priors,num_classes)
56 | loc shape: torch.size(batch_size,num_priors,4)
57 | priors shape: torch.size(num_priors,4)
58 |
59 | targets (tensor): Ground truth boxes and labels for a batch,
60 | shape: [batch_size,num_objs,5] (last idx is the label).
61 | """
62 | loc_data, conf_data, priors = predictions
63 | num = loc_data.size(0)
64 | priors = priors[:loc_data.size(1), :]
65 | num_priors = (priors.size(0))
66 | num_classes = self.num_classes
67 |
68 | # match priors (default boxes) and ground truth boxes
69 | loc_t = torch.Tensor(num, num_priors, 4)
70 | conf_t = torch.LongTensor(num, num_priors)
71 | for idx in range(num):
72 | truths = targets[idx][:, :-1].data
73 | labels = targets[idx][:, -1].data
74 | defaults = priors.data
75 | match(self.threshold, truths, defaults, self.variance, labels,
76 | loc_t, conf_t, idx)
77 | if self.use_gpu:
78 | loc_t = loc_t.cuda()
79 | conf_t = conf_t.cuda()
80 | # wrap targets
81 | loc_t = Variable(loc_t, requires_grad=False)
82 | conf_t = Variable(conf_t, requires_grad=False)
83 |
84 | pos = conf_t > 0
85 | num_pos = pos.sum(dim=1, keepdim=True)
86 |
87 | # Localization Loss (Smooth L1)
88 | # Shape: [batch,num_priors,4]
89 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
90 | loc_p = loc_data[pos_idx].view(-1, 4)
91 | loc_t = loc_t[pos_idx].view(-1, 4)
92 | # loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
93 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
94 |
95 | # Compute max conf across batch for hard negative mining
96 | batch_conf = conf_data.view(-1, self.num_classes)
97 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
98 |
99 | # 修复bug
100 | loss_c = loss_c.view(pos.size()[0], pos.size()[1])
101 | # Hard Negative Mining
102 | loss_c[pos] = 0 # filter out pos boxes for now
103 | loss_c = loss_c.view(num, -1)
104 | _, loss_idx = loss_c.sort(1, descending=True)
105 | _, idx_rank = loss_idx.sort(1)
106 | num_pos = pos.long().sum(1, keepdim=True)
107 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
108 | neg = idx_rank < num_neg.expand_as(idx_rank)
109 |
110 | # Confidence Loss Including Positive and Negative Examples
111 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
112 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
113 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
114 | targets_weighted = conf_t[(pos+neg).gt(0)]
115 | # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
116 | # loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
117 | loss_c = self.FL(conf_p, targets_weighted)
118 |
119 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
120 |
121 | N = num_pos.data.sum()
122 | loss_l /= N
123 | loss_c /= N
124 | return loss_l, loss_c
125 |
126 |
127 |
--------------------------------------------------------------------------------
/data/voc0712.py:
--------------------------------------------------------------------------------
1 | """VOC Dataset Classes
2 |
3 | Original author: Francisco Massa
4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
5 |
6 | Updated by: Ellis Brown, Max deGroot
7 | """
8 | from .config import HOME
9 | import os.path as osp
10 | import sys
11 | import torch
12 | import torch.utils.data as data
13 | import cv2
14 | import numpy as np
15 | if sys.version_info[0] == 2:
16 | import xml.etree.cElementTree as ET
17 | else:
18 | import xml.etree.ElementTree as ET
19 |
20 | VOC_CLASSES = ( # always index 0
21 | 'aeroplane', 'bicycle', 'bird', 'boat',
22 | 'bottle', 'bus', 'car', 'cat', 'chair',
23 | 'cow', 'diningtable', 'dog', 'horse',
24 | 'motorbike', 'person', 'pottedplant',
25 | 'sheep', 'sofa', 'train', 'tvmonitor')
26 |
27 | # note: if you used our download scripts, this should be right
28 | VOC_ROOT = osp.join(HOME, "data/VOCdevkit/")
29 |
30 |
31 | class VOCAnnotationTransform(object):
32 | """Transforms a VOC annotation into a Tensor of bbox coords and label index
33 | Initilized with a dictionary lookup of classnames to indexes
34 |
35 | Arguments:
36 | class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
37 | (default: alphabetic indexing of VOC's 20 classes)
38 | keep_difficult (bool, optional): keep difficult instances or not
39 | (default: False)
40 | height (int): height
41 | width (int): width
42 | """
43 |
44 | def __init__(self, class_to_ind=None, keep_difficult=False):
45 | self.class_to_ind = class_to_ind or dict(
46 | zip(VOC_CLASSES, range(len(VOC_CLASSES))))
47 | self.keep_difficult = keep_difficult
48 |
49 | def __call__(self, target, width, height):
50 | """
51 | Arguments:
52 | target (annotation) : the target annotation to be made usable
53 | will be an ET.Element
54 | Returns:
55 | a list containing lists of bounding boxes [bbox coords, class name]
56 | """
57 | res = []
58 | for obj in target.iter('object'):
59 | difficult = int(obj.find('difficult').text) == 1
60 | if not self.keep_difficult and difficult:
61 | continue
62 | name = obj.find('name').text.lower().strip()
63 | bbox = obj.find('bndbox')
64 |
65 | pts = ['xmin', 'ymin', 'xmax', 'ymax']
66 | bndbox = []
67 | for i, pt in enumerate(pts):
68 | cur_pt = int(bbox.find(pt).text) - 1
69 | # scale height or width
70 | cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
71 | bndbox.append(cur_pt)
72 | label_idx = self.class_to_ind[name]
73 | bndbox.append(label_idx)
74 | res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind]
75 | # img_id = target.find('filename').text[:-4]
76 |
77 | return res # [[xmin, ymin, xmax, ymax, label_ind], ... ]
78 |
79 |
80 | class VOCDetection(data.Dataset):
81 | """VOC Detection Dataset Object
82 |
83 | input is image, target is annotation
84 |
85 | Arguments:
86 | root (string): filepath to VOCdevkit folder.
87 | image_set (string): imageset to use (eg. 'train', 'val', 'test')
88 | transform (callable, optional): transformation to perform on the
89 | input image
90 | target_transform (callable, optional): transformation to perform on the
91 | target `annotation`
92 | (eg: take in caption string, return tensor of word indices)
93 | dataset_name (string, optional): which dataset to load
94 | (default: 'VOC2007')
95 | """
96 |
97 | def __init__(self, root,
98 | image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
99 | transform=None, target_transform=VOCAnnotationTransform(),
100 | dataset_name='VOC0712'):
101 | self.root = root
102 | self.image_set = image_sets
103 | self.transform = transform
104 | self.target_transform = target_transform
105 | self.name = dataset_name
106 | self._annopath = osp.join('%s', 'Annotations', '%s.xml')
107 | self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
108 | self.ids = list()
109 | for (year, name) in image_sets:
110 | rootpath = osp.join(self.root, 'VOC' + year)
111 | for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
112 | self.ids.append((rootpath, line.strip()))
113 |
114 | def __getitem__(self, index):
115 | im, gt, h, w = self.pull_item(index)
116 |
117 | return im, gt
118 |
119 | def __len__(self):
120 | return len(self.ids)
121 |
122 | def pull_item(self, index):
123 | img_id = self.ids[index]
124 |
125 | target = ET.parse(self._annopath % img_id).getroot()
126 | img = cv2.imread(self._imgpath % img_id)
127 | height, width, channels = img.shape
128 |
129 | if self.target_transform is not None:
130 | target = self.target_transform(target, width, height)
131 |
132 | if self.transform is not None:
133 | target = np.array(target)
134 | img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
135 | # to rgb
136 | img = img[:, :, (2, 1, 0)]
137 | # img = img.transpose(2, 0, 1)
138 | target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
139 | return torch.from_numpy(img).permute(2, 0, 1), target, height, width
140 | # return torch.from_numpy(img), target, height, width
141 |
142 | def pull_image(self, index):
143 | '''Returns the original image object at index in PIL form
144 |
145 | Note: not using self.__getitem__(), as any transformations passed in
146 | could mess up this functionality.
147 |
148 | Argument:
149 | index (int): index of img to show
150 | Return:
151 | PIL img
152 | '''
153 | img_id = self.ids[index]
154 | return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
155 |
156 | def pull_anno(self, index):
157 | '''Returns the original annotation of image at index
158 |
159 | Note: not using self.__getitem__(), as any transformations passed in
160 | could mess up this functionality.
161 |
162 | Argument:
163 | index (int): index of img to get annotation of
164 | Return:
165 | list: [img_id, [(label, bbox coords),...]]
166 | eg: ('001718', [('dog', (96, 13, 438, 332))])
167 | '''
168 | img_id = self.ids[index]
169 | anno = ET.parse(self._annopath % img_id).getroot()
170 | gt = self.target_transform(anno, 1, 1)
171 | return img_id[1], gt
172 |
173 | def pull_tensor(self, index):
174 | '''Returns the original image at an index in tensor form
175 |
176 | Note: not using self.__getitem__(), as any transformations passed in
177 | could mess up this functionality.
178 |
179 | Argument:
180 | index (int): index of img to show
181 | Return:
182 | tensorized version of img, squeezed
183 | '''
184 | return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
--------------------------------------------------------------------------------
/data/custom.py:
--------------------------------------------------------------------------------
1 | """custom Dataset Classes
2 |
3 | Original author: Francisco Massa
4 | https://github.com/fmassa/vision/blob/custom_dataset/torchvision/datasets/custom.py
5 |
6 | Updated by: Ellis Brown, Max deGroot
7 | """
8 | from .config import HOME
9 | import os.path as osp
10 | import sys
11 | import torch
12 | import torch.utils.data as data
13 | import cv2
14 | import numpy as np
15 | if sys.version_info[0] == 2:
16 | import xml.etree.cElementTree as ET
17 | else:
18 | import xml.etree.ElementTree as ET
19 |
20 | CUSTOM_CLASSES = ( # always index 0
21 | 'gauge',)
22 |
23 | # CUSTOM_CLASSES = ( # always index 0
24 | # 'waterline', 'mark')
25 |
26 | # CUSTOM_CLASSES = ( # always index 0
27 | # 'building', 'water')
28 |
29 | # note: if you used our download scripts, this should be right
30 | CUSTOM_ROOT = osp.join(HOME, "data/video/")
31 |
32 |
33 | class customAnnotationTransform(object):
34 | """Transforms a custom annotation into a Tensor of bbox coords and label index
35 | Initilized with a dictionary lookup of classnames to indexes
36 |
37 | Arguments:
38 | class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
39 | (default: alphabetic indexing of custom's 20 classes)
40 | keep_difficult (bool, optional): keep difficult instances or not
41 | (default: False)
42 | height (int): height
43 | width (int): width
44 | """
45 |
46 | def __init__(self, class_to_ind=None, keep_difficult=False):
47 | self.class_to_ind = class_to_ind or dict(
48 | zip(CUSTOM_CLASSES, range(len(CUSTOM_CLASSES))))
49 | print(self.class_to_ind)
50 | self.keep_difficult = keep_difficult
51 |
52 | def __call__(self, target, width, height):
53 | """
54 | Arguments:
55 | target (annotation) : the target annotation to be made usable
56 | will be an ET.Element
57 | Returns:
58 | a list containing lists of bounding boxes [bbox coords, class name]
59 | """
60 | res = []
61 | for obj in target.iter('object'):
62 | difficult = int(obj.find('difficult').text) == 1
63 | if not self.keep_difficult and difficult:
64 | continue
65 | name = obj.find('name').text.lower().strip()
66 | bbox = obj.find('bndbox')
67 |
68 | pts = ['xmin', 'ymin', 'xmax', 'ymax']
69 | bndbox = []
70 | for i, pt in enumerate(pts):
71 | cur_pt = int(bbox.find(pt).text) - 1
72 | # scale height or width
73 | cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
74 | bndbox.append(cur_pt)
75 | label_idx = self.class_to_ind[name]
76 | bndbox.append(label_idx)
77 | res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind]
78 | # img_id = target.find('filename').text[:-4]
79 | return res # [[xmin, ymin, xmax, ymax, label_ind], ... ]
80 |
81 |
82 | class customDetection(data.Dataset):
83 | """custom Detection Dataset Object
84 |
85 | input is image, target is annotation
86 |
87 | Arguments:
88 | root (string): filepath to customdevkit folder.
89 | image_set (string): imageset to use (eg. 'train', 'val', 'test')
90 | transform (callable, optional): transformation to perform on the
91 | input image
92 | target_transform (callable, optional): transformation to perform on the
93 | target `annotation`
94 | (eg: take in caption string, return tensor of word indices)
95 | dataset_name (string, optional): which dataset to load
96 | (default: 'VOC2007')
97 | """
98 |
99 | def __init__(self, root,
100 | # image_sets=[('shenhe', 'train')],
101 | image_sets=[('gauge', 'train')],
102 | transform=None, target_transform=customAnnotationTransform(),
103 | dataset_name='custom'):
104 | self.root = root
105 | self.image_set = image_sets
106 | self.transform = transform
107 | self.target_transform = target_transform
108 | self.name = dataset_name
109 | self._annopath = osp.join('%s', 'Annotations', '%s.xml')
110 | self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
111 | self.ids = list()
112 | for (curDir, name) in image_sets:
113 | rootpath = osp.join(self.root, curDir)
114 | for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
115 | self.ids.append((rootpath, line.strip()))
116 |
117 | def __getitem__(self, index):
118 | im, gt, h, w = self.pull_item(index)
119 |
120 | return im, gt
121 |
122 | def __len__(self):
123 | return len(self.ids)
124 |
125 | def pull_item(self, index):
126 | img_id = self.ids[index]
127 |
128 | target = ET.parse(self._annopath % img_id).getroot()
129 | img = cv2.imread(self._imgpath % img_id)
130 | height, width, channels = img.shape
131 |
132 | if self.target_transform is not None:
133 | target = self.target_transform(target, width, height)
134 |
135 | if self.transform is not None:
136 | target = np.array(target)
137 | img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
138 | # to rgb
139 | img = img[:, :, (2, 1, 0)]
140 | # img = img.transpose(2, 0, 1)
141 | target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
142 | return torch.from_numpy(img).permute(2, 0, 1), target, height, width
143 | # return torch.from_numpy(img), target, height, width
144 |
145 | def pull_image(self, index):
146 | '''Returns the original image object at index in PIL form
147 |
148 | Note: not using self.__getitem__(), as any transformations passed in
149 | could mess up this functionality.
150 |
151 | Argument:
152 | index (int): index of img to show
153 | Return:
154 | PIL img
155 | '''
156 | img_id = self.ids[index]
157 | return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
158 |
159 | def pull_anno(self, index):
160 | '''Returns the original annotation of image at index
161 |
162 | Note: not using self.__getitem__(), as any transformations passed in
163 | could mess up this functionality.
164 |
165 | Argument:
166 | index (int): index of img to get annotation of
167 | Return:
168 | list: [img_id, [(label, bbox coords),...]]
169 | eg: ('001718', [('dog', (96, 13, 438, 332))])
170 | '''
171 | img_id = self.ids[index]
172 | anno = ET.parse(self._annopath % img_id).getroot()
173 | gt = self.target_transform(anno, 1, 1)
174 | return img_id[1], gt
175 |
176 | def pull_tensor(self, index):
177 | '''Returns the original image at an index in tensor form
178 |
179 | Note: not using self.__getitem__(), as any transformations passed in
180 | could mess up this functionality.
181 |
182 | Argument:
183 | index (int): index of img to show
184 | Return:
185 | tensorized version of img, squeezed
186 | '''
187 | return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
--------------------------------------------------------------------------------
/data/custom_for_visual.py:
--------------------------------------------------------------------------------
1 | """custom Dataset Classes
2 |
3 | Original author: Francisco Massa
4 | https://github.com/fmassa/vision/blob/custom_dataset/torchvision/datasets/custom.py
5 |
6 | Updated by: Ellis Brown, Max deGroot
7 | """
8 | from .config import HOME
9 | import os.path as osp
10 | import sys
11 | import torch
12 | import torch.utils.data as data
13 | import cv2
14 | import numpy as np
15 | if sys.version_info[0] == 2:
16 | import xml.etree.cElementTree as ET
17 | else:
18 | import xml.etree.ElementTree as ET
19 |
20 | # {'garbage': 1, 'garbagew': 1, 'www': 1, 'w': 1}
21 | # CUSTOM_CLASSES = ( # always index 0
22 | # 'waterline',)
23 | CUSTOM_CLASSES_GAUGE = ( # always index 0
24 | 'gauge',)
25 |
26 | CUSTOM_CLASSES_WATERLINE = ( # always index 0
27 | 'waterline', 'mark',)
28 |
29 | CUSTOM_CLASSES_BUILDING = ( # always index 0
30 | 'building', 'water',)
31 |
32 | # note: if you used our download scripts, this should be right
33 | # CUSTOM_ROOT = osp.join(HOME, "data/piaofu/piao/")
34 | CUSTOM_ROOT = osp.join(HOME, "data/video/")
35 |
36 |
37 | class customAnnotationTransform(object):
38 | """Transforms a custom annotation into a Tensor of bbox coords and label index
39 | Initilized with a dictionary lookup of classnames to indexes
40 |
41 | Arguments:
42 | class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
43 | (default: alphabetic indexing of custom's 20 classes)
44 | keep_difficult (bool, optional): keep difficult instances or not
45 | (default: False)
46 | height (int): height
47 | width (int): width
48 | """
49 |
50 | def __init__(self, class_to_ind=None, keep_difficult=False):
51 | self.class_to_ind = class_to_ind or dict(
52 | zip(CUSTOM_CLASSES_GAUGE, range(len(CUSTOM_CLASSES_GAUGE))))
53 | self.keep_difficult = keep_difficult
54 |
55 | def __call__(self, target, width, height):
56 | """
57 | Arguments:
58 | target (annotation) : the target annotation to be made usable
59 | will be an ET.Element
60 | Returns:
61 | a list containing lists of bounding boxes [bbox coords, class name]
62 | """
63 | res = []
64 | for obj in target.iter('object'):
65 | difficult = int(obj.find('difficult').text) == 1
66 | if not self.keep_difficult and difficult:
67 | continue
68 | name = obj.find('name').text.lower().strip()
69 | bbox = obj.find('bndbox')
70 |
71 | pts = ['xmin', 'ymin', 'xmax', 'ymax']
72 | bndbox = []
73 | for i, pt in enumerate(pts):
74 | cur_pt = int(bbox.find(pt).text) - 1
75 | # scale height or width
76 | cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
77 | bndbox.append(cur_pt)
78 | label_idx = self.class_to_ind[name]
79 | bndbox.append(label_idx)
80 | res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind]
81 | # img_id = target.find('filename').text[:-4]
82 | return res # [[xmin, ymin, xmax, ymax, label_ind], ... ]
83 |
84 |
85 | class customDetection(data.Dataset):
86 | """custom Detection Dataset Object
87 |
88 | input is image, target is annotation
89 |
90 | Arguments:
91 | root (string): filepath to customdevkit folder.
92 | image_set (string): imageset to use (eg. 'train', 'val', 'test')
93 | transform (callable, optional): transformation to perform on the
94 | input image
95 | target_transform (callable, optional): transformation to perform on the
96 | target `annotation`
97 | (eg: take in caption string, return tensor of word indices)
98 | dataset_name (string, optional): which dataset to load
99 | (default: 'VOC2007')
100 | """
101 |
102 | def __init__(self, root,
103 | image_sets=[('gauge', 'train')],
104 | transform=None, target_transform=customAnnotationTransform(),
105 | dataset_name='custom'):
106 | self.root = root
107 | self.image_set = image_sets
108 | self.transform = transform
109 | self.target_transform = target_transform
110 | self.name = dataset_name
111 | self._annopath = osp.join('%s', 'Annotations', '%s.xml')
112 | self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
113 | self.ids = list()
114 | for (curDir, name) in image_sets:
115 | rootpath = osp.join(self.root, curDir)
116 | for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
117 | self.ids.append((rootpath, line.strip()))
118 |
119 | def __getitem__(self, index):
120 | im, gt, h, w = self.pull_item(index)
121 |
122 | return im, gt
123 |
124 | def __len__(self):
125 | return len(self.ids)
126 |
127 | def pull_item(self, index):
128 | img_id = self.ids[index]
129 |
130 | target = ET.parse(self._annopath % img_id).getroot()
131 | img = cv2.imread(self._imgpath % img_id)
132 | height, width, channels = img.shape
133 |
134 | if self.target_transform is not None:
135 | target = self.target_transform(target, width, height)
136 |
137 | if self.transform is not None:
138 | target = np.array(target)
139 | img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
140 | # to rgb
141 | img = img[:, :, (2, 1, 0)]
142 | # img = img.transpose(2, 0, 1)
143 | target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
144 | return torch.from_numpy(img).permute(2, 0, 1), target, height, width
145 | # return torch.from_numpy(img), target, height, width
146 |
147 | def pull_image(self, index):
148 | '''Returns the original image object at index in PIL form
149 |
150 | Note: not using self.__getitem__(), as any transformations passed in
151 | could mess up this functionality.
152 |
153 | Argument:
154 | index (int): index of img to show
155 | Return:
156 | PIL img
157 | '''
158 | img_id = self.ids[index]
159 | return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
160 |
161 | def pull_anno(self, index):
162 | '''Returns the original annotation of image at index
163 |
164 | Note: not using self.__getitem__(), as any transformations passed in
165 | could mess up this functionality.
166 |
167 | Argument:
168 | index (int): index of img to get annotation of
169 | Return:
170 | list: [img_id, [(label, bbox coords),...]]
171 | eg: ('001718', [('dog', (96, 13, 438, 332))])
172 | '''
173 | img_id = self.ids[index]
174 | anno = ET.parse(self._annopath % img_id).getroot()
175 | gt = self.target_transform(anno, 1, 1)
176 | return img_id[1], gt
177 |
178 | def pull_tensor(self, index):
179 | '''Returns the original image at an index in tensor form
180 |
181 | Note: not using self.__getitem__(), as any transformations passed in
182 | could mess up this functionality.
183 |
184 | Argument:
185 | index (int): index of img to show
186 | Return:
187 | tensorized version of img, squeezed
188 | '''
189 | return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
190 |
191 | def pull_img_name(self, index):
192 | return self.ids[index]
--------------------------------------------------------------------------------
/data/coco.py:
--------------------------------------------------------------------------------
1 | from .config import HOME
2 | import os
3 | import os.path as osp
4 | import sys
5 | import torch
6 | import torch.utils.data as data
7 | import torchvision.transforms as transforms
8 | import cv2
9 | import numpy as np
10 |
11 | COCO_ROOT = osp.join(HOME, 'data/coco/')
12 | IMAGES = 'images'
13 | ANNOTATIONS = 'annotations'
14 | COCO_API = 'PythonAPI'
15 | INSTANCES_SET = 'instances_{}.json'
16 | COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
17 | 'train', 'truck', 'boat', 'traffic light', 'fire', 'hydrant',
18 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
19 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
20 | 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
21 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
22 | 'kite', 'baseball bat', 'baseball glove', 'skateboard',
23 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
24 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
25 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
26 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
27 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
28 | 'keyboard', 'cell phone', 'microwave oven', 'toaster', 'sink',
29 | 'refrigerator', 'book', 'clock', 'vase', 'scissors',
30 | 'teddy bear', 'hair drier', 'toothbrush')
31 |
32 |
33 | def get_label_map(label_file):
34 | label_map = {}
35 | labels = open(label_file, 'r')
36 | for line in labels:
37 | ids = line.split(',')
38 | label_map[int(ids[0])] = int(ids[1])
39 | return label_map
40 |
41 |
42 | class COCOAnnotationTransform(object):
43 | """Transforms a COCO annotation into a Tensor of bbox coords and label index
44 | Initilized with a dictionary lookup of classnames to indexes
45 | """
46 | def __init__(self):
47 | self.label_map = get_label_map(osp.join(COCO_ROOT, 'coco_labels.txt'))
48 |
49 | def __call__(self, target, width, height):
50 | """
51 | Args:
52 | target (dict): COCO target json annotation as a python dict
53 | height (int): height
54 | width (int): width
55 | Returns:
56 | a list containing lists of bounding boxes [bbox coords, class idx]
57 | """
58 | scale = np.array([width, height, width, height])
59 | res = []
60 | for obj in target:
61 | if 'bbox' in obj:
62 | bbox = obj['bbox']
63 | bbox[2] += bbox[0]
64 | bbox[3] += bbox[1]
65 | label_idx = self.label_map[obj['category_id']] - 1
66 | final_box = list(np.array(bbox)/scale)
67 | final_box.append(label_idx)
68 | res += [final_box] # [xmin, ymin, xmax, ymax, label_idx]
69 | else:
70 | print("no bbox problem!")
71 |
72 | return res # [[xmin, ymin, xmax, ymax, label_idx], ... ]
73 |
74 |
75 | class COCODetection(data.Dataset):
76 | """`MS Coco Detection `_ Dataset.
77 | Args:
78 | root (string): Root directory where images are downloaded to.
79 | set_name (string): Name of the specific set of COCO images.
80 | transform (callable, optional): A function/transform that augments the
81 | raw images`
82 | target_transform (callable, optional): A function/transform that takes
83 | in the target (bbox) and transforms it.
84 | """
85 |
86 | def __init__(self, root, image_set='trainval35k', transform=None,
87 | target_transform=COCOAnnotationTransform(), dataset_name='MS COCO'):
88 | sys.path.append(osp.join(root, COCO_API))
89 | from pycocotools.coco import COCO
90 | self.root = osp.join(root, IMAGES, image_set)
91 | self.coco = COCO(osp.join(root, ANNOTATIONS,
92 | INSTANCES_SET.format(image_set)))
93 | self.ids = list(self.coco.imgToAnns.keys())
94 | self.transform = transform
95 | self.target_transform = target_transform
96 | self.name = dataset_name
97 |
98 | def __getitem__(self, index):
99 | """
100 | Args:
101 | index (int): Index
102 | Returns:
103 | tuple: Tuple (image, target).
104 | target is the object returned by ``coco.loadAnns``.
105 | """
106 | im, gt, h, w = self.pull_item(index)
107 | return im, gt
108 |
109 | def __len__(self):
110 | return len(self.ids)
111 |
112 | def pull_item(self, index):
113 | """
114 | Args:
115 | index (int): Index
116 | Returns:
117 | tuple: Tuple (image, target, height, width).
118 | target is the object returned by ``coco.loadAnns``.
119 | """
120 | img_id = self.ids[index]
121 | target = self.coco.imgToAnns[img_id]
122 | ann_ids = self.coco.getAnnIds(imgIds=img_id)
123 |
124 | target = self.coco.loadAnns(ann_ids)
125 | path = osp.join(self.root, self.coco.loadImgs(img_id)[0]['file_name'])
126 | assert osp.exists(path), 'Image path does not exist: {}'.format(path)
127 | img = cv2.imread(osp.join(self.root, path))
128 | height, width, _ = img.shape
129 | if self.target_transform is not None:
130 | target = self.target_transform(target, width, height)
131 | if self.transform is not None:
132 | target = np.array(target)
133 | img, boxes, labels = self.transform(img, target[:, :4],
134 | target[:, 4])
135 | # to rgb
136 | img = img[:, :, (2, 1, 0)]
137 |
138 | target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
139 | return torch.from_numpy(img).permute(2, 0, 1), target, height, width
140 |
141 | def pull_image(self, index):
142 | '''Returns the original image object at index in PIL form
143 |
144 | Note: not using self.__getitem__(), as any transformations passed in
145 | could mess up this functionality.
146 |
147 | Argument:
148 | index (int): index of img to show
149 | Return:
150 | cv2 img
151 | '''
152 | img_id = self.ids[index]
153 | path = self.coco.loadImgs(img_id)[0]['file_name']
154 | return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR)
155 |
156 | def pull_anno(self, index):
157 | '''Returns the original annotation of image at index
158 |
159 | Note: not using self.__getitem__(), as any transformations passed in
160 | could mess up this functionality.
161 |
162 | Argument:
163 | index (int): index of img to get annotation of
164 | Return:
165 | list: [img_id, [(label, bbox coords),...]]
166 | eg: ('001718', [('dog', (96, 13, 438, 332))])
167 | '''
168 | img_id = self.ids[index]
169 | ann_ids = self.coco.getAnnIds(imgIds=img_id)
170 | return self.coco.loadAnns(ann_ids)
171 |
172 | def __repr__(self):
173 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
174 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
175 | fmt_str += ' Root Location: {}\n'.format(self.root)
176 | tmp = ' Transforms (if any): '
177 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
178 | tmp = ' Target Transforms (if any): '
179 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
180 | return fmt_str
181 |
--------------------------------------------------------------------------------
/实验 4.1/ssd_resnet_101.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | from layers import *
6 | from data import voc, coco, custom
7 | import os
8 |
9 | from netModel.resnet import resnet18, resnet34, BasicBlock
10 |
11 |
12 | class SSD(nn.Module):
13 | """Single Shot Multibox Architecture
14 | The network is composed of a base VGG network followed by the
15 | added multibox conv layers. Each multibox layer branches into
16 | 1) conv2d for class conf scores
17 | 2) conv2d for localization predictions
18 | 3) associated priorbox layer to produce default bounding
19 | boxes specific to the layer's feature map size.
20 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
21 |
22 | Args:
23 | phase: (string) Can be "test" or "train"
24 | size: input image size
25 | base: resnet layers for input, size of either 300 or 500
26 | extras: extra layers that feed to multibox loc and conf layers
27 | head: "multibox head" consists of loc and conf conv layers
28 | """
29 |
30 | def __init__(self, phase, size, base, extras, head, num_classes):
31 | super(SSD, self).__init__()
32 | self.phase = phase
33 | self.num_classes = num_classes
34 | self.cfg = custom
35 | self.priorbox = PriorBox(self.cfg)
36 | self.priors = Variable(self.priorbox.forward(), volatile=True)
37 | self.size = size
38 |
39 | # SSD network
40 | self.resnet = nn.ModuleList(base)
41 | # Layer learns to scale the l2 normalized features from conv4_3
42 | self.L2Norm = L2Norm(512, 20)
43 | self.extras = nn.ModuleList(extras)
44 |
45 | self.loc = nn.ModuleList(head[0])
46 | self.conf = nn.ModuleList(head[1])
47 |
48 | if phase == 'test':
49 | self.softmax = nn.Softmax(dim=-1)
50 | self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
51 |
52 | def forward(self, x):
53 | """Applies network layers and ops on input image(s) x.
54 |
55 | Args:
56 | x: input image or batch of images. Shape: [batch,3,300,300].
57 |
58 | Return:
59 | Depending on phase:
60 | test:
61 | Variable(tensor) of output class label predictions,
62 | confidence score, and corresponding location predictions for
63 | each object detected. Shape: [batch,topk,7]
64 |
65 | train:
66 | list of concat outputs from:
67 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
68 | 2: localization layers, Shape: [batch,num_priors*4]
69 | 3: priorbox layers, Shape: [2,num_priors*4]
70 | """
71 | sources = list()
72 | loc = list()
73 | conf = list()
74 |
75 | # apply resnet up to layer2
76 | for k in range(0,7):
77 | x = self.resnet[k](x)
78 | sources.append(x)
79 |
80 | # apply resnet up to layer4
81 | for k in range(7, len(self.resnet)):
82 | x = self.resnet[k](x)
83 | sources.append(x)
84 | # s = self.L2Norm(x)
85 | # sources.append(s)
86 |
87 | # apply extra layers and cache source layer outputs
88 | for k, v in enumerate(self.extras):
89 | x = F.relu(v(x), inplace=True)
90 | if k % 2 == 1:
91 | sources.append(x)
92 | # apply multibox head to source layers
93 | for (x, l, c) in zip(sources, self.loc, self.conf):
94 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
95 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
96 |
97 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
98 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
99 | if self.phase == "test":
100 | output = self.detect(
101 | loc.view(loc.size(0), -1, 4), # loc preds
102 | self.softmax(conf.view(conf.size(0), -1,
103 | self.num_classes)), # conf preds
104 | self.priors.type(type(x.data)) # default boxes
105 | )
106 | else:
107 | output = (
108 | loc.view(loc.size(0), -1, 4),
109 | conf.view(conf.size(0), -1, self.num_classes),
110 | self.priors
111 | )
112 | return output
113 |
114 | def load_weights(self, base_file):
115 | other, ext = os.path.splitext(base_file)
116 | if ext == '.pkl' or '.pth':
117 | print('Loading weights into state dict...')
118 | self.load_state_dict(torch.load(base_file,
119 | map_location=lambda storage, loc: storage))
120 | print('Finished!')
121 | else:
122 | print('Sorry only .pth and .pkl files supported.')
123 |
124 |
125 | # This function is derived from torchvision VGG make_layers()
126 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
127 | def vgg(cfg, i, batch_norm=False):
128 | layers = []
129 | in_channels = i
130 | for v in cfg:
131 | if v == 'M':
132 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
133 | elif v == 'C':
134 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
135 | else:
136 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
137 | if batch_norm:
138 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
139 | else:
140 | layers += [conv2d, nn.ReLU(inplace=True)]
141 | in_channels = v
142 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
143 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
144 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
145 | layers += [pool5, conv6,
146 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
147 | return layers
148 |
149 | def resnet():
150 | resnet = resnet34(pretrained=True)
151 | layers = [
152 | resnet.conv1,
153 | resnet.bn1,
154 | resnet.relu,
155 | resnet.maxpool,
156 | resnet.layer1,
157 | resnet.layer2,
158 | resnet.layer3,
159 | resnet.layer4,
160 | ]
161 | return layers
162 |
163 | def add_extras(cfg, i, batch_norm=False):
164 | # Extra layers added to VGG for feature scaling
165 | layers = []
166 | in_channels = i
167 | flag = False
168 | for k, v in enumerate(cfg):
169 | if in_channels != 'S':
170 | if v == 'S':
171 | layers += [nn.Conv2d(in_channels, cfg[k + 1],
172 | kernel_size=(1, 3)[flag], stride=2, padding=1)]
173 | else:
174 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
175 | flag = not flag
176 | in_channels = v
177 | return layers
178 |
179 |
180 | def multibox(resnet, extra_layers, cfg, num_classes):
181 | loc_layers = []
182 | conf_layers = []
183 | resnet_source = [-2, -1]
184 | for k, v in enumerate(resnet_source):
185 | loc_layers += [nn.Conv2d(resnet[v][-1].conv2.out_channels,
186 | cfg[k] * 4, kernel_size=3, padding=1)]
187 | conf_layers += [nn.Conv2d(resnet[v][-1].conv2.out_channels,
188 | cfg[k] * num_classes, kernel_size=3, padding=1)]
189 | for k, v in enumerate(extra_layers[1::2], 2):
190 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
191 | * 4, kernel_size=3, padding=1)]
192 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
193 | * num_classes, kernel_size=3, padding=1)]
194 | return resnet, extra_layers, (loc_layers, conf_layers)
195 |
196 |
197 | base = {
198 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
199 | 512, 512, 512],
200 | '512': [],
201 | }
202 | extras = {
203 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
204 | '512': [],
205 | }
206 | mbox = {
207 | '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location
208 | '512': [],
209 | }
210 |
211 |
212 | def build_ssd(phase, size=300, num_classes=21):
213 | if phase != "test" and phase != "train":
214 | print("ERROR: Phase: " + phase + " not recognized")
215 | return
216 | if size != 300:
217 | print("ERROR: You specified size " + repr(size) + ". However, " +
218 | "currently only SSD300 (size=300) is supported!")
219 | return
220 | base_, extras_, head_ = multibox(resnet(),
221 | add_extras(extras[str(size)], 512),
222 | mbox[str(size)], num_classes)
223 | return SSD(phase, size, base_, extras_, head_, num_classes)
224 |
--------------------------------------------------------------------------------
/实验 4.2/ssd_resnet_18.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | from layers import *
6 | from data import voc, coco, custom
7 | import os
8 |
9 | from netModel.resnet import resnet18, resnet34, BasicBlock
10 |
11 |
12 | class SSD(nn.Module):
13 | """Single Shot Multibox Architecture
14 | The network is composed of a base VGG network followed by the
15 | added multibox conv layers. Each multibox layer branches into
16 | 1) conv2d for class conf scores
17 | 2) conv2d for localization predictions
18 | 3) associated priorbox layer to produce default bounding
19 | boxes specific to the layer's feature map size.
20 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
21 |
22 | Args:
23 | phase: (string) Can be "test" or "train"
24 | size: input image size
25 | base: resnet layers for input, size of either 300 or 500
26 | extras: extra layers that feed to multibox loc and conf layers
27 | head: "multibox head" consists of loc and conf conv layers
28 | """
29 |
30 | def __init__(self, phase, size, base, extras, head, num_classes):
31 | super(SSD, self).__init__()
32 | self.phase = phase
33 | self.num_classes = num_classes
34 | self.cfg = custom
35 | self.priorbox = PriorBox(self.cfg)
36 | self.priors = Variable(self.priorbox.forward(), volatile=True)
37 | self.size = size
38 |
39 | # SSD network
40 | self.resnet = nn.ModuleList(base)
41 | # Layer learns to scale the l2 normalized features from conv4_3
42 | self.L2Norm = L2Norm(512, 20)
43 | self.extras = nn.ModuleList(extras)
44 |
45 | self.loc = nn.ModuleList(head[0])
46 | self.conf = nn.ModuleList(head[1])
47 |
48 | if phase == 'test':
49 | self.softmax = nn.Softmax(dim=-1)
50 | self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
51 |
52 | def forward(self, x):
53 | """Applies network layers and ops on input image(s) x.
54 |
55 | Args:
56 | x: input image or batch of images. Shape: [batch,3,300,300].
57 |
58 | Return:
59 | Depending on phase:
60 | test:
61 | Variable(tensor) of output class label predictions,
62 | confidence score, and corresponding location predictions for
63 | each object detected. Shape: [batch,topk,7]
64 |
65 | train:
66 | list of concat outputs from:
67 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
68 | 2: localization layers, Shape: [batch,num_priors*4]
69 | 3: priorbox layers, Shape: [2,num_priors*4]
70 | """
71 | sources = list()
72 | loc = list()
73 | conf = list()
74 |
75 | # apply resnet up to layer2
76 | for k in range(0,7):
77 | x = self.resnet[k](x)
78 | sources.append(x)
79 |
80 | # apply resnet up to layer4
81 | for k in range(7, len(self.resnet)):
82 | x = self.resnet[k](x)
83 | sources.append(x)
84 | # s = self.L2Norm(x)
85 | # sources.append(s)
86 |
87 | # apply extra layers and cache source layer outputs
88 | for k, v in enumerate(self.extras):
89 | x = F.relu(v(x), inplace=True)
90 | if k % 2 == 1:
91 | sources.append(x)
92 | # apply multibox head to source layers
93 | for (x, l, c) in zip(sources, self.loc, self.conf):
94 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
95 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
96 |
97 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
98 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
99 | if self.phase == "test":
100 | output = self.detect(
101 | loc.view(loc.size(0), -1, 4), # loc preds
102 | self.softmax(conf.view(conf.size(0), -1,
103 | self.num_classes)), # conf preds
104 | self.priors.type(type(x.data)) # default boxes
105 | )
106 | else:
107 | output = (
108 | loc.view(loc.size(0), -1, 4),
109 | conf.view(conf.size(0), -1, self.num_classes),
110 | self.priors
111 | )
112 | return output
113 |
114 | def load_weights(self, base_file):
115 | other, ext = os.path.splitext(base_file)
116 | if ext == '.pkl' or '.pth':
117 | print('Loading weights into state dict...')
118 | self.load_state_dict(torch.load(base_file,
119 | map_location=lambda storage, loc: storage))
120 | print('Finished!')
121 | else:
122 | print('Sorry only .pth and .pkl files supported.')
123 |
124 |
125 | # This function is derived from torchvision VGG make_layers()
126 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
127 | def vgg(cfg, i, batch_norm=False):
128 | layers = []
129 | in_channels = i
130 | for v in cfg:
131 | if v == 'M':
132 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
133 | elif v == 'C':
134 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
135 | else:
136 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
137 | if batch_norm:
138 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
139 | else:
140 | layers += [conv2d, nn.ReLU(inplace=True)]
141 | in_channels = v
142 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
143 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
144 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
145 | layers += [pool5, conv6,
146 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
147 | return layers
148 |
149 | def resnet():
150 | resnet = resnet18(pretrained=True)
151 | layers = [
152 | resnet.conv1,
153 | resnet.bn1,
154 | resnet.relu,
155 | resnet.maxpool,
156 | resnet.layer1,
157 | resnet.layer2,
158 | resnet.layer3,
159 | resnet.layer4,
160 | ]
161 | return layers
162 |
163 | def add_extras(cfg, i, batch_norm=False):
164 | # Extra layers added to VGG for feature scaling
165 | layers = []
166 | in_channels = i
167 | flag = False
168 | for k, v in enumerate(cfg):
169 | if in_channels != 'S':
170 | if v == 'S':
171 | layers += [nn.Conv2d(in_channels, cfg[k + 1],
172 | kernel_size=(1, 3)[flag], stride=2, padding=1)]
173 | else:
174 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
175 | flag = not flag
176 | in_channels = v
177 | return layers
178 |
179 |
180 | def multibox(resnet, extra_layers, cfg, num_classes):
181 | loc_layers = []
182 | conf_layers = []
183 | resnet_source = [-2, -1]
184 | for k, v in enumerate(resnet_source):
185 | loc_layers += [nn.Conv2d(resnet[v][-1].conv2.out_channels,
186 | cfg[k] * 4, kernel_size=3, padding=1)]
187 | conf_layers += [nn.Conv2d(resnet[v][-1].conv2.out_channels,
188 | cfg[k] * num_classes, kernel_size=3, padding=1)]
189 | for k, v in enumerate(extra_layers[1::2], 2):
190 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
191 | * 4, kernel_size=3, padding=1)]
192 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
193 | * num_classes, kernel_size=3, padding=1)]
194 | return resnet, extra_layers, (loc_layers, conf_layers)
195 |
196 |
197 | base = {
198 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
199 | 512, 512, 512],
200 | '512': [],
201 | }
202 | extras = {
203 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
204 | '512': [],
205 | }
206 | mbox = {
207 | '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location
208 | '512': [],
209 | }
210 |
211 |
212 | def build_ssd(phase, size=300, num_classes=21):
213 | if phase != "test" and phase != "train":
214 | print("ERROR: Phase: " + phase + " not recognized")
215 | return
216 | if size != 300:
217 | print("ERROR: You specified size " + repr(size) + ". However, " +
218 | "currently only SSD300 (size=300) is supported!")
219 | return
220 | base_, extras_, head_ = multibox(resnet(),
221 | add_extras(extras[str(size)], 512),
222 | mbox[str(size)], num_classes)
223 | return SSD(phase, size, base_, extras_, head_, num_classes)
224 |
--------------------------------------------------------------------------------
/layers/box_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 |
4 |
5 | def point_form(boxes):
6 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
7 | representation for comparison to point form ground truth data.
8 | Args:
9 | boxes: (tensor) center-size default boxes from priorbox layers.
10 | Return:
11 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
12 | """
13 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
14 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
15 |
16 |
17 | def center_size(boxes):
18 | """ Convert prior_boxes to (cx, cy, w, h)
19 | representation for comparison to center-size form ground truth data.
20 | Args:
21 | boxes: (tensor) point_form boxes
22 | Return:
23 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
24 | """
25 | return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
26 | boxes[:, 2:] - boxes[:, :2], 1) # w, h
27 |
28 |
29 | def intersect(box_a, box_b):
30 | """ We resize both tensors to [A,B,2] without new malloc:
31 | [A,2] -> [A,1,2] -> [A,B,2]
32 | [B,2] -> [1,B,2] -> [A,B,2]
33 | Then we compute the area of intersect between box_a and box_b.
34 | Args:
35 | box_a: (tensor) bounding boxes, Shape: [A,4].
36 | box_b: (tensor) bounding boxes, Shape: [B,4].
37 | Return:
38 | (tensor) intersection area, Shape: [A,B].
39 | """
40 | A = box_a.size(0)
41 | B = box_b.size(0)
42 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
43 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
44 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
45 | box_b[:, :2].unsqueeze(0).expand(A, B, 2))
46 | inter = torch.clamp((max_xy - min_xy), min=0)
47 | return inter[:, :, 0] * inter[:, :, 1]
48 |
49 |
50 | def jaccard(box_a, box_b):
51 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
52 | is simply the intersection over union of two boxes. Here we operate on
53 | ground truth boxes and default boxes.
54 | E.g.:
55 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
56 | Args:
57 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
58 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
59 | Return:
60 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
61 | """
62 | inter = intersect(box_a, box_b)
63 | area_a = ((box_a[:, 2]-box_a[:, 0]) *
64 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
65 | area_b = ((box_b[:, 2]-box_b[:, 0]) *
66 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
67 | union = area_a + area_b - inter
68 | return inter / union # [A,B]
69 |
70 |
71 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
72 | """Match each prior box with the ground truth box of the highest jaccard
73 | overlap, encode the bounding boxes, then return the matched indices
74 | corresponding to both confidence and location preds.
75 | Args:
76 | threshold: (float) The overlap threshold used when mathing boxes.
77 | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
78 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
79 | variances: (tensor) Variances corresponding to each prior coord,
80 | Shape: [num_priors, 4].
81 | labels: (tensor) All the class labels for the image, Shape: [num_obj].
82 | loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
83 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
84 | idx: (int) current batch index
85 | Return:
86 | The matched indices corresponding to 1)location and 2)confidence preds.
87 | """
88 | # jaccard index
89 | overlaps = jaccard(
90 | truths,
91 | point_form(priors)
92 | )
93 | # (Bipartite Matching)
94 | # [1,num_objects] best prior for each ground truth
95 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
96 | # [1,num_priors] best ground truth for each prior
97 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
98 | best_truth_idx.squeeze_(0)
99 | best_truth_overlap.squeeze_(0)
100 | best_prior_idx.squeeze_(1)
101 | best_prior_overlap.squeeze_(1)
102 | best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior
103 | # TODO refactor: index best_prior_idx with long tensor
104 | # ensure every gt matches with its prior of max overlap
105 | for j in range(best_prior_idx.size(0)):
106 | best_truth_idx[best_prior_idx[j]] = j
107 | matches = truths[best_truth_idx] # Shape: [num_priors,4]
108 | conf = labels[best_truth_idx] + 1 # Shape: [num_priors]
109 | conf[best_truth_overlap < threshold] = 0 # label as background
110 | loc = encode(matches, priors, variances)
111 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
112 | conf_t[idx] = conf # [num_priors] top class label for each prior
113 |
114 |
115 | def encode(matched, priors, variances):
116 | """Encode the variances from the priorbox layers into the ground truth boxes
117 | we have matched (based on jaccard overlap) with the prior boxes.
118 | Args:
119 | matched: (tensor) Coords of ground truth for each prior in point-form
120 | Shape: [num_priors, 4].
121 | priors: (tensor) Prior boxes in center-offset form
122 | Shape: [num_priors,4].
123 | variances: (list[float]) Variances of priorboxes
124 | Return:
125 | encoded boxes (tensor), Shape: [num_priors, 4]
126 | """
127 |
128 | # dist b/t match center and prior's center
129 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
130 | # encode variance
131 | g_cxcy /= (variances[0] * priors[:, 2:])
132 | # match wh / prior wh
133 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
134 | g_wh = torch.log(g_wh) / variances[1]
135 | # return target for smooth_l1_loss
136 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
137 |
138 |
139 | # Adapted from https://github.com/Hakuyume/chainer-ssd
140 | def decode(loc, priors, variances):
141 | """Decode locations from predictions using priors to undo
142 | the encoding we did for offset regression at train time.
143 | Args:
144 | loc (tensor): location predictions for loc layers,
145 | Shape: [num_priors,4]
146 | priors (tensor): Prior boxes in center-offset form.
147 | Shape: [num_priors,4].
148 | variances: (list[float]) Variances of priorboxes
149 | Return:
150 | decoded bounding box predictions
151 | """
152 |
153 | boxes = torch.cat((
154 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
155 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
156 | boxes[:, :2] -= boxes[:, 2:] / 2
157 | boxes[:, 2:] += boxes[:, :2]
158 | return boxes
159 |
160 |
161 | def log_sum_exp(x):
162 | """Utility function for computing log_sum_exp while determining
163 | This will be used to determine unaveraged confidence loss across
164 | all examples in a batch.
165 | Args:
166 | x (Variable(tensor)): conf_preds from conf layers
167 | """
168 | x_max = x.data.max()
169 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
170 |
171 |
172 | # Original author: Francisco Massa:
173 | # https://github.com/fmassa/object-detection.torch
174 | # Ported to PyTorch by Max deGroot (02/01/2017)
175 | def nms(boxes, scores, overlap=0.5, top_k=200):
176 | """Apply non-maximum suppression at test time to avoid detecting too many
177 | overlapping bounding boxes for a given object.
178 | Args:
179 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
180 | scores: (tensor) The class predscores for the img, Shape:[num_priors].
181 | overlap: (float) The overlap thresh for suppressing unnecessary boxes.
182 | top_k: (int) The Maximum number of box preds to consider.
183 | Return:
184 | The indices of the kept boxes with respect to num_priors.
185 | """
186 |
187 | keep = scores.new(scores.size(0)).zero_().long()
188 | if boxes.numel() == 0:
189 | return keep
190 | x1 = boxes[:, 0]
191 | y1 = boxes[:, 1]
192 | x2 = boxes[:, 2]
193 | y2 = boxes[:, 3]
194 | area = torch.mul(x2 - x1, y2 - y1)
195 | v, idx = scores.sort(0) # sort in ascending order
196 | # I = I[v >= 0.01]
197 | idx = idx[-top_k:] # indices of the top-k largest vals
198 | xx1 = boxes.new()
199 | yy1 = boxes.new()
200 | xx2 = boxes.new()
201 | yy2 = boxes.new()
202 | w = boxes.new()
203 | h = boxes.new()
204 |
205 | # keep = torch.Tensor()
206 | count = 0
207 | while idx.numel() > 0:
208 | i = idx[-1] # index of current largest val
209 | # keep.append(i)
210 | keep[count] = i
211 | count += 1
212 | if idx.size(0) == 1:
213 | break
214 | idx = idx[:-1] # remove kept element from view
215 | # load bboxes of next highest vals
216 | torch.index_select(x1, 0, idx, out=xx1)
217 | torch.index_select(y1, 0, idx, out=yy1)
218 | torch.index_select(x2, 0, idx, out=xx2)
219 | torch.index_select(y2, 0, idx, out=yy2)
220 | # store element-wise max with next highest score
221 | xx1 = torch.clamp(xx1, min=x1[i])
222 | yy1 = torch.clamp(yy1, min=y1[i])
223 | xx2 = torch.clamp(xx2, max=x2[i])
224 | yy2 = torch.clamp(yy2, max=y2[i])
225 | w.resize_as_(xx2)
226 | h.resize_as_(yy2)
227 | w = xx2 - xx1
228 | h = yy2 - yy1
229 | # check sizes of xx1 and xx2.. after each iteration
230 | w = torch.clamp(w, min=0.0)
231 | h = torch.clamp(h, min=0.0)
232 | inter = w*h
233 | # IoU = i / (area(a) + area(b) - i)
234 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
235 | union = (rem_areas - inter) + area[i]
236 | IoU = inter/union # store result in iou
237 | # keep only elements with an IoU <= overlap
238 | idx = idx[IoU.le(overlap)]
239 | return keep, count
240 |
--------------------------------------------------------------------------------
/实验 4.2/visualTest_building.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import torch.nn as nn
4 | import torch.backends.cudnn as cudnn
5 | from torch.autograd import Variable
6 | import torch.utils.data as data
7 |
8 | from data import BaseTransform
9 | from data.custom_for_visual import CUSTOM_CLASSES_BUILDING as labelmap_building
10 | from data.custom_for_visual import customDetection, customAnnotationTransform, CUSTOM_ROOT, CUSTOM_CLASSES_BUILDING
11 |
12 | # from ssd import build_ssd
13 | from ssd_resnet_18 import build_ssd
14 |
15 | import sys
16 | import os
17 | import time
18 | import argparse
19 | import numpy as np
20 | import pickle
21 | import cv2
22 | import math
23 |
24 | import warnings
25 | warnings.filterwarnings("ignore")
26 |
27 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection')
28 | parser.add_argument('--trained_model_building',
29 | default='useful_weight/CUSTOM.pth', type=str,
30 | help='Trained state_dict file path to open')
31 | parser.add_argument('--save_folder', default='eval/', type=str,
32 | help='Dir to save results')
33 | parser.add_argument('--visual_threshold', default=0.15, type=float,
34 | help='Final confidence threshold')
35 | parser.add_argument('--cuda', default=True, type=bool,
36 | help='Use cuda to train model')
37 | parser.add_argument('--custom_root', default=CUSTOM_ROOT, help='Location of VOC root directory')
38 | parser.add_argument('-f', default=None, type=str, help="Dummy arg so we can load in Jupyter Notebooks")
39 | args = parser.parse_args()
40 |
41 | if args.cuda and torch.cuda.is_available():
42 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
43 | else:
44 | torch.set_default_tensor_type('torch.FloatTensor')
45 |
46 | if not os.path.exists(args.save_folder):
47 | os.mkdir(args.save_folder)
48 |
49 |
50 | def test_net(save_folder, net, cuda, testset, transform, thresh, labelmap):
51 | # dump predictions and assoc. ground truth to text file for now
52 | filename = save_folder + 'result_%s.txt'
53 | num_images = len(testset)
54 | for i in range(num_images):
55 | print('Testing image {:d}/{:d}....'.format(i+1, num_images))
56 | img = testset.pull_image(i)
57 | img_id, annotation = testset.pull_anno(i)
58 | x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)
59 | x = Variable(x.unsqueeze(0))
60 |
61 | if cuda:
62 | x = x.cuda()
63 |
64 | y = net(x) # forward pass
65 | detections = y.data
66 | # scale each detection back up to the image
67 | scale = torch.Tensor([img.shape[1], img.shape[0],
68 | img.shape[1], img.shape[0]])
69 | pred_num = 0
70 | for i in range(detections.size(1)):
71 | j = 0
72 | while detections[0, i, j, 0] >= args.visual_threshold:
73 | score = detections[0, i, j, 0]
74 | label_name = labelmap[i-1]
75 | pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
76 | coords = (pt[0], pt[1], pt[2], pt[3])
77 | pred_num += 1
78 | with open(filename % label_name, mode='a') as f:
79 | f.write(str(img_id) + ' ' +
80 | str(score.cpu().numpy()) + ' '+ ' '.join(str(c) for c in coords) + '\n')
81 | j += 1
82 |
83 | def xmlData(name, width, height, label):
84 | return '''
85 | JPEGImages
86 | %s.jpg
87 | %s.jpg
88 |
89 | Unknown
90 |
91 |
92 | %d
93 | %d
94 | 3
95 |
96 | 0
97 |
109 | ''' % (name, name, width, height, label)
110 |
111 | def get_output_dir(name, phase=""):
112 | filedir = os.path.join(name, phase)
113 | if not os.path.exists(filedir):
114 | os.makedirs(filedir)
115 | return filedir
116 |
117 | def is_rect_intersect(rect1, rect2):
118 | rect1_x1 = math.floor(rect1['x1'])
119 | rect1_y1 = math.floor(rect1['y1'])
120 | rect1_x2 = math.floor(rect1['x2'])
121 | rect1_y2 = math.floor(rect1['y2'])
122 |
123 | rect2_x1 = math.floor(rect2['x1'])
124 | rect2_y1 = math.floor(rect2['y1'])
125 | rect2_x2 = math.floor(rect2['x2'])
126 | rect2_y2 = math.floor(rect2['y2'])
127 |
128 | zx = abs(rect1_x1 + rect1_x2 - rect2_x1 - rect2_x2)
129 | x = abs(rect1_x1 - rect1_x2) + abs(rect2_x1 - rect2_x2)
130 |
131 | zy = abs(rect1_y1 + rect1_y2 - rect2_y1 - rect2_y2)
132 | y = abs(rect1_y1 - rect1_y2) + abs(rect2_y1 - rect2_y2)
133 |
134 | return True if zx <= x and zy <= y else False
135 |
136 |
137 | def test_custom():
138 | DEBUG = False
139 | set_type = 'test'
140 |
141 | if not os.path.exists(os.path.join(args.save_folder, 'result_building.txt')):
142 | # load net
143 | num_classes_building = len(labelmap_building) + 1 # +1 for background
144 | net = build_ssd('test', 300, num_classes_building) # initialize SSD
145 | net.load_state_dict(torch.load(args.trained_model_building))
146 | net.eval()
147 |
148 | print('Finished loading model!')
149 | # load data
150 | dataset1 = customDetection(args.custom_root, [('buildingwater', set_type)], None, customAnnotationTransform(class_to_ind=dict(zip(CUSTOM_CLASSES_BUILDING, range(len(CUSTOM_CLASSES_BUILDING))))))
151 | if args.cuda:
152 | net = net.cuda()
153 | cudnn.benchmark = True
154 | # evaluation
155 |
156 | test_net(args.save_folder, net, args.cuda, dataset1,
157 | BaseTransform(net.size, (104, 117, 123)),
158 | thresh=args.visual_threshold, labelmap=labelmap_building)
159 |
160 | rootPath = 'F:/ssd/data/video/buildingwater'
161 | img_path = os.path.join(rootPath, 'JPEGImages', '%s.jpg')
162 | imgList_building = {}
163 | imgList_water = {}
164 |
165 | with open(os.path.join(args.save_folder, 'result_building.txt'), 'r') as f:
166 | text_lines = f.readlines()
167 | for line in text_lines:
168 | info = line.split(" ")
169 | name, score, x1, y1, x2, y2 = info
170 | if name in imgList_building:
171 | imgList_building[name].append({
172 | 'score': float(score),
173 | 'x1': float(x1),
174 | 'y1': float(y1),
175 | 'x2': float(x2),
176 | 'y2': float(y2)
177 | })
178 | else:
179 | imgList_building[name] = [{
180 | 'score': float(score),
181 | 'x1': float(x1),
182 | 'y1': float(y1),
183 | 'x2': float(x2),
184 | 'y2': float(y2)
185 | }]
186 |
187 | with open(os.path.join(args.save_folder, 'result_water.txt'), 'r') as f:
188 | text_lines = f.readlines()
189 | for line in text_lines:
190 | info = line.split(" ")
191 | name, score, x1, y1, x2, y2 = info
192 | if name in imgList_water:
193 | imgList_water[name].append({
194 | 'score': float(score),
195 | 'x1': float(x1),
196 | 'y1': float(y1),
197 | 'x2': float(x2),
198 | 'y2': float(y2)
199 | })
200 | else:
201 | imgList_water[name] = [{
202 | 'score': float(score),
203 | 'x1': float(x1),
204 | 'y1': float(y1),
205 | 'x2': float(x2),
206 | 'y2': float(y2)
207 | }]
208 |
209 | opacity = 0.8
210 | for name in imgList_building:
211 | img_building = imgList_building[name]
212 | img_water = imgList_water[name] if name in imgList_water else []
213 |
214 | image = cv2.imread(img_path % name)
215 | (h, w, c) = image.shape
216 | img_black = image.copy()
217 | img_cp = image.copy()
218 | img_black.fill(1)
219 |
220 |
221 | for building in img_building:
222 | for water in img_water:
223 | if is_rect_intersect(building, water):
224 | x1_b = max(math.floor(building['x1']), 0)
225 | y1_b = max(math.floor(building['y1']), 0)
226 | x2_b = min(math.floor(building['x2']), w)
227 | y2_b = min(math.floor(building['y2']), h)
228 | cv2.rectangle(image, (x1_b-2, y1_b-2), (x2_b+2, y2_b+2), (0,0,255), 5)
229 | img_black[y1_b:y2_b, x1_b:x2_b] = 0
230 |
231 |
232 | # for building in img_building:
233 | # x1_b = max(math.floor(building['x1']), 0)
234 | # y1_b = max(math.floor(building['y1']), 0)
235 | # x2_b = min(math.floor(building['x2']), w)
236 | # y2_b = min(math.floor(building['y2']), h)
237 | # # cv2.rectangle(image, (x1_b, y1_b), (x2_b, y2_b), (0,0,255), 5)
238 | # img_black[y1_b:y2_b, x1_b:x2_b] = 0
239 | image[:,:,0] = (1 - img_black[:,:,0]) * (img_cp[:,:,0]) + img_black[:,:,0] * image[:,:,0]
240 | image[:,:,1] = (1 - img_black[:,:,1]) * (img_cp[:,:,1]) + img_black[:,:,1] * image[:,:,1]
241 | image[:,:,2] = (1 - img_black[:,:,2]) * (img_cp[:,:,2] ) + img_black[:,:,2] * image[:,:,2]
242 |
243 | image[:,:,0] = (1 - img_black[:,:,0]) * (image[:,:,0] * opacity + 0 * (1 - opacity)) + img_black[:,:,0] * image[:,:,0]
244 | image[:,:,1] = (1 - img_black[:,:,1]) * (image[:,:,1] * opacity + 0 * (1 - opacity)) + img_black[:,:,1] * image[:,:,1]
245 | image[:,:,2] = (1 - img_black[:,:,2]) * (image[:,:,2] * opacity + 255 * (1 - opacity)) + img_black[:,:,2] * image[:,:,2]
246 |
247 | # for water in img_water:
248 | # x1_w = max(math.floor(water['x1']), 0)
249 | # y1_w = max(math.floor(water['y1']), 0)
250 | # x2_w = min(math.floor(water['x2']), w)
251 | # y2_w = min(math.floor(water['y2']), h)
252 | # cv2.rectangle(image, (x1_w, y1_w), (x2_w, y2_w), (0,255,0), 5)
253 |
254 | image = cv2.resize(image, (512, 512))
255 | # cv2.putText(image, 'building', (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 0, 255), 2)
256 | # cv2.putText(image, 'water', (10, 80), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 255, 0), 2)
257 | cv2.imshow('w2', image)
258 | cv2.waitKey()
259 |
260 | if __name__ == '__main__':
261 | test_custom()
262 |
--------------------------------------------------------------------------------
/实验 4.2/trainCustom_18.py:
--------------------------------------------------------------------------------
1 | from data import *
2 | from utils.augmentations import SSDAugmentation
3 | from layers.modules import MultiBoxLoss
4 | from ssd_resnet_18 import build_ssd
5 | import os
6 | import sys
7 | import time
8 | import torch
9 | from torch.autograd import Variable
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | import torch.backends.cudnn as cudnn
13 | import torch.nn.init as init
14 | import torch.utils.data as data
15 | import numpy as np
16 | import argparse
17 |
18 | from data.voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
19 |
20 | from data.coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
21 |
22 | from data.custom import customDetection, customAnnotationTransform, CUSTOM_CLASSES, CUSTOM_ROOT
23 |
24 | def str2bool(v):
25 | return v.lower() in ("yes", "true", "t", "1")
26 |
27 |
28 | parser = argparse.ArgumentParser(
29 | description='Single Shot MultiBox Detector Training With Pytorch')
30 | train_set = parser.add_mutually_exclusive_group()
31 | parser.add_argument('--dataset', default='CUSTOM', choices=['VOC', 'COCO', 'CUSTOM'],
32 | type=str, help='VOC or COCO')
33 | parser.add_argument('--dataset_root', default=CUSTOM_ROOT, # VOC_ROOT,
34 | help='Dataset root directory path')
35 | parser.add_argument('--basenet', default='vgg16_reducedfc.pth',
36 | help='Pretrained base model')
37 | parser.add_argument('--batch_size', default=32, type=int,
38 | help='Batch size for training')
39 | parser.add_argument('--resume', default=None, type=str,
40 | help='Checkpoint state_dict file to resume training from')
41 | parser.add_argument('--start_iter', default=0, type=int,
42 | help='Resume training at this iter')
43 | parser.add_argument('--num_workers', default=4, type=int,
44 | help='Number of workers used in dataloading')
45 | parser.add_argument('--cuda', default=True, type=str2bool,
46 | help='Use CUDA to train model')
47 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float,
48 | help='initial learning rate')
49 | parser.add_argument('--momentum', default=0.9, type=float,
50 | help='Momentum value for optim')
51 | parser.add_argument('--weight_decay', default=5e-4, type=float,
52 | help='Weight decay for SGD')
53 | parser.add_argument('--gamma', default=0.1, type=float,
54 | help='Gamma update for SGD')
55 | parser.add_argument('--visdom', default=False, type=str2bool,
56 | help='Use visdom for loss visualization')
57 | parser.add_argument('--save_folder', default='weights/',
58 | help='Directory for saving checkpoint models')
59 | args = parser.parse_args()
60 |
61 |
62 | if torch.cuda.is_available():
63 | if args.cuda:
64 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
65 | if not args.cuda:
66 | print("WARNING: It looks like you have a CUDA device, but aren't " +
67 | "using CUDA.\nRun with --cuda for optimal training speed.")
68 | torch.set_default_tensor_type('torch.FloatTensor')
69 | else:
70 | torch.set_default_tensor_type('torch.FloatTensor')
71 |
72 | if not os.path.exists(args.save_folder):
73 | os.mkdir(args.save_folder)
74 |
75 |
76 | def train():
77 | if args.dataset == 'COCO':
78 | if args.dataset_root == VOC_ROOT:
79 | if not os.path.exists(COCO_ROOT):
80 | parser.error('Must specify dataset_root if specifying dataset')
81 | print("WARNING: Using default COCO dataset_root because " +
82 | "--dataset_root was not specified.")
83 | args.dataset_root = COCO_ROOT
84 | cfg = coco
85 | dataset = COCODetection(root=args.dataset_root,
86 | transform=SSDAugmentation(cfg['min_dim'],
87 | MEANS))
88 | elif args.dataset == 'VOC':
89 | if args.dataset_root == COCO_ROOT:
90 | parser.error('Must specify dataset if specifying dataset_root')
91 | cfg = voc
92 | dataset = VOCDetection(root=args.dataset_root,
93 | transform=SSDAugmentation(cfg['min_dim'],
94 | MEANS))
95 |
96 | elif args.dataset == 'CUSTOM':
97 | if args.dataset_root == VOC_ROOT or args.dataset_root == COCO_ROOT:
98 | parser.error('Must specify dataset if specifying dataset_root')
99 | cfg = custom
100 | dataset = customDetection(root=args.dataset_root,
101 | transform=SSDAugmentation(cfg['min_dim'],
102 | MEANS))
103 |
104 | if args.visdom:
105 | import visdom
106 | viz = visdom.Visdom()
107 |
108 | ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
109 | net = ssd_net
110 |
111 | if args.cuda:
112 | net = torch.nn.DataParallel(ssd_net)
113 | cudnn.benchmark = True
114 |
115 | if args.resume:
116 | print('Resuming training, loading {}...'.format(args.resume))
117 | ssd_net.load_weights(args.resume)
118 | else:
119 | pass
120 | # resnet_weights = torch.load(args.save_folder + args.basenet)
121 | # print('Loading base network...')
122 | # ssd_net.resnet.load_state_dict(resnet_weights)
123 |
124 | if args.cuda:
125 | net = net.cuda()
126 |
127 | if not args.resume:
128 | print('Initializing weights...')
129 | # initialize newly added layers' weights with xavier method
130 | ssd_net.extras.apply(weights_init)
131 | ssd_net.loc.apply(weights_init)
132 | ssd_net.conf.apply(weights_init)
133 |
134 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
135 | weight_decay=args.weight_decay)
136 | criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
137 | False, args.cuda)
138 |
139 | net.train()
140 | # loss counters
141 | loc_loss = 0
142 | conf_loss = 0
143 | epoch = 0
144 | print('Loading the dataset...')
145 |
146 | epoch_size = len(dataset) // args.batch_size
147 | print('Epochj Size:', epoch_size)
148 | print('Training SSD on:', dataset.name)
149 | print('Using the specified args:')
150 | print(args)
151 |
152 | step_index = 0
153 |
154 | if args.visdom:
155 | vis_title = 'SSD.PyTorch on ' + dataset.name
156 | vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
157 | iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
158 | epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)
159 |
160 | data_loader = data.DataLoader(dataset, args.batch_size,
161 | num_workers=args.num_workers,
162 | shuffle=True, collate_fn=detection_collate,
163 | pin_memory=True)
164 | # create batch iterator
165 | batch_iterator = iter(data_loader)
166 | for iteration in range(args.start_iter, cfg['max_iter']):
167 | if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
168 | update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
169 | 'append', epoch_size)
170 | # reset epoch loss counters
171 | loc_loss = 0
172 | conf_loss = 0
173 | epoch += 1
174 |
175 | if iteration in cfg['lr_steps']:
176 | step_index += 1
177 | adjust_learning_rate(optimizer, args.gamma, step_index)
178 |
179 | # load train data
180 | # images, targets = next(batch_iterator)
181 | try:
182 | images, targets = next(batch_iterator)
183 | except StopIteration:
184 | batch_iterator = iter(data_loader)
185 | images, targets = next(batch_iterator)
186 |
187 | if args.cuda:
188 | images = Variable(images.cuda())
189 | targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
190 | else:
191 | images = Variable(images)
192 | targets = [Variable(ann, volatile=True) for ann in targets]
193 | # forward
194 | t0 = time.time()
195 | out = net(images)
196 | # backprop
197 | optimizer.zero_grad()
198 | loss_l, loss_c = criterion(out, targets)
199 | loss = loss_l + loss_c
200 | loss.backward()
201 | optimizer.step()
202 | t1 = time.time()
203 | # loc_loss += loss_l.data[0]
204 | # conf_loss += loss_c.data[0]
205 | loc_loss += loss_l.item()
206 | conf_loss += loss_c.item()
207 |
208 | if iteration % 10 == 0:
209 | print('timer: %.4f sec.' % (t1 - t0))
210 | # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')
211 | print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')
212 |
213 | if args.visdom:
214 | # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
215 | # iter_plot, epoch_plot, 'append')
216 | update_vis_plot(iteration, loss_l.item(), loss_c.item(),
217 | iter_plot, epoch_plot, 'append')
218 |
219 | if iteration != 0 and iteration % 5000 == 0:
220 | print('Saving state, iter:', iteration)
221 | torch.save(ssd_net.state_dict(), args.save_folder + '/ssd300_COCO_' +
222 | repr(iteration) + '.pth')
223 | torch.save(ssd_net.state_dict(),
224 | args.save_folder + '' + args.dataset + '.pth')
225 |
226 |
227 | def adjust_learning_rate(optimizer, gamma, step):
228 | """Sets the learning rate to the initial LR decayed by 10 at every
229 | specified step
230 | # Adapted from PyTorch Imagenet example:
231 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py
232 | """
233 | lr = args.lr * (gamma ** (step))
234 | for param_group in optimizer.param_groups:
235 | param_group['lr'] = lr
236 |
237 |
238 | def xavier(param):
239 | init.xavier_uniform(param)
240 |
241 |
242 | def weights_init(m):
243 | if isinstance(m, nn.Conv2d):
244 | xavier(m.weight.data)
245 | m.bias.data.zero_()
246 |
247 |
248 | def create_vis_plot(_xlabel, _ylabel, _title, _legend):
249 | return viz.line(
250 | X=torch.zeros((1,)).cpu(),
251 | Y=torch.zeros((1, 3)).cpu(),
252 | opts=dict(
253 | xlabel=_xlabel,
254 | ylabel=_ylabel,
255 | title=_title,
256 | legend=_legend
257 | )
258 | )
259 |
260 |
261 | def update_vis_plot(iteration, loc, conf, window1, window2, update_type,
262 | epoch_size=1):
263 | viz.line(
264 | X=torch.ones((1, 3)).cpu() * iteration,
265 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu() / epoch_size,
266 | win=window1,
267 | update=update_type
268 | )
269 | # initialize epoch plot on first iteration
270 | if iteration == 0:
271 | viz.line(
272 | X=torch.zeros((1, 3)).cpu(),
273 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu(),
274 | win=window2,
275 | update=True
276 | )
277 |
278 |
279 | if __name__ == '__main__':
280 | train()
281 |
--------------------------------------------------------------------------------
/实验 4.1/trainCustom_101.py:
--------------------------------------------------------------------------------
1 | from data import *
2 | from utils.augmentations import SSDAugmentation
3 | from layers.modules import MultiBoxLoss
4 | from ssd_resnet_101 import build_ssd
5 | import os
6 | import sys
7 | import time
8 | import torch
9 | from torch.autograd import Variable
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | import torch.backends.cudnn as cudnn
13 | import torch.nn.init as init
14 | import torch.utils.data as data
15 | import numpy as np
16 | import argparse
17 |
18 | from data.voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
19 |
20 | from data.coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
21 |
22 | from data.custom import customDetection, customAnnotationTransform, CUSTOM_CLASSES, CUSTOM_ROOT
23 |
24 | def str2bool(v):
25 | return v.lower() in ("yes", "true", "t", "1")
26 |
27 |
28 | parser = argparse.ArgumentParser(
29 | description='Single Shot MultiBox Detector Training With Pytorch')
30 | train_set = parser.add_mutually_exclusive_group()
31 | parser.add_argument('--dataset', default='CUSTOM', choices=['VOC', 'COCO', 'CUSTOM'],
32 | type=str, help='VOC or COCO')
33 | parser.add_argument('--dataset_root', default=CUSTOM_ROOT, # VOC_ROOT,
34 | help='Dataset root directory path')
35 | parser.add_argument('--basenet', default='vgg16_reducedfc.pth',
36 | help='Pretrained base model')
37 | parser.add_argument('--batch_size', default=32, type=int,
38 | help='Batch size for training')
39 | parser.add_argument('--resume', default=None, type=str,
40 | help='Checkpoint state_dict file to resume training from')
41 | parser.add_argument('--start_iter', default=0, type=int,
42 | help='Resume training at this iter')
43 | parser.add_argument('--num_workers', default=4, type=int,
44 | help='Number of workers used in dataloading')
45 | parser.add_argument('--cuda', default=True, type=str2bool,
46 | help='Use CUDA to train model')
47 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float,
48 | help='initial learning rate')
49 | parser.add_argument('--momentum', default=0.9, type=float,
50 | help='Momentum value for optim')
51 | parser.add_argument('--weight_decay', default=5e-4, type=float,
52 | help='Weight decay for SGD')
53 | parser.add_argument('--gamma', default=0.1, type=float,
54 | help='Gamma update for SGD')
55 | parser.add_argument('--visdom', default=False, type=str2bool,
56 | help='Use visdom for loss visualization')
57 | parser.add_argument('--save_folder', default='weights/',
58 | help='Directory for saving checkpoint models')
59 | args = parser.parse_args()
60 |
61 |
62 | if torch.cuda.is_available():
63 | if args.cuda:
64 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
65 | if not args.cuda:
66 | print("WARNING: It looks like you have a CUDA device, but aren't " +
67 | "using CUDA.\nRun with --cuda for optimal training speed.")
68 | torch.set_default_tensor_type('torch.FloatTensor')
69 | else:
70 | torch.set_default_tensor_type('torch.FloatTensor')
71 |
72 | if not os.path.exists(args.save_folder):
73 | os.mkdir(args.save_folder)
74 |
75 |
76 | def train():
77 | if args.dataset == 'COCO':
78 | if args.dataset_root == VOC_ROOT:
79 | if not os.path.exists(COCO_ROOT):
80 | parser.error('Must specify dataset_root if specifying dataset')
81 | print("WARNING: Using default COCO dataset_root because " +
82 | "--dataset_root was not specified.")
83 | args.dataset_root = COCO_ROOT
84 | cfg = coco
85 | dataset = COCODetection(root=args.dataset_root,
86 | transform=SSDAugmentation(cfg['min_dim'],
87 | MEANS))
88 | elif args.dataset == 'VOC':
89 | if args.dataset_root == COCO_ROOT:
90 | parser.error('Must specify dataset if specifying dataset_root')
91 | cfg = voc
92 | dataset = VOCDetection(root=args.dataset_root,
93 | transform=SSDAugmentation(cfg['min_dim'],
94 | MEANS))
95 |
96 | elif args.dataset == 'CUSTOM':
97 | if args.dataset_root == VOC_ROOT or args.dataset_root == COCO_ROOT:
98 | parser.error('Must specify dataset if specifying dataset_root')
99 | cfg = custom
100 | dataset = customDetection(root=args.dataset_root,
101 | transform=SSDAugmentation(cfg['min_dim'],
102 | MEANS))
103 |
104 | if args.visdom:
105 | import visdom
106 | viz = visdom.Visdom()
107 |
108 | ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
109 | net = ssd_net
110 |
111 | if args.cuda:
112 | net = torch.nn.DataParallel(ssd_net)
113 | cudnn.benchmark = True
114 |
115 | if args.resume:
116 | print('Resuming training, loading {}...'.format(args.resume))
117 | ssd_net.load_weights(args.resume)
118 | else:
119 | pass
120 | # resnet_weights = torch.load(args.save_folder + args.basenet)
121 | # print('Loading base network...')
122 | # ssd_net.resnet.load_state_dict(resnet_weights)
123 |
124 | if args.cuda:
125 | net = net.cuda()
126 |
127 | if not args.resume:
128 | print('Initializing weights...')
129 | # initialize newly added layers' weights with xavier method
130 | ssd_net.extras.apply(weights_init)
131 | ssd_net.loc.apply(weights_init)
132 | ssd_net.conf.apply(weights_init)
133 |
134 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
135 | weight_decay=args.weight_decay)
136 | criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
137 | False, args.cuda)
138 |
139 | net.train()
140 | # loss counters
141 | loc_loss = 0
142 | conf_loss = 0
143 | epoch = 0
144 | print('Loading the dataset...')
145 |
146 | epoch_size = len(dataset) // args.batch_size
147 | print('Epochj Size:', epoch_size)
148 | print('Training SSD on:', dataset.name)
149 | print('Using the specified args:')
150 | print(args)
151 |
152 | step_index = 0
153 |
154 | if args.visdom:
155 | vis_title = 'SSD.PyTorch on ' + dataset.name
156 | vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
157 | iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
158 | epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)
159 |
160 | data_loader = data.DataLoader(dataset, args.batch_size,
161 | num_workers=args.num_workers,
162 | shuffle=True, collate_fn=detection_collate,
163 | pin_memory=True)
164 | # create batch iterator
165 | batch_iterator = iter(data_loader)
166 | for iteration in range(args.start_iter, cfg['max_iter']):
167 | if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
168 | update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
169 | 'append', epoch_size)
170 | # reset epoch loss counters
171 | loc_loss = 0
172 | conf_loss = 0
173 | epoch += 1
174 |
175 | if iteration in cfg['lr_steps']:
176 | step_index += 1
177 | adjust_learning_rate(optimizer, args.gamma, step_index)
178 |
179 | # load train data
180 | # images, targets = next(batch_iterator)
181 | try:
182 | images, targets = next(batch_iterator)
183 | except StopIteration:
184 | batch_iterator = iter(data_loader)
185 | images, targets = next(batch_iterator)
186 |
187 | if args.cuda:
188 | images = Variable(images.cuda())
189 | targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
190 | else:
191 | images = Variable(images)
192 | targets = [Variable(ann, volatile=True) for ann in targets]
193 | # forward
194 | t0 = time.time()
195 | out = net(images)
196 | # backprop
197 | optimizer.zero_grad()
198 | loss_l, loss_c = criterion(out, targets)
199 | loss = loss_l + loss_c
200 | loss.backward()
201 | optimizer.step()
202 | t1 = time.time()
203 | # loc_loss += loss_l.data[0]
204 | # conf_loss += loss_c.data[0]
205 | loc_loss += loss_l.item()
206 | conf_loss += loss_c.item()
207 |
208 | if iteration % 10 == 0:
209 | print('timer: %.4f sec.' % (t1 - t0))
210 | # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')
211 | print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')
212 |
213 | if args.visdom:
214 | # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
215 | # iter_plot, epoch_plot, 'append')
216 | update_vis_plot(iteration, loss_l.item(), loss_c.item(),
217 | iter_plot, epoch_plot, 'append')
218 |
219 | if iteration != 0 and iteration % 5000 == 0:
220 | print('Saving state, iter:', iteration)
221 | torch.save(ssd_net.state_dict(), args.save_folder + '/ssd300_COCO_' +
222 | repr(iteration) + '.pth')
223 | torch.save(ssd_net.state_dict(),
224 | args.save_folder + '' + args.dataset + '.pth')
225 |
226 |
227 | def adjust_learning_rate(optimizer, gamma, step):
228 | """Sets the learning rate to the initial LR decayed by 10 at every
229 | specified step
230 | # Adapted from PyTorch Imagenet example:
231 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py
232 | """
233 | lr = args.lr * (gamma ** (step))
234 | for param_group in optimizer.param_groups:
235 | param_group['lr'] = lr
236 |
237 |
238 | def xavier(param):
239 | init.xavier_uniform(param)
240 |
241 |
242 | def weights_init(m):
243 | if isinstance(m, nn.Conv2d):
244 | xavier(m.weight.data)
245 | m.bias.data.zero_()
246 |
247 |
248 | def create_vis_plot(_xlabel, _ylabel, _title, _legend):
249 | return viz.line(
250 | X=torch.zeros((1,)).cpu(),
251 | Y=torch.zeros((1, 3)).cpu(),
252 | opts=dict(
253 | xlabel=_xlabel,
254 | ylabel=_ylabel,
255 | title=_title,
256 | legend=_legend
257 | )
258 | )
259 |
260 |
261 | def update_vis_plot(iteration, loc, conf, window1, window2, update_type,
262 | epoch_size=1):
263 | viz.line(
264 | X=torch.ones((1, 3)).cpu() * iteration,
265 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu() / epoch_size,
266 | win=window1,
267 | update=update_type
268 | )
269 | # initialize epoch plot on first iteration
270 | if iteration == 0:
271 | viz.line(
272 | X=torch.zeros((1, 3)).cpu(),
273 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu(),
274 | win=window2,
275 | update=True
276 | )
277 |
278 |
279 | if __name__ == '__main__':
280 | train()
281 |
--------------------------------------------------------------------------------
/实验 4.1/visualTest_gauge.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import torch.nn as nn
4 | import torch.backends.cudnn as cudnn
5 | from torch.autograd import Variable
6 | import torch.utils.data as data
7 |
8 | from data import BaseTransform
9 | from data.custom_for_visual import CUSTOM_CLASSES_GAUGE as labelmap_gauge
10 | from data.custom_for_visual import CUSTOM_CLASSES_WATERLINE as labelmap_waterline
11 | from data.custom_for_visual import customDetection, customAnnotationTransform, CUSTOM_ROOT, CUSTOM_CLASSES_GAUGE, CUSTOM_CLASSES_WATERLINE
12 |
13 | # from ssd import build_ssd
14 | from ssd_resnet_101 import build_ssd
15 |
16 | import sys
17 | import os
18 | import time
19 | import argparse
20 | import numpy as np
21 | import pickle
22 | import cv2
23 | import math
24 |
25 | import warnings
26 | warnings.filterwarnings("ignore")
27 |
28 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection')
29 | parser.add_argument('--trained_model_gauge',
30 | default='useful_weight/CUSTOM_gauge.pth', type=str,
31 | help='Trained state_dict file path to open')
32 | parser.add_argument('--trained_model_waterline',
33 | default='useful_weight/CUSTOM_mark.pth', type=str,
34 | help='Trained state_dict file path to open')
35 | parser.add_argument('--save_folder', default='eval/', type=str,
36 | help='Dir to save results')
37 | parser.add_argument('--visual_threshold', default=0.1, type=float,
38 | help='Final confidence threshold')
39 | parser.add_argument('--cuda', default=True, type=bool,
40 | help='Use cuda to train model')
41 | parser.add_argument('--custom_root', default=CUSTOM_ROOT, help='Location of VOC root directory')
42 | parser.add_argument('-f', default=None, type=str, help="Dummy arg so we can load in Jupyter Notebooks")
43 | args = parser.parse_args()
44 |
45 | if args.cuda and torch.cuda.is_available():
46 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
47 | else:
48 | torch.set_default_tensor_type('torch.FloatTensor')
49 |
50 | if not os.path.exists(args.save_folder):
51 | os.mkdir(args.save_folder)
52 |
53 |
54 | def test_net(save_folder, net, cuda, testset, transform, thresh, labelmap):
55 | # dump predictions and assoc. ground truth to text file for now
56 | filename = save_folder + 'result_%s.txt'
57 | num_images = len(testset)
58 | for i in range(num_images):
59 | print('Testing image {:d}/{:d}....'.format(i+1, num_images))
60 | img = testset.pull_image(i)
61 | img_id, annotation = testset.pull_anno(i)
62 | x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)
63 | x = Variable(x.unsqueeze(0))
64 |
65 | if cuda:
66 | x = x.cuda()
67 |
68 | y = net(x) # forward pass
69 | detections = y.data
70 | # scale each detection back up to the image
71 | scale = torch.Tensor([img.shape[1], img.shape[0],
72 | img.shape[1], img.shape[0]])
73 | pred_num = 0
74 | for i in range(detections.size(1)):
75 | j = 0
76 | while detections[0, i, j, 0] >= 0.1:
77 | score = detections[0, i, j, 0]
78 | label_name = labelmap[i-1]
79 | pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
80 | coords = (pt[0], pt[1], pt[2], pt[3])
81 | pred_num += 1
82 | with open(filename % label_name, mode='a') as f:
83 | f.write(str(img_id) + ' ' +
84 | str(score.cpu().numpy()) + ' '+ ' '.join(str(c) for c in coords) + '\n')
85 | j += 1
86 |
87 | def xmlData(name, width, height, label):
88 | return '''
89 | JPEGImages
90 | %s.jpg
91 | %s.jpg
92 |
93 | Unknown
94 |
95 |
96 | %d
97 | %d
98 | 3
99 |
100 | 0
101 |
113 | ''' % (name, name, width, height, label)
114 |
115 | def get_output_dir(name, phase=""):
116 | filedir = os.path.join(name, phase)
117 | if not os.path.exists(filedir):
118 | os.makedirs(filedir)
119 | return filedir
120 |
121 | def test_custom():
122 | DEBUG = False
123 | set_type = 'test'
124 | devkit_path = args.custom_root + 'test'
125 | devkit_annopath = os.path.join(args.custom_root, 'test', 'Annotations')
126 | devkit_imgpath = os.path.join(args.custom_root, 'test', 'JPEGImages')
127 | devkit_imgsetpath = os.path.join(args.custom_root, 'test', 'ImageSets', 'Main')
128 |
129 | # load net
130 | num_classes_gauge = len(labelmap_gauge) + 1 # +1 for background
131 | net = build_ssd('test', 300, num_classes_gauge) # initialize SSD
132 | net.load_state_dict(torch.load(args.trained_model_gauge))
133 | net.eval()
134 |
135 | num_classes_waterline = len(labelmap_waterline) + 1 # +1 for background
136 | net1 = build_ssd('test', 300, num_classes_waterline) # initialize SSD
137 | net1.load_state_dict(torch.load(args.trained_model_waterline))
138 | net1.eval()
139 | print('Finished loading model!')
140 | # load data
141 | dataset1 = customDetection(args.custom_root, [('gauge', set_type)], None, customAnnotationTransform(class_to_ind=dict(zip(CUSTOM_CLASSES_GAUGE, range(len(CUSTOM_CLASSES_GAUGE))))))
142 | if args.cuda:
143 | net = net.cuda()
144 | cudnn.benchmark = True
145 | # evaluation
146 | test_net(args.save_folder, net, args.cuda, dataset1,
147 | BaseTransform(net.size, (104, 117, 123)),
148 | thresh=args.visual_threshold, labelmap=labelmap_gauge)
149 |
150 | rootPath = 'F:/ssd/data/video/gauge'
151 | rootPath_temp = 'F:/ssd/data/video/test'
152 | imgList_gauge = {}
153 |
154 | with open(os.path.join(args.save_folder, 'result_gauge.txt'), 'r') as f:
155 | text_lines = f.readlines()
156 | for line in text_lines:
157 | info = line.split(" ")
158 | name, score, x1, y1, x2, y2 = info
159 | if name in imgList_gauge:
160 | if float(score) > imgList_gauge[name]['score']:
161 | imgList_gauge[name] = {
162 | 'score': float(score),
163 | 'x1': float(x1),
164 | 'y1': float(y1),
165 | 'x2': float(x2),
166 | 'y2': float(y2)
167 | }
168 | else:
169 | imgList_gauge[name] = {
170 | 'score': float(score),
171 | 'x1': float(x1),
172 | 'y1': float(y1),
173 | 'x2': float(x2),
174 | 'y2': float(y2)
175 | }
176 |
177 | img_path = os.path.join(rootPath, 'JPEGImages', '%s.jpg')
178 | devkit_imgpath = os.path.join(get_output_dir(devkit_imgpath), '%s.jpg')
179 | devkit_imgsetpath = os.path.join(get_output_dir(devkit_imgsetpath), '%s.txt')
180 | devkit_annopath = os.path.join(get_output_dir(devkit_annopath), '%s.xml')
181 | with open(devkit_imgsetpath % ('test'), 'w') as f:
182 | for obj in imgList_gauge.items():
183 | name, img = obj
184 | image = cv2.imread(img_path % name)
185 | (h, w, c) = image.shape
186 | x1 = max(math.floor(img['x1']), 0)
187 | y1 = max(math.floor(img['y1']), 0)
188 | x2 = min(math.floor(img['x2']), w)
189 | y2 = min(math.floor(img['y2']), h)
190 | if DEBUG:
191 | cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 5)
192 | image = cv2.resize(image, (512, 512))
193 | cv2.imshow('w1', image)
194 | cv2.waitKey()
195 | else:
196 | image = image[y1:y2, x1:x2]
197 | # cv2.imshow('w1', image)
198 | cv2.imwrite(devkit_imgpath % name, image, [100])
199 | f.write(name + '\n')
200 | # cv2.waitKey()
201 | with open(devkit_annopath % (name), 'w') as f_a:
202 | f_a.write(xmlData(name, x2 - x1, y2 - y1, 'waterline'))
203 |
204 | dataset2 = customDetection(args.custom_root, [('test', set_type)], None, customAnnotationTransform(class_to_ind=dict(zip(CUSTOM_CLASSES_WATERLINE, range(len(CUSTOM_CLASSES_WATERLINE))))))
205 |
206 | if args.cuda:
207 | net1 = net1.cuda()
208 | cudnn.benchmark = True
209 |
210 | # evaluation
211 | test_net(args.save_folder, net1, args.cuda, dataset2,
212 | BaseTransform(net.size, (104, 117, 123)),
213 | thresh=args.visual_threshold, labelmap=labelmap_waterline)
214 |
215 | imgList_waterline = {}
216 | with open(os.path.join(args.save_folder, 'result_waterline.txt'), 'r') as f:
217 | text_lines = f.readlines()
218 | for line in text_lines:
219 | info = line.split(" ")
220 | name, score, x1, y1, x2, y2 = info
221 | if name in imgList_waterline:
222 | if float(score) > imgList_waterline[name]['score']:
223 | imgList_waterline[name] = {
224 | 'score': float(score),
225 | 'x1': float(x1),
226 | 'y1': float(y1),
227 | 'x2': float(x2),
228 | 'y2': float(y2)
229 | }
230 | else:
231 | imgList_waterline[name] = {
232 | 'score': float(score),
233 | 'x1': float(x1),
234 | 'y1': float(y1),
235 | 'x2': float(x2),
236 | 'y2': float(y2)
237 | }
238 |
239 | imgList_mark = {}
240 | with open(os.path.join(args.save_folder, 'result_mark.txt'), 'r') as f:
241 | text_lines = f.readlines()
242 | for line in text_lines:
243 | info = line.split(" ")
244 | name, score, x1, y1, x2, y2 = info
245 | if name in imgList_mark:
246 | if float(score) > imgList_mark[name]['score']:
247 | imgList_mark[name] = {
248 | 'score': float(score),
249 | 'x1': float(x1),
250 | 'y1': float(y1),
251 | 'x2': float(x2),
252 | 'y2': float(y2)
253 | }
254 | else:
255 | imgList_mark[name] = {
256 | 'score': float(score),
257 | 'x1': float(x1),
258 | 'y1': float(y1),
259 | 'x2': float(x2),
260 | 'y2': float(y2)
261 | }
262 |
263 | cv2.namedWindow('w2',1)
264 | use_origin = True
265 |
266 | if not use_origin:
267 | img_path = os.path.join(rootPath_temp, 'JPEGImages', '%s.jpg')
268 | count = 0
269 | for name in imgList_gauge:
270 | img_gauge = imgList_gauge[name]
271 | img_waterline = imgList_waterline[name]
272 | img_mark = imgList_mark[name]
273 |
274 | if not use_origin:
275 | image = cv2.imread(img_path % name)
276 | (h, w, c) = image.shape
277 |
278 | x1_w = max(math.floor(img_waterline['x1']), 0)
279 | y1_w = max(math.floor(img_waterline['y1']), 0)
280 | x2_w = min(math.floor(img_waterline['x2']), w)
281 | y2_w = min(math.floor(img_waterline['y2']), h)
282 |
283 | x1_m = max(math.floor(img_mark['x1']), 0)
284 | y1_m = max(math.floor(img_mark['y1']), 0)
285 | x2_m = min(math.floor(img_mark['x2']), w)
286 | y2_m = min(math.floor(img_mark['y2']), h)
287 |
288 | cv2.rectangle(image, (x1_w, y1_w), (x2_w, y2_w), (255,0,0), 5)
289 | cv2.rectangle(image, (x1_m, y1_m), (x2_m, y2_m), (0,255,0), 5)
290 | image = cv2.resize(image, (512, 512))
291 | cv2.imshow('w2', image)
292 | cv2.waitKey()
293 | else:
294 | image = cv2.imread(img_path % name)
295 | (h, w, c) = image.shape
296 |
297 | x1_g = math.floor(img_gauge['x1'])
298 | y1_g = math.floor(img_gauge['y1'])
299 | x2_g = math.floor(img_gauge['x2'])
300 | y2_g = math.floor(img_gauge['y2'])
301 |
302 | x1_w = max(math.floor(img_waterline['x1']), 0)
303 | y1_w = max(math.floor(img_waterline['y1']), 0)
304 | x2_w = min(math.floor(img_waterline['x2']), w)
305 | y2_w = min(math.floor(img_waterline['y2']), h)
306 |
307 | x1_m = max(math.floor(img_mark['x1']), 0)
308 | y1_m = max(math.floor(img_mark['y1']), 0)
309 | x2_m = min(math.floor(img_mark['x2']), w)
310 | y2_m = min(math.floor(img_mark['y2']), h)
311 |
312 | if (y1_w + y2_w) > (y1_m + y2_m):
313 | count += 1
314 |
315 | cv2.rectangle(image, (x1_g, y1_g), (x2_g, y2_g), (255,0,0), 5)
316 | cv2.rectangle(image, (x1_g + x1_w, y1_g + y1_w), (x1_g + x2_w, y1_g + y2_w), (0,255,0), 5)
317 | cv2.rectangle(image, (x1_g + x1_m, y1_g + y1_m), (x1_g + x2_m, y1_g + y2_m), (0,0,255), 5)
318 |
319 | image = cv2.resize(image, (512, 512))
320 | cv2.putText(image, 'gauge: %.2f' % img_gauge['score'], (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
321 | cv2.putText(image, 'waterline: %.2f' % img_waterline['score'], (10, 80), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 255, 0), 2)
322 | cv2.putText(image, 'mark: %.2f' % img_mark['score'], (10, 120), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 0, 255), 2)
323 | cv2.imshow('w2', image)
324 | cv2.waitKey()
325 | print('correct count:', count)
326 |
327 | if __name__ == '__main__':
328 | test_custom()
329 |
--------------------------------------------------------------------------------
/netModel/resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | try:
4 | from torch.hub import load_state_dict_from_url
5 | except ImportError:
6 | from torch.utils.model_zoo import load_url as load_state_dict_from_url
7 |
8 |
9 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
10 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
11 | 'wide_resnet50_2', 'wide_resnet101_2']
12 |
13 |
14 | model_urls = {
15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
20 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
21 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
22 | 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
23 | 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
24 | }
25 |
26 |
27 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
28 | """3x3 convolution with padding"""
29 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
30 | padding=dilation, groups=groups, bias=False, dilation=dilation)
31 |
32 |
33 | def conv1x1(in_planes, out_planes, stride=1):
34 | """1x1 convolution"""
35 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
36 |
37 |
38 | class BasicBlock(nn.Module):
39 | expansion = 1
40 | __constants__ = ['downsample']
41 |
42 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
43 | base_width=64, dilation=1, norm_layer=None):
44 | super(BasicBlock, self).__init__()
45 | if norm_layer is None:
46 | norm_layer = nn.BatchNorm2d
47 | if groups != 1 or base_width != 64:
48 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
49 | if dilation > 1:
50 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
51 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
52 | self.conv1 = conv3x3(inplanes, planes, stride)
53 | self.bn1 = norm_layer(planes)
54 | self.relu = nn.ReLU(inplace=True)
55 | self.conv2 = conv3x3(planes, planes)
56 | self.bn2 = norm_layer(planes)
57 | self.downsample = downsample
58 | self.stride = stride
59 |
60 | def forward(self, x):
61 | identity = x
62 |
63 | out = self.conv1(x)
64 | out = self.bn1(out)
65 | out = self.relu(out)
66 |
67 | out = self.conv2(out)
68 | out = self.bn2(out)
69 |
70 | if self.downsample is not None:
71 | identity = self.downsample(x)
72 |
73 | out += identity
74 | out = self.relu(out)
75 |
76 | return out
77 |
78 |
79 | class Bottleneck(nn.Module):
80 | expansion = 4
81 | __constants__ = ['downsample']
82 |
83 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
84 | base_width=64, dilation=1, norm_layer=None):
85 | super(Bottleneck, self).__init__()
86 | if norm_layer is None:
87 | norm_layer = nn.BatchNorm2d
88 | width = int(planes * (base_width / 64.)) * groups
89 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1
90 | self.conv1 = conv1x1(inplanes, width)
91 | self.bn1 = norm_layer(width)
92 | self.conv2 = conv3x3(width, width, stride, groups, dilation)
93 | self.bn2 = norm_layer(width)
94 | self.conv3 = conv1x1(width, planes * self.expansion)
95 | self.bn3 = norm_layer(planes * self.expansion)
96 | self.relu = nn.ReLU(inplace=True)
97 | self.downsample = downsample
98 | self.stride = stride
99 |
100 | def forward(self, x):
101 | identity = x
102 |
103 | out = self.conv1(x)
104 | out = self.bn1(out)
105 | out = self.relu(out)
106 |
107 | out = self.conv2(out)
108 | out = self.bn2(out)
109 | out = self.relu(out)
110 |
111 | out = self.conv3(out)
112 | out = self.bn3(out)
113 |
114 | if self.downsample is not None:
115 | identity = self.downsample(x)
116 |
117 | out += identity
118 | out = self.relu(out)
119 |
120 | return out
121 |
122 |
123 | class ResNet(nn.Module):
124 |
125 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
126 | groups=1, width_per_group=64, replace_stride_with_dilation=None,
127 | norm_layer=None):
128 | super(ResNet, self).__init__()
129 | if norm_layer is None:
130 | norm_layer = nn.BatchNorm2d
131 | self._norm_layer = norm_layer
132 |
133 | self.inplanes = 64
134 | self.dilation = 1
135 | if replace_stride_with_dilation is None:
136 | # each element in the tuple indicates if we should replace
137 | # the 2x2 stride with a dilated convolution instead
138 | replace_stride_with_dilation = [False, False, False]
139 | if len(replace_stride_with_dilation) != 3:
140 | raise ValueError("replace_stride_with_dilation should be None "
141 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
142 | self.groups = groups
143 | self.base_width = width_per_group
144 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=1, padding=3,
145 | bias=False)
146 | self.bn1 = norm_layer(self.inplanes)
147 | self.relu = nn.ReLU(inplace=True)
148 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
149 | self.layer1 = self._make_layer(block, 64, layers[0])
150 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
151 | dilate=replace_stride_with_dilation[0])
152 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
153 | dilate=replace_stride_with_dilation[1])
154 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
155 | dilate=replace_stride_with_dilation[2])
156 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
157 | self.fc = nn.Linear(512 * block.expansion, num_classes)
158 |
159 | for m in self.modules():
160 | if isinstance(m, nn.Conv2d):
161 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
162 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
163 | nn.init.constant_(m.weight, 1)
164 | nn.init.constant_(m.bias, 0)
165 |
166 | # Zero-initialize the last BN in each residual branch,
167 | # so that the residual branch starts with zeros, and each residual block behaves like an identity.
168 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
169 | if zero_init_residual:
170 | for m in self.modules():
171 | if isinstance(m, Bottleneck):
172 | nn.init.constant_(m.bn3.weight, 0)
173 | elif isinstance(m, BasicBlock):
174 | nn.init.constant_(m.bn2.weight, 0)
175 |
176 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
177 | norm_layer = self._norm_layer
178 | downsample = None
179 | previous_dilation = self.dilation
180 | if dilate:
181 | self.dilation *= stride
182 | stride = 1
183 | if stride != 1 or self.inplanes != planes * block.expansion:
184 | downsample = nn.Sequential(
185 | conv1x1(self.inplanes, planes * block.expansion, stride),
186 | norm_layer(planes * block.expansion),
187 | )
188 |
189 | layers = []
190 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
191 | self.base_width, previous_dilation, norm_layer))
192 | self.inplanes = planes * block.expansion
193 | for _ in range(1, blocks):
194 | layers.append(block(self.inplanes, planes, groups=self.groups,
195 | base_width=self.base_width, dilation=self.dilation,
196 | norm_layer=norm_layer))
197 |
198 | return nn.Sequential(*layers)
199 |
200 | def _forward_impl(self, x):
201 | # See note [TorchScript super()]
202 | x = self.conv1(x)
203 | x = self.bn1(x)
204 | x = self.relu(x)
205 | x = self.maxpool(x)
206 |
207 | x = self.layer1(x)
208 | x = self.layer2(x)
209 | x = self.layer3(x)
210 | x = self.layer4(x)
211 |
212 | x = self.avgpool(x)
213 | x = torch.flatten(x, 1)
214 | x = self.fc(x)
215 |
216 | return x
217 |
218 | def forward(self, x):
219 | return self._forward_impl(x)
220 |
221 |
222 | def _resnet(arch, block, layers, pretrained, progress, **kwargs):
223 | model = ResNet(block, layers, **kwargs)
224 | if pretrained:
225 | state_dict = load_state_dict_from_url(model_urls[arch],
226 | progress=progress)
227 | model.load_state_dict(state_dict)
228 | return model
229 |
230 |
231 | def resnet18(pretrained=False, progress=True, **kwargs):
232 | r"""ResNet-18 model from
233 | `"Deep Residual Learning for Image Recognition" `_
234 | Args:
235 | pretrained (bool): If True, returns a model pre-trained on ImageNet
236 | progress (bool): If True, displays a progress bar of the download to stderr
237 | """
238 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
239 | **kwargs)
240 |
241 |
242 | def resnet34(pretrained=False, progress=True, **kwargs):
243 | r"""ResNet-34 model from
244 | `"Deep Residual Learning for Image Recognition" `_
245 | Args:
246 | pretrained (bool): If True, returns a model pre-trained on ImageNet
247 | progress (bool): If True, displays a progress bar of the download to stderr
248 | """
249 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
250 | **kwargs)
251 |
252 |
253 | def resnet50(pretrained=False, progress=True, **kwargs):
254 | r"""ResNet-50 model from
255 | `"Deep Residual Learning for Image Recognition" `_
256 | Args:
257 | pretrained (bool): If True, returns a model pre-trained on ImageNet
258 | progress (bool): If True, displays a progress bar of the download to stderr
259 | """
260 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
261 | **kwargs)
262 |
263 |
264 | def resnet101(pretrained=False, progress=True, **kwargs):
265 | r"""ResNet-101 model from
266 | `"Deep Residual Learning for Image Recognition" `_
267 | Args:
268 | pretrained (bool): If True, returns a model pre-trained on ImageNet
269 | progress (bool): If True, displays a progress bar of the download to stderr
270 | """
271 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
272 | **kwargs)
273 |
274 |
275 | def resnet152(pretrained=False, progress=True, **kwargs):
276 | r"""ResNet-152 model from
277 | `"Deep Residual Learning for Image Recognition" `_
278 | Args:
279 | pretrained (bool): If True, returns a model pre-trained on ImageNet
280 | progress (bool): If True, displays a progress bar of the download to stderr
281 | """
282 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
283 | **kwargs)
284 |
285 |
286 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
287 | r"""ResNeXt-50 32x4d model from
288 | `"Aggregated Residual Transformation for Deep Neural Networks" `_
289 | Args:
290 | pretrained (bool): If True, returns a model pre-trained on ImageNet
291 | progress (bool): If True, displays a progress bar of the download to stderr
292 | """
293 | kwargs['groups'] = 32
294 | kwargs['width_per_group'] = 4
295 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
296 | pretrained, progress, **kwargs)
297 |
298 |
299 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
300 | r"""ResNeXt-101 32x8d model from
301 | `"Aggregated Residual Transformation for Deep Neural Networks" `_
302 | Args:
303 | pretrained (bool): If True, returns a model pre-trained on ImageNet
304 | progress (bool): If True, displays a progress bar of the download to stderr
305 | """
306 | kwargs['groups'] = 32
307 | kwargs['width_per_group'] = 8
308 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
309 | pretrained, progress, **kwargs)
310 |
311 |
312 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
313 | r"""Wide ResNet-50-2 model from
314 | `"Wide Residual Networks" `_
315 | The model is the same as ResNet except for the bottleneck number of channels
316 | which is twice larger in every block. The number of channels in outer 1x1
317 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
318 | channels, and in Wide ResNet-50-2 has 2048-1024-2048.
319 | Args:
320 | pretrained (bool): If True, returns a model pre-trained on ImageNet
321 | progress (bool): If True, displays a progress bar of the download to stderr
322 | """
323 | kwargs['width_per_group'] = 64 * 2
324 | return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
325 | pretrained, progress, **kwargs)
326 |
327 |
328 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
329 | r"""Wide ResNet-101-2 model from
330 | `"Wide Residual Networks" `_
331 | The model is the same as ResNet except for the bottleneck number of channels
332 | which is twice larger in every block. The number of channels in outer 1x1
333 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
334 | channels, and in Wide ResNet-50-2 has 2048-1024-2048.
335 | Args:
336 | pretrained (bool): If True, returns a model pre-trained on ImageNet
337 | progress (bool): If True, displays a progress bar of the download to stderr
338 | """
339 | kwargs['width_per_group'] = 64 * 2
340 | return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
341 | pretrained, progress, **kwargs)
--------------------------------------------------------------------------------
/utils/augmentations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | import cv2
4 | import numpy as np
5 | import types
6 | from numpy import random
7 |
8 |
9 | def intersect(box_a, box_b):
10 | max_xy = np.minimum(box_a[:, 2:], box_b[2:])
11 | min_xy = np.maximum(box_a[:, :2], box_b[:2])
12 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
13 | return inter[:, 0] * inter[:, 1]
14 |
15 |
16 | def jaccard_numpy(box_a, box_b):
17 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
18 | is simply the intersection over union of two boxes.
19 | E.g.:
20 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
21 | Args:
22 | box_a: Multiple bounding boxes, Shape: [num_boxes,4]
23 | box_b: Single bounding box, Shape: [4]
24 | Return:
25 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
26 | """
27 | inter = intersect(box_a, box_b)
28 | area_a = ((box_a[:, 2]-box_a[:, 0]) *
29 | (box_a[:, 3]-box_a[:, 1])) # [A,B]
30 | area_b = ((box_b[2]-box_b[0]) *
31 | (box_b[3]-box_b[1])) # [A,B]
32 | union = area_a + area_b - inter
33 | return inter / union # [A,B]
34 |
35 |
36 | class Compose(object):
37 | """Composes several augmentations together.
38 | Args:
39 | transforms (List[Transform]): list of transforms to compose.
40 | Example:
41 | >>> augmentations.Compose([
42 | >>> transforms.CenterCrop(10),
43 | >>> transforms.ToTensor(),
44 | >>> ])
45 | """
46 |
47 | def __init__(self, transforms):
48 | self.transforms = transforms
49 |
50 | def __call__(self, img, boxes=None, labels=None):
51 | for t in self.transforms:
52 | img, boxes, labels = t(img, boxes, labels)
53 | return img, boxes, labels
54 |
55 |
56 | class Lambda(object):
57 | """Applies a lambda as a transform."""
58 |
59 | def __init__(self, lambd):
60 | assert isinstance(lambd, types.LambdaType)
61 | self.lambd = lambd
62 |
63 | def __call__(self, img, boxes=None, labels=None):
64 | return self.lambd(img, boxes, labels)
65 |
66 |
67 | class ConvertFromInts(object):
68 | def __call__(self, image, boxes=None, labels=None):
69 | return image.astype(np.float32), boxes, labels
70 |
71 |
72 | class SubtractMeans(object):
73 | def __init__(self, mean):
74 | self.mean = np.array(mean, dtype=np.float32)
75 |
76 | def __call__(self, image, boxes=None, labels=None):
77 | image = image.astype(np.float32)
78 | image -= self.mean
79 | return image.astype(np.float32), boxes, labels
80 |
81 |
82 | class ToAbsoluteCoords(object):
83 | def __call__(self, image, boxes=None, labels=None):
84 | height, width, channels = image.shape
85 | boxes[:, 0] *= width
86 | boxes[:, 2] *= width
87 | boxes[:, 1] *= height
88 | boxes[:, 3] *= height
89 |
90 | return image, boxes, labels
91 |
92 |
93 | class ToPercentCoords(object):
94 | def __call__(self, image, boxes=None, labels=None):
95 | height, width, channels = image.shape
96 | boxes[:, 0] /= width
97 | boxes[:, 2] /= width
98 | boxes[:, 1] /= height
99 | boxes[:, 3] /= height
100 |
101 | return image, boxes, labels
102 |
103 |
104 | class Resize(object):
105 | def __init__(self, size=300):
106 | self.size = size
107 |
108 | def __call__(self, image, boxes=None, labels=None):
109 | image = cv2.resize(image, (self.size,
110 | self.size))
111 | return image, boxes, labels
112 |
113 |
114 | class RandomSaturation(object):
115 | def __init__(self, lower=0.5, upper=1.5):
116 | self.lower = lower
117 | self.upper = upper
118 | assert self.upper >= self.lower, "contrast upper must be >= lower."
119 | assert self.lower >= 0, "contrast lower must be non-negative."
120 |
121 | def __call__(self, image, boxes=None, labels=None):
122 | if random.randint(2):
123 | image[:, :, 1] *= random.uniform(self.lower, self.upper)
124 |
125 | return image, boxes, labels
126 |
127 |
128 | class RandomHue(object):
129 | def __init__(self, delta=18.0):
130 | assert delta >= 0.0 and delta <= 360.0
131 | self.delta = delta
132 |
133 | def __call__(self, image, boxes=None, labels=None):
134 | if random.randint(2):
135 | image[:, :, 0] += random.uniform(-self.delta, self.delta)
136 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
137 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
138 | return image, boxes, labels
139 |
140 |
141 | class RandomLightingNoise(object):
142 | def __init__(self):
143 | self.perms = ((0, 1, 2), (0, 2, 1),
144 | (1, 0, 2), (1, 2, 0),
145 | (2, 0, 1), (2, 1, 0))
146 |
147 | def __call__(self, image, boxes=None, labels=None):
148 | if random.randint(2):
149 | swap = self.perms[random.randint(len(self.perms))]
150 | shuffle = SwapChannels(swap) # shuffle channels
151 | image = shuffle(image)
152 | return image, boxes, labels
153 |
154 |
155 | class ConvertColor(object):
156 | def __init__(self, current='BGR', transform='HSV'):
157 | self.transform = transform
158 | self.current = current
159 |
160 | def __call__(self, image, boxes=None, labels=None):
161 | if self.current == 'BGR' and self.transform == 'HSV':
162 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
163 | elif self.current == 'HSV' and self.transform == 'BGR':
164 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
165 | else:
166 | raise NotImplementedError
167 | return image, boxes, labels
168 |
169 |
170 | class RandomContrast(object):
171 | def __init__(self, lower=0.5, upper=1.5):
172 | self.lower = lower
173 | self.upper = upper
174 | assert self.upper >= self.lower, "contrast upper must be >= lower."
175 | assert self.lower >= 0, "contrast lower must be non-negative."
176 |
177 | # expects float image
178 | def __call__(self, image, boxes=None, labels=None):
179 | if random.randint(2):
180 | alpha = random.uniform(self.lower, self.upper)
181 | image *= alpha
182 | return image, boxes, labels
183 |
184 |
185 | class RandomBrightness(object):
186 | def __init__(self, delta=32):
187 | assert delta >= 0.0
188 | assert delta <= 255.0
189 | self.delta = delta
190 |
191 | def __call__(self, image, boxes=None, labels=None):
192 | if random.randint(2):
193 | delta = random.uniform(-self.delta, self.delta)
194 | image += delta
195 | return image, boxes, labels
196 |
197 |
198 | class ToCV2Image(object):
199 | def __call__(self, tensor, boxes=None, labels=None):
200 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
201 |
202 |
203 | class ToTensor(object):
204 | def __call__(self, cvimage, boxes=None, labels=None):
205 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
206 |
207 |
208 | class RandomSampleCrop(object):
209 | """Crop
210 | Arguments:
211 | img (Image): the image being input during training
212 | boxes (Tensor): the original bounding boxes in pt form
213 | labels (Tensor): the class labels for each bbox
214 | mode (float tuple): the min and max jaccard overlaps
215 | Return:
216 | (img, boxes, classes)
217 | img (Image): the cropped image
218 | boxes (Tensor): the adjusted bounding boxes in pt form
219 | labels (Tensor): the class labels for each bbox
220 | """
221 | def __init__(self):
222 | self.sample_options = (
223 | # using entire original input image
224 | None,
225 | # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
226 | (0.1, None),
227 | (0.3, None),
228 | (0.7, None),
229 | (0.9, None),
230 | # randomly sample a patch
231 | (None, None),
232 | )
233 |
234 | def __call__(self, image, boxes=None, labels=None):
235 | height, width, _ = image.shape
236 | while True:
237 | # randomly choose a mode
238 | mode = random.choice(self.sample_options)
239 | if mode is None:
240 | return image, boxes, labels
241 |
242 | min_iou, max_iou = mode
243 | if min_iou is None:
244 | min_iou = float('-inf')
245 | if max_iou is None:
246 | max_iou = float('inf')
247 |
248 | # max trails (50)
249 | for _ in range(50):
250 | current_image = image
251 |
252 | w = random.uniform(0.3 * width, width)
253 | h = random.uniform(0.3 * height, height)
254 |
255 | # aspect ratio constraint b/t .5 & 2
256 | if h / w < 0.5 or h / w > 2:
257 | continue
258 |
259 | left = random.uniform(width - w)
260 | top = random.uniform(height - h)
261 |
262 | # convert to integer rect x1,y1,x2,y2
263 | rect = np.array([int(left), int(top), int(left+w), int(top+h)])
264 |
265 | # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
266 | overlap = jaccard_numpy(boxes, rect)
267 |
268 | # is min and max overlap constraint satisfied? if not try again
269 | if overlap.min() < min_iou and max_iou < overlap.max():
270 | continue
271 |
272 | # cut the crop from the image
273 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
274 | :]
275 |
276 | # keep overlap with gt box IF center in sampled patch
277 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
278 |
279 | # mask in all gt boxes that above and to the left of centers
280 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
281 |
282 | # mask in all gt boxes that under and to the right of centers
283 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
284 |
285 | # mask in that both m1 and m2 are true
286 | mask = m1 * m2
287 |
288 | # have any valid boxes? try again if not
289 | if not mask.any():
290 | continue
291 |
292 | # take only matching gt boxes
293 | current_boxes = boxes[mask, :].copy()
294 |
295 | # take only matching gt labels
296 | current_labels = labels[mask]
297 |
298 | # should we use the box left and top corner or the crop's
299 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
300 | rect[:2])
301 | # adjust to crop (by substracting crop's left,top)
302 | current_boxes[:, :2] -= rect[:2]
303 |
304 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
305 | rect[2:])
306 | # adjust to crop (by substracting crop's left,top)
307 | current_boxes[:, 2:] -= rect[:2]
308 |
309 | return current_image, current_boxes, current_labels
310 |
311 |
312 | class Expand(object):
313 | def __init__(self, mean):
314 | self.mean = mean
315 |
316 | def __call__(self, image, boxes, labels):
317 | if random.randint(2):
318 | return image, boxes, labels
319 |
320 | height, width, depth = image.shape
321 | ratio = random.uniform(1, 4)
322 | left = random.uniform(0, width*ratio - width)
323 | top = random.uniform(0, height*ratio - height)
324 |
325 | expand_image = np.zeros(
326 | (int(height*ratio), int(width*ratio), depth),
327 | dtype=image.dtype)
328 | expand_image[:, :, :] = self.mean
329 | expand_image[int(top):int(top + height),
330 | int(left):int(left + width)] = image
331 | image = expand_image
332 |
333 | boxes = boxes.copy()
334 | boxes[:, :2] += (int(left), int(top))
335 | boxes[:, 2:] += (int(left), int(top))
336 |
337 | return image, boxes, labels
338 |
339 |
340 | class RandomMirror(object):
341 | def __call__(self, image, boxes, classes):
342 | _, width, _ = image.shape
343 | if random.randint(2):
344 | image = image[:, ::-1]
345 | boxes = boxes.copy()
346 | boxes[:, 0::2] = width - boxes[:, 2::-2]
347 | return image, boxes, classes
348 |
349 |
350 | class SwapChannels(object):
351 | """Transforms a tensorized image by swapping the channels in the order
352 | specified in the swap tuple.
353 | Args:
354 | swaps (int triple): final order of channels
355 | eg: (2, 1, 0)
356 | """
357 |
358 | def __init__(self, swaps):
359 | self.swaps = swaps
360 |
361 | def __call__(self, image):
362 | """
363 | Args:
364 | image (Tensor): image tensor to be transformed
365 | Return:
366 | a tensor with channels swapped according to swap
367 | """
368 | # if torch.is_tensor(image):
369 | # image = image.data.cpu().numpy()
370 | # else:
371 | # image = np.array(image)
372 | image = image[:, :, self.swaps]
373 | return image
374 |
375 |
376 | class PhotometricDistort(object):
377 | def __init__(self):
378 | self.pd = [
379 | RandomContrast(),
380 | ConvertColor(transform='HSV'),
381 | RandomSaturation(),
382 | RandomHue(),
383 | ConvertColor(current='HSV', transform='BGR'),
384 | RandomContrast()
385 | ]
386 | self.rand_brightness = RandomBrightness()
387 | self.rand_light_noise = RandomLightingNoise()
388 |
389 | def __call__(self, image, boxes, labels):
390 | im = image.copy()
391 | im, boxes, labels = self.rand_brightness(im, boxes, labels)
392 | if random.randint(2):
393 | distort = Compose(self.pd[:-1])
394 | else:
395 | distort = Compose(self.pd[1:])
396 | im, boxes, labels = distort(im, boxes, labels)
397 | return self.rand_light_noise(im, boxes, labels)
398 |
399 |
400 | class SSDAugmentation(object):
401 | def __init__(self, size=300, mean=(104, 117, 123)):
402 | self.mean = mean
403 | self.size = size
404 | self.augment = Compose([
405 | ConvertFromInts(),
406 | ToAbsoluteCoords(),
407 | PhotometricDistort(),
408 | Expand(self.mean),
409 | RandomSampleCrop(),
410 | RandomMirror(),
411 | ToPercentCoords(),
412 | Resize(self.size),
413 | SubtractMeans(self.mean)
414 | ])
415 |
416 | def __call__(self, img, boxes, labels):
417 | return self.augment(img, boxes, labels)
418 |
--------------------------------------------------------------------------------
/实验 4.1/evalCustom_101.py:
--------------------------------------------------------------------------------
1 | """Adapted from:
2 | @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
3 | @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
4 | Licensed under The MIT License [see LICENSE for details]
5 | """
6 |
7 | from __future__ import print_function
8 | import torch
9 | import torch.nn as nn
10 | import torch.backends.cudnn as cudnn
11 | from torch.autograd import Variable
12 | # from data import VOC_ROOT, VOCAnnotationTransform, VOCDetection, BaseTransform
13 | # from data import VOC_CLASSES as labelmap
14 | import torch.utils.data as data
15 |
16 | from data import BaseTransform
17 | from data.custom import CUSTOM_CLASSES as labelmap
18 | from data.custom import customDetection, customAnnotationTransform, CUSTOM_CLASSES, CUSTOM_ROOT
19 |
20 | # from ssd import build_ssd
21 | from ssd_resnet_101 import build_ssd
22 |
23 | import sys
24 | import os
25 | import time
26 | import argparse
27 | import numpy as np
28 | import pickle
29 | import cv2
30 |
31 | if sys.version_info[0] == 2:
32 | import xml.etree.cElementTree as ET
33 | else:
34 | import xml.etree.ElementTree as ET
35 |
36 |
37 | def str2bool(v):
38 | return v.lower() in ("yes", "true", "t", "1")
39 |
40 |
41 | parser = argparse.ArgumentParser(
42 | description='Single Shot MultiBox Detector Evaluation')
43 | parser.add_argument('--trained_model',
44 | default='weights/CUSTOM.pth', type=str,
45 | help='Trained state_dict file path to open')
46 | parser.add_argument('--save_folder', default='eval/', type=str,
47 | help='File path to save results')
48 | parser.add_argument('--confidence_threshold', default=0.01, type=float,
49 | help='Detection confidence threshold')
50 | parser.add_argument('--top_k', default=5, type=int,
51 | help='Further restrict the number of predictions to parse')
52 | parser.add_argument('--cuda', default=True, type=str2bool,
53 | help='Use cuda to train model')
54 | parser.add_argument('--custom_root', default=CUSTOM_ROOT,
55 | help='Location of VOC root directory')
56 | parser.add_argument('--cleanup', default=True, type=str2bool,
57 | help='Cleanup and remove results files following eval')
58 |
59 | args = parser.parse_args()
60 |
61 | if not os.path.exists(args.save_folder):
62 | os.mkdir(args.save_folder)
63 |
64 | if torch.cuda.is_available():
65 | if args.cuda:
66 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
67 | if not args.cuda:
68 | print("WARNING: It looks like you have a CUDA device, but aren't using \
69 | CUDA. Run with --cuda for optimal eval speed.")
70 | torch.set_default_tensor_type('torch.FloatTensor')
71 | else:
72 | torch.set_default_tensor_type('torch.FloatTensor')
73 |
74 | annopath = os.path.join(args.custom_root, 'shenhe', 'Annotations', '%s.xml')
75 | imgpath = os.path.join(args.custom_root, 'shenhe', 'JPEGImages', '%s.jpg')
76 | imgsetpath = os.path.join(args.custom_root, 'shenhe', 'ImageSets', 'Main', '%s.txt')
77 |
78 | devkit_path = args.custom_root + 'shenhe'
79 | dataset_mean = (104, 117, 123)
80 | set_type = 'test'
81 |
82 |
83 | class Timer(object):
84 | """A simple timer."""
85 | def __init__(self):
86 | self.total_time = 0.
87 | self.calls = 0
88 | self.start_time = 0.
89 | self.diff = 0.
90 | self.average_time = 0.
91 |
92 | def tic(self):
93 | # using time.time instead of time.clock because time time.clock
94 | # does not normalize for multithreading
95 | self.start_time = time.time()
96 |
97 | def toc(self, average=True):
98 | self.diff = time.time() - self.start_time
99 | self.total_time += self.diff
100 | self.calls += 1
101 | self.average_time = self.total_time / self.calls
102 | if average:
103 | return self.average_time
104 | else:
105 | return self.diff
106 |
107 |
108 | def parse_rec(filename):
109 | """ Parse a PASCAL VOC xml file """
110 | tree = ET.parse(filename)
111 | objects = []
112 | for obj in tree.findall('object'):
113 | obj_struct = {}
114 | obj_struct['name'] = obj.find('name').text
115 | obj_struct['pose'] = obj.find('pose').text
116 | obj_struct['truncated'] = int(obj.find('truncated').text)
117 | obj_struct['difficult'] = int(obj.find('difficult').text)
118 | bbox = obj.find('bndbox')
119 | obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
120 | int(bbox.find('ymin').text) - 1,
121 | int(bbox.find('xmax').text) - 1,
122 | int(bbox.find('ymax').text) - 1]
123 | objects.append(obj_struct)
124 |
125 | return objects
126 |
127 |
128 | def get_output_dir(name, phase):
129 | """Return the directory where experimental artifacts are placed.
130 | If the directory does not exist, it is created.
131 | A canonical path is built using the name from an imdb and a network
132 | (if not None).
133 | """
134 | filedir = os.path.join(name, phase)
135 | if not os.path.exists(filedir):
136 | os.makedirs(filedir)
137 | return filedir
138 |
139 |
140 | def get_voc_results_file_template(image_set, cls):
141 | # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
142 | filename = 'det_' + image_set + '_%s.txt' % (cls)
143 | filedir = os.path.join(devkit_path, 'results')
144 | if not os.path.exists(filedir):
145 | os.makedirs(filedir)
146 | path = os.path.join(filedir, filename)
147 | return path
148 |
149 |
150 | def write_voc_results_file(all_boxes, dataset):
151 | for cls_ind, cls in enumerate(labelmap):
152 | print('Writing {:s} VOC results file'.format(cls))
153 | filename = get_voc_results_file_template(set_type, cls)
154 | with open(filename, 'wt') as f:
155 | for im_ind, index in enumerate(dataset.ids):
156 | dets = all_boxes[cls_ind+1][im_ind]
157 | if dets == []:
158 | continue
159 | # the VOCdevkit expects 1-based indices
160 | for k in range(dets.shape[0]):
161 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
162 | format(index[1], dets[k, -1],
163 | dets[k, 0] + 1, dets[k, 1] + 1,
164 | dets[k, 2] + 1, dets[k, 3] + 1))
165 |
166 |
167 | def do_python_eval(output_dir='output', use_07=True):
168 | cachedir = os.path.join(devkit_path, 'annotations_cache')
169 | aps = []
170 | # The PASCAL VOC metric changed in 2010
171 | use_07_metric = use_07
172 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
173 | if not os.path.isdir(output_dir):
174 | os.mkdir(output_dir)
175 | for i, cls in enumerate(labelmap):
176 | filename = get_voc_results_file_template(set_type, cls)
177 | rec, prec, ap = voc_eval(
178 | filename, annopath, imgsetpath % (set_type), cls, cachedir,
179 | ovthresh=0.1, use_07_metric=use_07_metric)
180 | aps += [ap]
181 | print('AP for {} = {:.4f}'.format(cls, ap))
182 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
183 | pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
184 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
185 | print('~~~~~~~~')
186 | print('Results:')
187 | for ap in aps:
188 | print('{:.3f}'.format(ap))
189 | print('{:.3f}'.format(np.mean(aps)))
190 | print('~~~~~~~~')
191 | print('')
192 | print('--------------------------------------------------------------')
193 | print('Results computed with the **unofficial** Python eval code.')
194 | print('Results should be very close to the official MATLAB eval code.')
195 | print('--------------------------------------------------------------')
196 |
197 |
198 | def voc_ap(rec, prec, use_07_metric=True):
199 | """ ap = voc_ap(rec, prec, [use_07_metric])
200 | Compute VOC AP given precision and recall.
201 | If use_07_metric is true, uses the
202 | VOC 07 11 point method (default:True).
203 | """
204 | if use_07_metric:
205 | # 11 point metric
206 | ap = 0.
207 | for t in np.arange(0., 1.1, 0.1):
208 | if np.sum(rec >= t) == 0:
209 | p = 0
210 | else:
211 | p = np.max(prec[rec >= t])
212 | ap = ap + p / 11.
213 | else:
214 | # correct AP calculation
215 | # first append sentinel values at the end
216 | mrec = np.concatenate(([0.], rec, [1.]))
217 | mpre = np.concatenate(([0.], prec, [0.]))
218 |
219 | # compute the precision envelope
220 | for i in range(mpre.size - 1, 0, -1):
221 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
222 |
223 | # to calculate area under PR curve, look for points
224 | # where X axis (recall) changes value
225 | i = np.where(mrec[1:] != mrec[:-1])[0]
226 |
227 | # and sum (\Delta recall) * prec
228 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
229 | return ap
230 |
231 |
232 | def voc_eval(detpath,
233 | annopath,
234 | imagesetfile,
235 | classname,
236 | cachedir,
237 | ovthresh=0.5,
238 | use_07_metric=True):
239 | """rec, prec, ap = voc_eval(detpath,
240 | annopath,
241 | imagesetfile,
242 | classname,
243 | [ovthresh],
244 | [use_07_metric])
245 | Top level function that does the PASCAL VOC evaluation.
246 | detpath: Path to detections
247 | detpath.format(classname) should produce the detection results file.
248 | annopath: Path to annotations
249 | annopath.format(imagename) should be the xml annotations file.
250 | imagesetfile: Text file containing the list of images, one image per line.
251 | classname: Category name (duh)
252 | cachedir: Directory for caching the annotations
253 | [ovthresh]: Overlap threshold (default = 0.5)
254 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
255 | (default True)
256 | """
257 | # assumes detections are in detpath.format(classname)
258 | # assumes annotations are in annopath.format(imagename)
259 | # assumes imagesetfile is a text file with each line an image name
260 | # cachedir caches the annotations in a pickle file
261 | # first load gt
262 | if not os.path.isdir(cachedir):
263 | os.mkdir(cachedir)
264 | cachefile = os.path.join(cachedir, 'annots.pkl')
265 | # read list of images
266 | with open(imagesetfile, 'r') as f:
267 | lines = f.readlines()
268 | imagenames = [x.strip() for x in lines]
269 | if not os.path.isfile(cachefile):
270 | # load annots
271 | recs = {}
272 | for i, imagename in enumerate(imagenames):
273 | recs[imagename] = parse_rec(annopath % (imagename))
274 | if i % 100 == 0:
275 | print('Reading annotation for {:d}/{:d}'.format(
276 | i + 1, len(imagenames)))
277 | # save
278 | print('Saving cached annotations to {:s}'.format(cachefile))
279 | with open(cachefile, 'wb') as f:
280 | pickle.dump(recs, f)
281 | else:
282 | # load
283 | with open(cachefile, 'rb') as f:
284 | recs = pickle.load(f)
285 |
286 | # extract gt objects for this class
287 | class_recs = {}
288 | npos = 0
289 | for imagename in imagenames:
290 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
291 | bbox = np.array([x['bbox'] for x in R])
292 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
293 | det = [False] * len(R)
294 | npos = npos + sum(~difficult)
295 | class_recs[imagename] = {'bbox': bbox,
296 | 'difficult': difficult,
297 | 'det': det}
298 |
299 | # read dets
300 | detfile = detpath.format(classname)
301 | with open(detfile, 'r') as f:
302 | lines = f.readlines()
303 | if any(lines) == 1:
304 |
305 | splitlines = [x.strip().split(' ') for x in lines]
306 | image_ids = [x[0] for x in splitlines]
307 | confidence = np.array([float(x[1]) for x in splitlines])
308 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
309 |
310 | # sort by confidence
311 | sorted_ind = np.argsort(-confidence)
312 | sorted_scores = np.sort(-confidence)
313 | BB = BB[sorted_ind, :]
314 | image_ids = [image_ids[x] for x in sorted_ind]
315 |
316 | # go down dets and mark TPs and FPs
317 | nd = len(image_ids)
318 | tp = np.zeros(nd)
319 | fp = np.zeros(nd)
320 | for d in range(nd):
321 | R = class_recs[image_ids[d]]
322 | bb = BB[d, :].astype(float)
323 | ovmax = -np.inf
324 | BBGT = R['bbox'].astype(float)
325 | if BBGT.size > 0:
326 | # compute overlaps
327 | # intersection
328 | ixmin = np.maximum(BBGT[:, 0], bb[0])
329 | iymin = np.maximum(BBGT[:, 1], bb[1])
330 | ixmax = np.minimum(BBGT[:, 2], bb[2])
331 | iymax = np.minimum(BBGT[:, 3], bb[3])
332 | iw = np.maximum(ixmax - ixmin, 0.)
333 | ih = np.maximum(iymax - iymin, 0.)
334 | inters = iw * ih
335 | uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
336 | (BBGT[:, 2] - BBGT[:, 0]) *
337 | (BBGT[:, 3] - BBGT[:, 1]) - inters)
338 | overlaps = inters / uni
339 | ovmax = np.max(overlaps)
340 | jmax = np.argmax(overlaps)
341 |
342 | if ovmax > ovthresh:
343 | if not R['difficult'][jmax]:
344 | if not R['det'][jmax]:
345 | tp[d] = 1.
346 | R['det'][jmax] = 1
347 | else:
348 | fp[d] = 1.
349 | else:
350 | fp[d] = 1.
351 |
352 | # compute precision recall
353 | fp = np.cumsum(fp)
354 | tp = np.cumsum(tp)
355 | rec = tp / float(npos)
356 | # avoid divide by zero in case the first detection matches a difficult
357 | # ground truth
358 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
359 | ap = voc_ap(rec, prec, use_07_metric)
360 | else:
361 | rec = -1.
362 | prec = -1.
363 | ap = -1.
364 |
365 | return rec, prec, ap
366 |
367 |
368 | def test_net(save_folder, net, cuda, dataset, transform, top_k,
369 | im_size=300, thresh=0.05):
370 | num_images = len(dataset)
371 | # all detections are collected into:
372 | # all_boxes[cls][image] = N x 5 array of detections in
373 | # (x1, y1, x2, y2, score)
374 | all_boxes = [[[] for _ in range(num_images)]
375 | for _ in range(len(labelmap)+1)]
376 |
377 | # timers
378 | _t = {'im_detect': Timer(), 'misc': Timer()}
379 | output_dir = get_output_dir('ssd300_120000', set_type)
380 | det_file = os.path.join(output_dir, 'detections.pkl')
381 |
382 | for i in range(num_images):
383 | im, gt, h, w = dataset.pull_item(i)
384 |
385 | x = Variable(im.unsqueeze(0))
386 | if args.cuda:
387 | x = x.cuda()
388 | _t['im_detect'].tic()
389 | detections = net(x).data
390 | detect_time = _t['im_detect'].toc(average=False)
391 |
392 | # skip j = 0, because it's the background class
393 | for j in range(1, detections.size(1)):
394 | dets = detections[0, j, :]
395 | mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
396 | dets = torch.masked_select(dets, mask).view(-1, 5)
397 | if dets.size(0) == 0:
398 | continue
399 | boxes = dets[:, 1:]
400 | boxes[:, 0] *= w
401 | boxes[:, 2] *= w
402 | boxes[:, 1] *= h
403 | boxes[:, 3] *= h
404 | scores = dets[:, 0].cpu().numpy()
405 | cls_dets = np.hstack((boxes.cpu().numpy(),
406 | scores[:, np.newaxis])).astype(np.float32,
407 | copy=False)
408 | all_boxes[j][i] = cls_dets
409 |
410 | print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1,
411 | num_images, detect_time))
412 |
413 | with open(det_file, 'wb') as f:
414 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
415 |
416 | print('Evaluating detections')
417 | evaluate_detections(all_boxes, output_dir, dataset)
418 |
419 |
420 | def evaluate_detections(box_list, output_dir, dataset):
421 | write_voc_results_file(box_list, dataset)
422 | do_python_eval(output_dir)
423 |
424 |
425 | if __name__ == '__main__':
426 | # load net
427 | num_classes = len(labelmap) + 1 # +1 for background
428 | net = build_ssd('test', 300, num_classes) # initialize SSD
429 | net.load_state_dict(torch.load(args.trained_model))
430 | net.eval()
431 | print('Finished loading model!')
432 | # load data
433 | dataset = customDetection(args.custom_root, [('shenhe', set_type)],
434 | BaseTransform(300, dataset_mean),
435 | customAnnotationTransform())
436 | if args.cuda:
437 | net = net.cuda()
438 | cudnn.benchmark = True
439 | # evaluation
440 | test_net(args.save_folder, net, args.cuda, dataset,
441 | BaseTransform(net.size, dataset_mean), args.top_k, 300,
442 | thresh=args.confidence_threshold)
443 |
--------------------------------------------------------------------------------
/实验 4.2/evalCustom_18.py:
--------------------------------------------------------------------------------
1 | """Adapted from:
2 | @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
3 | @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
4 | Licensed under The MIT License [see LICENSE for details]
5 | """
6 |
7 | from __future__ import print_function
8 | import torch
9 | import torch.nn as nn
10 | import torch.backends.cudnn as cudnn
11 | from torch.autograd import Variable
12 | # from data import VOC_ROOT, VOCAnnotationTransform, VOCDetection, BaseTransform
13 | # from data import VOC_CLASSES as labelmap
14 | import torch.utils.data as data
15 |
16 | from data import BaseTransform
17 | from data.custom import CUSTOM_CLASSES as labelmap
18 | from data.custom import customDetection, customAnnotationTransform, CUSTOM_CLASSES, CUSTOM_ROOT
19 |
20 | # from ssd import build_ssd
21 | from ssd_resnet_18 import build_ssd
22 |
23 | import sys
24 | import os
25 | import time
26 | import argparse
27 | import numpy as np
28 | import pickle
29 | import cv2
30 |
31 | if sys.version_info[0] == 2:
32 | import xml.etree.cElementTree as ET
33 | else:
34 | import xml.etree.ElementTree as ET
35 |
36 |
37 | def str2bool(v):
38 | return v.lower() in ("yes", "true", "t", "1")
39 |
40 |
41 | parser = argparse.ArgumentParser(
42 | description='Single Shot MultiBox Detector Evaluation')
43 | parser.add_argument('--trained_model',
44 | default='weights/CUSTOM.pth', type=str,
45 | help='Trained state_dict file path to open')
46 | parser.add_argument('--save_folder', default='eval/', type=str,
47 | help='File path to save results')
48 | parser.add_argument('--confidence_threshold', default=0.01, type=float,
49 | help='Detection confidence threshold')
50 | parser.add_argument('--top_k', default=5, type=int,
51 | help='Further restrict the number of predictions to parse')
52 | parser.add_argument('--cuda', default=True, type=str2bool,
53 | help='Use cuda to train model')
54 | parser.add_argument('--custom_root', default=CUSTOM_ROOT,
55 | help='Location of VOC root directory')
56 | parser.add_argument('--cleanup', default=True, type=str2bool,
57 | help='Cleanup and remove results files following eval')
58 |
59 | args = parser.parse_args()
60 |
61 | if not os.path.exists(args.save_folder):
62 | os.mkdir(args.save_folder)
63 |
64 | if torch.cuda.is_available():
65 | if args.cuda:
66 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
67 | if not args.cuda:
68 | print("WARNING: It looks like you have a CUDA device, but aren't using \
69 | CUDA. Run with --cuda for optimal eval speed.")
70 | torch.set_default_tensor_type('torch.FloatTensor')
71 | else:
72 | torch.set_default_tensor_type('torch.FloatTensor')
73 |
74 | annopath = os.path.join(args.custom_root, 'shenhe', 'Annotations', '%s.xml')
75 | imgpath = os.path.join(args.custom_root, 'shenhe', 'JPEGImages', '%s.jpg')
76 | imgsetpath = os.path.join(args.custom_root, 'shenhe', 'ImageSets', 'Main', '%s.txt')
77 |
78 | devkit_path = args.custom_root + 'shenhe'
79 | dataset_mean = (104, 117, 123)
80 | set_type = 'test'
81 |
82 |
83 | class Timer(object):
84 | """A simple timer."""
85 | def __init__(self):
86 | self.total_time = 0.
87 | self.calls = 0
88 | self.start_time = 0.
89 | self.diff = 0.
90 | self.average_time = 0.
91 |
92 | def tic(self):
93 | # using time.time instead of time.clock because time time.clock
94 | # does not normalize for multithreading
95 | self.start_time = time.time()
96 |
97 | def toc(self, average=True):
98 | self.diff = time.time() - self.start_time
99 | self.total_time += self.diff
100 | self.calls += 1
101 | self.average_time = self.total_time / self.calls
102 | if average:
103 | return self.average_time
104 | else:
105 | return self.diff
106 |
107 |
108 | def parse_rec(filename):
109 | """ Parse a PASCAL VOC xml file """
110 | tree = ET.parse(filename)
111 | objects = []
112 | for obj in tree.findall('object'):
113 | obj_struct = {}
114 | obj_struct['name'] = obj.find('name').text
115 | obj_struct['pose'] = obj.find('pose').text
116 | obj_struct['truncated'] = int(obj.find('truncated').text)
117 | obj_struct['difficult'] = int(obj.find('difficult').text)
118 | bbox = obj.find('bndbox')
119 | obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
120 | int(bbox.find('ymin').text) - 1,
121 | int(bbox.find('xmax').text) - 1,
122 | int(bbox.find('ymax').text) - 1]
123 | objects.append(obj_struct)
124 |
125 | return objects
126 |
127 |
128 | def get_output_dir(name, phase):
129 | """Return the directory where experimental artifacts are placed.
130 | If the directory does not exist, it is created.
131 | A canonical path is built using the name from an imdb and a network
132 | (if not None).
133 | """
134 | filedir = os.path.join(name, phase)
135 | if not os.path.exists(filedir):
136 | os.makedirs(filedir)
137 | return filedir
138 |
139 |
140 | def get_voc_results_file_template(image_set, cls):
141 | # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
142 | filename = 'det_' + image_set + '_%s.txt' % (cls)
143 | filedir = os.path.join(devkit_path, 'results')
144 | if not os.path.exists(filedir):
145 | os.makedirs(filedir)
146 | path = os.path.join(filedir, filename)
147 | return path
148 |
149 |
150 | def write_voc_results_file(all_boxes, dataset):
151 | for cls_ind, cls in enumerate(labelmap):
152 | print('Writing {:s} VOC results file'.format(cls))
153 | filename = get_voc_results_file_template(set_type, cls)
154 | with open(filename, 'wt') as f:
155 | for im_ind, index in enumerate(dataset.ids):
156 | dets = all_boxes[cls_ind+1][im_ind]
157 | if dets == []:
158 | continue
159 | # the VOCdevkit expects 1-based indices
160 | for k in range(dets.shape[0]):
161 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
162 | format(index[1], dets[k, -1],
163 | dets[k, 0] + 1, dets[k, 1] + 1,
164 | dets[k, 2] + 1, dets[k, 3] + 1))
165 |
166 |
167 | def do_python_eval(output_dir='output', use_07=True):
168 | cachedir = os.path.join(devkit_path, 'annotations_cache')
169 | aps = []
170 | # The PASCAL VOC metric changed in 2010
171 | use_07_metric = use_07
172 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
173 | if not os.path.isdir(output_dir):
174 | os.mkdir(output_dir)
175 | for i, cls in enumerate(labelmap):
176 | filename = get_voc_results_file_template(set_type, cls)
177 | rec, prec, ap = voc_eval(
178 | filename, annopath, imgsetpath % (set_type), cls, cachedir,
179 | ovthresh=0.1, use_07_metric=use_07_metric)
180 | aps += [ap]
181 | print('AP for {} = {:.4f}'.format(cls, ap))
182 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
183 | pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
184 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
185 | print('~~~~~~~~')
186 | print('Results:')
187 | for ap in aps:
188 | print('{:.3f}'.format(ap))
189 | print('{:.3f}'.format(np.mean(aps)))
190 | print('~~~~~~~~')
191 | print('')
192 | print('--------------------------------------------------------------')
193 | print('Results computed with the **unofficial** Python eval code.')
194 | print('Results should be very close to the official MATLAB eval code.')
195 | print('--------------------------------------------------------------')
196 |
197 |
198 | def voc_ap(rec, prec, use_07_metric=True):
199 | """ ap = voc_ap(rec, prec, [use_07_metric])
200 | Compute VOC AP given precision and recall.
201 | If use_07_metric is true, uses the
202 | VOC 07 11 point method (default:True).
203 | """
204 | if use_07_metric:
205 | # 11 point metric
206 | ap = 0.
207 | for t in np.arange(0., 1.1, 0.1):
208 | if np.sum(rec >= t) == 0:
209 | p = 0
210 | else:
211 | p = np.max(prec[rec >= t])
212 | ap = ap + p / 11.
213 | else:
214 | # correct AP calculation
215 | # first append sentinel values at the end
216 | mrec = np.concatenate(([0.], rec, [1.]))
217 | mpre = np.concatenate(([0.], prec, [0.]))
218 |
219 | # compute the precision envelope
220 | for i in range(mpre.size - 1, 0, -1):
221 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
222 |
223 | # to calculate area under PR curve, look for points
224 | # where X axis (recall) changes value
225 | i = np.where(mrec[1:] != mrec[:-1])[0]
226 |
227 | # and sum (\Delta recall) * prec
228 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
229 | return ap
230 |
231 |
232 | def voc_eval(detpath,
233 | annopath,
234 | imagesetfile,
235 | classname,
236 | cachedir,
237 | ovthresh=0.5,
238 | use_07_metric=True):
239 | """rec, prec, ap = voc_eval(detpath,
240 | annopath,
241 | imagesetfile,
242 | classname,
243 | [ovthresh],
244 | [use_07_metric])
245 | Top level function that does the PASCAL VOC evaluation.
246 | detpath: Path to detections
247 | detpath.format(classname) should produce the detection results file.
248 | annopath: Path to annotations
249 | annopath.format(imagename) should be the xml annotations file.
250 | imagesetfile: Text file containing the list of images, one image per line.
251 | classname: Category name (duh)
252 | cachedir: Directory for caching the annotations
253 | [ovthresh]: Overlap threshold (default = 0.5)
254 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
255 | (default True)
256 | """
257 | # assumes detections are in detpath.format(classname)
258 | # assumes annotations are in annopath.format(imagename)
259 | # assumes imagesetfile is a text file with each line an image name
260 | # cachedir caches the annotations in a pickle file
261 | # first load gt
262 | if not os.path.isdir(cachedir):
263 | os.mkdir(cachedir)
264 | cachefile = os.path.join(cachedir, 'annots.pkl')
265 | # read list of images
266 | with open(imagesetfile, 'r') as f:
267 | lines = f.readlines()
268 | imagenames = [x.strip() for x in lines]
269 | if not os.path.isfile(cachefile):
270 | # load annots
271 | recs = {}
272 | for i, imagename in enumerate(imagenames):
273 | recs[imagename] = parse_rec(annopath % (imagename))
274 | if i % 100 == 0:
275 | print('Reading annotation for {:d}/{:d}'.format(
276 | i + 1, len(imagenames)))
277 | # save
278 | print('Saving cached annotations to {:s}'.format(cachefile))
279 | with open(cachefile, 'wb') as f:
280 | pickle.dump(recs, f)
281 | else:
282 | # load
283 | with open(cachefile, 'rb') as f:
284 | recs = pickle.load(f)
285 |
286 | # extract gt objects for this class
287 | class_recs = {}
288 | npos = 0
289 | for imagename in imagenames:
290 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
291 | bbox = np.array([x['bbox'] for x in R])
292 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
293 | det = [False] * len(R)
294 | npos = npos + sum(~difficult)
295 | class_recs[imagename] = {'bbox': bbox,
296 | 'difficult': difficult,
297 | 'det': det}
298 |
299 | # read dets
300 | detfile = detpath.format(classname)
301 | with open(detfile, 'r') as f:
302 | lines = f.readlines()
303 | if any(lines) == 1:
304 |
305 | splitlines = [x.strip().split(' ') for x in lines]
306 | image_ids = [x[0] for x in splitlines]
307 | confidence = np.array([float(x[1]) for x in splitlines])
308 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
309 |
310 | # sort by confidence
311 | sorted_ind = np.argsort(-confidence)
312 | sorted_scores = np.sort(-confidence)
313 | BB = BB[sorted_ind, :]
314 | image_ids = [image_ids[x] for x in sorted_ind]
315 |
316 | # go down dets and mark TPs and FPs
317 | nd = len(image_ids)
318 | tp = np.zeros(nd)
319 | fp = np.zeros(nd)
320 | for d in range(nd):
321 | R = class_recs[image_ids[d]]
322 | bb = BB[d, :].astype(float)
323 | ovmax = -np.inf
324 | BBGT = R['bbox'].astype(float)
325 | if BBGT.size > 0:
326 | # compute overlaps
327 | # intersection
328 | ixmin = np.maximum(BBGT[:, 0], bb[0])
329 | iymin = np.maximum(BBGT[:, 1], bb[1])
330 | ixmax = np.minimum(BBGT[:, 2], bb[2])
331 | iymax = np.minimum(BBGT[:, 3], bb[3])
332 | iw = np.maximum(ixmax - ixmin, 0.)
333 | ih = np.maximum(iymax - iymin, 0.)
334 | inters = iw * ih
335 | uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
336 | (BBGT[:, 2] - BBGT[:, 0]) *
337 | (BBGT[:, 3] - BBGT[:, 1]) - inters)
338 | overlaps = inters / uni
339 | ovmax = np.max(overlaps)
340 | jmax = np.argmax(overlaps)
341 |
342 | if ovmax > ovthresh:
343 | if not R['difficult'][jmax]:
344 | if not R['det'][jmax]:
345 | tp[d] = 1.
346 | R['det'][jmax] = 1
347 | else:
348 | fp[d] = 1.
349 | else:
350 | fp[d] = 1.
351 |
352 | # compute precision recall
353 | fp = np.cumsum(fp)
354 | tp = np.cumsum(tp)
355 | rec = tp / float(npos)
356 | # avoid divide by zero in case the first detection matches a difficult
357 | # ground truth
358 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
359 | ap = voc_ap(rec, prec, use_07_metric)
360 | else:
361 | rec = -1.
362 | prec = -1.
363 | ap = -1.
364 |
365 | return rec, prec, ap
366 |
367 |
368 | def test_net(save_folder, net, cuda, dataset, transform, top_k,
369 | im_size=300, thresh=0.05):
370 | num_images = len(dataset)
371 | # all detections are collected into:
372 | # all_boxes[cls][image] = N x 5 array of detections in
373 | # (x1, y1, x2, y2, score)
374 | all_boxes = [[[] for _ in range(num_images)]
375 | for _ in range(len(labelmap)+1)]
376 |
377 | # timers
378 | _t = {'im_detect': Timer(), 'misc': Timer()}
379 | output_dir = get_output_dir('ssd300_120000', set_type)
380 | det_file = os.path.join(output_dir, 'detections.pkl')
381 |
382 | for i in range(num_images):
383 | im, gt, h, w = dataset.pull_item(i)
384 |
385 | x = Variable(im.unsqueeze(0))
386 | if args.cuda:
387 | x = x.cuda()
388 | _t['im_detect'].tic()
389 | detections = net(x).data
390 | detect_time = _t['im_detect'].toc(average=False)
391 |
392 | # skip j = 0, because it's the background class
393 | for j in range(1, detections.size(1)):
394 | dets = detections[0, j, :]
395 | mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
396 | dets = torch.masked_select(dets, mask).view(-1, 5)
397 | if dets.size(0) == 0:
398 | continue
399 | boxes = dets[:, 1:]
400 | boxes[:, 0] *= w
401 | boxes[:, 2] *= w
402 | boxes[:, 1] *= h
403 | boxes[:, 3] *= h
404 | scores = dets[:, 0].cpu().numpy()
405 | cls_dets = np.hstack((boxes.cpu().numpy(),
406 | scores[:, np.newaxis])).astype(np.float32,
407 | copy=False)
408 | all_boxes[j][i] = cls_dets
409 |
410 | print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1,
411 | num_images, detect_time))
412 |
413 | with open(det_file, 'wb') as f:
414 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
415 |
416 | print('Evaluating detections')
417 | evaluate_detections(all_boxes, output_dir, dataset)
418 |
419 |
420 | def evaluate_detections(box_list, output_dir, dataset):
421 | write_voc_results_file(box_list, dataset)
422 | do_python_eval(output_dir)
423 |
424 |
425 | if __name__ == '__main__':
426 | # load net
427 | num_classes = len(labelmap) + 1 # +1 for background
428 | net = build_ssd('test', 300, num_classes) # initialize SSD
429 | net.load_state_dict(torch.load(args.trained_model))
430 | net.eval()
431 | print('Finished loading model!')
432 | # load data
433 | dataset = customDetection(args.custom_root, [('shenhe', set_type)],
434 | BaseTransform(300, dataset_mean),
435 | customAnnotationTransform())
436 | if args.cuda:
437 | net = net.cuda()
438 | cudnn.benchmark = True
439 | # evaluation
440 | test_net(args.save_folder, net, args.cuda, dataset,
441 | BaseTransform(net.size, dataset_mean), args.top_k, 300,
442 | thresh=args.confidence_threshold)
443 |
--------------------------------------------------------------------------------