├── SSD
    ├── models
    │   ├── __init__.py
    │   ├── base_models.py
    │   └── SSD.py
    ├── utils
    │   ├── __init__.py
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── gpu_nms.hpp
    │   │   ├── py_cpu_nms.py
    │   │   ├── gpu_nms.pyx
    │   │   ├── nms_kernel.cu
    │   │   └── cpu_nms.pyx
    │   ├── pycocotools
    │   │   ├── __init__.py
    │   │   ├── maskApi.h
    │   │   ├── mask.py
    │   │   ├── maskApi.c
    │   │   ├── _mask.pyx
    │   │   ├── coco.py
    │   │   └── cocoeval.py
    │   ├── nms_wrapper.py
    │   ├── timer.py
    │   ├── build.py
    │   └── box_utils.py
    ├── layers
    │   ├── __init__.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── loss.py
    │   │   └── multibox_loss.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── prior_box.py
    │   │   └── detection.py
    │   └── l2norm.py
    ├── image
    │   ├── 000050.jpg
    │   ├── 000753.jpg
    │   ├── 000762.jpg
    │   ├── 001070.jpg
    │   ├── 001136.jpg
    │   └── 001275.jpg
    ├── make.sh
    ├── data
    │   ├── __init__.py
    │   ├── scripts
    │   │   ├── VOC2012.sh
    │   │   └── VOC2007.sh
    │   ├── config.py
    │   ├── voc_eval.py
    │   ├── coco.py
    │   ├── data_augment.py
    │   └── voc0712.py
    ├── README.md
    ├── val.py
    └── train.py
├── YOLO v3
    └── README.md
└── README.md


/SSD/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSD/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSD/utils/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/SSD/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/SSD/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | 
3 | __all__ = ['MultiBoxLoss']
4 | 


--------------------------------------------------------------------------------
/SSD/image/000050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/000050.jpg


--------------------------------------------------------------------------------
/SSD/image/000753.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/000753.jpg


--------------------------------------------------------------------------------
/SSD/image/000762.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/000762.jpg


--------------------------------------------------------------------------------
/SSD/image/001070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/001070.jpg


--------------------------------------------------------------------------------
/SSD/image/001136.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/001136.jpg


--------------------------------------------------------------------------------
/SSD/image/001275.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlyldxwl/Stronger-One-stage-detector-with-much-Tricks/HEAD/SSD/image/001275.jpg


--------------------------------------------------------------------------------
/SSD/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ./utils/
3 | 
4 | CUDA_PATH=/usr/local/cuda/
5 | 
6 | python build.py build_ext --inplace
7 | 
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/SSD/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/SSD/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/SSD/data/__init__.py:
--------------------------------------------------------------------------------
1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
3 | from .coco import COCODetection
4 | from .data_augment import *
5 | from .config import *
6 | 


--------------------------------------------------------------------------------
/YOLO v3/README.md:
--------------------------------------------------------------------------------
1 | # Stronger YOLO v3 with much Tricks
2 | 
3 | This code will be released soon.
4 | 
5 | If you are interested in this repo, please give attention to me. 
6 | 
7 | Furthermore, if you have more GPUs, please feel free to concat me and help me to train YOLO V3. ([yhao.chen0617@gmail.com](yhao.chen0617@gmail.com))
8 | 
9 | 


--------------------------------------------------------------------------------
/SSD/layers/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         #x /= norm
22 |         x = torch.div(x,norm)
23 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 |         return out
25 | 


--------------------------------------------------------------------------------
/SSD/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Stronger One-stage detector with much Tricks
 2 | 
 3 | This repo was inspired by the paper [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/pdf/1902.04103).
 4 | 
 5 | I would test popular training tricks as many as I can for improving one-stage detector accuarcy, feel free to leave a comment or email me about the tricks you want me to test ([yhao.chen0617@gmail.com](yhao.chen0617@gmail.com)).
 6 | 
 7 | **Traing Data** :  VOC0712 trainval
 8 | 
 9 | **Test data** :  VOC07 test
10 | 
11 | **GPU** :  TITAN X (pascal)
12 | 
13 | **Framework** :  Pytorch 0.4
14 | 
15 | Network | mAP | FPS | Parameter
16 | --|:--:|:--:|:--:
17 | SSD 300| 80.58 | ~100| -
18 | YOLOV3 544| - | - | - 
19 | 
20 | **Note**:
21 | 
22 | - [ ] Stronger YOLOv3 with much tricks will be released soon.
23 | - [ ] This repo does not use **multi-scale train** because of the limitation of GPU memory (I only have one card), which is extremely beneficial to detector.
24 | 


--------------------------------------------------------------------------------
/SSD/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .nms.cpu_nms import cpu_nms, cpu_soft_nms
 9 | from .nms.gpu_nms import gpu_nms
10 | 
11 | 
12 | # def nms(dets, thresh, force_cpu=False):
13 | #     """Dispatch to either CPU or GPU NMS implementations."""
14 | #
15 | #     if dets.shape[0] == 0:
16 | #         return []
17 | #     if cfg.USE_GPU_NMS and not force_cpu:
18 | #         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | #     else:
20 | #         return cpu_nms(dets, thresh)
21 | 
22 | 
23 | def nms(dets, thresh, force_cpu=False):
24 |     """Dispatch to either CPU or GPU NMS implementations."""
25 | 
26 |     if dets.shape[0] == 0:
27 |         return []
28 |     if force_cpu:
29 |         #return cpu_soft_nms(dets, thresh, method = 0)
30 |         return cpu_nms(dets, thresh)
31 |     return gpu_nms(dets, thresh)
32 | 


--------------------------------------------------------------------------------
/SSD/README.md:
--------------------------------------------------------------------------------
 1 | # Stronger SSD with much Tricks
 2 | ## Tricks
 3 | This repo was mainly used the following tricks.
 4 | 
 5 | Trick | Reference paper
 6 | --|:--:
 7 | Warm up | -
 8 | Cos lr | -
 9 | Htd lr | -
10 | Batch Normalization | -
11 | Group Normalization | -
12 | No bais decay | -
13 | Label smooth | -
14 | Mixup | -
15 | Random erasing | -
16 | Balance Smoothl1 | -
17 | Focal loss | -
18 | GIOU | -
19 | Octconv | -
20 | 
21 | 
22 | ## Result
23 | Pretrained model is VGG-16 (atrous). The size of all models is 300&times;300.
24 | 
25 | **SSD equips much data augmentation operations, which leads miuxp, label smooth and some data augmentation methods or regularization don't work.** 
26 | 
27 | ## Note
28 | 
29 | - [ ] 80.58 is not the final resualt. The experiment of SSD300 with Focal loss, GIoU and Octconv is still going on. 
30 | 
31 | - [ ] BN can merge into convolution layer, thus it will not increase any inference time and parameters. The merge code will be pubilc soon.
32 | 
33 | - [ ] Multi-Scale traing with SSD 300 will acquire a significant gain, which will be released when I go to the internship (about one month later), because I only have one GPU now.
34 | 


--------------------------------------------------------------------------------
/SSD/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/SSD/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/SSD/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/SSD/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/SSD/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.backends.cudnn as cudnn
 4 | from math import sqrt as sqrt
 5 | from itertools import product as product
 6 | 
 7 | 
 8 | class PriorBox(object):
 9 |     """Compute priorbox coordinates in center-offset form for each source
10 |     feature map.
11 |     Note:
12 |     This 'layer' has changed between versions of the original SSD
13 |     paper, so we include both versions, but note v2 is the most tested and most
14 |     recent version of the paper.
15 | 
16 |     """
17 |     def __init__(self, cfg):
18 |         super(PriorBox, self).__init__()
19 |         self.image_size = cfg['min_dim']
20 |         # number of priors for feature map location (either 4 or 6)
21 |         self.num_priors = len(cfg['aspect_ratios'])
22 |         self.variance = cfg['variance'] or [0.1]
23 |         self.feature_maps = cfg['feature_maps']
24 |         self.min_sizes = cfg['min_sizes']
25 |         self.max_sizes = cfg['max_sizes']
26 |         self.steps = cfg['steps']
27 |         self.aspect_ratios = cfg['aspect_ratios']
28 |         self.clip = cfg['clip']
29 |         for v in self.variance:
30 |             if v <= 0:
31 |                 raise ValueError('Variances must be greater than 0')
32 | 
33 |     def forward(self):
34 |         mean = []
35 |         for k, f in enumerate(self.feature_maps):
36 |             for i, j in product(range(f), repeat=2):
37 |                 f_k = self.image_size / self.steps[k]
38 |                 cx = (j + 0.5) / f_k
39 |                 cy = (i + 0.5) / f_k
40 | 
41 |                 s_k = self.min_sizes[k]/self.image_size
42 |                 mean += [cx, cy, s_k, s_k]
43 | 
44 |                 # aspect_ratio: 1
45 |                 # rel size: sqrt(s_k * s_(k+1))
46 |                 s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
47 |                 mean += [cx, cy, s_k_prime, s_k_prime]
48 | 
49 |                 # rest of aspect ratios
50 |                 for ar in self.aspect_ratios[k]:
51 |                     mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
52 |                     mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
53 | 
54 |         # back to torch land
55 |         output = torch.Tensor(mean).view(-1, 4)
56 |         if self.clip:
57 |             output.clamp_(max=1, min=0)
58 |         return output
59 | 


--------------------------------------------------------------------------------
/SSD/models/base_models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def vgg(cfg, i, batch_norm=False):
 5 |     layers = []
 6 |     in_channels = i
 7 |     for v in cfg:
 8 |         if v == 'M':
 9 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
10 |         elif v == 'C':
11 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
12 |         else:
13 |             if not batch_norm:
14 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
15 |             else:
16 |                 conv2d = BasicConv(in_channels, v, kernel_size=3, padding=1,relu=False, bn=batch_norm, bias=True)
17 |             layers += [conv2d,nn.ReLU(inplace=True)]
18 |             in_channels = v
19 |     pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
20 |     if not batch_norm:
21 |         conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
22 |         conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
23 |     else:
24 |         conv6 = BasicConv(512, 1024, kernel_size=3, padding=6, dilation=6, relu=False)
25 |         conv7 = BasicConv(1024, 1024, kernel_size=1, relu=False)
26 |     layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
27 |     return layers
28 | 
29 | vgg_base = {
30 |     '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
31 |             512, 512, 512],
32 |     '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
33 |             512, 512, 512],
34 | }
35 | 
36 | class BasicConv(nn.Module):
37 | 
38 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
39 |                  bn=True, bias=False):
40 |         super(BasicConv, self).__init__()
41 |         self.out_channels = out_planes
42 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
43 |                               dilation=dilation, groups=groups, bias=bias)
44 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
45 |         self.relu = nn.ReLU(inplace=True) if relu else None
46 | 
47 |     def forward(self, x):
48 |         x = self.conv(x)
49 |         if self.bn is not None:
50 |             x = self.bn(x)
51 |         if self.relu is not None:
52 |             x = self.relu(x)
53 |         return x
54 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/SSD/data/config.py:
--------------------------------------------------------------------------------
  1 | # config.py
  2 | import os.path
  3 | 
  4 | # gets home dir cross platform
  5 | home = os.path.expanduser("~")
  6 | ddir = os.path.join(home,"data/VOCdevkit/")
  7 | 
  8 | # note: if you used our download scripts, this should be right
  9 | VOCroot = ddir # path to VOCdevkit root dir
 10 | COCOroot = os.path.join(home,"data/COCO/")
 11 | 
 12 | 
 13 | #RFB CONFIGS
 14 | VOC_300 = {
 15 |     'feature_maps' : [38, 19, 10, 5, 3, 1],
 16 | 
 17 |     'min_dim' : 300,
 18 | 
 19 |     'steps' : [8, 16, 32, 64, 100, 300],
 20 | 
 21 |     'min_sizes' : [30, 60, 111, 162, 213, 264],
 22 | 
 23 |     'max_sizes' : [60, 111, 162, 213, 264, 315],
 24 | 
 25 |     'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2], [2]],
 26 | 
 27 |     'variance' : [0.1, 0.2],
 28 | 
 29 |     'clip' : True,
 30 | }
 31 | 
 32 | VOC_512= {
 33 |     'feature_maps' : [64, 32, 16, 8, 4, 2, 1],
 34 | 
 35 |     'min_dim' : 512,
 36 | 
 37 |     'steps' : [8, 16, 32, 64, 128, 256, 512],
 38 | 
 39 |     'min_sizes'  : [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8 ],
 40 | 
 41 |     'max_sizes'  : [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6],
 42 | 
 43 |     'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2,3], [2], [2]],
 44 | 
 45 |     'variance' : [0.1, 0.2],
 46 | 
 47 |     'clip' : True,
 48 | }
 49 | 
 50 | 
 51 | COCO_300 = {
 52 |     'feature_maps' : [38, 19, 10, 5, 3, 1],
 53 | 
 54 |     'min_dim' : 300,
 55 | 
 56 |     'steps' : [8, 16, 32, 64, 100, 300],
 57 | 
 58 |     'min_sizes' : [21, 45, 99, 153, 207, 261],
 59 | 
 60 |     'max_sizes' : [45, 99, 153, 207, 261, 315],
 61 | 
 62 |     'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2], [2]],
 63 | 
 64 |     'variance' : [0.1, 0.2],
 65 | 
 66 |     'clip' : True,
 67 | }
 68 | 
 69 | COCO_512= {
 70 |     'feature_maps' : [64, 32, 16, 8, 4, 2, 1],
 71 | 
 72 |     'min_dim' : 512,
 73 | 
 74 |     'steps' : [8, 16, 32, 64, 128, 256, 512],
 75 | 
 76 |     'min_sizes' : [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8],
 77 | 
 78 |     'max_sizes' : [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72],
 79 | 
 80 |     'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2,3], [2], [2]],
 81 | 
 82 |     'variance' : [0.1, 0.2],
 83 | 
 84 |     'clip' : True,
 85 | }
 86 | 
 87 | COCO_mobile_300 = {
 88 |     'feature_maps' : [19, 10, 5, 3, 2, 1],
 89 | 
 90 |     'min_dim' : 300,
 91 | 
 92 |     'steps' : [16, 32, 64, 100, 150, 300],
 93 | 
 94 |     'min_sizes' : [45, 90, 135, 180, 225, 270],
 95 | 
 96 |     'max_sizes' : [90, 135, 180, 225, 270, 315],
 97 | 
 98 |     'aspect_ratios' : [[2,3], [2, 3], [2, 3], [2, 3], [2], [2]],
 99 | 
100 |     'variance' : [0.1, 0.2],
101 | 
102 |     'clip' : True,
103 | }
104 | 


--------------------------------------------------------------------------------
/SSD/layers/functions/detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.backends.cudnn as cudnn
 4 | from torch.autograd import Function
 5 | from torch.autograd import Variable
 6 | from utils.box_utils import decode, point_form
 7 | 
 8 | 
 9 | class Detect(Function):
10 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
11 |     apply non-maximum suppression to location predictions based on conf
12 |     scores and threshold to a top_k number of output predictions for both
13 |     confidence score and locations.
14 |     """
15 |     def __init__(self, num_classes, bkg_label, cfg, GIOU=False):
16 |         self.num_classes = num_classes
17 |         self.background_label = bkg_label
18 | 
19 |         self.variance = cfg['variance']
20 |         self.giou = GIOU
21 | 
22 |     def forward(self, predictions, prior):
23 |         """
24 |         Args:
25 |             loc_data: (tensor) Loc preds from loc layers
26 |                 Shape: [batch,num_priors*4]
27 |             conf_data: (tensor) Shape: Conf preds from conf layers
28 |                 Shape: [batch*num_priors,num_classes]
29 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
30 |                 Shape: [1,num_priors,4]
31 |         """
32 | 
33 |         loc, conf = predictions
34 | 
35 |         loc_data = loc.data
36 |         conf_data = conf.data
37 |         prior_data = prior.data
38 |         num = loc_data.size(0)  # batch size
39 |         self.num_priors = prior_data.size(0)
40 |         self.boxes = torch.zeros(1, self.num_priors, 4)
41 |         self.scores = torch.zeros(1, self.num_priors, self.num_classes)
42 |         if loc_data.is_cuda:
43 |             self.boxes = self.boxes.cuda()
44 |             self.scores = self.scores.cuda()
45 | 
46 |         if num == 1:
47 |             # size batch x num_classes x num_priors
48 |             conf_preds = conf_data.unsqueeze(0)
49 | 
50 |         else:
51 |             conf_preds = conf_data.view(num, self.num_priors,
52 |                                         self.num_classes)
53 |             self.boxes.expand_(num, self.num_priors, 4)
54 |             self.scores.expand_(num, self.num_priors, self.num_classes)
55 | 
56 |         # Decode predictions into bboxes.
57 |         for i in range(num):
58 |             if self.giou:
59 |                 p = decode(loc_data[i], prior_data, self.variance)
60 |                 decoded_boxes = torch.stack([torch.min(p[:,0],p[:,2]), torch.min(p[:,1],p[:,3]), torch.max(p[:,0],p[:,2]), torch.max(p[:,1],p[:,3])],1)
61 |             else:
62 |                 decoded_boxes = decode(loc_data[i], prior_data, self.variance)
63 |             conf_scores = conf_preds[i].clone()
64 | 
65 |             self.boxes[i] = decoded_boxes
66 |             self.scores[i] = conf_scores
67 | 
68 |         return self.boxes, self.scores
69 | 
70 | 


--------------------------------------------------------------------------------
/SSD/val.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import sys
 3 | import os
 4 | import pickle
 5 | import argparse
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.backends.cudnn as cudnn
 9 | import torchvision.transforms as transforms
10 | import numpy as np
11 | from torch.autograd import Variable
12 | from data import VOCroot,COCOroot 
13 | from data import AnnotationTransform, COCODetection, VOCDetection, BaseTransform, VOC_300,VOC_512,COCO_300,COCO_512, COCO_mobile_300
14 | 
15 | import torch.utils.data as data
16 | from layers.functions import Detect,PriorBox
17 | from utils.nms_wrapper import nms
18 | from utils.timer import Timer
19 | 
20 | 
21 | def val_net(priors,save_val_folder,testset,num_classes,net,detector,transform,max_per_image,thresh,cuda,vgg_bn):
22 |     if not os.path.exists(save_val_folder):
23 |         os.makedirs(save_val_folder)
24 |         # dump predictions and assoc. ground truth to text file for now
25 |     num_images = len(testset)
26 | 
27 |     all_boxes = [[[] for _ in range(num_images)]
28 |                  for _ in range(num_classes)]
29 | 
30 |     _t = {'im_detect': Timer(), 'misc': Timer()}
31 |     det_file = os.path.join(save_val_folder, 'detections.pkl')
32 | 
33 |     for i in range(num_images):
34 |         img = testset.pull_image(i)
35 |         scale = torch.Tensor([img.shape[1], img.shape[0],
36 |                               img.shape[1], img.shape[0]])
37 |         with torch.no_grad():
38 |             x = transform(img).unsqueeze(0)
39 |             if cuda:
40 |                 x = x.cuda()
41 |                 scale = scale.cuda()
42 | 
43 |         _t['im_detect'].tic()
44 |         out = net(x,vgg_bn=vgg_bn,test='True')  # forward pass
45 |         boxes, scores = detector.forward(out, priors)
46 |         detect_time = _t['im_detect'].toc()
47 |         boxes = boxes[0]
48 |         scores = scores[0]
49 | 
50 |         boxes *= scale
51 |         boxes = boxes.cpu().numpy()
52 |         scores = scores.cpu().numpy()
53 |         # scale each detection back up to the image
54 | 
55 |         _t['misc'].tic()
56 | 
57 |         for j in range(1, num_classes):
58 |             inds = np.where(scores[:, j] > thresh)[0]
59 |             if len(inds) == 0:
60 |                 all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
61 |                 continue
62 |             c_bboxes = boxes[inds]
63 |             c_scores = scores[inds, j]
64 |             c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
65 |                 np.float32, copy=False)
66 | 
67 |             keep = nms(c_dets, 0.45, force_cpu=False)
68 |             # keep = keep[:40]
69 |             c_dets = c_dets[keep, :]
70 |             all_boxes[j][i] = c_dets
71 |         if max_per_image > 0:
72 |             image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, num_classes)])
73 |             if len(image_scores) > max_per_image:
74 |                 image_thresh = np.sort(image_scores)[-max_per_image]
75 |                 for j in range(1, num_classes):
76 |                     keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
77 |                     all_boxes[j][i] = all_boxes[j][i][keep, :]
78 | 
79 |         nms_time = _t['misc'].toc()
80 | 
81 |         if i % 1000 == 0:
82 |             print('im_detect: {:d}/{:d} {:.4f}s {:.3f}s'
83 |                   .format(i + 1, num_images, detect_time, nms_time))
84 |             _t['im_detect'].clear()
85 |             _t['misc'].clear()
86 | 
87 |     with open(det_file, 'wb') as f:
88 |         pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
89 | 
90 |     print('Evaluating detections')
91 |     testset.evaluate_detections(all_boxes, save_val_folder)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     pass
96 | 
97 | 


--------------------------------------------------------------------------------
/SSD/layers/modules/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | from utils.box_utils import log_sum_exp, focal_sum_exp, decode
  8 | 
  9 | class SmoothL1_Mixup_Balance_loss(nn.Module):
 10 |     def __init__(self, alpha=0.5, gamma=1.5, balance = False, mixup = False, size_average=False):
 11 |         super(SmoothL1_Mixup_Balance_loss,self).__init__()
 12 |         self.balance = balance
 13 |         self.mixup = mixup
 14 |         self.size_average = size_average
 15 |         if self.balance:
 16 |             self.a = alpha
 17 |             self.r = gamma
 18 |             self.b = math.exp(gamma / alpha) - 1
 19 |             self.c = gamma / self.b - alpha
 20 | 
 21 |     def forward(self, predict, truth, weight=None):
 22 |         if self.mixup:
 23 |             assert predict.shape[0]== truth.shape[0]== weight.shape[0]
 24 |         else:
 25 |             assert predict.shape[0] == truth.shape[0]
 26 |         t = torch.abs(truth-predict)
 27 |         if self.balance:
 28 |             smbloss = torch.where(t < 1, self.a * (self.b * t + 1) * torch.log(self.b * t + 1) / self.b - self.a * t, self.r * t + self.c)
 29 |         else:
 30 |             smbloss = torch.where(t < 1, 0.5 * t ** 2, t - 0.5)
 31 |         if self.mixup:
 32 |             smbloss = smbloss.sum(1, keepdim=True) * weight
 33 |         else:
 34 |             smbloss = smbloss.sum(1)
 35 |         if self.size_average:
 36 |             return torch.mean(smbloss)
 37 |         else:
 38 |             return smbloss.sum()
 39 | 
 40 | class Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss(nn.Module):
 41 |     def __init__(self, mixup=False, focal_loss=False, gamma=2, alpha=1, label_smooth=False,size_average=False):
 42 |         super(Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss,self).__init__()
 43 |         self.mixup = mixup
 44 |         self.softmax_focal = focal_loss
 45 |         if self.softmax_focal:
 46 |             self.gamma = gamma
 47 |             self.alpha = alpha
 48 |         self.label_smooth = label_smooth
 49 |         self.size_average = size_average
 50 | 
 51 |     def forward(self, predict, truth, weight=None):
 52 |         if self.mixup:
 53 |             assert predict.shape[0] == truth.shape[0] == weight.shape[0]
 54 |         else:
 55 |             assert predict.shape[0] == truth.shape[0]
 56 |         if self.softmax_focal:
 57 |             # using OHEM and focal loss with CE
 58 |             soft_score = focal_sum_exp(predict)
 59 |             pro = self.alpha * (1 - soft_score) ** self.gamma
 60 |             cmsloss = (log_sum_exp(predict) - predict.gather(1, truth.view(-1, 1))) * pro.gather(1, truth.view(-1,1))
 61 |         elif self.label_smooth:
 62 |             cmsloss = (log_sum_exp(predict, label_smooth=True) * truth).sum(1, keepdim=True)
 63 |         else:
 64 |             cmsloss = log_sum_exp(predict) - predict.gather(1, truth.view(-1, 1))
 65 |         if self.mixup:
 66 |             cmsloss = cmsloss * weight
 67 |         if self.size_average:
 68 |             return cmsloss.mean()
 69 |         else:
 70 |             return cmsloss.sum()
 71 | 
 72 | class GIoUloss(nn.Module):
 73 |     def __init__(self,size_average=False):
 74 |         super(GIoUloss,self).__init__()
 75 |         self.size_average = size_average
 76 | 
 77 |     def _GIoU(self, p, g):
 78 |         areas_p = (p[:, 2] - p[:, 0]) * (p[:, 3] - p[:, 1])
 79 |         areas_g = (g[:, 2] - g[:, 0]) * (g[:, 3] - g[:, 1])
 80 |         x1y1 = torch.max(p[:, :2], g[:, :2])
 81 |         x2y2 = torch.min(p[:, 2:], g[:, 2:])
 82 |         inter = torch.clamp((x2y2 - x1y1), min=0)
 83 |         area_inter = inter[:, 0] * inter[:, 1]
 84 |         x1y1 = torch.min(p[:, :2], g[:, :2])
 85 |         x2y2 = torch.max(p[:, 2:], g[:, 2:])
 86 |         total = x2y2 - x1y1
 87 |         area_total = total[:, 0] * total[:, 1]  # 闭包区域面积
 88 |         uni = areas_g + areas_p - area_inter
 89 |         iou = area_inter / uni
 90 |         Giou = iou - (area_total - uni) / area_total
 91 |         return Giou
 92 | 
 93 |     def forward(self, predict, priors, target, variance=[0.1,0.2]):
 94 |         assert priors.shape == predict.shape == target.shape, "GIoU loss ERROR!"
 95 | 
 96 |         p = decode(predict, priors, variance)
 97 |         p_n = torch.stack([torch.min(p[:,0],p[:,2]), torch.min(p[:,1],p[:,3]), torch.max(p[:,0],p[:,2]), torch.max(p[:,1],p[:,3])],1)
 98 |         loss = 1 - self._GIoU(p_n, target)
 99 |         if self.size_average:
100 |             return loss.mean()
101 |         else:
102 |             return loss.sum()
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     print("This is a loss function implementation file.")
107 |     pass
108 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | #import pycocotools._mask as _mask
  4 | from . import _mask
  5 | 
  6 | # Interface for manipulating masks stored in RLE format.
  7 | #
  8 | # RLE is a simple yet efficient format for storing binary masks. RLE
  9 | # first divides a vector (or vectorized image) into a series of piecewise
 10 | # constant regions and then for each piece simply stores the length of
 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 13 | # (note that the odd counts are always the numbers of zeros). Instead of
 14 | # storing the counts directly, additional compression is achieved with a
 15 | # variable bitrate representation based on a common scheme called LEB128.
 16 | #
 17 | # Compression is greatest given large piecewise constant regions.
 18 | # Specifically, the size of the RLE is proportional to the number of
 19 | # *boundaries* in M (or for an image the number of boundaries in the y
 20 | # direction). Assuming fairly simple shapes, the RLE representation is
 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 22 | # is substantially lower, especially for large simple objects (large n).
 23 | #
 24 | # Many common operations on masks can be computed directly using the RLE
 25 | # (without need for decoding). This includes computations such as area,
 26 | # union, intersection, etc. All of these operations are linear in the
 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 28 | # of the object. Computing these operations on the original mask is O(n).
 29 | # Thus, using the RLE can result in substantial computational savings.
 30 | #
 31 | # The following API functions are defined:
 32 | #  encode         - Encode binary masks using RLE.
 33 | #  decode         - Decode binary masks encoded via RLE.
 34 | #  merge          - Compute union or intersection of encoded masks.
 35 | #  iou            - Compute intersection over union between masks.
 36 | #  area           - Compute area of encoded masks.
 37 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 38 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 39 | #
 40 | # Usage:
 41 | #  Rs     = encode( masks )
 42 | #  masks  = decode( Rs )
 43 | #  R      = merge( Rs, intersect=false )
 44 | #  o      = iou( dt, gt, iscrowd )
 45 | #  a      = area( Rs )
 46 | #  bbs    = toBbox( Rs )
 47 | #  Rs     = frPyObjects( [pyObjects], h, w )
 48 | #
 49 | # In the API the following formats are used:
 50 | #  Rs      - [dict] Run-length encoding of binary masks
 51 | #  R       - dict Run-length encoding of binary mask
 52 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 53 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 54 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 55 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 56 | #  dt,gt   - May be either bounding boxes or encoded masks
 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 58 | #
 59 | # Finally, a note about the intersection over union (iou) computation.
 60 | # The standard iou of a ground truth (gt) and detected (dt) object is
 61 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 62 | # For "crowd" regions, we use a modified criteria. If a gt object is
 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 66 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 67 | # For crowd gt regions we use this modified criteria above for the iou.
 68 | #
 69 | # To compile run "python setup.py build_ext --inplace"
 70 | # Please do not contact us for help with compiling.
 71 | #
 72 | # Microsoft COCO Toolbox.      version 2.0
 73 | # Data, paper, and tutorials available at:  http://mscoco.org/
 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 75 | # Licensed under the Simplified BSD License [see coco/license.txt]
 76 | 
 77 | iou         = _mask.iou
 78 | merge       = _mask.merge
 79 | frPyObjects = _mask.frPyObjects
 80 | 
 81 | def encode(bimask):
 82 |     if len(bimask.shape) == 3:
 83 |         return _mask.encode(bimask)
 84 |     elif len(bimask.shape) == 2:
 85 |         h, w = bimask.shape
 86 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 87 | 
 88 | def decode(rleObjs):
 89 |     if type(rleObjs) == list:
 90 |         return _mask.decode(rleObjs)
 91 |     else:
 92 |         return _mask.decode([rleObjs])[:,:,0]
 93 | 
 94 | def area(rleObjs):
 95 |     if type(rleObjs) == list:
 96 |         return _mask.area(rleObjs)
 97 |     else:
 98 |         return _mask.area([rleObjs])[0]
 99 | 
100 | def toBbox(rleObjs):
101 |     if type(rleObjs) == list:
102 |         return _mask.toBbox(rleObjs)
103 |     else:
104 |         return _mask.toBbox([rleObjs])[0]
105 | 


--------------------------------------------------------------------------------
/SSD/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/SSD/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 71 |     cdef unsigned int N = boxes.shape[0]
 72 |     cdef float iw, ih, box_area
 73 |     cdef float ua
 74 |     cdef int pos = 0
 75 |     cdef float maxscore = 0
 76 |     cdef int maxpos = 0
 77 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 78 | 
 79 |     for i in range(N):
 80 |         maxscore = boxes[i, 4]
 81 |         maxpos = i
 82 | 
 83 |         tx1 = boxes[i,0]
 84 |         ty1 = boxes[i,1]
 85 |         tx2 = boxes[i,2]
 86 |         ty2 = boxes[i,3]
 87 |         ts = boxes[i,4]
 88 | 
 89 |         pos = i + 1
 90 | 	# get max box
 91 |         while pos < N:
 92 |             if maxscore < boxes[pos, 4]:
 93 |                 maxscore = boxes[pos, 4]
 94 |                 maxpos = pos
 95 |             pos = pos + 1
 96 | 
 97 | 	# add max box as a detection 
 98 |         boxes[i,0] = boxes[maxpos,0]
 99 |         boxes[i,1] = boxes[maxpos,1]
100 |         boxes[i,2] = boxes[maxpos,2]
101 |         boxes[i,3] = boxes[maxpos,3]
102 |         boxes[i,4] = boxes[maxpos,4]
103 | 
104 | 	# swap ith box with position of max box
105 |         boxes[maxpos,0] = tx1
106 |         boxes[maxpos,1] = ty1
107 |         boxes[maxpos,2] = tx2
108 |         boxes[maxpos,3] = ty2
109 |         boxes[maxpos,4] = ts
110 | 
111 |         tx1 = boxes[i,0]
112 |         ty1 = boxes[i,1]
113 |         tx2 = boxes[i,2]
114 |         ty2 = boxes[i,3]
115 |         ts = boxes[i,4]
116 | 
117 |         pos = i + 1
118 | 	# NMS iterations, note that N changes if detection boxes fall below threshold
119 |         while pos < N:
120 |             x1 = boxes[pos, 0]
121 |             y1 = boxes[pos, 1]
122 |             x2 = boxes[pos, 2]
123 |             y2 = boxes[pos, 3]
124 |             s = boxes[pos, 4]
125 | 
126 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
127 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
128 |             if iw > 0:
129 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
130 |                 if ih > 0:
131 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
132 |                     ov = iw * ih / ua #iou between max box and detection box
133 | 
134 |                     if method == 1: # linear
135 |                         if ov > Nt: 
136 |                             weight = 1 - ov
137 |                         else:
138 |                             weight = 1
139 |                     elif method == 2: # gaussian
140 |                         weight = np.exp(-(ov * ov)/sigma)
141 |                     else: # original NMS
142 |                         if ov > Nt: 
143 |                             weight = 0
144 |                         else:
145 |                             weight = 1
146 | 
147 |                     boxes[pos, 4] = weight*boxes[pos, 4]
148 | 		    
149 | 		    # if box score falls below threshold, discard the box by swapping with last box
150 | 		    # update N
151 |                     if boxes[pos, 4] < threshold:
152 |                         boxes[pos,0] = boxes[N-1, 0]
153 |                         boxes[pos,1] = boxes[N-1, 1]
154 |                         boxes[pos,2] = boxes[N-1, 2]
155 |                         boxes[pos,3] = boxes[N-1, 3]
156 |                         boxes[pos,4] = boxes[N-1, 4]
157 |                         N = N - 1
158 |                         pos = pos - 1
159 | 
160 |             pos = pos + 1
161 | 
162 |     keep = [i for i in range(N)]
163 |     return keep
164 | 


--------------------------------------------------------------------------------
/SSD/utils/build.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | def locate_cuda():
 27 |     """Locate the CUDA environment on the system
 28 | 
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 | 
 32 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 33 |     is based on finding 'nvcc' in the PATH.
 34 |     """
 35 | 
 36 |     # first check if the CUDAHOME env variable is in use
 37 |     if 'CUDAHOME' in os.environ:
 38 |         home = os.environ['CUDAHOME']
 39 |         nvcc = pjoin(home, 'bin', 'nvcc')
 40 |     else:
 41 |         # otherwise, search the PATH for NVCC
 42 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 43 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 44 |         if nvcc is None:
 45 |             raise EnvironmentError('The nvcc binary could not be '
 46 |                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 47 |         home = os.path.dirname(os.path.dirname(nvcc))
 48 | 
 49 |     cudaconfig = {'home': home, 'nvcc': nvcc,
 50 |                   'include': pjoin(home, 'include'),
 51 |                   'lib64': pjoin(home, 'lib64')}
 52 |     for k, v in cudaconfig.items():
 53 |         if not os.path.exists(v):
 54 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 55 | 
 56 |     return cudaconfig
 57 | 
 58 | 
 59 | CUDA = locate_cuda()
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | 
 68 | def customize_compiler_for_nvcc(self):
 69 |     """inject deep into distutils to customize how the dispatch
 70 |     to gcc/nvcc works.
 71 | 
 72 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 73 |     injected in, and still have the right customizations (i.e.
 74 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 75 |     the OO route, I have this. Note, it's kindof like a wierd functional
 76 |     subclassing going on."""
 77 | 
 78 |     # tell the compiler it can processes .cu
 79 |     self.src_extensions.append('.cu')
 80 | 
 81 |     # save references to the default compiler_so and _comple methods
 82 |     default_compiler_so = self.compiler_so
 83 |     super = self._compile
 84 | 
 85 |     # now redefine the _compile method. This gets executed for each
 86 |     # object but distutils doesn't have the ability to change compilers
 87 |     # based on source extension: we add it.
 88 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 89 |         print(extra_postargs)
 90 |         if os.path.splitext(src)[1] == '.cu':
 91 |             # use the cuda for .cu files
 92 |             self.set_executable('compiler_so', CUDA['nvcc'])
 93 |             # use only a subset of the extra_postargs, which are 1-1 translated
 94 |             # from the extra_compile_args in the Extension class
 95 |             postargs = extra_postargs['nvcc']
 96 |         else:
 97 |             postargs = extra_postargs['gcc']
 98 | 
 99 |         super(obj, src, ext, cc_args, postargs, pp_opts)
100 |         # reset the default compiler_so, which we might have changed for cuda
101 |         self.compiler_so = default_compiler_so
102 | 
103 |     # inject our redefined _compile method into the class
104 |     self._compile = _compile
105 | 
106 | 
107 | # run the customize_compiler
108 | class custom_build_ext(build_ext):
109 |     def build_extensions(self):
110 |         customize_compiler_for_nvcc(self.compiler)
111 |         build_ext.build_extensions(self)
112 | 
113 | 
114 | ext_modules = [
115 |     Extension(
116 |         "nms.cpu_nms",
117 |         ["nms/cpu_nms.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs=[numpy_include]
120 |     ),
121 |     Extension('nms.gpu_nms',
122 |               ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
123 |               library_dirs=[CUDA['lib64']],
124 |               libraries=['cudart'],
125 |               language='c++',
126 |               runtime_library_dirs=[CUDA['lib64']],
127 |               # this syntax is specific to this build system
128 |               # we're only going to use certain compiler args with nvcc and not with gcc
129 |               # the implementation of this trick is in customize_compiler() below
130 |               extra_compile_args={'gcc': ["-Wno-unused-function"],
131 |                                   'nvcc': ['-arch=sm_52',
132 |                                            '--ptxas-options=-v',
133 |                                            '-c',
134 |                                            '--compiler-options',
135 |                                            "'-fPIC'"]},
136 |               include_dirs=[numpy_include, CUDA['include']]
137 |               ),
138 |     Extension(
139 |         'pycocotools._mask',
140 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
141 |         include_dirs=[numpy_include, 'pycocotools'],
142 |         extra_compile_args={
143 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
144 |     ),
145 | ]
146 | 
147 | setup(
148 |     name='mot_utils',
149 |     ext_modules=ext_modules,
150 |     # inject our custom trigger
151 |     cmdclass={'build_ext': custom_build_ext},
152 | )
153 | 


--------------------------------------------------------------------------------
/SSD/data/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import pickle
 10 | import numpy as np
 11 | import pdb
 12 | 
 13 | 
 14 | def parse_rec(filename):
 15 |     """ Parse a PASCAL VOC xml file """
 16 |     tree = ET.parse(filename)
 17 |     objects = []
 18 |     for obj in tree.findall('object'):
 19 |         obj_struct = {}
 20 |         obj_struct['name'] = obj.find('name').text
 21 |         obj_struct['pose'] = obj.find('pose').text
 22 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 23 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 24 |         bbox = obj.find('bndbox')
 25 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 26 |                               int(bbox.find('ymin').text),
 27 |                               int(bbox.find('xmax').text),
 28 |                               int(bbox.find('ymax').text)]
 29 |         objects.append(obj_struct)
 30 | 
 31 |     return objects
 32 | 
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |     Compute VOC AP given precision and recall.
 38 |     If use_07_metric is true, uses the
 39 |     VOC 07 11 point method (default:False).
 40 |     """
 41 |     if use_07_metric:
 42 |         # 11 point metric
 43 |         ap = 0.
 44 |         for t in np.arange(0., 1.1, 0.1):
 45 |             if np.sum(rec >= t) == 0:
 46 |                 p = 0
 47 |             else:
 48 |                 p = np.max(prec[rec >= t])
 49 |             ap = ap + p / 11.
 50 |     else:
 51 |         # correct AP calculation
 52 |         # first append sentinel values at the end
 53 |         mrec = np.concatenate(([0.], rec, [1.]))
 54 |         mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |         # compute the precision envelope
 57 |         for i in range(mpre.size - 1, 0, -1):
 58 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |         # to calculate area under PR curve, look for points
 61 |         # where X axis (recall) changes value
 62 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |         # and sum (\Delta recall) * prec
 65 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |     return ap
 67 | 
 68 | def voc_eval(detpath,
 69 |              annopath,
 70 |              imagesetfile,
 71 |              classname,
 72 |              cachedir,
 73 |              ovthresh=0.5,
 74 |              use_07_metric=False):
 75 |     """rec, prec, ap = voc_eval(detpath,
 76 |                                 annopath,
 77 |                                 imagesetfile,
 78 |                                 classname,
 79 |                                 [ovthresh],
 80 |                                 [use_07_metric])
 81 | 
 82 |     Top level function that does the PASCAL VOC evaluation.
 83 | 
 84 |     detpath: Path to detections
 85 |         detpath.format(classname) should produce the detection results file.
 86 |     annopath: Path to annotations
 87 |         annopath.format(imagename) should be the xml annotations file.
 88 |     imagesetfile: Text file containing the list of images, one image per line.
 89 |     classname: Category name (duh)
 90 |     cachedir: Directory for caching the annotations
 91 |     [ovthresh]: Overlap threshold (default = 0.5)
 92 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 93 |         (default False)
 94 |     """
 95 |     # assumes detections are in detpath.format(classname)
 96 |     # assumes annotations are in annopath.format(imagename)
 97 |     # assumes imagesetfile is a text file with each line an image name
 98 |     # cachedir caches the annotations in a pickle file
 99 | 
100 |     # first load gt
101 |     if not os.path.isdir(cachedir):
102 |         os.mkdir(cachedir)
103 |     cachefile = os.path.join(cachedir, 'annots.pkl')
104 |     # read list of images
105 |     with open(imagesetfile, 'r') as f:
106 |         lines = f.readlines()
107 |     imagenames = [x.strip() for x in lines]
108 | 
109 |     if not os.path.isfile(cachefile):
110 |         # load annots
111 |         recs = {}
112 |         for i, imagename in enumerate(imagenames):
113 |             recs[imagename] = parse_rec(annopath.format(imagename))
114 |             if i % 100 == 0:
115 |                 print('Reading annotation for {:d}/{:d}'.format(
116 |                     i + 1, len(imagenames)))
117 |         # save
118 |         print('Saving cached annotations to {:s}'.format(cachefile))
119 |         with open(cachefile, 'wb') as f:
120 |             pickle.dump(recs, f)
121 |     else:
122 |         # load
123 |         with open(cachefile, 'rb') as f:
124 |             recs = pickle.load(f)
125 | 
126 |     # extract gt objects for this class
127 |     class_recs = {}
128 |     npos = 0
129 |     for imagename in imagenames:
130 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
131 |         bbox = np.array([x['bbox'] for x in R])
132 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
133 |         det = [False] * len(R)
134 |         npos = npos + sum(~difficult)
135 |         class_recs[imagename] = {'bbox': bbox,
136 |                                  'difficult': difficult,
137 |                                  'det': det}
138 | 
139 |     # read dets
140 |     detfile = detpath.format(classname)
141 |     with open(detfile, 'r') as f:
142 |         lines = f.readlines()
143 | 
144 |     splitlines = [x.strip().split(' ') for x in lines]
145 |     image_ids = [x[0] for x in splitlines]
146 |     confidence = np.array([float(x[1]) for x in splitlines])
147 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
148 | 
149 |         # sort by confidence
150 |     sorted_ind = np.argsort(-confidence)
151 |     sorted_scores = np.sort(-confidence)
152 |     BB = BB[sorted_ind, :]
153 |     image_ids = [image_ids[x] for x in sorted_ind]
154 | 
155 |         # go down dets and mark TPs and FPs
156 |     nd = len(image_ids)
157 |     tp = np.zeros(nd)
158 |     fp = np.zeros(nd)
159 |     for d in range(nd):
160 |         R = class_recs[image_ids[d]]
161 |         bb = BB[d, :].astype(float)
162 |         ovmax = -np.inf
163 |         BBGT = R['bbox'].astype(float)
164 | 
165 |         if BBGT.size > 0:
166 |             # compute overlaps
167 |             # intersection
168 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
169 |             iymin = np.maximum(BBGT[:, 1], bb[1])
170 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
171 |             iymax = np.minimum(BBGT[:, 3], bb[3])
172 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
173 |             ih = np.maximum(iymax - iymin + 1., 0.)
174 |             inters = iw * ih
175 | 
176 |                 # union
177 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 | 
181 |             overlaps = inters / uni
182 |             ovmax = np.max(overlaps)
183 |             jmax = np.argmax(overlaps)
184 | 
185 |         if ovmax > ovthresh:
186 |             if not R['difficult'][jmax]:
187 |                 if not R['det'][jmax]:
188 |                     tp[d] = 1.
189 |                     R['det'][jmax] = 1
190 |                 else:
191 |                     fp[d] = 1.
192 |         else:
193 |             fp[d] = 1.
194 | 
195 |         # compute precision recall
196 |     fp = np.cumsum(fp)
197 |     tp = np.cumsum(tp)
198 |     rec = tp / float(npos)
199 |         # avoid divide by zero in case the first detection matches a difficult
200 |         # ground truth
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec, use_07_metric)
203 | 
204 |     return rec, prec, ap
205 | 


--------------------------------------------------------------------------------
/SSD/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | from utils.box_utils import match, match_mixup,point_form
  8 | GPU = False
  9 | if torch.cuda.is_available():
 10 |     GPU = True
 11 | from .loss import *
 12 | 
 13 | class MultiBoxLoss(nn.Module):
 14 |     """SSD Weighted Loss Function
 15 |     Compute Targets:
 16 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 17 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 18 |            (default threshold: 0.5).
 19 |         2) Produce localization target by 'encoding' variance into offsets of ground
 20 |            truth boxes and their matched  'priorboxes'.
 21 |         3) Hard negative mining to filter the excessive number of negative examples
 22 |            that comes with using a large number of default bounding boxes.
 23 |            (default negative:positive ratio 3:1)
 24 |     Objective Loss:
 25 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 26 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 27 |         weighted by α which is set to 1 by cross val.
 28 |         Args:
 29 |             c: class confidences,
 30 |             l: predicted boxes,
 31 |             g: ground truth boxes
 32 |             N: number of matched default boxes
 33 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 34 |     """
 35 | 
 36 | 
 37 |     def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target,
 38 |                  label_smmooth=False, balance_l1=False, focal_loss=False, giou=False):
 39 |         super(MultiBoxLoss, self).__init__()
 40 |         self.num_classes = num_classes
 41 |         self.threshold = overlap_thresh
 42 |         self.background_label = bkg_label
 43 |         self.encode_target = encode_target
 44 |         self.use_prior_for_matching  = prior_for_matching
 45 |         self.do_neg_mining = neg_mining
 46 |         self.negpos_ratio = neg_pos
 47 |         self.neg_overlap = neg_overlap
 48 |         self.variance = [0.1,0.2]
 49 |         self.label_smooth = label_smmooth
 50 |         if self.label_smooth:
 51 |             self.label_pos = 0.9
 52 |             self.label_neg = (1.0 - self.label_pos) / (self.num_classes - 1)
 53 |         self.balance_l1 = balance_l1
 54 |         self.focal_loss = focal_loss
 55 |         self.softmax_focal = False # using OHEM, CEWithsoftmax and Focal loss
 56 |         self.sigmoid_focal = False # Original Focal loss(Using sigmoid with CE)
 57 |         if self.focal_loss:
 58 |             self.softmax_focal = True
 59 |         if self.sigmoid_focal:
 60 |             self.alpha = 0.25
 61 |             self.gamma = 2.0
 62 |         self.giou = giou
 63 | 
 64 |     def forward(self, predictions, priors, targets):
 65 |         """Multibox Loss
 66 |         Args:
 67 |             predictions (tuple): A tuple containing loc preds, conf preds,
 68 |             and prior boxes from SSD net.
 69 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 70 |                 loc shape: torch.size(batch_size,num_priors,4)
 71 |                 priors shape: torch.size(num_priors,4)
 72 | 
 73 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 74 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 75 |         """
 76 | 
 77 |         loc_data, conf_data = predictions
 78 |         priors = priors
 79 |         num = loc_data.size(0)
 80 |         num_priors = (priors.size(0))
 81 |         num_classes = self.num_classes
 82 | 
 83 |         # match priors (default boxes) and ground truth boxes
 84 |         loc_t = torch.Tensor(num, num_priors, 4)
 85 |         conf_t = torch.LongTensor(num, num_priors)
 86 |         if targets[0].shape[1] == 6:# mixup
 87 |             weight_t = torch.Tensor(num, num_priors)
 88 |         for idx in range(num):
 89 |             defaults = priors.data
 90 |             if targets[idx].shape[1] == 6:  # mixup
 91 |                 truths = targets[idx][:, :-2].data
 92 |                 labels = targets[idx][:, -2].data
 93 |                 weight_loss = targets[idx][:, -1].data
 94 |                 match_mixup(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, weight_t, weight_loss, self.giou)
 95 |             elif targets[idx].shape[1] == 5:  # no moxiup
 96 |                 truths = targets[idx][:, :-1].data
 97 |                 labels = targets[idx][:, -1].data
 98 |                 match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, self.giou)
 99 |             else:
100 |                 print('The shape of targets is error')
101 | 
102 |         if GPU:
103 |             loc_t = loc_t.cuda()
104 |             conf_t = conf_t.cuda()
105 |         # wrap targets
106 |         loc_t = Variable(loc_t, requires_grad=False)
107 |         conf_t = Variable(conf_t,requires_grad=False)
108 | 
109 |         pos = conf_t > 0
110 | 
111 |         mix_up = (False, True)[targets[0].shape[1] == 6]
112 |         pos_weight = None
113 |         weights_conf = None
114 | 
115 |         # Localization Loss (Smooth L1)
116 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
117 |         loc_p = loc_data[pos_idx].view(-1,4)
118 |         loc_t = loc_t[pos_idx].view(-1,4)
119 | 
120 |         if self.giou:
121 |             # prior_giou = point_form(priors)  # [x,y,h,w]->[x0,y0,x1,y1]
122 |             prior_giou = priors.unsqueeze(0).expand(num, num_priors, 4)
123 |             prior_giou = prior_giou[pos_idx].view(-1, 4)
124 |             reg_loss = GIoUloss()
125 |             loss_l = reg_loss(loc_p, prior_giou, loc_t)
126 |         else:
127 |             if mix_up:
128 |                 weight_t = weight_t.cuda()
129 |                 weight_t = Variable(weight_t, requires_grad=False)
130 |                 pos_weight = weight_t[pos].view(-1, 1)
131 | 
132 |             reg_loss = SmoothL1_Mixup_Balance_loss(mixup=mix_up, balance=self.balance_l1, size_average=False)
133 |             loss_l = reg_loss(loc_p, loc_t, pos_weight)
134 | 
135 |         # Confidence Loss
136 |         if self.sigmoid_focal:
137 |             # if use original focal loss, please modify the output of the test in models/SSD.py to the sigmoid
138 |             batch_conf = conf_data.view(-1, self.num_classes)
139 |             label_onehot = batch_conf.clone().zero_().scatter(1, conf_t.view(-1,1), 1)
140 |             alpha = self.alpha * label_onehot + (1 - self.alpha) * (1 - label_onehot)
141 |             p = torch.sigmoid(batch_conf)
142 |             pt = torch.where(label_onehot==1, p, 1-p)
143 |             loss_c = - alpha * ((1 - pt) ** self.gamma) * torch.log(pt)
144 |             loss_c = loss_c.sum()
145 |             num_pos = pos.long().sum(1, keepdim=True)
146 |         else:
147 |             batch_conf = conf_data.view(-1, self.num_classes)
148 |             loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
149 | 
150 |             # Hard Negative Mining
151 |             loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
152 |             loss_c = loss_c.view(num, -1)
153 |             _, loss_idx = loss_c.sort(1, descending=True)
154 |             _, idx_rank = loss_idx.sort(1)
155 |             num_pos = pos.long().sum(1, keepdim=True)
156 |             num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
157 |             neg = idx_rank < num_neg.expand_as(idx_rank)
158 | 
159 |             # Confidence Loss Including Positive and Negative Examples
160 |             pos_idx = pos.unsqueeze(2).expand_as(conf_data)
161 |             neg_idx = neg.unsqueeze(2).expand_as(conf_data)
162 |             conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
163 |             if self.label_smooth:
164 |                 p = conf_t.clone().view(-1, 1).float()
165 |                 lp = torch.where(p < 1, p + 1, torch.tensor(self.label_pos).cuda())
166 |                 label = batch_conf.clone().zero_().scatter_(1, conf_t.view(-1, 1), lp)
167 |                 label[:, 1:][pos.clone().view(-1, 1).flatten()] += self.label_neg
168 |                 label_ohem = (pos + neg).view(-1, 1).expand_as(batch_conf)
169 |                 targets_weighted = label[label_ohem.gt(0)].view(-1, self.num_classes)
170 |             else:
171 |                 targets_weighted = conf_t[(pos + neg).gt(0)]
172 |             if mix_up:
173 |                 weights_conf = weight_t[(pos + neg).gt(0)]
174 |                 weights_conf = torch.where(weights_conf > 0, weights_conf, weights_conf + 1.0).view(-1, 1)
175 | 
176 |             conf_loss = Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss(mixup=mix_up,focal_loss=self.softmax_focal,gamma=2.0,alpha=1.0,
177 |                                                                                  label_smooth=self.label_smooth,size_average=False)
178 |             loss_c = conf_loss(conf_p, targets_weighted, weights_conf)
179 | 
180 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
181 | 
182 |         N = max(num_pos.data.sum().float(), 1)
183 |         loss_l/=N
184 |         loss_c/=N
185 |         return loss_l,loss_c
186 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
142 |     }
143 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
144 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
145 |   }
146 | }
147 | 
148 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
149 |   siz i; for( i=0; i<n; i++ ) {
150 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
151 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
152 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
153 |     rleFrPoly( R+i, xy, 4, h, w );
154 |   }
155 | }
156 | 
157 | int uintCompare(const void *a, const void *b) {
158 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
159 | }
160 | 
161 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
162 |   /* upsample and get discrete points densely along entire boundary */
163 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
164 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
165 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
166 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
167 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
168 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
169 |   for( j=0; j<k; j++ ) {
170 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
171 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
172 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
173 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
174 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
175 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
176 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
177 |     } else for( d=0; d<=dy; d++ ) {
178 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
179 |     }
180 |   }
181 |   /* get points along y-boundary and downsample */
182 |   free(x); free(y); k=m; m=0; double xd, yd;
183 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
184 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
185 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
186 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
187 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
188 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
189 |     x[m]=(int) xd; y[m]=(int) yd; m++;
190 |   }
191 |   /* compute rle encoding given y-boundary points */
192 |   k=m; a=malloc(sizeof(uint)*(k+1));
193 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
194 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
195 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
196 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
197 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
198 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
199 |     j++; if(j<k) b[m-1]+=a[j++]; }
200 |   rleInit(R,h,w,m,b); free(a); free(b);
201 | }
202 | 
203 | char* rleToString( const RLE *R ) {
204 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
205 |   siz i, m=R->m, p=0; long x; int more;
206 |   char *s=malloc(sizeof(char)*m*6);
207 |   for( i=0; i<m; i++ ) {
208 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
209 |     while( more ) {
210 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
211 |       if(more) c |= 0x20; c+=48; s[p++]=c;
212 |     }
213 |   }
214 |   s[p]=0; return s;
215 | }
216 | 
217 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
218 |   siz m=0, p=0, k; long x; int more; uint *cnts;
219 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
220 |   while( s[p] ) {
221 |     x=0; k=0; more=1;
222 |     while( more ) {
223 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
224 |       more = c & 0x20; p++; k++;
225 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
226 |     }
227 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
228 |   }
229 |   rleInit(R,h,w,m,cnts); free(cnts);
230 | }
231 | 


--------------------------------------------------------------------------------
/SSD/models/SSD.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from .base_models import vgg, vgg_base
  6 | from layers import l2norm
  7 | 
  8 | class BasicConv(nn.Module):
  9 | 
 10 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
 11 |                  gn=False, bn=False):
 12 |         super(BasicConv, self).__init__()
 13 |         self.out_channels = out_planes
 14 |         if gn and bn:
 15 |             exit("Don't allow simultaneous use of BN and GN !")
 16 |         bias = (gn == bn)
 17 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
 18 |         self.gn = nn.GroupNorm(32, out_planes,eps=1e-5, affine=True) if gn else None
 19 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
 20 |         self.relu = nn.ReLU(inplace=True) if relu else None
 21 | 
 22 |     def forward(self, x):
 23 |         x = self.conv(x)
 24 |         if self.gn is not None:
 25 |             x = self.gn(x)
 26 |         if self.bn is not None:
 27 |             x = self.bn(x)
 28 |         if self.relu is not None:
 29 |             x = self.relu(x)
 30 |         return x
 31 | 
 32 | class SSD(nn.Module):
 33 |     """Single Shot Multibox Architecture
 34 |     The network is composed of a base VGG network followed by the
 35 |     added multibox conv layers.  Each multibox layer branches into
 36 |         1) conv2d for class conf scores
 37 |         2) conv2d for localization predictions
 38 |         3) associated priorbox layer to produce default bounding
 39 |            boxes specific to the layer's feature map size.
 40 |     See: https://arxiv.org/pdf/1712.00960.pdf or more details.
 41 | 
 42 |     Args:
 43 |         base: VGG16 layers for input, size of either 300 or 500
 44 |         extras: extra layers that feed to multibox loc and conf layers
 45 |         head: "multibox head" consists of loc and conf conv layers
 46 |     """
 47 | 
 48 |     def __init__(self, base, extras, head, num_classes, size, norm):
 49 |         super(SSD, self).__init__()
 50 |         self.num_classes = num_classes
 51 |         # TODO: implement __call__ in PriorBox
 52 |         self.size = size
 53 | 
 54 |         # SSD network
 55 |         self.base = nn.ModuleList(base)
 56 |         self.extras = nn.ModuleList(extras)
 57 |         if norm is "L2Norm":
 58 |             self.Norm = l2norm.L2Norm(512, 20)
 59 |         elif norm is "BN":
 60 |             self.Norm = nn.BatchNorm2d(512, eps=1e-5, momentum=0.01, affine=True)
 61 |         elif norm is "GN":
 62 |             self.Norm = nn.GroupNorm(32, 512, eps=1e-5, affine=True) # group is defaulted to 32
 63 |         else:
 64 |             exit("Error type of Normalization, please assign one of L2Norm, BN, GN")
 65 | 
 66 |         self.loc = nn.ModuleList(head[0])
 67 |         self.conf = nn.ModuleList(head[1])
 68 | 
 69 |         self.softmax = nn.Softmax()
 70 | 
 71 |     def forward(self, x, vgg_bn=False,test=False):
 72 |         """Applies network layers and ops on input image(s) x.
 73 | 
 74 |         Args:
 75 |             x: input image or batch of images. Shape: [batch,3*batch,300,300].
 76 | 
 77 |         Return:
 78 |             Depending on phase:
 79 |             test:
 80 |                 Variable(tensor) of output class label predictions,
 81 |                 confidence score, and corresponding location predictions for
 82 |                 each object detected. Shape: [batch,topk,7]
 83 | 
 84 |             train:
 85 |                 list of concat outputs from:
 86 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 87 |                     2: localization layers, Shape: [batch,num_priors*4]
 88 |                     3: priorbox layers, Shape: [2,num_priors*4]
 89 |         """
 90 |         source_features = list()
 91 |         loc = list()
 92 |         conf = list()
 93 | 
 94 |         # apply vgg up to conv4_3 relu
 95 |         for k in range(23):
 96 |             x = self.base[k](x)
 97 | 
 98 |         if vgg_bn:
 99 |             x1 = x
100 |         else:
101 |             x1 = self.Norm(x)
102 |         source_features.append(x1)
103 | 
104 |         # apply vgg up to fc7
105 |         for k in range(23, len(self.base)):
106 |             x = self.base[k](x)
107 |         source_features.append(x)
108 | 
109 |         for i,k in enumerate(self.extras):
110 |             x = k(x)
111 |             if i % 2 == 1:
112 |                 source_features.append(x)
113 | 
114 |         # apply multibox head to source layers
115 |         for (x, l, c) in zip(source_features, self.loc, self.conf):
116 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
117 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
118 | 
119 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
120 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
121 |         if test:
122 |             output = (
123 |                 loc.view(loc.size(0), -1, 4),  # loc preds
124 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
125 |                 # torch.sigmoid(conf.view(-1, self.num_classes))
126 |             )
127 |         else:
128 |             output = (
129 |                 loc.view(loc.size(0), -1, 4),
130 |                 conf.view(conf.size(0), -1, self.num_classes),
131 |             )
132 |         return output
133 | 
134 |     def load_weights(self, base_file):
135 |         other, ext = os.path.splitext(base_file)
136 |         if ext == '.pkl' or '.pth':
137 |             print('Loading weights into state dict...')
138 |             self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage))
139 |             print('Finished!')
140 |         else:
141 |             print('Sorry only .pth and .pkl files supported.')
142 | 
143 | def add_extras(size,norm):
144 |     if size == 300:
145 |         if norm == "BN" or norm == "L2Norm": # if using L2Norm, we set BN for normalization on the extra layers
146 |             layers = [BasicConv(1024, 256, kernel_size=1, stride=1, padding=0, bn=True),
147 |                       BasicConv(256, 512, kernel_size=3, stride=2, padding=1, bn=True),
148 |                       BasicConv(512, 128, kernel_size=1, stride=1, padding=0, bn=True),
149 |                       BasicConv(128, 256, kernel_size=3, stride=2, padding=1, bn=True),
150 |                       BasicConv(256, 128, kernel_size=1, stride=1, padding=0, bn=True),
151 |                       BasicConv(128, 256, kernel_size=3, stride=1, padding=0, bn=True),
152 |                       BasicConv(256, 128, kernel_size=1, stride=1, padding=0, bn=True),
153 |                       BasicConv(128, 256, kernel_size=3, stride=1, padding=0, bn=True),]
154 |         elif norm == "GN":
155 |             layers = [BasicConv(1024, 256, kernel_size=1, stride=1, padding=0, gn=True),
156 |                       BasicConv(256, 512, kernel_size=3, stride=2, padding=1, gn=True),
157 |                       BasicConv(512, 128, kernel_size=1, stride=1, padding=0, gn=True),
158 |                       BasicConv(128, 256, kernel_size=3, stride=2, padding=1, gn=True),
159 |                       BasicConv(256, 128, kernel_size=1, stride=1, padding=0, gn=True),
160 |                       BasicConv(128, 256, kernel_size=3, stride=1, padding=0, gn=True),
161 |                       BasicConv(256, 128, kernel_size=1, stride=1, padding=0, gn=True),
162 |                       BasicConv(128, 256, kernel_size=3, stride=1, padding=0, gn=True), ]
163 |         else:
164 |             exit("Error type of Normalization, please assign one of L2Norm, BN, GN")
165 | 
166 |     elif size == 512:
167 |         layers = [BasicConv(256 * 3, 512, kernel_size=3, stride=1, padding=1),
168 |                   BasicConv(512, 512, kernel_size=3, stride=2, padding=1), \
169 |                   BasicConv(512, 256, kernel_size=3, stride=2, padding=1),
170 |                   BasicConv(256, 256, kernel_size=3, stride=2, padding=1), \
171 |                   BasicConv(256, 256, kernel_size=3, stride=2, padding=1),
172 |                   BasicConv(256, 256, kernel_size=3, stride=2, padding=1), \
173 |                   BasicConv(256, 256, kernel_size=4, padding=1, stride=1)]
174 |     return layers
175 | 
176 | 
177 | def multibox(fea_channels, cfg, num_classes):
178 |     loc_layers = []
179 |     conf_layers = []
180 |     assert len(fea_channels) == len(cfg)
181 |     for i, fea_channel in enumerate(fea_channels):
182 |         loc_layers += [nn.Conv2d(fea_channel, cfg[i] * 4, kernel_size=3, padding=1)]
183 |         conf_layers += [nn.Conv2d(fea_channel, cfg[i] * num_classes, kernel_size=3, padding=1)]
184 |     return (loc_layers, conf_layers)
185 | 
186 | 
187 | mbox = {
188 |     '300': [6, 6, 6, 6, 4, 4],  # number of boxes per feature map location
189 |     '512': [6, 6, 6, 6, 6, 4, 4],
190 | }
191 | fea_channels = {
192 |     '300': [512, 1024, 512, 256, 256, 256],
193 |     '512': [512, 512, 256, 256, 256, 256, 256]}
194 | 
195 | 
196 | def build_net(size=300, num_classes=21, norm="BN",vgg_bn=False):
197 |     if size != 300 and size != 512:
198 |         print("Error: Sorry only FSSD300 and FSSD512 is supported currently!")
199 |         return
200 | 
201 |     return SSD(base=vgg(vgg_base[str(size)], 3, batch_norm=vgg_bn),extras=add_extras(size,norm),head=multibox(fea_channels[str(size)], mbox[str(size)], num_classes),
202 |                num_classes=num_classes, size=size, norm=norm)
203 | 
204 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/_mask.pyx:
--------------------------------------------------------------------------------
  1 | # distutils: language = c
  2 | # distutils: sources = ../common/maskApi.c
  3 | 
  4 | #**************************************************************************
  5 | # Microsoft COCO Toolbox.      version 2.0
  6 | # Data, paper, and tutorials available at:  http://mscoco.org/
  7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  8 | # Licensed under the Simplified BSD License [see coco/license.txt]
  9 | #**************************************************************************
 10 | 
 11 | __author__ = 'tsungyi'
 12 | 
 13 | import sys
 14 | PYTHON_VERSION = sys.version_info[0]
 15 | 
 16 | # import both Python-level and C-level symbols of Numpy
 17 | # the API uses Numpy to interface C and Python
 18 | import numpy as np
 19 | cimport numpy as np
 20 | from libc.stdlib cimport malloc, free
 21 | 
 22 | # intialized Numpy. must do.
 23 | np.import_array()
 24 | 
 25 | # import numpy C function
 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 27 | cdef extern from "numpy/arrayobject.h":
 28 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 29 | 
 30 | # Declare the prototype of the C functions in MaskApi.h
 31 | cdef extern from "maskApi.h":
 32 |     ctypedef unsigned int uint
 33 |     ctypedef unsigned long siz
 34 |     ctypedef unsigned char byte
 35 |     ctypedef double* BB
 36 |     ctypedef struct RLE:
 37 |         siz h,
 38 |         siz w,
 39 |         siz m,
 40 |         uint* cnts,
 41 |     void rlesInit( RLE **R, siz n )
 42 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 43 |     void rleDecode( const RLE *R, byte *mask, siz n )
 44 |     void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
 45 |     void rleArea( const RLE *R, siz n, uint *a )
 46 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 47 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 48 |     void rleToBbox( const RLE *R, BB bb, siz n )
 49 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 50 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 51 |     char* rleToString( const RLE *R )
 52 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 53 | 
 54 | # python class to wrap RLE array in C
 55 | # the class handles the memory allocation and deallocation
 56 | cdef class RLEs:
 57 |     cdef RLE *_R
 58 |     cdef siz _n
 59 | 
 60 |     def __cinit__(self, siz n =0):
 61 |         rlesInit(&self._R, n)
 62 |         self._n = n
 63 | 
 64 |     # free the RLE array here
 65 |     def __dealloc__(self):
 66 |         if self._R is not NULL:
 67 |             for i in range(self._n):
 68 |                 free(self._R[i].cnts)
 69 |             free(self._R)
 70 |     def __getattr__(self, key):
 71 |         if key == 'n':
 72 |             return self._n
 73 |         raise AttributeError(key)
 74 | 
 75 | # python class to wrap Mask array in C
 76 | # the class handles the memory allocation and deallocation
 77 | cdef class Masks:
 78 |     cdef byte *_mask
 79 |     cdef siz _h
 80 |     cdef siz _w
 81 |     cdef siz _n
 82 | 
 83 |     def __cinit__(self, h, w, n):
 84 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 85 |         self._h = h
 86 |         self._w = w
 87 |         self._n = n
 88 |     # def __dealloc__(self):
 89 |         # the memory management of _mask has been passed to np.ndarray
 90 |         # it doesn't need to be freed here
 91 | 
 92 |     # called when passing into np.array() and return an np.ndarray in column-major order
 93 |     def __array__(self):
 94 |         cdef np.npy_intp shape[1]
 95 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 96 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 97 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 98 |         # The _mask allocated by Masks is now handled by ndarray
 99 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
100 |         return ndarray
101 | 
102 | # internal conversion from Python RLEs object to compressed RLE format
103 | def _toString(RLEs Rs):
104 |     cdef siz n = Rs.n
105 |     cdef bytes py_string
106 |     cdef char* c_string
107 |     objs = []
108 |     for i in range(n):
109 |         c_string = rleToString( <RLE*> &Rs._R[i] )
110 |         py_string = c_string
111 |         objs.append({
112 |             'size': [Rs._R[i].h, Rs._R[i].w],
113 |             'counts': py_string
114 |         })
115 |         free(c_string)
116 |     return objs
117 | 
118 | # internal conversion from compressed RLE format to Python RLEs object
119 | def _frString(rleObjs):
120 |     cdef siz n = len(rleObjs)
121 |     Rs = RLEs(n)
122 |     cdef bytes py_string
123 |     cdef char* c_string
124 |     for i, obj in enumerate(rleObjs):
125 |         if PYTHON_VERSION == 2:
126 |             py_string = str(obj['counts']).encode('utf8')
127 |         elif PYTHON_VERSION == 3:
128 |             py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
129 |         else:
130 |             raise Exception('Python version must be 2 or 3')
131 |         c_string = py_string
132 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
133 |     return Rs
134 | 
135 | # encode mask to RLEs objects
136 | # list of RLE string can be generated by RLEs member function
137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
138 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
139 |     cdef RLEs Rs = RLEs(n)
140 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
141 |     objs = _toString(Rs)
142 |     return objs
143 | 
144 | # decode mask from compressed list of RLE string or RLEs object
145 | def decode(rleObjs):
146 |     cdef RLEs Rs = _frString(rleObjs)
147 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
148 |     masks = Masks(h, w, n)
149 |     rleDecode(<RLE*>Rs._R, masks._mask, n);
150 |     return np.array(masks)
151 | 
152 | def merge(rleObjs, intersect=0):
153 |     cdef RLEs Rs = _frString(rleObjs)
154 |     cdef RLEs R = RLEs(1)
155 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
156 |     obj = _toString(R)[0]
157 |     return obj
158 | 
159 | def area(rleObjs):
160 |     cdef RLEs Rs = _frString(rleObjs)
161 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
162 |     rleArea(Rs._R, Rs._n, _a)
163 |     cdef np.npy_intp shape[1]
164 |     shape[0] = <np.npy_intp> Rs._n
165 |     a = np.array((Rs._n, ), dtype=np.uint8)
166 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
167 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
168 |     return a
169 | 
170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
171 | def iou( dt, gt, pyiscrowd ):
172 |     def _preproc(objs):
173 |         if len(objs) == 0:
174 |             return objs
175 |         if type(objs) == np.ndarray:
176 |             if len(objs.shape) == 1:
177 |                 objs = objs.reshape((objs[0], 1))
178 |             # check if it's Nx4 bbox
179 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
180 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
181 |             objs = objs.astype(np.double)
182 |         elif type(objs) == list:
183 |             # check if list is in box format and convert it to np.ndarray
184 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
185 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
186 |             if isbox:
187 |                 objs = np.array(objs, dtype=np.double)
188 |                 if len(objs.shape) == 1:
189 |                     objs = objs.reshape((1,objs.shape[0]))
190 |             elif isrle:
191 |                 objs = _frString(objs)
192 |             else:
193 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
194 |         else:
195 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
196 |         return objs
197 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
198 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
199 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
200 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
201 |     def _len(obj):
202 |         cdef siz N = 0
203 |         if type(obj) == RLEs:
204 |             N = obj.n
205 |         elif len(obj)==0:
206 |             pass
207 |         elif type(obj) == np.ndarray:
208 |             N = obj.shape[0]
209 |         return N
210 |     # convert iscrowd to numpy array
211 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
212 |     # simple type checking
213 |     cdef siz m, n
214 |     dt = _preproc(dt)
215 |     gt = _preproc(gt)
216 |     m = _len(dt)
217 |     n = _len(gt)
218 |     if m == 0 or n == 0:
219 |         return []
220 |     if not type(dt) == type(gt):
221 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
222 | 
223 |     # define local variables
224 |     cdef double* _iou = <double*> 0
225 |     cdef np.npy_intp shape[1]
226 |     # check type and assign iou function
227 |     if type(dt) == RLEs:
228 |         _iouFun = _rleIou
229 |     elif type(dt) == np.ndarray:
230 |         _iouFun = _bbIou
231 |     else:
232 |         raise Exception('input data type not allowed.')
233 |     _iou = <double*> malloc(m*n* sizeof(double))
234 |     iou = np.zeros((m*n, ), dtype=np.double)
235 |     shape[0] = <np.npy_intp> m*n
236 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
237 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
238 |     _iouFun(dt, gt, iscrowd, m, n, iou)
239 |     return iou.reshape((m,n), order='F')
240 | 
241 | def toBbox( rleObjs ):
242 |     cdef RLEs Rs = _frString(rleObjs)
243 |     cdef siz n = Rs.n
244 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
245 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
246 |     cdef np.npy_intp shape[1]
247 |     shape[0] = <np.npy_intp> 4*n
248 |     bb = np.array((1,4*n), dtype=np.double)
249 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
250 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
251 |     return bb
252 | 
253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
254 |     cdef siz n = bb.shape[0]
255 |     Rs = RLEs(n)
256 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
257 |     objs = _toString(Rs)
258 |     return objs
259 | 
260 | def frPoly( poly, siz h, siz w ):
261 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
262 |     n = len(poly)
263 |     Rs = RLEs(n)
264 |     for i, p in enumerate(poly):
265 |         np_poly = np.array(p, dtype=np.double, order='F')
266 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
267 |     objs = _toString(Rs)
268 |     return objs
269 | 
270 | def frUncompressedRLE(ucRles, siz h, siz w):
271 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
272 |     cdef RLE R
273 |     cdef uint *data
274 |     n = len(ucRles)
275 |     objs = []
276 |     for i in range(n):
277 |         Rs = RLEs(1)
278 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
279 |         # time for malloc can be saved here but it's fine
280 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
281 |         for j in range(len(cnts)):
282 |             data[j] = <uint> cnts[j]
283 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
284 |         Rs._R[0] = R
285 |         objs.append(_toString(Rs)[0])
286 |     return objs
287 | 
288 | def frPyObjects(pyobj, h, w):
289 |     # encode rle from a list of python objects
290 |     if type(pyobj) == np.ndarray:
291 |         objs = frBbox(pyobj, h, w)
292 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
293 |         objs = frBbox(pyobj, h, w)
294 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
295 |         objs = frPoly(pyobj, h, w)
296 |     elif type(pyobj) == list and type(pyobj[0]) == dict \
297 |         and 'counts' in pyobj[0] and 'size' in pyobj[0]:
298 |         objs = frUncompressedRLE(pyobj, h, w)
299 |     # encode rle from single python object
300 |     elif type(pyobj) == list and len(pyobj) == 4:
301 |         objs = frBbox([pyobj], h, w)[0]
302 |     elif type(pyobj) == list and len(pyobj) > 4:
303 |         objs = frPoly([pyobj], h, w)[0]
304 |     elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
305 |         objs = frUncompressedRLE([pyobj], h, w)[0]
306 |     else:
307 |         raise Exception('input type is not supported.')
308 |     return objs
309 | 


--------------------------------------------------------------------------------
/SSD/data/coco.py:
--------------------------------------------------------------------------------
  1 | """VOC Dataset Classes
  2 | 
  3 | Original author: Francisco Massa
  4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
  5 | 
  6 | Updated by: Ellis Brown, Max deGroot
  7 | """
  8 | 
  9 | import os
 10 | import pickle
 11 | import os.path
 12 | import sys
 13 | import torch
 14 | import torch.utils.data as data
 15 | import torchvision.transforms as transforms
 16 | import cv2
 17 | import numpy as np
 18 | import json
 19 | import uuid
 20 | 
 21 | from utils.pycocotools.coco import COCO
 22 | from utils.pycocotools.cocoeval import COCOeval
 23 | from utils.pycocotools import mask as COCOmask
 24 | 
 25 | 
 26 | class COCODetection(data.Dataset):
 27 | 
 28 |     """VOC Detection Dataset Object
 29 | 
 30 |     input is image, target is annotation
 31 | 
 32 |     Arguments:
 33 |         root (string): filepath to VOCdevkit folder.
 34 |         image_set (string): imageset to use (eg. 'train', 'val', 'test')
 35 |         transform (callable, optional): transformation to perform on the
 36 |             input image
 37 |         target_transform (callable, optional): transformation to perform on the
 38 |             target `annotation`
 39 |             (eg: take in caption string, return tensor of word indices)
 40 |         dataset_name (string, optional): which dataset to load
 41 |             (default: 'VOC2007')
 42 |     """
 43 | 
 44 |     def __init__(self, root, image_sets, preproc=None, target_transform=None,
 45 |                  dataset_name='COCO'):
 46 |         self.root = root
 47 |         self.cache_path = os.path.join(self.root, 'cache')
 48 |         self.image_set = image_sets
 49 |         self.preproc = preproc
 50 |         self.target_transform = target_transform
 51 |         self.name = dataset_name
 52 |         self.ids = list()
 53 |         self.annotations = list()
 54 |         self._view_map = {
 55 |             'minival2014' : 'val2014',          # 5k val2014 subset
 56 |             'valminusminival2014' : 'val2014',  # val2014 \setminus minival2014
 57 |             'test-dev2015' : 'test2015',
 58 |         }
 59 | 
 60 |         for (year, image_set) in image_sets:
 61 |             coco_name = image_set+year
 62 |             data_name = (self._view_map[coco_name]
 63 |                         if coco_name in self._view_map
 64 |                         else coco_name)
 65 |             annofile = self._get_ann_file(coco_name)
 66 |             _COCO = COCO(annofile)
 67 |             self._COCO = _COCO
 68 |             self.coco_name = coco_name
 69 |             cats = _COCO.loadCats(_COCO.getCatIds())
 70 |             self._classes = tuple(['__background__'] + [c['name'] for c in cats])
 71 |             self.num_classes = len(self._classes)
 72 |             self._class_to_ind = dict(zip(self._classes, range(self.num_classes)))
 73 |             self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats],
 74 |                                                   _COCO.getCatIds()))
 75 |             indexes = _COCO.getImgIds()
 76 |             self.image_indexes = indexes
 77 |             self.ids.extend([self.image_path_from_index(data_name, index) for index in indexes ])
 78 |             if image_set.find('test') != -1:
 79 |                 print('test set will not load annotations!')
 80 |             else:
 81 |                 self.annotations.extend(self._load_coco_annotations(coco_name, indexes,_COCO))
 82 | 
 83 | 
 84 | 
 85 |     def image_path_from_index(self, name, index):
 86 |         """
 87 |         Construct an image path from the image's "index" identifier.
 88 |         """
 89 |         # Example image path for index=119993:
 90 |         #   images/train2014/COCO_train2014_000000119993.jpg
 91 |         file_name = ('COCO_' + name + '_' +
 92 |                      str(index).zfill(12) + '.jpg')
 93 |         image_path = os.path.join(self.root, 'images',
 94 |                               name, file_name)
 95 |         assert os.path.exists(image_path), \
 96 |                 'Path does not exist: {}'.format(image_path)
 97 |         return image_path
 98 | 
 99 | 
100 |     def _get_ann_file(self, name):
101 |         prefix = 'instances' if name.find('test') == -1 \
102 |                 else 'image_info'
103 |         return os.path.join(self.root, 'annotations',
104 |                         prefix + '_' + name + '.json')
105 | 
106 | 
107 |     def _load_coco_annotations(self, coco_name, indexes, _COCO):
108 |         cache_file=os.path.join(self.cache_path,coco_name+'_gt_roidb.pkl')
109 |         if os.path.exists(cache_file):
110 |             with open(cache_file, 'rb') as fid:
111 |                 roidb = pickle.load(fid)
112 |             print('{} gt roidb loaded from {}'.format(coco_name,cache_file))
113 |             return roidb
114 | 
115 |         gt_roidb = [self._annotation_from_index(index, _COCO)
116 |                     for index in indexes]
117 |         with open(cache_file, 'wb') as fid:
118 |             pickle.dump(gt_roidb,fid,pickle.HIGHEST_PROTOCOL)
119 |         print('wrote gt roidb to {}'.format(cache_file))
120 |         return gt_roidb
121 | 
122 | 
123 |     def _annotation_from_index(self, index, _COCO):
124 |         """
125 |         Loads COCO bounding-box instance annotations. Crowd instances are
126 |         handled by marking their overlaps (with all categories) to -1. This
127 |         overlap value means that crowd "instances" are excluded from training.
128 |         """
129 |         im_ann = _COCO.loadImgs(index)[0]
130 |         width = im_ann['width']
131 |         height = im_ann['height']
132 | 
133 |         annIds = _COCO.getAnnIds(imgIds=index, iscrowd=None)
134 |         objs = _COCO.loadAnns(annIds)
135 |         # Sanitize bboxes -- some are invalid
136 |         valid_objs = []
137 |         for obj in objs:
138 |             x1 = np.max((0, obj['bbox'][0]))
139 |             y1 = np.max((0, obj['bbox'][1]))
140 |             x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
141 |             y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
142 |             if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
143 |                 obj['clean_bbox'] = [x1, y1, x2, y2]
144 |                 valid_objs.append(obj)
145 |         objs = valid_objs
146 |         num_objs = len(objs)
147 | 
148 |         res = np.zeros((num_objs, 5))
149 | 
150 |         # Lookup table to map from COCO category ids to our internal class
151 |         # indices
152 |         coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls],
153 |                                           self._class_to_ind[cls])
154 |                                          for cls in self._classes[1:]])
155 | 
156 |         for ix, obj in enumerate(objs):
157 |             cls = coco_cat_id_to_class_ind[obj['category_id']]
158 |             res[ix, 0:4] = obj['clean_bbox']
159 |             res[ix, 4] = cls
160 | 
161 |         return res
162 | 
163 | 
164 | 
165 |     def __getitem__(self, index):
166 |         img_id = self.ids[index]
167 |         target = self.annotations[index]
168 |         img = cv2.imread(img_id, cv2.IMREAD_COLOR)
169 |         height, width, _ = img.shape
170 | 
171 |         if self.target_transform is not None:
172 |             target = self.target_transform(target)
173 | 
174 | 
175 |         if self.preproc is not None:
176 |             img, target = self.preproc(img, target)
177 | 
178 |                     # target = self.target_transform(target, width, height)
179 |         #print(target.shape)
180 | 
181 |         return img, target
182 | 
183 |     def __len__(self):
184 |         return len(self.ids)
185 | 
186 |     def pull_image(self, index):
187 |         '''Returns the original image object at index in PIL form
188 | 
189 |         Note: not using self.__getitem__(), as any transformations passed in
190 |         could mess up this functionality.
191 | 
192 |         Argument:
193 |             index (int): index of img to show
194 |         Return:
195 |             PIL img
196 |         '''
197 |         img_id = self.ids[index]
198 |         return cv2.imread(img_id, cv2.IMREAD_COLOR)
199 | 
200 | 
201 |     def pull_tensor(self, index):
202 |         '''Returns the original image at an index in tensor form
203 | 
204 |         Note: not using self.__getitem__(), as any transformations passed in
205 |         could mess up this functionality.
206 | 
207 |         Argument:
208 |             index (int): index of img to show
209 |         Return:
210 |             tensorized version of img, squeezed
211 |         '''
212 |         to_tensor = transforms.ToTensor()
213 |         return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
214 | 
215 |     def _print_detection_eval_metrics(self, coco_eval):
216 |         IoU_lo_thresh = 0.5
217 |         IoU_hi_thresh = 0.95
218 |         def _get_thr_ind(coco_eval, thr):
219 |             ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
220 |                            (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
221 |             iou_thr = coco_eval.params.iouThrs[ind]
222 |             assert np.isclose(iou_thr, thr)
223 |             return ind
224 | 
225 |         ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
226 |         ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
227 |         # precision has dims (iou, recall, cls, area range, max dets)
228 |         # area range index 0: all area ranges
229 |         # max dets index 2: 100 per image
230 |         precision = \
231 |             coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
232 |         ap_default = np.mean(precision[precision > -1])
233 |         print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
234 |                '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
235 |         print('{:.1f}'.format(100 * ap_default))
236 |         for cls_ind, cls in enumerate(self._classes):
237 |             if cls == '__background__':
238 |                 continue
239 |             # minus 1 because of __background__
240 |             precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
241 |             ap = np.mean(precision[precision > -1])
242 |             print('{:.1f}'.format(100 * ap))
243 | 
244 |         print('~~~~ Summary metrics ~~~~')
245 |         coco_eval.summarize()
246 | 
247 |     def _do_detection_eval(self, res_file, output_dir):
248 |         ann_type = 'bbox'
249 |         coco_dt = self._COCO.loadRes(res_file)
250 |         coco_eval = COCOeval(self._COCO, coco_dt)
251 |         coco_eval.params.useSegm = (ann_type == 'segm')
252 |         coco_eval.evaluate()
253 |         coco_eval.accumulate()
254 |         self._print_detection_eval_metrics(coco_eval)
255 |         eval_file = os.path.join(output_dir, 'detection_results.pkl')
256 |         with open(eval_file, 'wb') as fid:
257 |             pickle.dump(coco_eval, fid, pickle.HIGHEST_PROTOCOL)
258 |         print('Wrote COCO eval results to: {}'.format(eval_file))
259 | 
260 |     def _coco_results_one_category(self, boxes, cat_id):
261 |         results = []
262 |         for im_ind, index in enumerate(self.image_indexes):
263 |             dets = boxes[im_ind].astype(np.float)
264 |             if dets == []:
265 |                 continue
266 |             scores = dets[:, -1]
267 |             xs = dets[:, 0]
268 |             ys = dets[:, 1]
269 |             ws = dets[:, 2] - xs + 1
270 |             hs = dets[:, 3] - ys + 1
271 |             results.extend(
272 |               [{'image_id' : index,
273 |                 'category_id' : cat_id,
274 |                 'bbox' : [xs[k], ys[k], ws[k], hs[k]],
275 |                 'score' : scores[k]} for k in range(dets.shape[0])])
276 |         return results
277 | 
278 |     def _write_coco_results_file(self, all_boxes, res_file):
279 |         # [{"image_id": 42,
280 |         #   "category_id": 18,
281 |         #   "bbox": [258.15,41.29,348.26,243.78],
282 |         #   "score": 0.236}, ...]
283 |         results = []
284 |         for cls_ind, cls in enumerate(self._classes):
285 |             if cls == '__background__':
286 |                 continue
287 |             print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind,
288 |                                                           self.num_classes ))
289 |             coco_cat_id = self._class_to_coco_cat_id[cls]
290 |             results.extend(self._coco_results_one_category(all_boxes[cls_ind],
291 |                                                            coco_cat_id))
292 |             '''
293 |             if cls_ind ==30:
294 |                 res_f = res_file+ '_1.json'
295 |                 print('Writing results json to {}'.format(res_f))
296 |                 with open(res_f, 'w') as fid:
297 |                     json.dump(results, fid)
298 |                 results = []
299 |             '''
300 |         #res_f2 = res_file+'_2.json'
301 |         print('Writing results json to {}'.format(res_file))
302 |         with open(res_file, 'w') as fid:
303 |             json.dump(results, fid)
304 | 
305 |     def evaluate_detections(self, all_boxes, output_dir):
306 |         res_file = os.path.join(output_dir, ('detections_' +
307 |                                          self.coco_name +
308 |                                          '_results'))
309 |         res_file += '.json'
310 |         self._write_coco_results_file(all_boxes, res_file)
311 |         # Only do evaluation on non-test sets
312 |         if self.coco_name.find('test') == -1:
313 |             self._do_detection_eval(res_file, output_dir)
314 |         # Optionally cleanup results json file
315 | 
316 | 


--------------------------------------------------------------------------------
/SSD/data/data_augment.py:
--------------------------------------------------------------------------------
  1 | """Data augmentation functionality. Passed as callable transformations to
  2 | Dataset classes.
  3 | 
  4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper
  5 | http://arxiv.org/abs/1512.02325
  6 | """
  7 | 
  8 | import torch
  9 | from torchvision import transforms
 10 | import cv2
 11 | import numpy as np
 12 | import random
 13 | import math
 14 | from utils.box_utils import matrix_iou
 15 | # import torch_transforms
 16 | 
 17 | def _crop(image, boxes, labels):
 18 |     height, width, _ = image.shape
 19 | 
 20 |     if len(boxes)== 0:
 21 |         return image, boxes, labels
 22 | 
 23 |     while True:
 24 |         mode = random.choice((
 25 |             None,
 26 |             (0.1, None),
 27 |             (0.3, None),
 28 |             (0.5, None),
 29 |             (0.7, None),
 30 |             (0.9, None),
 31 |             (None, None),
 32 |         ))
 33 | 
 34 |         if mode is None:
 35 |             return image, boxes, labels
 36 | 
 37 |         min_iou, max_iou = mode
 38 |         if min_iou is None:
 39 |             min_iou = float('-inf')
 40 |         if max_iou is None:
 41 |             max_iou = float('inf')
 42 | 
 43 |         for _ in range(50):
 44 |             scale = random.uniform(0.3,1.)
 45 |             min_ratio = max(0.5, scale*scale)
 46 |             max_ratio = min(2, 1. / scale / scale)
 47 |             ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
 48 |             w = int(scale * ratio * width)
 49 |             h = int((scale / ratio) * height)
 50 | 
 51 | 
 52 |             l = random.randrange(width - w)
 53 |             t = random.randrange(height - h)
 54 |             roi = np.array((l, t, l + w, t + h))
 55 | 
 56 |             iou = matrix_iou(boxes, roi[np.newaxis])
 57 |             
 58 |             if not (min_iou <= iou.min() and iou.max() <= max_iou):
 59 |                 continue
 60 | 
 61 |             image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
 62 | 
 63 |             centers = (boxes[:, :2] + boxes[:, 2:]) / 2
 64 |             mask = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
 65 |             boxes_t = boxes[mask].copy()
 66 |             labels_t = labels[mask].copy()
 67 |             if len(boxes_t) == 0:
 68 |                 continue
 69 | 
 70 |             boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
 71 |             boxes_t[:, :2] -= roi[:2]
 72 |             boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
 73 |             boxes_t[:, 2:] -= roi[:2]
 74 | 
 75 |             return image_t, boxes_t,labels_t
 76 | 
 77 | def _crop_mixup(image, boxes, labels, weights):
 78 |     height, width, _ = image.shape
 79 | 
 80 |     if len(boxes) == 0:
 81 |         return image, boxes, labels, weights
 82 | 
 83 |     while True:
 84 |         mode = random.choice((
 85 |             None,
 86 |             (0.1, None),
 87 |             (0.3, None),
 88 |             (0.5, None),
 89 |             (0.7, None),
 90 |             (0.9, None),
 91 |             (None, None),
 92 |         ))
 93 | 
 94 |         if mode is None:
 95 |             return image, boxes, labels, weights
 96 | 
 97 |         min_iou, max_iou = mode
 98 |         if min_iou is None:
 99 |             min_iou = float('-inf')
100 |         if max_iou is None:
101 |             max_iou = float('inf')
102 | 
103 |         for _ in range(50):
104 |             scale = random.uniform(0.3, 1.)
105 |             min_ratio = max(0.5, scale * scale)
106 |             max_ratio = min(2, 1. / scale / scale)
107 |             ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
108 |             w = int(scale * ratio * width)
109 |             h = int((scale / ratio) * height)
110 | 
111 |             l = random.randrange(width - w)
112 |             t = random.randrange(height - h)
113 |             roi = np.array((l, t, l + w, t + h))
114 | 
115 |             iou = matrix_iou(boxes, roi[np.newaxis])
116 | 
117 |             if not (min_iou <= iou.min() and iou.max() <= max_iou):
118 |                 continue
119 | 
120 |             image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
121 | 
122 |             centers = (boxes[:, :2] + boxes[:, 2:]) / 2
123 |             mask = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
124 |             boxes_t = boxes[mask].copy()
125 |             labels_t = labels[mask].copy()
126 |             weights_t = weights[mask].copy()
127 |             if len(boxes_t) == 0:
128 |                 continue
129 | 
130 |             boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
131 |             boxes_t[:, :2] -= roi[:2]
132 |             boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
133 |             boxes_t[:, 2:] -= roi[:2]
134 | 
135 |             return image_t, boxes_t, labels_t, weights_t
136 | 
137 | 
138 | def _distort(image):
139 |     def _convert(image, alpha=1, beta=0):
140 |         tmp = image.astype(float) * alpha + beta
141 |         tmp[tmp < 0] = 0
142 |         tmp[tmp > 255] = 255
143 |         image[:] = tmp
144 | 
145 |     image = image.copy()
146 | 
147 |     if random.randrange(2):
148 |         _convert(image, beta=random.uniform(-32, 32))
149 | 
150 |     if random.randrange(2):
151 |         _convert(image, alpha=random.uniform(0.5, 1.5))
152 | 
153 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
154 | 
155 |     if random.randrange(2):
156 |         tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
157 |         tmp %= 180
158 |         image[:, :, 0] = tmp
159 | 
160 |     if random.randrange(2):
161 |         _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
162 | 
163 |     image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
164 | 
165 |     return image
166 | 
167 | 
168 | def _expand(image, boxes,fill, p):
169 |     if random.random() > p:
170 |         return image, boxes
171 | 
172 |     height, width, depth = image.shape
173 |     for _ in range(50):
174 |         scale = random.uniform(1,4)
175 | 
176 |         min_ratio = max(0.5, 1./scale/scale)
177 |         max_ratio = min(2, scale*scale)
178 |         ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
179 |         ws = scale*ratio
180 |         hs = scale/ratio
181 |         if ws < 1 or hs < 1:
182 |             continue
183 |         w = int(ws * width)
184 |         h = int(hs * height)
185 | 
186 |         left = random.randint(0, w - width)
187 |         top = random.randint(0, h - height)
188 | 
189 |         boxes_t = boxes.copy()
190 |         boxes_t[:, :2] += (left, top)
191 |         boxes_t[:, 2:] += (left, top)
192 | 
193 | 
194 |         expand_image = np.empty(
195 |             (h, w, depth),
196 |             dtype=image.dtype)
197 |         expand_image[:, :] = fill
198 |         expand_image[top:top + height, left:left + width] = image
199 |         image = expand_image
200 | 
201 |         return image, boxes_t
202 | 
203 | 
204 | def _mirror(image, boxes):
205 |     _, width, _ = image.shape
206 |     if random.randrange(2):
207 |         image = image[:, ::-1]
208 |         boxes = boxes.copy()
209 |         boxes[:, 0::2] = width - boxes[:, 2::-2]
210 |     return image, boxes
211 | 
212 | 
213 | def preproc_for_test(image, insize, mean):
214 |     interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
215 |     interp_method = interp_methods[random.randrange(5)]
216 |     image = cv2.resize(image, (insize, insize),interpolation=interp_method)
217 |     image = image.astype(np.float32)
218 |     image -= mean
219 |     return image.transpose(2, 0, 1)
220 | 
221 | def _random_erasing(image, boxes, means, p=0.6, sl=0.02, sh=0.2, r1=0.3):
222 |     if random.uniform(0, 1) > p:
223 |         return image
224 |     area_boxes = (boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1])
225 |     m = [2*j for j in means] # Please ensure the value of 2*mean is less to 255.
226 |     for i in range(len(boxes)):
227 |         for _ in range(50):
228 |             area = random.uniform(sl,sh) * area_boxes[i]
229 |             aspect_ratio = random.uniform(r1, 1.0 / r1)
230 | 
231 |             h = int(round(math.sqrt(area * aspect_ratio)))
232 |             w = int(round(math.sqrt(area / aspect_ratio)))
233 | 
234 |             boxes_w = boxes[i,2] - boxes[i,0]
235 |             boxes_h = boxes[i,3] - boxes[i,1]
236 | 
237 |             if w < boxes_w and h < boxes_h:
238 |                 x1 = int(random.randint(0,(boxes_w - w)) + boxes[i,0])
239 |                 y1 = int(random.randint(0,(boxes_h - h)) + boxes[i,1])
240 |                 image[y1:y1 + h, x1:x1 + w, :] = m
241 |                 break
242 | 
243 |     for j in range(50):
244 |         area = random.uniform(sl, sh) * image.shape[0] * image.shape[1]
245 |         aspect_ratio = random.uniform(r1, 1.0 / r1)
246 |         h = int(round(math.sqrt(area * aspect_ratio)))
247 |         w = int(round(math.sqrt(area / aspect_ratio)))
248 | 
249 |         if w < image.shape[1] and h < image.shape[0]:
250 |             x1 = int(random.randint(0, (image.shape[1] - w)))
251 |             y1 = int(random.randint(0, (image.shape[0] - h)))
252 |             img_crop = np.array((x1,y1,x1+w,y1+h))
253 | 
254 |             ios = matrix_iou(boxes, img_crop[np.newaxis], erasing=True)
255 |             if ios.max() < 0.2:
256 |                 image[y1:y1 + h, x1:x1 + w, :] = m
257 |                 break
258 |     # cv2.imshow('eras.jpg',image)
259 |     # cv2.waitKey()
260 |     # exit()
261 |     return image
262 | 
263 | 
264 | class preproc(object):
265 | 
266 |     def __init__(self, resize, rgb_means, p):
267 |         self.means = rgb_means
268 |         self.resize = resize
269 |         self.p = p
270 | 
271 |     def __call__(self, image, targets, random_erasing):
272 |         boxes = targets[:,:-1].copy()
273 |         labels = targets[:,-1].copy()
274 |         if len(boxes) == 0:
275 |             #boxes = np.empty((0, 4))
276 |             targets = np.zeros((1,5))
277 |             image = preproc_for_test(image, self.resize, self.means)
278 |             return torch.from_numpy(image), targets
279 | 
280 |         image_o = image.copy()
281 |         targets_o = targets.copy()
282 |         height_o, width_o, _ = image_o.shape
283 |         boxes_o = targets_o[:,:-1]
284 |         labels_o = targets_o[:,-1]
285 |         boxes_o[:, 0::2] /= width_o
286 |         boxes_o[:, 1::2] /= height_o
287 |         labels_o = np.expand_dims(labels_o,1)
288 |         targets_o = np.hstack((boxes_o,labels_o))
289 | 
290 |         image_t, boxes, labels = _crop(image, boxes, labels)
291 |         image_t = _distort(image_t)
292 |         if random_erasing:
293 |             image_t = _random_erasing(image_t, boxes, self.means)
294 |         image_t, boxes = _expand(image_t, boxes, self.means, self.p)
295 |         image_t, boxes = _mirror(image_t, boxes)
296 | 
297 |         height, width, _ = image_t.shape
298 |         image_t = preproc_for_test(image_t, self.resize, self.means)
299 |         boxes = boxes.copy()
300 |         boxes[:, 0::2] /= width
301 |         boxes[:, 1::2] /= height
302 |         b_w = (boxes[:, 2] - boxes[:, 0])*1.
303 |         b_h = (boxes[:, 3] - boxes[:, 1])*1.
304 |         mask_b= np.minimum(b_w, b_h) > 0.01
305 |         boxes_t = boxes[mask_b]
306 |         labels_t = labels[mask_b].copy()
307 | 
308 |         if len(boxes_t)==0:
309 |             image = preproc_for_test(image_o, self.resize, self.means)
310 |             return torch.from_numpy(image),targets_o
311 | 
312 |         labels_t = np.expand_dims(labels_t,1)
313 |         targets_t = np.hstack((boxes_t,labels_t))
314 | 
315 |         return torch.from_numpy(image_t), targets_t
316 | 
317 | class preproc_mixup(object):
318 | 
319 |     def __init__(self, resize, rgb_means, p):
320 |         self.means = rgb_means
321 |         self.resize = resize
322 |         self.p = p
323 | 
324 |     def __call__(self, image, targets, random_erasing):
325 |         boxes = targets[:,:-2].copy()
326 |         labels = targets[:,-2].copy()
327 |         weights = targets[:,-1].copy()
328 |         if len(boxes) == 0:
329 |             #boxes = np.empty((0, 4))
330 |             targets = np.zeros((1,6))
331 |             image = preproc_for_test(image, self.resize, self.means)
332 |             return torch.from_numpy(image), targets
333 | 
334 |         image_o = image.copy()
335 |         targets_o = targets.copy()
336 |         height_o, width_o, _ = image_o.shape
337 |         boxes_o = targets_o[:,:-2]
338 |         labels_o = targets_o[:,-2]
339 |         weights_o = targets_o[:, -1]
340 |         boxes_o[:, 0::2] /= width_o
341 |         boxes_o[:, 1::2] /= height_o
342 |         labels_o = np.expand_dims(labels_o,1)
343 |         weights_o = np.expand_dims(weights_o, 1)
344 |         targets_o = np.hstack((boxes_o,labels_o,weights_o))
345 | 
346 |         image_t, boxes, labels, weights = _crop_mixup(image, boxes, labels, weights)
347 |         image_t = _distort(image_t)
348 |         if random_erasing:
349 |             image_t = _random_erasing(image_t, boxes, self.means)
350 |         image_t, boxes = _expand(image_t, boxes, self.means, self.p)
351 |         image_t, boxes = _mirror(image_t, boxes)
352 | 
353 |         height, width, _ = image_t.shape
354 |         image_t = preproc_for_test(image_t, self.resize, self.means)
355 |         boxes = boxes.copy()
356 |         boxes[:, 0::2] /= width
357 |         boxes[:, 1::2] /= height
358 |         b_w = (boxes[:, 2] - boxes[:, 0])*1.
359 |         b_h = (boxes[:, 3] - boxes[:, 1])*1.
360 |         mask_b= np.minimum(b_w, b_h) > 0.01
361 |         boxes_t = boxes[mask_b]
362 |         labels_t = labels[mask_b].copy()
363 |         weights_t = weights[mask_b].copy()
364 | 
365 |         if len(boxes_t)==0:
366 |             image = preproc_for_test(image_o, self.resize, self.means)
367 |             return torch.from_numpy(image),targets_o
368 | 
369 |         labels_t = np.expand_dims(labels_t,1)
370 |         weights_t = np.expand_dims(weights_t,1)
371 |         targets_t = np.hstack((boxes_t,labels_t,weights_t))
372 | 
373 | 
374 | 
375 |         return torch.from_numpy(image_t), targets_t
376 | 
377 | class BaseTransform(object):
378 |     """Defines the transformations that should be applied to test PIL image
379 |         for input into the network
380 | 
381 |     dimension -> tensorize -> color adj
382 | 
383 |     Arguments:
384 |         resize (int): input dimension to SSD
385 |         rgb_means ((int,int,int)): average RGB of the dataset
386 |             (104,117,123)
387 |         swap ((int,int,int)): final order of channels
388 |     Returns:
389 |         transform (transform) : callable transform to be applied to test/val
390 |         data
391 |     """
392 |     def __init__(self, resize, rgb_means, swap=(2, 0, 1)):
393 |         self.means = rgb_means
394 |         self.resize = resize
395 |         self.swap = swap
396 | 
397 |     # assume input is cv2 img for now
398 |     def __call__(self, img):
399 | 
400 |         interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
401 |         interp_method = interp_methods[0]
402 |         img = cv2.resize(np.array(img), (self.resize,
403 |                                          self.resize),interpolation = interp_method).astype(np.float32)
404 |         img -= self.means
405 |         img = img.transpose(self.swap)
406 |         return torch.from_numpy(img)
407 | 


--------------------------------------------------------------------------------
/SSD/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | import numpy as np
  5 | if torch.cuda.is_available():
  6 |     import torch.backends.cudnn as cudnn
  7 | 
  8 | 
  9 | def point_form(boxes):
 10 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
 11 |     representation for comparison to point form ground truth data.
 12 |     Args:
 13 |         boxes: (tensor) center-size default boxes from priorbox layers.
 14 |     Return:
 15 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 16 |     """
 17 |     return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
 18 |                      boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
 19 | 
 20 | 
 21 | def center_size(boxes):
 22 |     """ Convert prior_boxes to (cx, cy, w, h)
 23 |     representation for comparison to center-size form ground truth data.
 24 |     Args:
 25 |         boxes: (tensor) point_form boxes
 26 |     Return:
 27 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 28 |     """
 29 |     return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 30 |                      boxes[:, 2:] - boxes[:, :2], 1)  # w, h
 31 | 
 32 | 
 33 | def intersect(box_a, box_b):
 34 |     """ We resize both tensors to [A,B,2] without new malloc:
 35 |     [A,2] -> [A,1,2] -> [A,B,2]
 36 |     [B,2] -> [1,B,2] -> [A,B,2]
 37 |     Then we compute the area of intersect between box_a and box_b.
 38 |     Args:
 39 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 40 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 41 |     Return:
 42 |       (tensor) intersection area, Shape: [A,B].
 43 |     """
 44 |     A = box_a.size(0)
 45 |     B = box_b.size(0)
 46 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 47 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 48 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 49 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 50 |     inter = torch.clamp((max_xy - min_xy), min=0)
 51 |     return inter[:, :, 0] * inter[:, :, 1]
 52 | 
 53 | 
 54 | def jaccard(box_a, box_b):
 55 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 56 |     is simply the intersection over union of two boxes.  Here we operate on
 57 |     ground truth boxes and default boxes.
 58 |     E.g.:
 59 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 60 |     Args:
 61 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 62 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 63 |     Return:
 64 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 65 |     """
 66 |     inter = intersect(box_a, box_b)
 67 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 68 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 69 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
 70 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 71 |     union = area_a + area_b - inter
 72 |     return inter / union  # [A,B]
 73 | 
 74 | def matrix_iou(a,b,erasing=False):
 75 |     """
 76 |     return iou of a and b, numpy version for data augenmentation
 77 |     """
 78 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 79 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 80 | 
 81 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 82 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 83 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 84 |     if erasing:
 85 |         IoS = area_i / np.minimum(area_a[:, np.newaxis], area_b) # iog
 86 |         return IoS
 87 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i)
 88 | 
 89 | def match_mixup(threshold, truths, priors, variances, labels, loc_t, conf_t, idx, weight_t, weight_loss, giou):
 90 |     overlaps = jaccard(
 91 |         truths,
 92 |         point_form(priors)
 93 |     )
 94 |     # (Bipartite Matching)
 95 |     # [1,num_objects] best prior for each ground truth
 96 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
 97 |     # [1,num_priors] best ground truth for each prior
 98 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
 99 |     best_truth_idx.squeeze_(0)
100 |     best_truth_overlap.squeeze_(0)
101 |     best_prior_idx.squeeze_(1)
102 |     best_prior_overlap.squeeze_(1)
103 |     best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior
104 |     # TODO refactor: index  best_prior_idx with long tensor
105 |     # ensure every gt matches with its prior of max overlap
106 |     for j in range(best_prior_idx.size(0)):
107 |         best_truth_idx[best_prior_idx[j]] = j
108 |     matches = truths[best_truth_idx]  # Shape: [num_priors,4]
109 |     conf = labels[best_truth_idx]  # Shape: [num_priors]
110 |     conf[best_truth_overlap < threshold] = 0  # label as background
111 |     if not giou:
112 |         loc = encode(matches, priors, variances)
113 |         loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
114 |     else:
115 |         loc_t[idx] = matches
116 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
117 |     weight = weight_loss[best_truth_idx]
118 |     weight[best_truth_overlap < threshold] = 0.0
119 |     weight_t[idx] = weight
120 | 
121 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx, giou):
122 |     """Match each prior box with the ground truth box of the highest jaccard
123 |     overlap, encode the bounding boxes, then return the matched indices
124 |     corresponding to both confidence and location preds.
125 |     Args:
126 |         threshold: (float) The overlap threshold used when mathing boxes.
127 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
128 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
129 |         variances: (tensor) Variances corresponding to each prior coord,
130 |             Shape: [num_priors, 4].
131 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
132 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
133 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
134 |         idx: (int) current batch index
135 |     Return:
136 |         The matched indices corresponding to 1)location and 2)confidence preds.
137 |     """
138 |     # jaccard index
139 |     overlaps = jaccard(
140 |         truths,
141 |         point_form(priors)
142 |     )
143 |     # (Bipartite Matching)
144 |     # [1,num_objects] best prior for each ground truth
145 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
146 |     # [1,num_priors] best ground truth for each prior
147 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
148 |     best_truth_idx.squeeze_(0)
149 |     best_truth_overlap.squeeze_(0)
150 |     best_prior_idx.squeeze_(1)
151 |     best_prior_overlap.squeeze_(1)
152 |     best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior
153 |     # TODO refactor: index  best_prior_idx with long tensor
154 |     # ensure every gt matches with its prior of max overlap
155 |     for j in range(best_prior_idx.size(0)):
156 |         best_truth_idx[best_prior_idx[j]] = j
157 |     matches = truths[best_truth_idx]          # Shape: [num_priors,4]
158 |     conf = labels[best_truth_idx]          # Shape: [num_priors]
159 |     conf[best_truth_overlap < threshold] = 0  # label as background
160 |     if not giou:
161 |         loc = encode(matches, priors, variances)
162 |         loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
163 |     else:
164 |         loc_t[idx] = matches
165 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
166 | 
167 | def encode(matched, priors, variances):
168 |     """Encode the variances from the priorbox layers into the ground truth boxes
169 |     we have matched (based on jaccard overlap) with the prior boxes.
170 |     Args:
171 |         matched: (tensor) Coords of ground truth for each prior in point-form
172 |             Shape: [num_priors, 4].
173 |         priors: (tensor) Prior boxes in center-offset form
174 |             Shape: [num_priors,4].
175 |         variances: (list[float]) Variances of priorboxes
176 |     Return:
177 |         encoded boxes (tensor), Shape: [num_priors, 4]
178 |     """
179 | 
180 |     # dist b/t match center and prior's center
181 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
182 |     # encode variance
183 |     g_cxcy /= (variances[0] * priors[:, 2:])
184 |     # match wh / prior wh
185 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
186 |     g_wh = torch.log(g_wh) / variances[1]
187 |     # return target for smooth_l1_loss
188 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
189 | 
190 | 
191 | def encode_multi(matched, priors, offsets, variances):
192 |     """Encode the variances from the priorbox layers into the ground truth boxes
193 |     we have matched (based on jaccard overlap) with the prior boxes.
194 |     Args:
195 |         matched: (tensor) Coords of ground truth for each prior in point-form
196 |             Shape: [num_priors, 4].
197 |         priors: (tensor) Prior boxes in center-offset form
198 |             Shape: [num_priors,4].
199 |         variances: (list[float]) Variances of priorboxes
200 |     Return:
201 |         encoded boxes (tensor), Shape: [num_priors, 4]
202 |     """
203 | 
204 |     # dist b/t match center and prior's center
205 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] - offsets[:,:2]
206 |     # encode variance
207 |     #g_cxcy /= (variances[0] * priors[:, 2:])
208 |     g_cxcy.div_(variances[0] * offsets[:, 2:])
209 |     # match wh / prior wh
210 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
211 |     g_wh = torch.log(g_wh) / variances[1]
212 |     # return target for smooth_l1_loss
213 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
214 | 
215 | # Adapted from https://github.com/Hakuyume/chainer-ssd
216 | def decode(loc, priors, variances):
217 |     """Decode locations from predictions using priors to undo
218 |     the encoding we did for offset regression at train time.
219 |     Args:
220 |         loc (tensor): location predictions for loc layers,
221 |             Shape: [num_priors,4]
222 |         priors (tensor): Prior boxes in center-offset form.
223 |             Shape: [num_priors,4].
224 |         variances: (list[float]) Variances of priorboxes
225 |     Return:
226 |         decoded bounding box predictions
227 |     """
228 | 
229 |     boxes = torch.cat((
230 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
231 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
232 |     boxes[:, :2] -= boxes[:, 2:] / 2
233 |     boxes[:, 2:] += boxes[:, :2]
234 |     return boxes
235 | 
236 | def decode_multi(loc, priors, offsets, variances):
237 |     """Decode locations from predictions using priors to undo
238 |     the encoding we did for offset regression at train time.
239 |     Args:
240 |         loc (tensor): location predictions for loc layers,
241 |             Shape: [num_priors,4]
242 |         priors (tensor): Prior boxes in center-offset form.
243 |             Shape: [num_priors,4].
244 |         variances: (list[float]) Variances of priorboxes
245 |     Return:
246 |         decoded bounding box predictions
247 |     """
248 | 
249 |     boxes = torch.cat((
250 |         priors[:, :2] + offsets[:,:2]+ loc[:, :2] * variances[0] * offsets[:, 2:],
251 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
252 |     boxes[:, :2] -= boxes[:, 2:] / 2
253 |     boxes[:, 2:] += boxes[:, :2]
254 |     return boxes
255 | 
256 | def log_sum_exp(x,label_smooth=False):
257 |     """Utility function for computing log_sum_exp while determining
258 |     This will be used to determine unaveraged confidence loss across
259 |     all examples in a batch.
260 |     Args:
261 |         x (Variable(tensor)): conf_preds from conf layers
262 |     """
263 |     x_max = x.data.max()
264 |     if label_smooth:
265 |         return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max - x
266 |     else:
267 |         return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
268 | 
269 | def focal_sum_exp(x):
270 |     """Utility function for computing log_sum_exp while determining
271 |     This will be used to determine unaveraged confidence loss across
272 |     all examples in a batch.
273 |     Args:
274 |         x (Variable(tensor)): conf_preds from conf layers
275 |     """
276 |     x_max = x.data.max()
277 |     return torch.exp(x - x_max) / torch.sum(torch.exp(x - x_max), 1, keepdim=True)
278 | 
279 | # Original author: Francisco Massa:
280 | # https://github.com/fmassa/object-detection.torch
281 | # Ported to PyTorch by Max deGroot (02/01/2017)
282 | def nms(boxes, scores, overlap=0.5, top_k=200):
283 |     """Apply non-maximum suppression at test time to avoid detecting too many
284 |     overlapping bounding boxes for a given object.
285 |     Args:
286 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
287 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
288 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
289 |         top_k: (int) The Maximum number of box preds to consider.
290 |     Return:
291 |         The indices of the kept boxes with respect to num_priors.
292 |     """
293 | 
294 |     keep = torch.Tensor(scores.size(0)).fill_(0).long()
295 |     if boxes.numel() == 0:
296 |         return keep
297 |     x1 = boxes[:, 0]
298 |     y1 = boxes[:, 1]
299 |     x2 = boxes[:, 2]
300 |     y2 = boxes[:, 3]
301 |     area = torch.mul(x2 - x1, y2 - y1)
302 |     v, idx = scores.sort(0)  # sort in ascending order
303 |     # I = I[v >= 0.01]
304 |     idx = idx[-top_k:]  # indices of the top-k largest vals
305 |     xx1 = boxes.new()
306 |     yy1 = boxes.new()
307 |     xx2 = boxes.new()
308 |     yy2 = boxes.new()
309 |     w = boxes.new()
310 |     h = boxes.new()
311 | 
312 |     # keep = torch.Tensor()
313 |     count = 0
314 |     while idx.numel() > 0:
315 |         i = idx[-1]  # index of current largest val
316 |         # keep.append(i)
317 |         keep[count] = i
318 |         count += 1
319 |         if idx.size(0) == 1:
320 |             break
321 |         idx = idx[:-1]  # remove kept element from view
322 |         # load bboxes of next highest vals
323 |         torch.index_select(x1, 0, idx, out=xx1)
324 |         torch.index_select(y1, 0, idx, out=yy1)
325 |         torch.index_select(x2, 0, idx, out=xx2)
326 |         torch.index_select(y2, 0, idx, out=yy2)
327 |         # store element-wise max with next highest score
328 |         xx1 = torch.clamp(xx1, min=x1[i])
329 |         yy1 = torch.clamp(yy1, min=y1[i])
330 |         xx2 = torch.clamp(xx2, max=x2[i])
331 |         yy2 = torch.clamp(yy2, max=y2[i])
332 |         w.resize_as_(xx2)
333 |         h.resize_as_(yy2)
334 |         w = xx2 - xx1
335 |         h = yy2 - yy1
336 |         # check sizes of xx1 and xx2.. after each iteration
337 |         w = torch.clamp(w, min=0.0)
338 |         h = torch.clamp(h, min=0.0)
339 |         inter = w*h
340 |         # IoU = i / (area(a) + area(b) - i)
341 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
342 |         union = (rem_areas - inter) + area[i]
343 |         IoU = inter/union  # store result in iou
344 |         # keep only elements with an IoU <= overlap
345 |         idx = idx[IoU.le(overlap)]
346 |     return keep, count
347 | 
348 | 
349 | 


--------------------------------------------------------------------------------
/SSD/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | import torch.backends.cudnn as cudnn
  8 | import torchvision.transforms as transforms
  9 | import torch.nn.init as init
 10 | import argparse
 11 | import numpy as np
 12 | from torch.autograd import Variable
 13 | import torch.utils.data as data
 14 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, COCODetection, VOCDetection, detection_collate, BaseTransform, preproc, preproc_mixup
 15 | from layers.modules import MultiBoxLoss
 16 | from layers.functions import PriorBox,Detect
 17 | import time
 18 | import math
 19 | from val import val_net
 20 | 
 21 | parser = argparse.ArgumentParser(description='SSD Training')
 22 | parser.add_argument('-v', '--version', default='SSD', help='version.')
 23 | parser.add_argument('-s', '--size', default='300', help='300 or 512 input size.')
 24 | parser.add_argument('-d', '--dataset', default='VOC', help='VOC or COCO dataset')
 25 | parser.add_argument('--basenet', default='vgg16_bn.pth', help='pretrained base model')
 26 | parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching')
 27 | parser.add_argument('-b', '--batch_size', default=32, type=int, help='Batch size for training')
 28 | parser.add_argument('--num_workers', default=8, type=int, help='Number of workers used in dataloading')
 29 | parser.add_argument('--cuda', default=True, type=bool, help='Use cuda to train model')
 30 | parser.add_argument('--ngpu', default=1, type=int, help='gpus')
 31 | parser.add_argument('--lr', '--learning-rate', default=4e-3, type=float, help='initial learning rate')
 32 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
 33 | parser.add_argument( '--resume_net', default=None, help='resume net for retraining')
 34 | parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining')
 35 | parser.add_argument('-max','--max_epoch', default=250, type=int, help='max epoch for retraining')
 36 | parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD')
 37 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD')
 38 | parser.add_argument('--log_iters', default=True, type=bool, help='Print the loss at each iteration')
 39 | parser.add_argument('--save_folder', default='./weights/', help='Location to save checkpoint models')
 40 | parser.add_argument('--save_val_folder', default='eval/', type=str, help='Dir to save results')
 41 | parser.add_argument('-wu','--warm_epoch', default='5', type=int, help='warm up')
 42 | parser.add_argument('-ls','--lr_schedule', default='cos', type=str, help='lr schedule: step;cos;htd')
 43 | parser.add_argument('--norm', default="BN", type=str, help='L2Norm/BN/GN for normalization')
 44 | parser.add_argument('-bd','--bias_decay', default=True, type=bool, help='BN/GN and bias for weight decay')
 45 | parser.add_argument('--label_smooth', default=False, type=bool,
 46 |                     help='Label Smooth for cls task, default label_pos=0.9.Please refer layers/modules/multibox_loss.py')
 47 | parser.add_argument('--balance_l1', default=False, type=bool, help='Balanced for SmoothL1, refer to Libra R-CNN')
 48 | parser.add_argument('--random_erasing', default=True, type=bool, help='Random Erasing for Data Augmentation')
 49 | parser.add_argument('--focal_loss', default=False, type=bool, help='Focal Loss')
 50 | parser.add_argument('--alpha', default=0, type=float, help='Mixup for SSD, if alpha is zero, not use Mixup')
 51 | parser.add_argument('--giou', default=False, type=bool, help='GIOU for reg loss')
 52 | parser.add_argument('--vgg_bn', default=True, type=bool, help='Use VGG16_BN as backbone for training')
 53 | args = parser.parse_args()
 54 | 
 55 | 
 56 | if not os.path.exists(args.save_folder):
 57 |     os.mkdir(args.save_folder)
 58 | 
 59 | if args.dataset == 'VOC':
 60 |     train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
 61 |     cfg = (VOC_300, VOC_512)[args.size == '512']
 62 | else:
 63 |     train_sets = [('2014', 'train'),('2014', 'valminusminival')]
 64 |     cfg = (COCO_300, COCO_512)[args.size == '512']
 65 | 
 66 | if args.version == 'SSD':
 67 |     from models.SSD import build_net
 68 | else:
 69 |     print('Unkown version!')
 70 | 
 71 | img_dim = (300,512)[args.size=='512']
 72 | rgb_means = (104, 117, 123)
 73 | p = 0.6
 74 | num_classes = (21, 81)[args.dataset == 'COCO']
 75 | batch_size = args.batch_size
 76 | weight_decay = args.weight_decay
 77 | gamma = args.gamma
 78 | momentum = args.momentum
 79 | 
 80 | net = build_net(img_dim, num_classes,args.norm,args.vgg_bn)
 81 | print(net)
 82 | if not args.resume_net:
 83 |     base_weights = torch.load(args.basenet)
 84 |     print('Loading base network...')
 85 |     if args.vgg_bn:
 86 |         net.base[:-5].load_state_dict(base_weights)
 87 |     else:
 88 |         net.base.load_state_dict(base_weights)
 89 | 
 90 |     def weights_init(m):
 91 |         for key in m.state_dict():
 92 |             if key.split('.')[-1] == 'weight':
 93 |                 if 'conv' in key:
 94 |                     init.kaiming_normal_(m.state_dict()[key], mode='fan_out',nonlinearity='relu')
 95 |                 if 'bn' in key:
 96 |                     m.state_dict()[key][...] = 1
 97 |                 if 'gn' in key:
 98 |                     m.state_dict()[key][...] = 1
 99 | 
100 |             elif key.split('.')[-1] == 'bias':
101 |                 m.state_dict()[key][...] = 0
102 | 
103 |     def head_weights_init(m):
104 |         for key in m.state_dict():
105 |             if key.split('.')[-1] == 'weight':
106 |                 init.xavier_uniform_(m.state_dict()[key])
107 |             elif key.split('.')[-1] == 'bias':
108 |                 m.state_dict()[key][...] = 0
109 | 
110 |     print('Initializing weights...')
111 |     # initialize newly added layers' weights with kaiming_normal method
112 |     if args.vgg_bn:
113 |         net.base[-5:].apply(weights_init)
114 |     net.extras.apply(weights_init)
115 | 
116 | else:
117 |     print('Loading resume network')
118 |     state_dict = torch.load(args.resume_net)
119 |     # create new OrderedDict that does not contain `module.`
120 |     from collections import OrderedDict
121 | 
122 |     # multi-GPU
123 |     new_state_dict = OrderedDict()
124 |     for k, v in state_dict.items():
125 |         head = k[:7]
126 |         if head == 'module.':
127 |             name = k[7:]  # remove `module.`
128 |         else:
129 |             name = k
130 |         new_state_dict[name] = v
131 |     net.load_state_dict(new_state_dict)
132 | 
133 | if args.ngpu > 1:
134 |     net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
135 | 
136 | if args.cuda:
137 |     net.cuda()
138 |     cudnn.benchmark = True
139 | 
140 | if not args.bias_decay: # BN/GN and bias don't use weight decay
141 |     spe_params = []
142 |     conv_params = []
143 |     for k, v in net.named_parameters():
144 |         if 'bn' in k or 'bias' in k:
145 |             spe_params.append(v)
146 |         else:
147 |             conv_params.append(v)
148 |     params_group = [{'params': spe_params, 'weight_decay': 0.0}, {'params': conv_params}]
149 |     optimizer = optim.SGD(params_group, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
150 | else:
151 |     optimizer = optim.SGD(net.parameters(), lr=args.lr,momentum=args.momentum, weight_decay=args.weight_decay)
152 | 
153 | criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False,label_smmooth=args.label_smooth,balance_l1=args.balance_l1,
154 |                          focal_loss=args.focal_loss,giou=args.giou)
155 | 
156 | priorbox = PriorBox(cfg)
157 | with torch.no_grad():
158 |     priors = priorbox.forward()
159 |     if args.cuda:
160 |         priors = priors.cuda()
161 | 
162 | def get_features_hook(self,input,output):
163 |     print('~'*10)
164 |     print('features:')
165 |     print('input:',input[0][0,0])
166 |     print('output:',output[0,0])
167 | 
168 | def get_grads_hook(self,input_grad, output_grad):
169 |     print('~'*10)
170 |     print('grad:')
171 |     print('grad_in:',input_grad[0][0,0])
172 |     print('grad_out',output_grad[0][0,0])
173 | 
174 | def train():
175 |     net.train()
176 |     # loss counters
177 |     loc_loss = 0  # epoch
178 |     conf_loss = 0
179 |     epoch = 0 + args.resume_epoch
180 |     print('Loading Dataset...')
181 | 
182 |     if args.dataset == 'VOC':
183 |         if args.alpha - 0.0 > 1e-5:
184 |             dataset = VOCDetection(VOCroot, train_sets, preproc_mixup(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing,
185 |                                    mixup_alpha=args.alpha)
186 |         else:
187 |             dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing)
188 |     elif args.dataset == 'COCO':
189 |         dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p))
190 |     else:
191 |         print('Only VOC and COCO are supported now!')
192 |         return
193 | 
194 |     epoch_size = len(dataset) // args.batch_size
195 |     max_iter = args.max_epoch * epoch_size
196 | 
197 |     stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
198 |     stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size)
199 |     stepvalues = (stepvalues_VOC,stepvalues_COCO)[args.dataset=='COCO']
200 |     print('Training',args.version, 'on', dataset.name)
201 |     step_index = 0
202 | 
203 |     if args.resume_epoch > 0:
204 |         start_iter = args.resume_epoch * epoch_size
205 |         for sv in stepvalues:
206 |             if start_iter>sv:
207 |                 step_index+=1
208 |                 continue
209 |             else:
210 |                 break
211 |     else:
212 |         start_iter = 0
213 | 
214 |     lr = args.lr
215 |     avg_loss_list = []
216 |     flag = True
217 |     for iteration in range(start_iter, max_iter):
218 |         if iteration % epoch_size == 0:
219 |             # create batch iterator
220 |             batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate))
221 |             avg_loss = (loc_loss+conf_loss)/epoch_size
222 |             avg_loss_list.append(avg_loss)
223 |             print("avg_loss_list:")
224 |             if len(avg_loss_list)<=5:
225 |                 print (avg_loss_list)
226 |             else:
227 |                 print(avg_loss_list[-5:])
228 |             loc_loss = 0
229 |             conf_loss = 0
230 |             if (epoch<=150 and epoch%10==0) or (150< epoch< 200 and epoch%5==0) or (epoch>200):
231 |                 torch.save(net.state_dict(), args.save_folder+args.version+'_'+args.dataset + '_epoches_'+ repr(epoch) + '.pth')
232 |                 if (epoch!=args.resume_epoch):
233 |                 #if(epoch):
234 |                     ValNet = build_net(img_dim, num_classes, args.norm, args.vgg_bn)
235 |                     val_state_dict = torch.load(args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth')
236 |                     from collections import OrderedDict
237 |                     new_state_dict = OrderedDict()
238 |                     for k, v in val_state_dict.items():
239 |                         head = k[:7]
240 |                         if head == 'module.':
241 |                             name = k[7:]
242 |                         else:
243 |                             name = k
244 |                         new_state_dict[name] = v
245 |                     ValNet.load_state_dict(new_state_dict)
246 |                     ValNet.eval()
247 |                     print('Finished loading ' + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth model!')
248 |                     if args.dataset == 'VOC':
249 |                         testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform())
250 |                     elif args.dataset == 'COCO':
251 |                         testset = COCODetection(COCOroot, [('2014', 'minival')], None)
252 |                     if args.cuda:
253 |                         ValNet = ValNet.cuda()
254 |                         cudnn.benchmark = True
255 |                     else:
256 |                         ValNet = ValNet.cpu()
257 |                     top_k = 200
258 |                     detector = Detect(num_classes, 0, cfg, GIOU=args.giou)
259 |                     save_val_folder = os.path.join(args.save_val_folder, args.dataset)
260 |                     val_transform = BaseTransform(ValNet.size, rgb_means, (2, 0, 1))
261 |                     val_net(priors, save_val_folder, testset, num_classes, ValNet, detector, val_transform, top_k, 0.01,
262 |                             args.cuda,args.vgg_bn)
263 |             epoch += 1
264 | 
265 |         load_t0 = time.time()
266 |         if iteration in stepvalues:
267 |             step_index += 1
268 |         lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size)
269 | 
270 |         images, targets = next(batch_iterator)
271 | 
272 |         # no mixup
273 |         if args.cuda:
274 |             images = Variable(images.cuda())
275 |             targets = [Variable(anno.cuda()) for anno in targets]
276 |         else:
277 |             images = Variable(images)
278 |             targets = [Variable(anno) for anno in targets]
279 | 
280 |         # fh = net.base[22].register_forward_hook(get_features_hook)
281 |         # bh = net.base[22].register_backward_hook(get_grads_hook)
282 |         out = net(images,vgg_bn=args.vgg_bn)
283 |         optimizer.zero_grad()
284 |         loss_l, loss_c, = criterion(out, priors, targets)
285 |         loss = loss_l + loss_c
286 |         loss.backward()
287 |         # fh.remove()
288 |         # bh.remove()
289 | 
290 |         optimizer.step()
291 |         t1 = time.time()
292 |         loc_loss += loss_l.item()
293 |         conf_loss += loss_c.item()
294 |         load_t1 = time.time()
295 |         if iteration % 10 == 0:
296 |             print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size)
297 |                   + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f S: %.4f||' % (loss_l.item(),loss_c.item(),loss_l.item()+loss_c.item()) +
298 |                 'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' % (lr))
299 | 
300 |     torch.save(net.state_dict(), args.save_folder + 'Final_' + args.version +'_' + args.dataset+ '.pth')
301 | 
302 | 
303 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size,lr_schedule=args.lr_schedule):
304 |     """Sets the learning rate 
305 |     # Adapted from PyTorch Imagenet example:
306 |     # https://github.com/pytorch/examples/blob/master/imagenet/main.py
307 |     """
308 |     if epoch <= args.warm_epoch:
309 |         lr = 1e-6 + (args.lr - 1e-6) * iteration / (epoch_size * args.warm_epoch)
310 |     else:
311 |         if lr_schedule == 'step':
312 |             lr = args.lr * (gamma ** (step_index))
313 |         elif lr_schedule == 'cos':
314 |             lr = 1e-6 + (args.lr - 1e-6) * 0.5 * (1 + math.cos(
315 |                 (iteration - args.warm_epoch * epoch_size) * math.pi /((args.max_epoch - args.warm_epoch) * epoch_size)))
316 |         elif lr_schedule == 'htd':
317 |             l,u = -6,3
318 |             lr = 1e-6 + (args.lr - 1e-6) * 0.5 * (1 - math.tanh(l + (u - l) *
319 |                 ((iteration - args.warm_epoch * epoch_size) /(args.max_epoch - args.warm_epoch) /epoch_size)))
320 |         else:
321 |             print ('Unknown the lr schedule type!')
322 |     for param_group in optimizer.param_groups:
323 |         param_group['lr'] = lr
324 |     return lr
325 | 
326 | 
327 | if __name__ == '__main__':
328 |     train()
329 | 
330 | 


--------------------------------------------------------------------------------
/SSD/data/voc0712.py:
--------------------------------------------------------------------------------
  1 | """VOC Dataset Classes
  2 | 
  3 | Original author: Francisco Massa
  4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
  5 | 
  6 | Updated by: Ellis Brown, Max deGroot
  7 | """
  8 | 
  9 | import os
 10 | import pickle
 11 | import os.path
 12 | import sys
 13 | import torch
 14 | import torch.utils.data as data
 15 | import torchvision.transforms as transforms
 16 | from PIL import Image, ImageDraw, ImageFont
 17 | import cv2
 18 | import numpy as np
 19 | from .voc_eval import voc_eval
 20 | if sys.version_info[0] == 2:
 21 |     import xml.etree.cElementTree as ET
 22 | else:
 23 |     import xml.etree.ElementTree as ET
 24 | 
 25 | VOC_CLASSES = ( '__background__', # always index 0
 26 |     'aeroplane', 'bicycle', 'bird', 'boat',
 27 |     'bottle', 'bus', 'car', 'cat', 'chair',
 28 |     'cow', 'diningtable', 'dog', 'horse',
 29 |     'motorbike', 'person', 'pottedplant',
 30 |     'sheep', 'sofa', 'train', 'tvmonitor')
 31 | 
 32 | # for making bounding boxes pretty
 33 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
 34 |           (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
 35 | 
 36 | 
 37 | class VOCSegmentation(data.Dataset):
 38 | 
 39 |     """VOC Segmentation Dataset Object
 40 |     input and target are both images
 41 | 
 42 |     NOTE: need to address https://github.com/pytorch/vision/issues/9
 43 | 
 44 |     Arguments:
 45 |         root (string): filepath to VOCdevkit folder.
 46 |         image_set (string): imageset to use (eg: 'train', 'val', 'test').
 47 |         transform (callable, optional): transformation to perform on the
 48 |             input image
 49 |         target_transform (callable, optional): transformation to perform on the
 50 |             target image
 51 |         dataset_name (string, optional): which dataset to load
 52 |             (default: 'VOC2007')
 53 |     """
 54 | 
 55 |     def __init__(self, root, image_set, transform=None, target_transform=None,
 56 |                  dataset_name='VOC2007'):
 57 |         self.root = root
 58 |         self.image_set = image_set
 59 |         self.transform = transform
 60 |         self.target_transform = target_transform
 61 | 
 62 |         self._annopath = os.path.join(
 63 |             self.root, dataset_name, 'SegmentationClass', '%s.png')
 64 |         self._imgpath = os.path.join(
 65 |             self.root, dataset_name, 'JPEGImages', '%s.jpg')
 66 |         self._imgsetpath = os.path.join(
 67 |             self.root, dataset_name, 'ImageSets', 'Segmentation', '%s.txt')
 68 | 
 69 |         with open(self._imgsetpath % self.image_set) as f:
 70 |             self.ids = f.readlines()
 71 |         self.ids = [x.strip('\n') for x in self.ids]
 72 | 
 73 |     def __getitem__(self, index):
 74 |         img_id = self.ids[index]
 75 | 
 76 |         target = Image.open(self._annopath % img_id).convert('RGB')
 77 |         img = Image.open(self._imgpath % img_id).convert('RGB')
 78 | 
 79 |         if self.transform is not None:
 80 |             img = self.transform(img)
 81 | 
 82 |         if self.target_transform is not None:
 83 |             target = self.target_transform(target)
 84 | 
 85 |         return img, target
 86 | 
 87 |     def __len__(self):
 88 |         return len(self.ids)
 89 | 
 90 | 
 91 | class AnnotationTransform(object):
 92 | 
 93 |     """Transforms a VOC annotation into a Tensor of bbox coords and label index
 94 |     Initilized with a dictionary lookup of classnames to indexes
 95 | 
 96 |     Arguments:
 97 |         class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
 98 |             (default: alphabetic indexing of VOC's 20 classes)
 99 |         keep_difficult (bool, optional): keep difficult instances or not
100 |             (default: False)
101 |         height (int): height
102 |         width (int): width
103 |     """
104 | 
105 |     def __init__(self, class_to_ind=None, keep_difficult=True):
106 |         self.class_to_ind = class_to_ind or dict(
107 |             zip(VOC_CLASSES, range(len(VOC_CLASSES))))
108 |         self.keep_difficult = keep_difficult
109 | 
110 |     def __call__(self, target):
111 |         """
112 |         Arguments:
113 |             target (annotation) : the target annotation to be made usable
114 |                 will be an ET.Element
115 |         Returns:
116 |             a list containing lists of bounding boxes  [bbox coords, class name]
117 |         """
118 |         res = np.empty((0,5)) 
119 |         for obj in target.iter('object'):
120 |             difficult = int(obj.find('difficult').text) == 1
121 |             if not self.keep_difficult and difficult:
122 |                 continue
123 |             name = obj.find('name').text.lower().strip()
124 |             bbox = obj.find('bndbox')
125 | 
126 |             pts = ['xmin', 'ymin', 'xmax', 'ymax']
127 |             bndbox = []
128 |             for i, pt in enumerate(pts):
129 |                 cur_pt = int(bbox.find(pt).text) - 1
130 |                 # scale height or width
131 |                 #cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
132 |                 bndbox.append(cur_pt)
133 |             label_idx = self.class_to_ind[name]
134 |             bndbox.append(label_idx)
135 |             res = np.vstack((res,bndbox))  # [xmin, ymin, xmax, ymax, label_ind]
136 |             # img_id = target.find('filename').text[:-4]
137 | 
138 |         return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
139 | 
140 | 
141 | class VOCDetection(data.Dataset):
142 | 
143 |     """VOC Detection Dataset Object
144 | 
145 |     input is image, target is annotation
146 | 
147 |     Arguments:
148 |         root (string): filepath to VOCdevkit folder.
149 |         image_set (string): imageset to use (eg. 'train', 'val', 'test')
150 |         transform (callable, optional): transformation to perform on the
151 |             input image
152 |         target_transform (callable, optional): transformation to perform on the
153 |             target `annotation`
154 |             (eg: take in caption string, return tensor of word indices)
155 |         dataset_name (string, optional): which dataset to load
156 |             (default: 'VOC2007')
157 |     """
158 | 
159 |     def __init__(self, root, image_sets, preproc=None, target_transform=None, dataset_name='VOC0712', means=(104, 117, 123),
160 |                  random_erasing=False, mixup_alpha=0.0):
161 |         self.root = root
162 |         self.image_set = image_sets
163 |         self.preproc = preproc
164 |         self.target_transform = target_transform
165 |         self.name = dataset_name
166 |         self._annopath = os.path.join('%s', 'Annotations', '%s.xml')
167 |         self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg')
168 |         self.ids = list()
169 |         self.means = means
170 |         for (year, name) in image_sets:
171 |             self._year = year
172 |             rootpath = os.path.join(self.root, 'VOC' + year)
173 |             for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
174 |                 self.ids.append((rootpath, line.strip()))
175 |         self.random_erasing = random_erasing
176 |         self.mixup_alpha = mixup_alpha
177 | 
178 |     def __getitem__(self, index):
179 |         img_id = self.ids[index]
180 |         target = ET.parse(self._annopath % img_id).getroot()
181 |         img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
182 |         height, width, _ = img.shape
183 | 
184 |         if self.target_transform is not None:
185 |             target = self.target_transform(target)
186 | 
187 |         if (self.mixup_alpha - 0.0 > 1e-5):
188 |             index2 = np.random.choice(np.delete(np.arange(len(self.ids)), index),replace=False)
189 |             img_id2 = self.ids[index2]
190 |             target2 = ET.parse(self._annopath % img_id2).getroot()
191 |             img2 = cv2.imread(self._imgpath % img_id2, cv2.IMREAD_COLOR)
192 |             height2, width2, _ = img2.shape
193 |             if self.target_transform is not None:
194 |                 target2 = self.target_transform(target2)
195 | 
196 |             height_mix = max(height, height2)
197 |             width_mix = max(width, width2)
198 |             mix_img = np.zeros((height_mix, width_mix, 3), dtype='float32')
199 |             alpha = np.random.uniform(0, self.mixup_alpha)
200 |             lam = np.random.beta(alpha,alpha)
201 |             mix_img[:height, :width, :] = img.astype('float32') * lam
202 |             mix_img[:height2, :width2, :] += img2.astype('float32') * (1.0 - lam)
203 |             if (height2-height)*(width2-width) < 0:
204 |                 mix_img[min(height,height2): height_mix, min(width, width2): width_mix, :] = [2*j for j in self.means]
205 |             mix_img = mix_img.astype('uint8')
206 |             w1 = np.full((target.shape[0], 1), lam)
207 |             w2 = np.full((target2.shape[0], 1), (1 - lam))
208 |             mix_target = np.hstack((np.vstack((target, target2)), np.vstack((w1, w2))))
209 | 
210 |             if self.preproc is not None:
211 |                 img, target = self.preproc(mix_img, mix_target, self.random_erasing)
212 | 
213 |         else:
214 |             if self.preproc is not None:
215 |                 img, target = self.preproc(img, target, self.random_erasing)
216 | 
217 | 
218 |         return img, target
219 | 
220 |     def __len__(self):
221 |         return len(self.ids)
222 | 
223 |     def pull_image(self, index):
224 |         '''Returns the original image object at index in PIL form
225 | 
226 |         Note: not using self.__getitem__(), as any transformations passed in
227 |         could mess up this functionality.
228 | 
229 |         Argument:
230 |             index (int): index of img to show
231 |         Return:
232 |             PIL img
233 |         '''
234 |         img_id = self.ids[index]
235 |         return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
236 | 
237 |     def pull_anno(self, index):
238 |         '''Returns the original annotation of image at index
239 | 
240 |         Note: not using self.__getitem__(), as any transformations passed in
241 |         could mess up this functionality.
242 | 
243 |         Argument:
244 |             index (int): index of img to get annotation of
245 |         Return:
246 |             list:  [img_id, [(label, bbox coords),...]]
247 |                 eg: ('001718', [('dog', (96, 13, 438, 332))])
248 |         '''
249 |         img_id = self.ids[index]
250 |         anno = ET.parse(self._annopath % img_id).getroot()
251 |         gt = self.target_transform(anno, 1, 1)
252 |         return img_id[1], gt
253 | 
254 |     def pull_tensor(self, index):
255 |         '''Returns the original image at an index in tensor form
256 | 
257 |         Note: not using self.__getitem__(), as any transformations passed in
258 |         could mess up this functionality.
259 | 
260 |         Argument:
261 |             index (int): index of img to show
262 |         Return:
263 |             tensorized version of img, squeezed
264 |         '''
265 |         to_tensor = transforms.ToTensor()
266 |         return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
267 | 
268 |     def evaluate_detections(self, all_boxes, output_dir=None):
269 |         """
270 |         all_boxes is a list of length number-of-classes.
271 |         Each list element is a list of length number-of-images.
272 |         Each of those list elements is either an empty list []
273 |         or a numpy array of detection.
274 | 
275 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
276 |         """
277 |         self._write_voc_results_file(all_boxes)
278 |         self._do_python_eval(output_dir)
279 | 
280 |     def _get_voc_results_file_template(self):
281 |         filename = 'comp4_det_test' + '_{:s}.txt'
282 |         filedir = os.path.join(
283 |             self.root, 'results', 'VOC' + self._year, 'Main')
284 |         if not os.path.exists(filedir):
285 |             os.makedirs(filedir)
286 |         path = os.path.join(filedir, filename)
287 |         return path
288 | 
289 |     def _write_voc_results_file(self, all_boxes):
290 |         for cls_ind, cls in enumerate(VOC_CLASSES):
291 |             cls_ind = cls_ind 
292 |             if cls == '__background__':
293 |                 continue
294 |             print('Writing {} VOC results file'.format(cls))
295 |             filename = self._get_voc_results_file_template().format(cls)
296 |             with open(filename, 'wt') as f:
297 |                 for im_ind, index in enumerate(self.ids):
298 |                     index = index[1]
299 |                     dets = all_boxes[cls_ind][im_ind]
300 |                     if dets == []:
301 |                         continue
302 |                     for k in range(dets.shape[0]):
303 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
304 |                                 format(index, dets[k, -1],
305 |                                 dets[k, 0] + 1, dets[k, 1] + 1,
306 |                                 dets[k, 2] + 1, dets[k, 3] + 1))
307 | 
308 |     def _do_python_eval(self, output_dir='output'):
309 |         rootpath = os.path.join(self.root, 'VOC' + self._year)
310 |         name = self.image_set[0][1]
311 |         annopath = os.path.join(
312 |                                 rootpath,
313 |                                 'Annotations',
314 |                                 '{:s}.xml')
315 |         imagesetfile = os.path.join(
316 |                                 rootpath,
317 |                                 'ImageSets',
318 |                                 'Main',
319 |                                 name+'.txt')
320 |         cachedir = os.path.join(self.root, 'annotations_cache')
321 |         aps = []
322 |         # The PASCAL VOC metric changed in 2010
323 |         use_07_metric = True if int(self._year) < 2010 else False
324 |         print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
325 |         if output_dir is not None and not os.path.isdir(output_dir):
326 |             os.mkdir(output_dir)
327 |         for i, cls in enumerate(VOC_CLASSES):
328 | 
329 |             if cls == '__background__':
330 |                 continue
331 | 
332 |             filename = self._get_voc_results_file_template().format(cls)
333 |             rec, prec, ap = voc_eval(
334 |                                     filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
335 |                                     use_07_metric=use_07_metric)
336 |             aps += [ap]
337 |             print('AP for {} = {:.4f}'.format(cls, ap))
338 |             if output_dir is not None:
339 |                 with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
340 |                     pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
341 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
342 |         print('~~~~~~~~')
343 |         print('Results:')
344 |         for ap in aps:
345 |             print('{:.3f}'.format(ap))
346 |         print('{:.3f}'.format(np.mean(aps)))
347 |         print('~~~~~~~~')
348 |         print('')
349 |         print('--------------------------------------------------------------')
350 |         print('Results computed with the **unofficial** Python eval code.')
351 |         print('Results should be very close to the official MATLAB eval code.')
352 |         print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
353 |         print('-- Thanks, The Management')
354 |         print('--------------------------------------------------------------')
355 | 
356 | def detection_collate(batch):
357 |     """Custom collate fn for dealing with batches of images that have a different
358 |     number of associated object annotations (bounding boxes).
359 | 
360 |     Arguments:
361 |         batch: (tuple) A tuple of tensor images and lists of annotations
362 | 
363 |     Return:
364 |         A tuple containing:
365 |             1) (tensor) batch of images stacked on their 0 dim
366 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
367 |     """
368 |     targets = []
369 |     imgs = []
370 |     for _, sample in enumerate(batch):
371 |         for _, tup in enumerate(sample):
372 |             if torch.is_tensor(tup):
373 |                 imgs.append(tup)
374 |             elif isinstance(tup, type(np.empty(0))):
375 |                 annos = torch.from_numpy(tup).float()
376 |                 targets.append(annos)
377 | 
378 |     return (torch.stack(imgs, 0), targets)
379 | 


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/coco.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tylin'
  2 | __version__ = '2.0'
  3 | # Interface for accessing the Microsoft COCO dataset.
  4 | 
  5 | # Microsoft COCO is a large image dataset designed for object detection,
  6 | # segmentation, and caption generation. pycocotools is a Python API that
  7 | # assists in loading, parsing and visualizing the annotations in COCO.
  8 | # Please visit http://mscoco.org/ for more information on COCO, including
  9 | # for the data, paper, and tutorials. The exact format of the annotations
 10 | # is also described on the COCO website. For example usage of the pycocotools
 11 | # please see pycocotools_demo.ipynb. In addition to this API, please download both
 12 | # the COCO images and annotations in order to run the demo.
 13 | 
 14 | # An alternative to using the API is to load the annotations directly
 15 | # into Python dictionary
 16 | # Using the API provides additional utility functions. Note that this API
 17 | # supports both *instance* and *caption* annotations. In the case of
 18 | # captions not all functions are defined (e.g. categories are undefined).
 19 | 
 20 | # The following API functions are defined:
 21 | #  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
 22 | #  decodeMask - Decode binary mask M encoded via run-length encoding.
 23 | #  encodeMask - Encode binary mask M using run-length encoding.
 24 | #  getAnnIds  - Get ann ids that satisfy given filter conditions.
 25 | #  getCatIds  - Get cat ids that satisfy given filter conditions.
 26 | #  getImgIds  - Get img ids that satisfy given filter conditions.
 27 | #  loadAnns   - Load anns with the specified ids.
 28 | #  loadCats   - Load cats with the specified ids.
 29 | #  loadImgs   - Load imgs with the specified ids.
 30 | #  annToMask  - Convert segmentation in an annotation to binary mask.
 31 | #  showAnns   - Display the specified annotations.
 32 | #  loadRes    - Load algorithm results and create API for accessing them.
 33 | #  download   - Download COCO images from mscoco.org server.
 34 | # Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
 35 | # Help on each functions can be accessed by: "help COCO>function".
 36 | 
 37 | # See also COCO>decodeMask,
 38 | # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
 39 | # COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
 40 | # COCO>loadImgs, COCO>annToMask, COCO>showAnns
 41 | 
 42 | # Microsoft COCO Toolbox.      version 2.0
 43 | # Data, paper, and tutorials available at:  http://mscoco.org/
 44 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
 45 | # Licensed under the Simplified BSD License [see bsd.txt]
 46 | 
 47 | import json
 48 | import time
 49 | import matplotlib.pyplot as plt
 50 | from matplotlib.collections import PatchCollection
 51 | from matplotlib.patches import Polygon
 52 | import numpy as np
 53 | import copy
 54 | import itertools
 55 | from . import mask as maskUtils
 56 | import os
 57 | from collections import defaultdict
 58 | import sys
 59 | PYTHON_VERSION = sys.version_info[0]
 60 | if PYTHON_VERSION == 2:
 61 |     from urllib import urlretrieve
 62 | elif PYTHON_VERSION == 3:
 63 |     from urllib.request import urlretrieve
 64 | 
 65 | class COCO:
 66 |     def __init__(self, annotation_file=None):
 67 |         """
 68 |         Constructor of Microsoft COCO helper class for reading and visualizing annotations.
 69 |         :param annotation_file (str): location of annotation file
 70 |         :param image_folder (str): location to the folder that hosts images.
 71 |         :return:
 72 |         """
 73 |         # load dataset
 74 |         self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
 75 |         self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
 76 |         if not annotation_file == None:
 77 |             print('loading annotations into memory...')
 78 |             tic = time.time()
 79 |             dataset = json.load(open(annotation_file, 'r'))
 80 |             assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
 81 |             print('Done (t={:0.2f}s)'.format(time.time()- tic))
 82 |             self.dataset = dataset
 83 |             self.createIndex()
 84 | 
 85 |     def createIndex(self):
 86 |         # create index
 87 |         print('creating index...')
 88 |         anns, cats, imgs = {}, {}, {}
 89 |         imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
 90 |         if 'annotations' in self.dataset:
 91 |             for ann in self.dataset['annotations']:
 92 |                 imgToAnns[ann['image_id']].append(ann)
 93 |                 anns[ann['id']] = ann
 94 | 
 95 |         if 'images' in self.dataset:
 96 |             for img in self.dataset['images']:
 97 |                 imgs[img['id']] = img
 98 | 
 99 |         if 'categories' in self.dataset:
100 |             for cat in self.dataset['categories']:
101 |                 cats[cat['id']] = cat
102 | 
103 |         if 'annotations' in self.dataset and 'categories' in self.dataset:
104 |             for ann in self.dataset['annotations']:
105 |                 catToImgs[ann['category_id']].append(ann['image_id'])
106 | 
107 |         print('index created!')
108 | 
109 |         # create class members
110 |         self.anns = anns
111 |         self.imgToAnns = imgToAnns
112 |         self.catToImgs = catToImgs
113 |         self.imgs = imgs
114 |         self.cats = cats
115 | 
116 |     def info(self):
117 |         """
118 |         Print information about the annotation file.
119 |         :return:
120 |         """
121 |         for key, value in self.dataset['info'].items():
122 |             print('{}: {}'.format(key, value))
123 | 
124 |     def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
125 |         """
126 |         Get ann ids that satisfy given filter conditions. default skips that filter
127 |         :param imgIds  (int array)     : get anns for given imgs
128 |                catIds  (int array)     : get anns for given cats
129 |                areaRng (float array)   : get anns for given area range (e.g. [0 inf])
130 |                iscrowd (boolean)       : get anns for given crowd label (False or True)
131 |         :return: ids (int array)       : integer array of ann ids
132 |         """
133 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
134 |         catIds = catIds if type(catIds) == list else [catIds]
135 | 
136 |         if len(imgIds) == len(catIds) == len(areaRng) == 0:
137 |             anns = self.dataset['annotations']
138 |         else:
139 |             if not len(imgIds) == 0:
140 |                 lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
141 |                 anns = list(itertools.chain.from_iterable(lists))
142 |             else:
143 |                 anns = self.dataset['annotations']
144 |             anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
145 |             anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
146 |         if not iscrowd == None:
147 |             ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
148 |         else:
149 |             ids = [ann['id'] for ann in anns]
150 |         return ids
151 | 
152 |     def getCatIds(self, catNms=[], supNms=[], catIds=[]):
153 |         """
154 |         filtering parameters. default skips that filter.
155 |         :param catNms (str array)  : get cats for given cat names
156 |         :param supNms (str array)  : get cats for given supercategory names
157 |         :param catIds (int array)  : get cats for given cat ids
158 |         :return: ids (int array)   : integer array of cat ids
159 |         """
160 |         catNms = catNms if type(catNms) == list else [catNms]
161 |         supNms = supNms if type(supNms) == list else [supNms]
162 |         catIds = catIds if type(catIds) == list else [catIds]
163 | 
164 |         if len(catNms) == len(supNms) == len(catIds) == 0:
165 |             cats = self.dataset['categories']
166 |         else:
167 |             cats = self.dataset['categories']
168 |             cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
169 |             cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
170 |             cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
171 |         ids = [cat['id'] for cat in cats]
172 |         return ids
173 | 
174 |     def getImgIds(self, imgIds=[], catIds=[]):
175 |         '''
176 |         Get img ids that satisfy given filter conditions.
177 |         :param imgIds (int array) : get imgs for given ids
178 |         :param catIds (int array) : get imgs with all given cats
179 |         :return: ids (int array)  : integer array of img ids
180 |         '''
181 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
182 |         catIds = catIds if type(catIds) == list else [catIds]
183 | 
184 |         if len(imgIds) == len(catIds) == 0:
185 |             ids = self.imgs.keys()
186 |         else:
187 |             ids = set(imgIds)
188 |             for i, catId in enumerate(catIds):
189 |                 if i == 0 and len(ids) == 0:
190 |                     ids = set(self.catToImgs[catId])
191 |                 else:
192 |                     ids &= set(self.catToImgs[catId])
193 |         return list(ids)
194 | 
195 |     def loadAnns(self, ids=[]):
196 |         """
197 |         Load anns with the specified ids.
198 |         :param ids (int array)       : integer ids specifying anns
199 |         :return: anns (object array) : loaded ann objects
200 |         """
201 |         if type(ids) == list:
202 |             return [self.anns[id] for id in ids]
203 |         elif type(ids) == int:
204 |             return [self.anns[ids]]
205 | 
206 |     def loadCats(self, ids=[]):
207 |         """
208 |         Load cats with the specified ids.
209 |         :param ids (int array)       : integer ids specifying cats
210 |         :return: cats (object array) : loaded cat objects
211 |         """
212 |         if type(ids) == list:
213 |             return [self.cats[id] for id in ids]
214 |         elif type(ids) == int:
215 |             return [self.cats[ids]]
216 | 
217 |     def loadImgs(self, ids=[]):
218 |         """
219 |         Load anns with the specified ids.
220 |         :param ids (int array)       : integer ids specifying img
221 |         :return: imgs (object array) : loaded img objects
222 |         """
223 |         if type(ids) == list:
224 |             return [self.imgs[id] for id in ids]
225 |         elif type(ids) == int:
226 |             return [self.imgs[ids]]
227 | 
228 |     def showAnns(self, anns):
229 |         """
230 |         Display the specified annotations.
231 |         :param anns (array of object): annotations to display
232 |         :return: None
233 |         """
234 |         if len(anns) == 0:
235 |             return 0
236 |         if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
237 |             datasetType = 'instances'
238 |         elif 'caption' in anns[0]:
239 |             datasetType = 'captions'
240 |         else:
241 |             raise Exception('datasetType not supported')
242 |         if datasetType == 'instances':
243 |             ax = plt.gca()
244 |             ax.set_autoscale_on(False)
245 |             polygons = []
246 |             color = []
247 |             for ann in anns:
248 |                 c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
249 |                 if 'segmentation' in ann:
250 |                     if type(ann['segmentation']) == list:
251 |                         # polygon
252 |                         for seg in ann['segmentation']:
253 |                             poly = np.array(seg).reshape((int(len(seg)/2), 2))
254 |                             polygons.append(Polygon(poly))
255 |                             color.append(c)
256 |                     else:
257 |                         # mask
258 |                         t = self.imgs[ann['image_id']]
259 |                         if type(ann['segmentation']['counts']) == list:
260 |                             rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
261 |                         else:
262 |                             rle = [ann['segmentation']]
263 |                         m = maskUtils.decode(rle)
264 |                         img = np.ones( (m.shape[0], m.shape[1], 3) )
265 |                         if ann['iscrowd'] == 1:
266 |                             color_mask = np.array([2.0,166.0,101.0])/255
267 |                         if ann['iscrowd'] == 0:
268 |                             color_mask = np.random.random((1, 3)).tolist()[0]
269 |                         for i in range(3):
270 |                             img[:,:,i] = color_mask[i]
271 |                         ax.imshow(np.dstack( (img, m*0.5) ))
272 |                 if 'keypoints' in ann and type(ann['keypoints']) == list:
273 |                     # turn skeleton into zero-based index
274 |                     sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
275 |                     kp = np.array(ann['keypoints'])
276 |                     x = kp[0::3]
277 |                     y = kp[1::3]
278 |                     v = kp[2::3]
279 |                     for sk in sks:
280 |                         if np.all(v[sk]>0):
281 |                             plt.plot(x[sk],y[sk], linewidth=3, color=c)
282 |                     plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
283 |                     plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
284 |             p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
285 |             ax.add_collection(p)
286 |             p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
287 |             ax.add_collection(p)
288 |         elif datasetType == 'captions':
289 |             for ann in anns:
290 |                 print(ann['caption'])
291 | 
292 |     def loadRes(self, resFile):
293 |         """
294 |         Load result file and return a result api object.
295 |         :param   resFile (str)     : file name of result file
296 |         :return: res (obj)         : result api object
297 |         """
298 |         res = COCO()
299 |         res.dataset['images'] = [img for img in self.dataset['images']]
300 | 
301 |         print('Loading and preparing results...')
302 |         tic = time.time()
303 |         if type(resFile) == str or type(resFile) == unicode:
304 |             anns = json.load(open(resFile))
305 |         elif type(resFile) == np.ndarray:
306 |             anns = self.loadNumpyAnnotations(resFile)
307 |         else:
308 |             anns = resFile
309 |         assert type(anns) == list, 'results in not an array of objects'
310 |         annsImgIds = [ann['image_id'] for ann in anns]
311 |         assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
312 |                'Results do not correspond to current coco set'
313 |         if 'caption' in anns[0]:
314 |             imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
315 |             res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
316 |             for id, ann in enumerate(anns):
317 |                 ann['id'] = id+1
318 |         elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
319 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
320 |             for id, ann in enumerate(anns):
321 |                 bb = ann['bbox']
322 |                 x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
323 |                 if not 'segmentation' in ann:
324 |                     ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
325 |                 ann['area'] = bb[2]*bb[3]
326 |                 ann['id'] = id+1
327 |                 ann['iscrowd'] = 0
328 |         elif 'segmentation' in anns[0]:
329 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
330 |             for id, ann in enumerate(anns):
331 |                 # now only support compressed RLE format as segmentation results
332 |                 ann['area'] = maskUtils.area(ann['segmentation'])
333 |                 if not 'bbox' in ann:
334 |                     ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
335 |                 ann['id'] = id+1
336 |                 ann['iscrowd'] = 0
337 |         elif 'keypoints' in anns[0]:
338 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
339 |             for id, ann in enumerate(anns):
340 |                 s = ann['keypoints']
341 |                 x = s[0::3]
342 |                 y = s[1::3]
343 |                 x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
344 |                 ann['area'] = (x1-x0)*(y1-y0)
345 |                 ann['id'] = id + 1
346 |                 ann['bbox'] = [x0,y0,x1-x0,y1-y0]
347 |         print('DONE (t={:0.2f}s)'.format(time.time()- tic))
348 | 
349 |         res.dataset['annotations'] = anns
350 |         res.createIndex()
351 |         return res
352 | 
353 |     def download(self, tarDir = None, imgIds = [] ):
354 |         '''
355 |         Download COCO images from mscoco.org server.
356 |         :param tarDir (str): COCO results directory name
357 |                imgIds (list): images to be downloaded
358 |         :return:
359 |         '''
360 |         if tarDir is None:
361 |             print('Please specify target directory')
362 |             return -1
363 |         if len(imgIds) == 0:
364 |             imgs = self.imgs.values()
365 |         else:
366 |             imgs = self.loadImgs(imgIds)
367 |         N = len(imgs)
368 |         if not os.path.exists(tarDir):
369 |             os.makedirs(tarDir)
370 |         for i, img in enumerate(imgs):
371 |             tic = time.time()
372 |             fname = os.path.join(tarDir, img['file_name'])
373 |             if not os.path.exists(fname):
374 |                 urlretrieve(img['coco_url'], fname)
375 |             print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
376 | 
377 |     def loadNumpyAnnotations(self, data):
378 |         """
379 |         Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
380 |         :param  data (numpy.ndarray)
381 |         :return: annotations (python nested list)
382 |         """
383 |         print('Converting ndarray to lists...')
384 |         assert(type(data) == np.ndarray)
385 |         print(data.shape)
386 |         assert(data.shape[1] == 7)
387 |         N = data.shape[0]
388 |         ann = []
389 |         for i in range(N):
390 |             if i % 1000000 == 0:
391 |                 print('{}/{}'.format(i,N))
392 |             ann += [{
393 |                 'image_id'  : int(data[i, 0]),
394 |                 'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
395 |                 'score' : data[i, 5],
396 |                 'category_id': int(data[i, 6]),
397 |                 }]
398 |         return ann
399 | 
400 |     def annToRLE(self, ann):
401 |         """
402 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
403 |         :return: binary mask (numpy 2D array)
404 |         """
405 |         t = self.imgs[ann['image_id']]
406 |         h, w = t['height'], t['width']
407 |         segm = ann['segmentation']
408 |         if type(segm) == list:
409 |             # polygon -- a single object might consist of multiple parts
410 |             # we merge all parts into one mask rle code
411 |             rles = maskUtils.frPyObjects(segm, h, w)
412 |             rle = maskUtils.merge(rles)
413 |         elif type(segm['counts']) == list:
414 |             # uncompressed RLE
415 |             rle = maskUtils.frPyObjects(segm, h, w)
416 |         else:
417 |             # rle
418 |             rle = ann['segmentation']
419 |         return rle
420 | 
421 |     def annToMask(self, ann):
422 |         """
423 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
424 |         :return: binary mask (numpy 2D array)
425 |         """
426 |         rle = self.annToRLE(ann)
427 |         m = maskUtils.decode(rle)
428 |         return m


--------------------------------------------------------------------------------
/SSD/utils/pycocotools/cocoeval.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import numpy as np
  4 | import datetime
  5 | import time
  6 | from collections import defaultdict
  7 | from . import mask as maskUtils
  8 | import copy
  9 | 
 10 | class COCOeval:
 11 |     # Interface for evaluating detection on the Microsoft COCO dataset.
 12 |     #
 13 |     # The usage for CocoEval is as follows:
 14 |     #  cocoGt=..., cocoDt=...       # load dataset and results
 15 |     #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
 16 |     #  E.params.recThrs = ...;      # set parameters as desired
 17 |     #  E.evaluate();                # run per image evaluation
 18 |     #  E.accumulate();              # accumulate per image results
 19 |     #  E.summarize();               # display summary metrics of results
 20 |     # For example usage see evalDemo.m and http://mscoco.org/.
 21 |     #
 22 |     # The evaluation parameters are as follows (defaults in brackets):
 23 |     #  imgIds     - [all] N img ids to use for evaluation
 24 |     #  catIds     - [all] K cat ids to use for evaluation
 25 |     #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
 26 |     #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
 27 |     #  areaRng    - [...] A=4 object area ranges for evaluation
 28 |     #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
 29 |     #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
 30 |     #  iouType replaced the now DEPRECATED useSegm parameter.
 31 |     #  useCats    - [1] if true use category labels for evaluation
 32 |     # Note: if useCats=0 category labels are ignored as in proposal scoring.
 33 |     # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
 34 |     #
 35 |     # evaluate(): evaluates detections on every image and every category and
 36 |     # concats the results into the "evalImgs" with fields:
 37 |     #  dtIds      - [1xD] id for each of the D detections (dt)
 38 |     #  gtIds      - [1xG] id for each of the G ground truths (gt)
 39 |     #  dtMatches  - [TxD] matching gt id at each IoU or 0
 40 |     #  gtMatches  - [TxG] matching dt id at each IoU or 0
 41 |     #  dtScores   - [1xD] confidence of each dt
 42 |     #  gtIgnore   - [1xG] ignore flag for each gt
 43 |     #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
 44 |     #
 45 |     # accumulate(): accumulates the per-image, per-category evaluation
 46 |     # results in "evalImgs" into the dictionary "eval" with fields:
 47 |     #  params     - parameters used for evaluation
 48 |     #  date       - date evaluation was performed
 49 |     #  counts     - [T,R,K,A,M] parameter dimensions (see above)
 50 |     #  precision  - [TxRxKxAxM] precision for every evaluation setting
 51 |     #  recall     - [TxKxAxM] max recall for every evaluation setting
 52 |     # Note: precision and recall==-1 for settings with no gt objects.
 53 |     #
 54 |     # See also coco, mask, pycocoDemo, pycocoEvalDemo
 55 |     #
 56 |     # Microsoft COCO Toolbox.      version 2.0
 57 |     # Data, paper, and tutorials available at:  http://mscoco.org/
 58 |     # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 59 |     # Licensed under the Simplified BSD License [see coco/license.txt]
 60 |     def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
 61 |         '''
 62 |         Initialize CocoEval using coco APIs for gt and dt
 63 |         :param cocoGt: coco object with ground truth annotations
 64 |         :param cocoDt: coco object with detection results
 65 |         :return: None
 66 |         '''
 67 |         if not iouType:
 68 |             print('iouType not specified. use default iouType segm')
 69 |         self.cocoGt   = cocoGt              # ground truth COCO API
 70 |         self.cocoDt   = cocoDt              # detections COCO API
 71 |         self.params   = {}                  # evaluation parameters
 72 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
 73 |         self.eval     = {}                  # accumulated evaluation results
 74 |         self._gts = defaultdict(list)       # gt for evaluation
 75 |         self._dts = defaultdict(list)       # dt for evaluation
 76 |         self.params = Params(iouType=iouType) # parameters
 77 |         self._paramsEval = {}               # parameters for evaluation
 78 |         self.stats = []                     # result summarization
 79 |         self.ious = {}                      # ious between all gts and dts
 80 |         if not cocoGt is None:
 81 |             self.params.imgIds = sorted(cocoGt.getImgIds())
 82 |             self.params.catIds = sorted(cocoGt.getCatIds())
 83 | 
 84 | 
 85 |     def _prepare(self):
 86 |         '''
 87 |         Prepare ._gts and ._dts for evaluation based on params
 88 |         :return: None
 89 |         '''
 90 |         def _toMask(anns, coco):
 91 |             # modify ann['segmentation'] by reference
 92 |             for ann in anns:
 93 |                 rle = coco.annToRLE(ann)
 94 |                 ann['segmentation'] = rle
 95 |         p = self.params
 96 |         if p.useCats:
 97 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
 98 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
 99 |         else:
100 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
101 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
102 | 
103 |         # convert ground truth to mask if iouType == 'segm'
104 |         if p.iouType == 'segm':
105 |             _toMask(gts, self.cocoGt)
106 |             _toMask(dts, self.cocoDt)
107 |         # set ignore flag
108 |         for gt in gts:
109 |             gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
110 |             gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
111 |             if p.iouType == 'keypoints':
112 |                 gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
113 |         self._gts = defaultdict(list)       # gt for evaluation
114 |         self._dts = defaultdict(list)       # dt for evaluation
115 |         for gt in gts:
116 |             self._gts[gt['image_id'], gt['category_id']].append(gt)
117 |         for dt in dts:
118 |             self._dts[dt['image_id'], dt['category_id']].append(dt)
119 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
120 |         self.eval     = {}                  # accumulated evaluation results
121 | 
122 |     def evaluate(self):
123 |         '''
124 |         Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
125 |         :return: None
126 |         '''
127 |         tic = time.time()
128 |         print('Running per image evaluation...')
129 |         p = self.params
130 |         # add backward compatibility if useSegm is specified in params
131 |         if not p.useSegm is None:
132 |             p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
133 |             print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
134 |         print('Evaluate annotation type *{}*'.format(p.iouType))
135 |         p.imgIds = list(np.unique(p.imgIds))
136 |         if p.useCats:
137 |             p.catIds = list(np.unique(p.catIds))
138 |         p.maxDets = sorted(p.maxDets)
139 |         self.params=p
140 | 
141 |         self._prepare()
142 |         # loop through images, area range, max detection number
143 |         catIds = p.catIds if p.useCats else [-1]
144 | 
145 |         if p.iouType == 'segm' or p.iouType == 'bbox':
146 |             computeIoU = self.computeIoU
147 |         elif p.iouType == 'keypoints':
148 |             computeIoU = self.computeOks
149 |         self.ious = {(imgId, catId): computeIoU(imgId, catId) \
150 |                         for imgId in p.imgIds
151 |                         for catId in catIds}
152 | 
153 |         evaluateImg = self.evaluateImg
154 |         maxDet = p.maxDets[-1]
155 |         self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
156 |                  for catId in catIds
157 |                  for areaRng in p.areaRng
158 |                  for imgId in p.imgIds
159 |              ]
160 |         self._paramsEval = copy.deepcopy(self.params)
161 |         toc = time.time()
162 |         print('DONE (t={:0.2f}s).'.format(toc-tic))
163 | 
164 |     def computeIoU(self, imgId, catId):
165 |         p = self.params
166 |         if p.useCats:
167 |             gt = self._gts[imgId,catId]
168 |             dt = self._dts[imgId,catId]
169 |         else:
170 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
171 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
172 |         if len(gt) == 0 and len(dt) ==0:
173 |             return []
174 |         inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
175 |         dt = [dt[i] for i in inds]
176 |         if len(dt) > p.maxDets[-1]:
177 |             dt=dt[0:p.maxDets[-1]]
178 | 
179 |         if p.iouType == 'segm':
180 |             g = [g['segmentation'] for g in gt]
181 |             d = [d['segmentation'] for d in dt]
182 |         elif p.iouType == 'bbox':
183 |             g = [g['bbox'] for g in gt]
184 |             d = [d['bbox'] for d in dt]
185 |         else:
186 |             raise Exception('unknown iouType for iou computation')
187 | 
188 |         # compute iou between each dt and gt region
189 |         iscrowd = [int(o['iscrowd']) for o in gt]
190 |         ious = maskUtils.iou(d,g,iscrowd)
191 |         return ious
192 | 
193 |     def computeOks(self, imgId, catId):
194 |         p = self.params
195 |         # dimention here should be Nxm
196 |         gts = self._gts[imgId, catId]
197 |         dts = self._dts[imgId, catId]
198 |         inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
199 |         dts = [dts[i] for i in inds]
200 |         if len(dts) > p.maxDets[-1]:
201 |             dts = dts[0:p.maxDets[-1]]
202 |         # if len(gts) == 0 and len(dts) == 0:
203 |         if len(gts) == 0 or len(dts) == 0:
204 |             return []
205 |         ious = np.zeros((len(dts), len(gts)))
206 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
207 |         vars = (sigmas * 2)**2
208 |         k = len(sigmas)
209 |         # compute oks between each detection and ground truth object
210 |         for j, gt in enumerate(gts):
211 |             # create bounds for ignore regions(double the gt bbox)
212 |             g = np.array(gt['keypoints'])
213 |             xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
214 |             k1 = np.count_nonzero(vg > 0)
215 |             bb = gt['bbox']
216 |             x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
217 |             y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
218 |             for i, dt in enumerate(dts):
219 |                 d = np.array(dt['keypoints'])
220 |                 xd = d[0::3]; yd = d[1::3]
221 |                 if k1>0:
222 |                     # measure the per-keypoint distance if keypoints visible
223 |                     dx = xd - xg
224 |                     dy = yd - yg
225 |                 else:
226 |                     # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
227 |                     z = np.zeros((k))
228 |                     dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
229 |                     dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
230 |                 e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
231 |                 if k1 > 0:
232 |                     e=e[vg > 0]
233 |                 ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
234 |         return ious
235 | 
236 |     def evaluateImg(self, imgId, catId, aRng, maxDet):
237 |         '''
238 |         perform evaluation for single category and image
239 |         :return: dict (single image results)
240 |         '''
241 |         p = self.params
242 |         if p.useCats:
243 |             gt = self._gts[imgId,catId]
244 |             dt = self._dts[imgId,catId]
245 |         else:
246 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
247 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
248 |         if len(gt) == 0 and len(dt) ==0:
249 |             return None
250 | 
251 |         for g in gt:
252 |             if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
253 |                 g['_ignore'] = 1
254 |             else:
255 |                 g['_ignore'] = 0
256 | 
257 |         # sort dt highest score first, sort gt ignore last
258 |         gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
259 |         gt = [gt[i] for i in gtind]
260 |         dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
261 |         dt = [dt[i] for i in dtind[0:maxDet]]
262 |         iscrowd = [int(o['iscrowd']) for o in gt]
263 |         # load computed ious
264 |         ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
265 | 
266 |         T = len(p.iouThrs)
267 |         G = len(gt)
268 |         D = len(dt)
269 |         gtm  = np.zeros((T,G))
270 |         dtm  = np.zeros((T,D))
271 |         gtIg = np.array([g['_ignore'] for g in gt])
272 |         dtIg = np.zeros((T,D))
273 |         if not len(ious)==0:
274 |             for tind, t in enumerate(p.iouThrs):
275 |                 for dind, d in enumerate(dt):
276 |                     # information about best match so far (m=-1 -> unmatched)
277 |                     iou = min([t,1-1e-10])
278 |                     m   = -1
279 |                     for gind, g in enumerate(gt):
280 |                         # if this gt already matched, and not a crowd, continue
281 |                         if gtm[tind,gind]>0 and not iscrowd[gind]:
282 |                             continue
283 |                         # if dt matched to reg gt, and on ignore gt, stop
284 |                         if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
285 |                             break
286 |                         # continue to next gt unless better match made
287 |                         if ious[dind,gind] < iou:
288 |                             continue
289 |                         # if match successful and best so far, store appropriately
290 |                         iou=ious[dind,gind]
291 |                         m=gind
292 |                     # if match made store id of match for both dt and gt
293 |                     if m ==-1:
294 |                         continue
295 |                     dtIg[tind,dind] = gtIg[m]
296 |                     dtm[tind,dind]  = gt[m]['id']
297 |                     gtm[tind,m]     = d['id']
298 |         # set unmatched detections outside of area range to ignore
299 |         a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
300 |         dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
301 |         # store results for given image and category
302 |         return {
303 |                 'image_id':     imgId,
304 |                 'category_id':  catId,
305 |                 'aRng':         aRng,
306 |                 'maxDet':       maxDet,
307 |                 'dtIds':        [d['id'] for d in dt],
308 |                 'gtIds':        [g['id'] for g in gt],
309 |                 'dtMatches':    dtm,
310 |                 'gtMatches':    gtm,
311 |                 'dtScores':     [d['score'] for d in dt],
312 |                 'gtIgnore':     gtIg,
313 |                 'dtIgnore':     dtIg,
314 |             }
315 | 
316 |     def accumulate(self, p = None):
317 |         '''
318 |         Accumulate per image evaluation results and store the result in self.eval
319 |         :param p: input params for evaluation
320 |         :return: None
321 |         '''
322 |         print('Accumulating evaluation results...')
323 |         tic = time.time()
324 |         if not self.evalImgs:
325 |             print('Please run evaluate() first')
326 |         # allows input customized parameters
327 |         if p is None:
328 |             p = self.params
329 |         p.catIds = p.catIds if p.useCats == 1 else [-1]
330 |         T           = len(p.iouThrs)
331 |         R           = len(p.recThrs)
332 |         K           = len(p.catIds) if p.useCats else 1
333 |         A           = len(p.areaRng)
334 |         M           = len(p.maxDets)
335 |         precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
336 |         recall      = -np.ones((T,K,A,M))
337 | 
338 |         # create dictionary for future indexing
339 |         _pe = self._paramsEval
340 |         catIds = _pe.catIds if _pe.useCats else [-1]
341 |         setK = set(catIds)
342 |         setA = set(map(tuple, _pe.areaRng))
343 |         setM = set(_pe.maxDets)
344 |         setI = set(_pe.imgIds)
345 |         # get inds to evaluate
346 |         k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
347 |         m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
348 |         a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
349 |         i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
350 |         I0 = len(_pe.imgIds)
351 |         A0 = len(_pe.areaRng)
352 |         # retrieve E at each category, area range, and max number of detections
353 |         for k, k0 in enumerate(k_list):
354 |             Nk = k0*A0*I0
355 |             for a, a0 in enumerate(a_list):
356 |                 Na = a0*I0
357 |                 for m, maxDet in enumerate(m_list):
358 |                     E = [self.evalImgs[Nk + Na + i] for i in i_list]
359 |                     E = [e for e in E if not e is None]
360 |                     if len(E) == 0:
361 |                         continue
362 |                     dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
363 | 
364 |                     # different sorting method generates slightly different results.
365 |                     # mergesort is used to be consistent as Matlab implementation.
366 |                     inds = np.argsort(-dtScores, kind='mergesort')
367 | 
368 |                     dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
369 |                     dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
370 |                     gtIg = np.concatenate([e['gtIgnore'] for e in E])
371 |                     npig = np.count_nonzero(gtIg==0 )
372 |                     if npig == 0:
373 |                         continue
374 |                     tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
375 |                     fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
376 | 
377 |                     tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
378 |                     fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
379 |                     for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
380 |                         tp = np.array(tp)
381 |                         fp = np.array(fp)
382 |                         nd = len(tp)
383 |                         rc = tp / npig
384 |                         pr = tp / (fp+tp+np.spacing(1))
385 |                         q  = np.zeros((R,))
386 | 
387 |                         if nd:
388 |                             recall[t,k,a,m] = rc[-1]
389 |                         else:
390 |                             recall[t,k,a,m] = 0
391 | 
392 |                         # numpy is slow without cython optimization for accessing elements
393 |                         # use python array gets significant speed improvement
394 |                         pr = pr.tolist(); q = q.tolist()
395 | 
396 |                         for i in range(nd-1, 0, -1):
397 |                             if pr[i] > pr[i-1]:
398 |                                 pr[i-1] = pr[i]
399 | 
400 |                         inds = np.searchsorted(rc, p.recThrs, side='left')
401 |                         try:
402 |                             for ri, pi in enumerate(inds):
403 |                                 q[ri] = pr[pi]
404 |                         except:
405 |                             pass
406 |                         precision[t,:,k,a,m] = np.array(q)
407 |         self.eval = {
408 |             'params': p,
409 |             'counts': [T, R, K, A, M],
410 |             'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
411 |             'precision': precision,
412 |             'recall':   recall,
413 |         }
414 |         toc = time.time()
415 |         print('DONE (t={:0.2f}s).'.format( toc-tic))
416 | 
417 |     def summarize(self):
418 |         '''
419 |         Compute and display summary metrics for evaluation results.
420 |         Note this functin can *only* be applied on the default parameter setting
421 |         '''
422 |         def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
423 |             p = self.params
424 |             iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
425 |             titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
426 |             typeStr = '(AP)' if ap==1 else '(AR)'
427 |             iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
428 |                 if iouThr is None else '{:0.2f}'.format(iouThr)
429 | 
430 |             aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
431 |             mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
432 |             if ap == 1:
433 |                 # dimension of precision: [TxRxKxAxM]
434 |                 s = self.eval['precision']
435 |                 # IoU
436 |                 if iouThr is not None:
437 |                     t = np.where(iouThr == p.iouThrs)[0]
438 |                     s = s[t]
439 |                 s = s[:,:,:,aind,mind]
440 |             else:
441 |                 # dimension of recall: [TxKxAxM]
442 |                 s = self.eval['recall']
443 |                 if iouThr is not None:
444 |                     t = np.where(iouThr == p.iouThrs)[0]
445 |                     s = s[t]
446 |                 s = s[:,:,aind,mind]
447 |             if len(s[s>-1])==0:
448 |                 mean_s = -1
449 |             else:
450 |                 mean_s = np.mean(s[s>-1])
451 |             print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
452 |             return mean_s
453 |         def _summarizeDets():
454 |             stats = np.zeros((12,))
455 |             stats[0] = _summarize(1)
456 |             stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
457 |             stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
458 |             stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
459 |             stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
460 |             stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
461 |             stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
462 |             stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
463 |             stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
464 |             stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
465 |             stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
466 |             stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
467 |             return stats
468 |         def _summarizeKps():
469 |             stats = np.zeros((10,))
470 |             stats[0] = _summarize(1, maxDets=20)
471 |             stats[1] = _summarize(1, maxDets=20, iouThr=.5)
472 |             stats[2] = _summarize(1, maxDets=20, iouThr=.75)
473 |             stats[3] = _summarize(1, maxDets=20, areaRng='medium')
474 |             stats[4] = _summarize(1, maxDets=20, areaRng='large')
475 |             stats[5] = _summarize(0, maxDets=20)
476 |             stats[6] = _summarize(0, maxDets=20, iouThr=.5)
477 |             stats[7] = _summarize(0, maxDets=20, iouThr=.75)
478 |             stats[8] = _summarize(0, maxDets=20, areaRng='medium')
479 |             stats[9] = _summarize(0, maxDets=20, areaRng='large')
480 |             return stats
481 |         if not self.eval:
482 |             raise Exception('Please run accumulate() first')
483 |         iouType = self.params.iouType
484 |         if iouType == 'segm' or iouType == 'bbox':
485 |             summarize = _summarizeDets
486 |         elif iouType == 'keypoints':
487 |             summarize = _summarizeKps
488 |         self.stats = summarize()
489 | 
490 |     def __str__(self):
491 |         self.summarize()
492 | 
493 | class Params:
494 |     '''
495 |     Params for coco evaluation api
496 |     '''
497 |     def setDetParams(self):
498 |         self.imgIds = []
499 |         self.catIds = []
500 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
501 |         self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
502 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
503 |         self.maxDets = [1, 10, 100]
504 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
505 |         self.areaRngLbl = ['all', 'small', 'medium', 'large']
506 |         self.useCats = 1
507 | 
508 |     def setKpParams(self):
509 |         self.imgIds = []
510 |         self.catIds = []
511 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
512 |         self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
513 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
514 |         self.maxDets = [20]
515 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
516 |         self.areaRngLbl = ['all', 'medium', 'large']
517 |         self.useCats = 1
518 | 
519 |     def __init__(self, iouType='segm'):
520 |         if iouType == 'segm' or iouType == 'bbox':
521 |             self.setDetParams()
522 |         elif iouType == 'keypoints':
523 |             self.setKpParams()
524 |         else:
525 |             raise Exception('iouType not supported')
526 |         self.iouType = iouType
527 |         # useSegm is deprecated
528 |         self.useSegm = None


--------------------------------------------------------------------------------