├── README.md
├── __init__
├── config
    ├── cifar100_config.json
    └── coco_config.json
├── dataloaders.py
├── lib
    ├── README.md
    ├── build
    │   └── temp.linux-x86_64-3.6
    │   │   └── nms
    │   │       ├── gpu_nms.o
    │   │       └── nms_kernel.o
    ├── nms
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-36.pyc
    │   ├── gpu_nms.cpp
    │   ├── gpu_nms.cpython-36m-x86_64-linux-gnu.so
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   ├── nums_py.py
    │   ├── nums_py1.c
    │   ├── nums_py1.pyx
    │   ├── nums_py2.c
    │   ├── nums_py2.pyx
    │   ├── setup1.py
    │   └── setup2.py
    ├── setup3.py
    └── test_num.py
├── losses.py
├── model
    ├── BottleneckBlock.py
    ├── __init__.py
    ├── anchors.py
    ├── attentionConv2d.py
    ├── gpu_nms.pyx
    ├── losses.py
    ├── retinanet.py
    └── wideresnet.py
├── pytorch-retinanet
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── anchors.py
    ├── attentionConv2d.py
    ├── coco_eval.py
    ├── csv_eval.py
    ├── dataloader.py
    ├── images
    │   ├── 1.jpg
    │   ├── 3.jpg
    │   ├── 4.jpg
    │   ├── 5.jpg
    │   ├── 6.jpg
    │   ├── 7.jpg
    │   └── 8.jpg
    ├── lib
    │   ├── README.md
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── gpu_nms.cpp
    │   │   ├── gpu_nms.hpp
    │   │   ├── gpu_nms.pyx
    │   │   ├── nms_kernel.cu
    │   │   ├── nums_py.py
    │   │   ├── nums_py1.c
    │   │   ├── nums_py1.pyx
    │   │   ├── nums_py2.c
    │   │   ├── nums_py2.pyx
    │   │   ├── setup1.py
    │   │   └── setup2.py
    │   ├── setup3.py
    │   └── test_num.py
    ├── losses.py
    ├── model.py
    ├── oid_dataset.py
    ├── train.py
    ├── utils.py
    └── visualize.py
├── train.py
└── utils
    └── utils.py


/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-attention-augmented-convolution
2 | A pytorch implementation of https://arxiv.org/abs/1904.09925
3 | 


--------------------------------------------------------------------------------
/__init__:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/__init__


--------------------------------------------------------------------------------
/config/cifar100_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "cifar100_net",
 3 |   "model": "AttentionWideResNet",
 4 |   "checkpoint_dir": "wgts/attconv_cifar100/",
 5 |   "tb_logdir": "cifar100net_logs",
 6 |   "batch_size": 16,
 7 |   "epochs": 500,
 8 |   "lr": 0.01,
 9 |   "momentum": 0.9,
10 |   "log_interval": 200
11 | }
12 | 


--------------------------------------------------------------------------------
/config/coco_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "coco_net",
 3 |   "model": "AttentionRetinaNet",
 4 |   "checkpoint_dir": "wgts/attnconv_coco/",
 5 |   "tb_logdir": "coconet_logs",
 6 |   "batch_size": 16,
 7 |   "epochs": 500,
 8 |   "lr": 0.01,
 9 |   "momentum": 0.9,
10 |   "log_interval": 200
11 | }


--------------------------------------------------------------------------------
/dataloaders.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | from PIL import Image
 3 | import os
 4 | import os.path
 5 | import numpy as np
 6 | 
 7 | 
 8 | class CocoDetection(data.Dataset):
 9 |     """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
10 | 
11 |     Args:
12 |         root (string): Root directory where images are downloaded to.
13 |         annFile (string): Path to json annotation file.
14 |         transform (callable, optional): A function/transform that  takes in an PIL image
15 |             and returns a transformed version. E.g, ``transforms.ToTensor``
16 |         target_transform (callable, optional): A function/transform that takes in the
17 |             target and transforms it.
18 |     """
19 | 
20 |     def __init__(self, root, annFile, transform=None, img_and_target_transform=None, target_transform=None):
21 |         from pycocotools.coco import COCO
22 |         self.root = root
23 |         self.coco = COCO(annFile)
24 |         self.ids = list(self.coco.imgs.keys())
25 |         self.transform = transform
26 |         self.target_transform = target_transform
27 |         self.img_and_target_transform=img_and_target_transform
28 | 
29 | 
30 |     def __getitem__(self, index):
31 |         """
32 |         Args:
33 |             index (int): Index
34 | 
35 |         Returns:
36 |             tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
37 |         """
38 |         coco = self.coco
39 |         img_id = self.ids[index]
40 |         ann_ids = coco.getAnnIds(imgIds=img_id)
41 |         target = coco.loadAnns(ann_ids)
42 | 
43 |         path = coco.loadImgs(img_id)[0]['file_name']
44 | 
45 |         img = Image.open(os.path.join(self.root, path)).convert('RGB')
46 |         print(np.array(img).shape)
47 | 
48 |         if self.img_and_target_transform is not None:
49 |             img, target, _ = self.img_and_target_transform(img, target)
50 | 
51 |         if self.transform is not None:
52 |             img = self.transform(img)
53 | 
54 |         return img, target
55 | 
56 |     def __len__(self):
57 |         return len(self.ids)
58 | 
59 |     def __repr__(self):
60 |         fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
61 |         fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
62 |         fmt_str += '    Root Location: {}\n'.format(self.root)
63 |         tmp = '    Transforms (if any): '
64 |         fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
65 |         tmp = '    Target Transforms (if any): '
66 |         fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
67 |         return fmt_str


--------------------------------------------------------------------------------
/lib/README.md:
--------------------------------------------------------------------------------
 1 | # NMS
 2 | the comparison of nms in speed
 3 | 
 4 | method 1:
 5 | thresh=0.7, time wastes:0.0287
 6 | thresh=0.8, time wastes:0.1057
 7 | thresh=0.9, time wastes:0.4204
 8 | 
 9 | method 2:
10 | thresh=0.7, time wastes:0.0272
11 | thresh=0.8, time wastes:0.1038
12 | thresh=0.9, time wastes:0.4184
13 | 
14 | method 3:
15 | thresh=0.7, time wastes:0.0019
16 | thresh=0.8, time wastes:0.0028
17 | thresh=0.9, time wastes:0.0036
18 | 
19 | method 4:
20 | thresh=0.7, time wastes:0.0120
21 | thresh=0.8, time wastes:0.0063
22 | thresh=0.9, time wastes:0.0071
23 | 
24 | Reference:
25 | py-faster-rcnn: https://github.com/rbgirshick/py-faster-rcnn/tree/master/lib/nms
26 | 


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/nms/gpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/lib/build/temp.linux-x86_64-3.6/nms/gpu_nms.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/nms/nms_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/lib/build/temp.linux-x86_64-3.6/nms/nms_kernel.o


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/lib/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/lib/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | 
 2 | # --------------------------------------------------------
 3 | # Faster R-CNN
 4 | # Copyright (c) 2015 Microsoft
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # Written by Ross Girshick
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | assert sizeof(int) == sizeof(np.int32_t)
14 | 
15 | cdef extern from "gpu_nms.hpp":
16 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
17 | 
18 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
19 |             np.int32_t device_id=0):
20 |     dets = dets.numpy()
21 |     cdef int boxes_num = dets.shape[0]
22 |     cdef int boxes_dim = dets.shape[1]
23 |     cdef int num_out
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         keep = np.zeros(boxes_num, dtype=np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=1] \
27 |         scores = dets[:, 4]
28 |     cdef np.ndarray[np.int_t, ndim=1] \
29 |         order = scores.argsort()[::-1]
30 |     cdef np.ndarray[np.float32_t, ndim=2] \
31 |         sorted_dets = dets[order, :]
32 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
33 |     keep = keep[:num_out]
34 |     return list(order[keep])
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | 
  9 | #include "gpu_nms.hpp"
 10 | #include <vector>
 11 | #include <iostream>
 12 | 
 13 | #define CUDA_CHECK(condition) \
 14 |   /* Code block avoids redefinition of cudaError_t error */ \
 15 |   do { \
 16 |     cudaError_t error = condition; \
 17 |     if (error != cudaSuccess) { \
 18 |       std::cout << cudaGetErrorString(error) << std::endl; \
 19 |     } \
 20 |   } while (0)
 21 | 
 22 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 23 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 24 | 
 25 | __device__ inline float devIoU(float const * const a, float const * const b) {
 26 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 27 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 28 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 29 |   float interS = width * height;
 30 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 31 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 32 |   return interS / (Sa + Sb - interS);
 33 | }
 34 | 
 35 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 36 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 37 |   const int row_start = blockIdx.y;
 38 |   const int col_start = blockIdx.x;
 39 | 
 40 |   // if (row_start > col_start) return;
 41 | 
 42 |   const int row_size =
 43 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 44 |   const int col_size =
 45 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 46 | 
 47 |   __shared__ float block_boxes[threadsPerBlock * 5];
 48 |   if (threadIdx.x < col_size) {
 49 |     block_boxes[threadIdx.x * 5 + 0] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 51 |     block_boxes[threadIdx.x * 5 + 1] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 53 |     block_boxes[threadIdx.x * 5 + 2] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 55 |     block_boxes[threadIdx.x * 5 + 3] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 57 |     block_boxes[threadIdx.x * 5 + 4] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 59 |   }
 60 |   __syncthreads();
 61 | 
 62 |   if (threadIdx.x < row_size) {
 63 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 64 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 65 |     int i = 0;
 66 |     unsigned long long t = 0;
 67 |     int start = 0;
 68 |     if (row_start == col_start) {
 69 |       start = threadIdx.x + 1;
 70 |     }
 71 |     for (i = start; i < col_size; i++) {
 72 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 73 |         t |= 1ULL << i;
 74 |       }
 75 |     }
 76 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 77 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 78 |   }
 79 | }
 80 | 
 81 | void _set_device(int device_id) {
 82 |   int current_device;
 83 |   CUDA_CHECK(cudaGetDevice(&current_device));
 84 |   if (current_device == device_id) {
 85 |     return;
 86 |   }
 87 |   // The call to cudaSetDevice must come before any calls to Get, which
 88 |   // may perform initialization using the GPU.
 89 |   CUDA_CHECK(cudaSetDevice(device_id));
 90 | }
 91 | 
 92 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 93 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 94 |   _set_device(device_id);
 95 | 
 96 |   float* boxes_dev = NULL;
 97 |   unsigned long long* mask_dev = NULL;
 98 | 
 99 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
100 | 
101 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
102 |                         boxes_num * boxes_dim * sizeof(float)));
103 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
104 |                         boxes_host,
105 |                         boxes_num * boxes_dim * sizeof(float),
106 |                         cudaMemcpyHostToDevice));
107 | 
108 |   CUDA_CHECK(cudaMalloc(&mask_dev,
109 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
110 | 
111 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
112 |               DIVUP(boxes_num, threadsPerBlock));
113 |   dim3 threads(threadsPerBlock);
114 |   nms_kernel<<<blocks, threads>>>(boxes_num,
115 |                                   nms_overlap_thresh,
116 |                                   boxes_dev,
117 |                                   mask_dev);
118 | 
119 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
120 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
121 |                         mask_dev,
122 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
123 |                         cudaMemcpyDeviceToHost));
124 | 
125 |   std::vector<unsigned long long> remv(col_blocks);
126 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
127 | 
128 |   int num_to_keep = 0;
129 |   for (int i = 0; i < boxes_num; i++) {
130 |     int nblock = i / threadsPerBlock;
131 |     int inblock = i % threadsPerBlock;
132 | 
133 |     if (!(remv[nblock] & (1ULL << inblock))) {
134 |       keep_out[num_to_keep++] = i;
135 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
136 |       for (int j = nblock; j < col_blocks; j++) {
137 |         remv[j] |= p[j];
138 |       }
139 |     }
140 |   }
141 |   *num_out = num_to_keep;
142 | 
143 |   CUDA_CHECK(cudaFree(boxes_dev));
144 |   CUDA_CHECK(cudaFree(mask_dev));
145 | }
146 | 


--------------------------------------------------------------------------------
/lib/nms/nums_py.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon May  7 21:45:37 2018
 5 | 
 6 | @author: lps
 7 | """
 8 | import numpy as np
 9 | 
10 | 
11 | boxes=np.array([[100,100,210,210,0.72],
12 |         [250,250,420,420,0.8],
13 |         [220,220,320,330,0.92],
14 |         [100,100,210,210,0.72],
15 |         [230,240,325,330,0.81],
16 |         [220,230,315,340,0.9]]) 
17 | 
18 | 
19 | def py_cpu_nms(dets, thresh):
20 |     # dets:(m,5)  thresh:scaler
21 |     
22 |     x1 = dets[:,0]
23 |     y1 = dets[:,1]
24 |     x2 = dets[:,2]
25 |     y2 = dets[:,3]
26 |     
27 |     areas = (y2-y1+1) * (x2-x1+1)
28 |     scores = dets[:,4]
29 |     keep = []
30 |     
31 |     index = scores.argsort()[::-1]
32 |     
33 |     while index.size >0:
34 | 
35 |         i = index[0]       # every time the first is the biggst, and add it directly
36 |         keep.append(i)
37 |         
38 |         x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
39 |         y11 = np.maximum(y1[i], y1[index[1:]])
40 |         x22 = np.minimum(x2[i], x2[index[1:]])
41 |         y22 = np.minimum(y2[i], y2[index[1:]])
42 |         
43 |         w = np.maximum(0, x22-x11+1)    # the weights of overlap
44 |         h = np.maximum(0, y22-y11+1)    # the height of overlap
45 |        
46 |         overlaps = w*h
47 |         
48 |         ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
49 |         
50 |         idx = np.where(ious<=thresh)[0]
51 |         
52 |         index = index[idx+1]   # because index start from 1
53 |         
54 |     return keep
55 |         
56 | 
57 | import matplotlib.pyplot as plt
58 | def plot_bbox(dets, c='k'):
59 |     
60 |     x1 = dets[:,0]
61 |     y1 = dets[:,1]
62 |     x2 = dets[:,2]
63 |     y2 = dets[:,3]
64 |     
65 |     
66 |     plt.plot([x1,x2], [y1,y1], c)
67 |     plt.plot([x1,x1], [y1,y2], c)
68 |     plt.plot([x1,x2], [y2,y2], c)
69 |     plt.plot([x2,x2], [y1,y2], c)
70 |     plt.title("after nms")
71 | 
72 | #plot_bbox(boxes,'k')   # before nms
73 | #
74 | #keep = py_cpu_nms(boxes, thresh=0.7)
75 | #plot_bbox(boxes[keep], 'r')# after nms
76 | #        
77 | 
78 |         


--------------------------------------------------------------------------------
/lib/nms/nums_py1.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | boxes=np.array([[100,100,210,210,0.72],
 5 |         [250,250,420,420,0.8],
 6 |         [220,220,320,330,0.92],
 7 |         [100,100,210,210,0.72],
 8 |         [230,240,325,330,0.81],
 9 |         [220,230,315,340,0.9]]) 
10 | 
11 | 
12 | def py_cpu_nms(dets, thresh):
13 |     # dets:(m,5)  thresh:scaler
14 |     
15 |     x1 = dets[:,0]
16 |     y1 = dets[:,1]
17 |     x2 = dets[:,2]
18 |     y2 = dets[:,3]
19 |     
20 |     areas = (y2-y1+1) * (x2-x1+1)
21 |     scores = dets[:,4]
22 |     keep = []
23 |     
24 |     index = scores.argsort()[::-1]
25 |     
26 |     j=0
27 |     while index.size >0:
28 |         
29 |         j = j+1
30 |         i = index[0]       # every time the first is the biggst, and add it directly
31 |         keep.append(i)
32 |         
33 |         x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
34 |         y11 = np.maximum(y1[i], y1[index[1:]])
35 |         x22 = np.minimum(x2[i], x2[index[1:]])
36 |         y22 = np.minimum(y2[i], y2[index[1:]])
37 |         
38 |         w = np.maximum(0, x22-x11+1)    # the weights of overlap
39 |         h = np.maximum(0, y22-y11+1)    # the height of overlap
40 |        
41 |         overlaps = w*h
42 |         
43 |         ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
44 |         
45 |         idx = np.where(ious<=thresh)[0]
46 |         
47 |         index = index[idx+1]   # because index starts with 1
48 |         
49 |     return keep,j
50 |         
51 | import matplotlib.pyplot as plt
52 | def plot_bbox(dets, c='k'):
53 |     
54 |     x1 = dets[:,0]
55 |     y1 = dets[:,1]
56 |     x2 = dets[:,2]
57 |     y2 = dets[:,3]
58 |     
59 |     plt.plot([x1,x2], [y1,y1], c)
60 |     plt.plot([x1,x1], [y1,y2], c)
61 |     plt.plot([x1,x2], [y2,y2], c)
62 |     plt.plot([x2,x2], [y1,y2], c)
63 |     
64 | #plot_bbox(boxes,'k')   # before nms
65 | 
66 | #keep = py_cpu_nms(boxes, thresh=0.7)
67 | #plot_bbox(boxes[keep], 'r')# after nms
68 |         
69 | 
70 |         
71 | 


--------------------------------------------------------------------------------
/lib/nms/nums_py2.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | #
 4 | #boxes=np.array([[100,100,210,210,0.72],
 5 | #        [250,250,420,420,0.8],
 6 | #        [220,220,320,330,0.92],
 7 | #        [100,100,210,210,0.72],
 8 | #        [230,240,325,330,0.81],
 9 | #        [220,230,315,340,0.9]]) 
10 | #
11 | 
12 | 
13 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
14 |     return a if a >= b else b
15 | 
16 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
17 |     return a if a <= b else b
18 | 
19 | def py_cpu_nms(np.ndarray[np.float32_t,ndim=2] dets, np.float thresh):
20 |     # dets:(m,5)  thresh:scaler
21 |     
22 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:,0]
23 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:,1]
24 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:,2]
25 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:,3]
26 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
27 |     
28 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (y2-y1+1) * (x2-x1+1)
29 |     cdef np.ndarray[np.int_t, ndim=1]  index = scores.argsort()[::-1]    # can be rewriten
30 |     keep = []
31 |     
32 |     cdef int ndets = dets.shape[0]
33 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros(ndets, dtype=np.int)
34 |     
35 |     cdef int _i, _j
36 |     
37 |     cdef int i, j
38 |     
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     cdef np.float32_t w, h
41 |     cdef np.float32_t overlap, ious
42 |     
43 |     j=0
44 |     
45 |     for _i in range(ndets):
46 |         i = index[_i]
47 |         
48 |         if suppressed[i] == 1:
49 |             continue
50 |         keep.append(i)
51 |         
52 |         ix1 = x1[i]
53 |         iy1 = y1[i]
54 |         ix2 = x2[i]
55 |         iy2 = y2[i]
56 |         
57 |         iarea = areas[i]
58 |         
59 |         for _j in range(_i+1, ndets):
60 |             j = index[_j]
61 |             if suppressed[j] == 1:
62 |                 continue
63 |             xx1 = max(ix1, x1[j])
64 |             yy1 = max(iy1, y1[j])
65 |             xx2 = max(ix2, x2[j])
66 |             yy2 = max(iy2, y2[j])
67 |     
68 |             w = max(0.0, xx2-xx1+1)
69 |             h = max(0.0, yy2-yy1+1)
70 |             
71 |             overlap = w*h 
72 |             ious = overlap / (iarea + areas[j] - overlap)
73 |             if ious>thresh:
74 |                 suppressed[j] = 1
75 |     
76 |     return keep
77 | 
78 | import matplotlib.pyplot as plt
79 | def plot_bbox(dets, c='k'):
80 |     
81 |     x1 = dets[:,0]
82 |     y1 = dets[:,1]
83 |     x2 = dets[:,2]
84 |     y2 = dets[:,3]
85 |     
86 |     plt.plot([x1,x2], [y1,y1], c)
87 |     plt.plot([x1,x1], [y1,y2], c)
88 |     plt.plot([x1,x2], [y2,y2], c)
89 |     plt.plot([x2,x2], [y1,y2], c)
90 |     
91 | 
92 | #plot_bbox(boxes,'k')   # before nms
93 | # 
94 | #keep = py_cpu_nms(boxes, thresh=0.7)
95 | #plot_bbox(boxes[keep], 'r')# after nms
96 |         
97 | 
98 |         


--------------------------------------------------------------------------------
/lib/nms/setup1.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | 
4 | setup(
5 |       name = 'nms_module',
6 |       ext_modules = cythonize('nums_py1.pyx'),
7 |       )
8 | 


--------------------------------------------------------------------------------
/lib/nms/setup2.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | 
4 | setup(
5 |       name = 'nms_module',
6 |       ext_modules = cythonize('nums_py2.pyx'),
7 |       )
8 | 


--------------------------------------------------------------------------------
/lib/setup3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | from distutils.core import setup
 11 | from Cython.Build import cythonize
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | import subprocess
 15 | import numpy as np
 16 | import os
 17 | from os.path import join as pjoin
 18 | 
 19 | 
 20 | def find_in_path(name, path):
 21 |     "Find a file in a search path"
 22 |     # Adapted fom
 23 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 24 |     for dir in path.split(os.pathsep):
 25 |         binpath = pjoin(dir, name)
 26 |         if os.path.exists(binpath):
 27 |             return os.path.abspath(binpath)
 28 |     return None
 29 | 
 30 | def locate_cuda():
 31 |     """Locate the CUDA environment on the system
 32 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 33 |     and values giving the absolute path to each directory.
 34 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 35 |     is based on finding 'nvcc' in the PATH.
 36 |     """
 37 | 
 38 |     # first check if the CUDAHOME env variable is in use
 39 |     if 'CUDAHOME' in os.environ:
 40 |         home = os.environ['CUDAHOME']
 41 |         nvcc = pjoin(home, 'bin', 'nvcc')
 42 |     else:
 43 |         # otherwise, search the PATH for NVCC
 44 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 45 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 46 |         if nvcc is None:
 47 |             raise EnvironmentError('The nvcc binary could not be '
 48 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 49 |         home = os.path.dirname(os.path.dirname(nvcc))
 50 | 
 51 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 52 |                   'include': pjoin(home, 'include'),
 53 |                   'lib64': pjoin(home, 'lib64')}
 54 |     for k, v in cudaconfig.items():
 55 |         if not os.path.exists(v):
 56 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 57 | 
 58 |     return cudaconfig
 59 | CUDA = locate_cuda()
 60 | 
 61 | try:
 62 |     numpy_include = np.get_include()
 63 | except AttributeError:
 64 |     numpy_include = np.get_numpy_include()
 65 | 
 66 | 
 67 | def customize_compiler_for_nvcc(self):
 68 |     """inject deep into distutils to customize how the dispatch
 69 |     to gcc/nvcc works.
 70 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 71 |     injected in, and still have the right customizations (i.e.
 72 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 73 |     the OO route, I have this. Note, it's kindof like a wierd functional
 74 |     subclassing going on."""
 75 | 
 76 |     # tell the compiler it can processes .cu
 77 |     self.src_extensions.append('.cu')
 78 | 
 79 |     # save references to the default compiler_so and _comple methods
 80 |     default_compiler_so = self.compiler_so
 81 |     super = self._compile
 82 | 
 83 |     # now redefine the _compile method. This gets executed for each
 84 |     # object but distutils doesn't have the ability to change compilers
 85 |     # based on source extension: we add it.
 86 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 87 |         if os.path.splitext(src)[1] == '.cu':
 88 |             # use the cuda for .cu files
 89 |             self.set_executable('compiler_so', CUDA['nvcc'])
 90 |             # use only a subset of the extra_postargs, which are 1-1 translated
 91 |             # from the extra_compile_args in the Extension class
 92 |             postargs = extra_postargs['nvcc']
 93 |         else:
 94 |             postargs = extra_postargs['gcc']
 95 | 
 96 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 97 |         # reset the default compiler_so, which we might have changed for cuda
 98 |         self.compiler_so = default_compiler_so
 99 | 
100 |     # inject our redefined _compile method into the class
101 |     self._compile = _compile
102 | 
103 | 
104 | # run the customize_compiler
105 | class custom_build_ext(build_ext):
106 |     def build_extensions(self):
107 |         customize_compiler_for_nvcc(self.compiler)
108 |         build_ext.build_extensions(self)
109 | 
110 | ext_modules =     [Extension('nms.gpu_nms',
111 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
112 |         library_dirs=[CUDA['lib64']],
113 |         libraries=['cudart'],
114 |         language='c++',
115 |         runtime_library_dirs=[CUDA['lib64']],
116 |         # this syntax is specific to this build system
117 |         # we're only going to use certain compiler args with nvcc and not with
118 |         # gcc the implementation of this trick is in customize_compiler() below
119 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
120 |                             'nvcc': ['-arch=sm_35',
121 |                                      '--ptxas-options=-v',
122 |                                      '-c',
123 |                                      '--compiler-options',
124 |                                      "'-fPIC'"]},
125 |         include_dirs = [numpy_include, CUDA['include']]
126 |     )]
127 | 
128 | setup(
129 |     name='fast_rcnn',
130 |     ext_modules=ext_modules,
131 |     # inject our custom trigger
132 |     cmdclass={'build_ext': custom_build_ext},
133 | )
134 |       
135 | 


--------------------------------------------------------------------------------
/lib/test_num.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | #from nms.nums_py2 import py_cpu_nms  # for cpu
 4 | from nms.gpu_nms import gpu_nms   # for gpu 
 5 | 
 6 | 
 7 | np.random.seed( 1 )   # keep fixed
 8 | num_rois = 6000
 9 | minxy = np.random.randint(50,145,size=(num_rois ,2))
10 | maxxy = np.random.randint(150,200,size=(num_rois ,2))
11 | score = 0.8*np.random.random_sample((num_rois ,1))+0.2
12 | 
13 | boxes_new = np.concatenate((minxy,maxxy,score), axis=1).astype(np.float32)
14 | 
15 | def nms_test_time(boxes_new):
16 | 
17 |     thresh = [0.7,0.8,0.9]
18 |     T = 50
19 |     for i in range(len(thresh)):
20 |         since = time.time()
21 |         for t in range(T):
22 | 
23 | #            keep = py_cpu_nms(boxes_new, thresh=thresh[i])     # for cpu
24 |             keep = gpu_nms(boxes_new, thresh=thresh[i])         # for gpu
25 |         print("thresh={:.1f}, time wastes:{:.4f}".format(thresh[i], (time.time()-since)/T))
26 |     
27 |     return keep
28 | 
29 | 
30 | if __name__ =="__main__":
31 |     nms_test_time(boxes_new)
32 |     
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | def calc_iou(a, b):
  6 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
  7 | 
  8 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
  9 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 10 | 
 11 |     iw = torch.clamp(iw, min=0)
 12 |     ih = torch.clamp(ih, min=0)
 13 | 
 14 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 15 | 
 16 |     ua = torch.clamp(ua, min=1e-8)
 17 | 
 18 |     intersection = iw * ih
 19 | 
 20 |     IoU = intersection / ua
 21 | 
 22 |     return IoU
 23 | 
 24 | class FocalLoss(nn.Module):
 25 |     #def __init__(self):
 26 | 
 27 |     def forward(self, classifications, regressions, anchors, annotations):
 28 |         alpha = 0.25
 29 |         gamma = 2.0
 30 |         batch_size = classifications.shape[0]
 31 |         classification_losses = []
 32 |         regression_losses = []
 33 | 
 34 |         anchor = anchors[0, :, :]
 35 | 
 36 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 37 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 38 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 39 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 40 | 
 41 |         for j in range(batch_size):
 42 | 
 43 |             classification = classifications[j, :, :]
 44 |             regression = regressions[j, :, :]
 45 | 
 46 |             bbox_annotation = annotations[j, :, :]
 47 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 48 | 
 49 |             if bbox_annotation.shape[0] == 0:
 50 |                 regression_losses.append(torch.tensor(0).float().cuda())
 51 |                 classification_losses.append(torch.tensor(0).float().cuda())
 52 | 
 53 |                 continue
 54 | 
 55 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 56 | 
 57 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 58 | 
 59 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 60 | 
 61 |             #import pdb
 62 |             #pdb.set_trace()
 63 | 
 64 |             # compute the loss for classification
 65 |             targets = torch.ones(classification.shape) * -1
 66 |             targets = targets.cuda()
 67 | 
 68 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 69 | 
 70 |             positive_indices = torch.ge(IoU_max, 0.5)
 71 | 
 72 |             num_positive_anchors = positive_indices.sum()
 73 | 
 74 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 75 | 
 76 |             targets[positive_indices, :] = 0
 77 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 78 | 
 79 |             alpha_factor = torch.ones(targets.shape).cuda() * alpha
 80 | 
 81 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 82 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 83 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 84 | 
 85 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 86 | 
 87 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 88 |             cls_loss = focal_weight * bce
 89 | 
 90 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
 91 | 
 92 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
 93 | 
 94 |             # compute the loss for regression
 95 | 
 96 |             if positive_indices.sum() > 0:
 97 |                 assigned_annotations = assigned_annotations[positive_indices, :]
 98 | 
 99 |                 anchor_widths_pi = anchor_widths[positive_indices]
100 |                 anchor_heights_pi = anchor_heights[positive_indices]
101 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
102 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
103 | 
104 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
105 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
106 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
107 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
108 | 
109 |                 # clip widths to 1
110 |                 gt_widths  = torch.clamp(gt_widths, min=1)
111 |                 gt_heights = torch.clamp(gt_heights, min=1)
112 | 
113 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
114 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
115 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
116 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
117 | 
118 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
119 |                 targets = targets.t()
120 | 
121 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
122 | 
123 | 
124 |                 negative_indices = 1 - positive_indices
125 | 
126 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
127 | 
128 |                 regression_loss = torch.where(
129 |                     torch.le(regression_diff, 1.0 / 9.0),
130 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
131 |                     regression_diff - 0.5 / 9.0
132 |                 )
133 |                 regression_losses.append(regression_loss.mean())
134 |             else:
135 |                 regression_losses.append(torch.tensor(0).float().cuda())
136 | 
137 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
138 | 
139 |     
140 | 


--------------------------------------------------------------------------------
/model/BottleneckBlock.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .attentionConv2d import AttentionConv2d
 4 | from ..utils.utils import comptue_dim
 5 | 
 6 | 
 7 | class BottleneckBlock(nn.Module):
 8 |     expansion = 4
 9 |     def __init__(self, input_dim, output_dim, stride=1, downsample_shortcut=None, attention=False, expansion=4,
10 |                  kappa=None, nu=None, num_heads=None, H=None, W=None):
11 |         super(BottleneckBlock, self).__init__()
12 |         self.expansion = expansion
13 | 
14 |         self.relu = nn.ReLU(inplace=True)
15 |         self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=1, bias=False)
16 |         self.bn1 = nn.BatchNorm2d(output_dim)
17 | 
18 |         self.conv2 = nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=stride, padding=1, bias=False)
19 |         self.bn2 = nn.BatchNorm2d(output_dim)
20 | 
21 |         expansion_dim = expansion * output_dim
22 |         self.conv3 = None
23 |         if attention:
24 |             dk = round(kappa * expansion_dim)
25 |             dv = round(nu * expansion_dim)
26 |             h = comptue_dim(H, 1, 3, stride)
27 |             w = comptue_dim(W, 1, 3, stride)
28 |             self.conv3 = AttentionConv2d(output_dim, expansion_dim, dk, dv, num_heads, kernel_size=1, padding=0,
29 |                                          height=h, width=w)
30 | 
31 |         else:
32 |             self.conv3 = nn.Conv2d(output_dim, expansion_dim, kernel_size=1, bias=False)
33 | 
34 |         self.bn3 = nn.BatchNorm2d(expansion * output_dim)
35 |         self.downsample_shortcut = downsample_shortcut
36 | 
37 |     def forward(self, x):
38 |         residual = x
39 |         out = self.conv1(x)
40 |         out = self.bn1(out)
41 |         out = self.relu(out)
42 | 
43 |         out = self.conv2(out)
44 |         out = self.bn2(out)
45 |         out = self.relu(out)
46 | 
47 |         out = self.conv3(out)
48 |         out = self.bn3(out)
49 | 
50 |         if self.downsample_shortcut is not None:
51 |             residual = self.downsample_shortcut(x)
52 | 
53 |         out += residual
54 |         out = self.relu(out)
55 | 
56 |         return out
57 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/model/__init__.py


--------------------------------------------------------------------------------
/model/anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class Anchors(nn.Module):
  7 |     def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
  8 |         super(Anchors, self).__init__()
  9 | 
 10 |         if pyramid_levels is None:
 11 |             self.pyramid_levels = [3, 4, 5, 6, 7]
 12 |         if strides is None:
 13 |             self.strides = [2 ** x for x in self.pyramid_levels]
 14 |         if sizes is None:
 15 |             self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
 16 |         if ratios is None:
 17 |             self.ratios = np.array([0.5, 1, 2])
 18 |         if scales is None:
 19 |             self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 20 | 
 21 |     def forward(self, image):
 22 |         
 23 |         image_shape = image.shape[2:]
 24 |         image_shape = np.array(image_shape)
 25 |         image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
 26 | 
 27 |         # compute anchors over all pyramid levels
 28 |         all_anchors = np.zeros((0, 4)).astype(np.float32)
 29 | 
 30 |         for idx, p in enumerate(self.pyramid_levels):
 31 |             anchors         = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
 32 |             shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
 33 |             all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
 34 | 
 35 |         all_anchors = np.expand_dims(all_anchors, axis=0)
 36 | 
 37 |         return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
 38 | 
 39 | def generate_anchors(base_size=16, ratios=None, scales=None):
 40 |     """
 41 |     Generate anchor (reference) windows by enumerating aspect ratios X
 42 |     scales w.r.t. a reference window.
 43 |     """
 44 | 
 45 |     if ratios is None:
 46 |         ratios = np.array([0.5, 1, 2])
 47 | 
 48 |     if scales is None:
 49 |         scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 50 | 
 51 |     num_anchors = len(ratios) * len(scales)
 52 | 
 53 |     # initialize output anchors
 54 |     anchors = np.zeros((num_anchors, 4))
 55 | 
 56 |     # scale base_size
 57 |     anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
 58 | 
 59 |     # compute areas of anchors
 60 |     areas = anchors[:, 2] * anchors[:, 3]
 61 | 
 62 |     # correct for ratios
 63 |     anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
 64 |     anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
 65 | 
 66 |     # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
 67 |     anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
 68 |     anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
 69 | 
 70 |     return anchors
 71 | 
 72 | def compute_shape(image_shape, pyramid_levels):
 73 |     """Compute shapes based on pyramid levels.
 74 | 
 75 |     :param image_shape:
 76 |     :param pyramid_levels:
 77 |     :return:
 78 |     """
 79 |     image_shape = np.array(image_shape[:2])
 80 |     image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
 81 |     return image_shapes
 82 | 
 83 | 
 84 | def anchors_for_shape(
 85 |     image_shape,
 86 |     pyramid_levels=None,
 87 |     ratios=None,
 88 |     scales=None,
 89 |     strides=None,
 90 |     sizes=None,
 91 |     shapes_callback=None,
 92 | ):
 93 | 
 94 |     image_shapes = compute_shape(image_shape, pyramid_levels)
 95 | 
 96 |     # compute anchors over all pyramid levels
 97 |     all_anchors = np.zeros((0, 4))
 98 |     for idx, p in enumerate(pyramid_levels):
 99 |         anchors         = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
100 |         shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
101 |         all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
102 | 
103 |     return all_anchors
104 | 
105 | 
106 | def shift(shape, stride, anchors):
107 |     shift_x = (np.arange(0, shape[1]) + 0.5) * stride
108 |     shift_y = (np.arange(0, shape[0]) + 0.5) * stride
109 | 
110 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
111 | 
112 |     shifts = np.vstack((
113 |         shift_x.ravel(), shift_y.ravel(),
114 |         shift_x.ravel(), shift_y.ravel()
115 |     )).transpose()
116 | 
117 |     # add A anchors (1, A, 4) to
118 |     # cell K shifts (K, 1, 4) to get
119 |     # shift anchors (K, A, 4)
120 |     # reshape to (K*A, 4) shifted anchors
121 |     A = anchors.shape[0]
122 |     K = shifts.shape[0]
123 |     all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
124 |     all_anchors = all_anchors.reshape((K * A, 4))
125 | 
126 |     return all_anchors
127 | 
128 | 


--------------------------------------------------------------------------------
/model/attentionConv2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch import einsum
 5 | 
 6 | 
 7 | class AttentionConv2d(nn.Module):
 8 |     def __init__(self, input_dim, output_dim, dk, dv, num_heads, kernel_size, padding, rel_encoding=True, height=None, width=None):
 9 |         super(AttentionConv2d, self).__init__()
10 |         self.input_dim = input_dim
11 |         self.output_dim = output_dim
12 |         self.dk = dk
13 |         self.dv = dv
14 |         self.num_heads = num_heads
15 |         self.kernel_size = kernel_size
16 |         self.dkh = self.dk // self.num_heads
17 |         if rel_encoding and not height:
18 |             raise("Cannot use relative encoding without specifying input's height and width")
19 |         self.H = height
20 |         self.W = width
21 | 
22 |         self.conv_qkv = nn.Conv2d(input_dim, 2*dk + dv, 1)
23 |         self.conv_attn = nn.Conv2d(dv, dv, 1)
24 |         self.conv_out = nn.Conv2d(input_dim, output_dim - dv, kernel_size, padding=padding)
25 |         self.softmax = nn.Softmax(dim=-1)
26 |         self.key_rel_w = nn.Parameter(self.dkh**-0.5 + torch.rand(2*width-1, self.dkh), requires_grad=True)
27 |         self.key_rel_h = nn.Parameter(self.dkh**-0.5 + torch.rand(2*height-1, self.dkh), requires_grad=True)
28 |         self.relative_encoding = rel_encoding
29 |         
30 | 
31 |     def forward(self, input):
32 |         conv_out = self.conv_out(input)
33 | 
34 |         qkv = self.conv_qkv(input)    # batch_size, 2*dk+dv, H, W
35 |         
36 |         q, k, v = torch.split(qkv, [self.dk, self.dk, self.dv], dim=1)
37 |         
38 |         batch_size, _, H, W = q.size()
39 | 
40 |         q = q.view([batch_size, self.num_heads, self.dk // self.num_heads, H*W])
41 |         k = k.view([batch_size, self.num_heads, self.dk // self.num_heads, H*W])
42 |         v = v.view([batch_size, self.num_heads, self.dv // self.num_heads, H*W])
43 | 
44 |         q *= self.dkh ** -0.5
45 |         logits = einsum('ijkl, ijkm -> ijlm', q, k)
46 |         if self.relative_encoding:
47 |             h_rel_logits, w_rel_logits = self._relative_logits(q)
48 |             logits += h_rel_logits
49 |             logits += w_rel_logits
50 | 
51 |         weights = self.softmax(logits)
52 |         attn_out = einsum('ijkl, ijfl -> ijfk', weights, v)
53 |         attn_out = attn_out.contiguous().view(batch_size, self.dv, H, W)
54 |         attn_out = self.conv_attn(attn_out)
55 |         output = torch.cat([conv_out, attn_out], dim=1)
56 |         return output
57 | 
58 |     def _relative_logits(self, q):
59 |         b, nh, dkh, _ = q.size()
60 |         q = q.view(b, nh, dkh, self.H, self.W)
61 | 
62 |         rel_logits_w = self._relative_logits1d(q, self.key_rel_w, self.H, self.W, nh, [0, 1, 2, 4, 3, 5])
63 |         rel_logits_h = self._relative_logits1d(q.permute(0, 1, 2, 4, 3), self.key_rel_h, self.W, self.H, nh, [0, 1, 4, 2, 5, 3])
64 |         return rel_logits_h, rel_logits_w
65 | 
66 |     def _relative_logits1d(self, q, rel_k, H, W, Nh, transpose_mask):
67 |         rel_logits = einsum('bhdxy, md -> bhxym', q, rel_k)
68 | 
69 |         rel_logits = rel_logits.view([-1, Nh*H, W, 2*W-1])
70 |         rel_logits = self._rel_to_abs(rel_logits)
71 |         rel_logits = rel_logits.view([-1, Nh, H, W, W]).unsqueeze(dim=3).repeat([1,1,1,H,1,1])
72 |         rel_logits = rel_logits.permute(*transpose_mask)
73 |         rel_logits = rel_logits.contiguous().view(-1, Nh, H*W, H*W)
74 |         return rel_logits
75 | 
76 |     def _rel_to_abs(self, x):
77 |         b, nh, l, _ = x.size()
78 | 
79 | 
80 |         x = F.pad(x, (0,1), 'constant', 0)
81 |         flat_x = x.view([b, nh, l*(2*l)]);
82 |         flat_x_padded = F.pad(flat_x, (0, l-1), 'constant', 0)
83 | 
84 |         final_x = flat_x_padded.view([b, nh, l+1, 2*l-1])
85 |         final_x = final_x[:, :, :l, l-1:]
86 | 
87 |         return final_x
88 | 


--------------------------------------------------------------------------------
/model/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | 
 2 | # --------------------------------------------------------
 3 | # Faster R-CNN
 4 | # Copyright (c) 2015 Microsoft
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # Written by Ross Girshick
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | assert sizeof(int) == sizeof(np.int32_t)
14 | 
15 | cdef extern from "gpu_nms.hpp":
16 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
17 | 
18 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
19 |             np.int32_t device_id=0):
20 |     cdef int boxes_num = dets.shape[0]
21 |     cdef int boxes_dim = dets.shape[1]
22 |     cdef int num_out
23 |     cdef np.ndarray[np.int32_t, ndim=1] \
24 |         keep = np.zeros(boxes_num, dtype=np.int32)
25 |     cdef np.ndarray[np.float32_t, ndim=1] \
26 |         scores = dets[:, 4]
27 |     cdef np.ndarray[np.int_t, ndim=1] \
28 |         order = scores.argsort()[::-1]
29 |     cdef np.ndarray[np.float32_t, ndim=2] \
30 |         sorted_dets = dets[order, :]
31 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
32 |     keep = keep[:num_out]
33 |     return list(order[keep])
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/model/losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | def calc_iou(a, b):
  6 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
  7 | 
  8 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
  9 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 10 | 
 11 |     iw = torch.clamp(iw, min=0)
 12 |     ih = torch.clamp(ih, min=0)
 13 | 
 14 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 15 | 
 16 |     ua = torch.clamp(ua, min=1e-8)
 17 | 
 18 |     intersection = iw * ih
 19 | 
 20 |     IoU = intersection / ua
 21 | 
 22 |     return IoU
 23 | 
 24 | class FocalLoss(nn.Module):
 25 |     #def __init__(self):
 26 | 
 27 |     def forward(self, classifications, regressions, anchors, annotations):
 28 |         alpha = 0.25
 29 |         gamma = 2.0
 30 |         batch_size = classifications.shape[0]
 31 |         classification_losses = []
 32 |         regression_losses = []
 33 | 
 34 |         anchor = anchors[0, :, :]
 35 | 
 36 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 37 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 38 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 39 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 40 | 
 41 |         for j in range(batch_size):
 42 | 
 43 |             classification = classifications[j, :, :]
 44 |             regression = regressions[j, :, :]
 45 | 
 46 |             bbox_annotation = annotations[j, :, :]
 47 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 48 | 
 49 |             if bbox_annotation.shape[0] == 0:
 50 |                 regression_losses.append(torch.tensor(0).float().cuda())
 51 |                 classification_losses.append(torch.tensor(0).float().cuda())
 52 | 
 53 |                 continue
 54 | 
 55 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 56 | 
 57 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 58 | 
 59 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 60 | 
 61 |             #import pdb
 62 |             #pdb.set_trace()
 63 | 
 64 |             # compute the loss for classification
 65 |             targets = torch.ones(classification.shape) * -1
 66 |             targets = targets.cuda()
 67 | 
 68 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 69 | 
 70 |             positive_indices = torch.ge(IoU_max, 0.5)
 71 | 
 72 |             num_positive_anchors = positive_indices.sum()
 73 | 
 74 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 75 | 
 76 |             targets[positive_indices, :] = 0
 77 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 78 | 
 79 |             alpha_factor = torch.ones(targets.shape).cuda() * alpha
 80 | 
 81 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 82 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 83 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 84 | 
 85 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 86 | 
 87 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 88 |             cls_loss = focal_weight * bce
 89 | 
 90 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
 91 | 
 92 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
 93 | 
 94 |             # compute the loss for regression
 95 | 
 96 |             if positive_indices.sum() > 0:
 97 |                 assigned_annotations = assigned_annotations[positive_indices, :]
 98 | 
 99 |                 anchor_widths_pi = anchor_widths[positive_indices]
100 |                 anchor_heights_pi = anchor_heights[positive_indices]
101 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
102 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
103 | 
104 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
105 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
106 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
107 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
108 | 
109 |                 # clip widths to 1
110 |                 gt_widths  = torch.clamp(gt_widths, min=1)
111 |                 gt_heights = torch.clamp(gt_heights, min=1)
112 | 
113 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
114 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
115 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
116 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
117 | 
118 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
119 |                 targets = targets.t()
120 | 
121 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
122 | 
123 | 
124 |                 negative_indices = 1 - positive_indices
125 | 
126 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
127 | 
128 |                 regression_loss = torch.where(
129 |                     torch.le(regression_diff, 1.0 / 9.0),
130 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
131 |                     regression_diff - 0.5 / 9.0
132 |                 )
133 |                 regression_losses.append(regression_loss.mean())
134 |             else:
135 |                 regression_losses.append(torch.tensor(0).float().cuda())
136 | 
137 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
138 | 
139 |     
140 | 


--------------------------------------------------------------------------------
/model/retinanet.py:
--------------------------------------------------------------------------------
  1 | """Original from from https://github.com/yhenon/pytorch-retinanet"""
  2 | import sys
  3 | #sys.path.append('../')
  4 | import torch.nn as nn
  5 | import torch
  6 | import math
  7 | import time
  8 | import torch.utils.model_zoo as model_zoo
  9 | from ..utils.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
 10 | from .BottleneckBlock import BottleneckBlock
 11 | from .anchors import Anchors
 12 | from .losses import FocalLoss
 13 | from ..lib.nms.gpu_nms import gpu_nms
 14 | 
 15 | 
 16 | def nms(dets, thresh):
 17 |     """Dispatch to either CPU or GPU NMS implementations.\
 18 |     Accept dets as tensor"""
 19 |     return gpu_nms(dets, thresh)
 20 | 
 21 | 
 22 | model_urls = {
 23 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 24 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 25 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 26 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 27 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 28 | }
 29 | 
 30 | 
 31 | class PyramidFeatures(nn.Module):
 32 |     def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
 33 |         super(PyramidFeatures, self).__init__()
 34 | 
 35 |         # upsample C5 to get P5 from the FPN paper
 36 |         self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
 37 |         self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
 38 |         self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 39 | 
 40 |         # add P5 elementwise to C4
 41 |         self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
 42 |         self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
 43 |         self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 44 | 
 45 |         # add P4 elementwise to C3
 46 |         self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
 47 |         self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 48 | 
 49 |         # "P6 is obtained via a 3x3 stride-2 conv on C5"
 50 |         self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
 51 | 
 52 |         # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
 53 |         self.P7_1 = nn.ReLU()
 54 |         self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
 55 | 
 56 |     def forward(self, inputs):
 57 |         C3, C4, C5 = inputs
 58 | 
 59 |         P5_x = self.P5_1(C5)
 60 |         P5_upsampled_x = self.P5_upsampled(P5_x)
 61 |         P5_x = self.P5_2(P5_x)
 62 | 
 63 |         P4_x = self.P4_1(C4)
 64 |         P4_x = P5_upsampled_x + P4_x
 65 |         P4_upsampled_x = self.P4_upsampled(P4_x)
 66 |         P4_x = self.P4_2(P4_x)
 67 | 
 68 |         P3_x = self.P3_1(C3)
 69 |         P3_x = P3_x + P4_upsampled_x
 70 |         P3_x = self.P3_2(P3_x)
 71 | 
 72 |         P6_x = self.P6(C5)
 73 | 
 74 |         P7_x = self.P7_1(P6_x)
 75 |         P7_x = self.P7_2(P7_x)
 76 | 
 77 |         return [P3_x, P4_x, P5_x, P6_x, P7_x]
 78 | 
 79 | 
 80 | class RegressionModel(nn.Module):
 81 |     def __init__(self, num_features_in, num_anchors=9, feature_size=256):
 82 |         super(RegressionModel, self).__init__()
 83 | 
 84 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 85 |         self.act1 = nn.ReLU()
 86 | 
 87 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 88 |         self.act2 = nn.ReLU()
 89 | 
 90 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 91 |         self.act3 = nn.ReLU()
 92 | 
 93 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 94 |         self.act4 = nn.ReLU()
 95 | 
 96 |         self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
 97 | 
 98 |     def forward(self, x):
 99 |         out = self.conv1(x)
100 |         out = self.act1(out)
101 | 
102 |         out = self.conv2(out)
103 |         out = self.act2(out)
104 | 
105 |         out = self.conv3(out)
106 |         out = self.act3(out)
107 | 
108 |         out = self.conv4(out)
109 |         out = self.act4(out)
110 | 
111 |         out = self.output(out)
112 | 
113 |         # out is B x C x W x H, with C = 4*num_anchors
114 |         out = out.permute(0, 2, 3, 1)
115 | 
116 |         return out.contiguous().view(out.shape[0], -1, 4)
117 | 
118 | 
119 | class ClassificationModel(nn.Module):
120 |     def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
121 |         super(ClassificationModel, self).__init__()
122 | 
123 |         self.num_classes = num_classes
124 |         self.num_anchors = num_anchors
125 | 
126 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
127 |         self.act1 = nn.ReLU()
128 | 
129 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
130 |         self.act2 = nn.ReLU()
131 | 
132 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
133 |         self.act3 = nn.ReLU()
134 | 
135 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
136 |         self.act4 = nn.ReLU()
137 | 
138 |         self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
139 |         self.output_act = nn.Sigmoid()
140 | 
141 |     def forward(self, x):
142 |         out = self.conv1(x)
143 |         out = self.act1(out)
144 | 
145 |         out = self.conv2(out)
146 |         out = self.act2(out)
147 | 
148 |         out = self.conv3(out)
149 |         out = self.act3(out)
150 | 
151 |         out = self.conv4(out)
152 |         out = self.act4(out)
153 | 
154 |         out = self.output(out)
155 |         out = self.output_act(out)
156 | 
157 |         # out is B x C x W x H, with C = n_classes + n_anchors
158 |         out1 = out.permute(0, 2, 3, 1)
159 | 
160 |         batch_size, width, height, channels = out1.shape
161 | 
162 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
163 | 
164 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
165 | 
166 | 
167 | class ResNet(nn.Module):
168 | 
169 |     def __init__(self, num_classes, block, layers, attention=False, input_size=None):
170 |         self.inplanes = 64
171 |         super(ResNet, self).__init__()
172 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
173 |         self.bn1 = nn.BatchNorm2d(64)
174 |         self.relu = nn.ReLU(inplace=True)
175 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
176 | 
177 |         dummy = torch.rand((1, *input_size))
178 |         sizes = self.compute_sizes(self.conv1, dummy)
179 | 
180 |         self.layer1 = self._make_layer(block, 64, layers[0], attention, h=sizes[2], w=sizes[3])
181 |         dummy = torch.rand(sizes)
182 |         sizes = self.compute_sizes(self.layer1, dummy)
183 | 
184 |         self.layer2 = self._make_layer(block, 128, layers[1], attention, stride=2, h=sizes[2], w=sizes[3])
185 |         dummy = torch.rand(sizes)
186 |         sizes = self.compute_sizes(self.layer2, dummy)
187 | 
188 |         self.layer3 = self._make_layer(block, 256, layers[2], attention, stride=2, h=sizes[2], w=sizes[3])
189 |         dummy = torch.rand(sizes)
190 |         sizes = self.compute_sizes(self.layer3, dummy)
191 | 
192 |         self.layer4 = self._make_layer(block, 512, layers[3], attention, stride=2, h=sizes[2], w=sizes[3])
193 | 
194 |         if block == BasicBlock:
195 |             fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
196 |                          self.layer4[layers[3] - 1].conv2.out_channels]
197 |         elif block == Bottleneck or block == BottleneckBlock:
198 |             fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
199 |                          self.layer4[layers[3] - 1].conv3.out_channels]
200 | 
201 |         self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
202 | 
203 |         self.regressionModel = RegressionModel(256)
204 |         self.classificationModel = ClassificationModel(256, num_classes=num_classes)
205 | 
206 |         self.anchors = Anchors()
207 | 
208 |         self.regressBoxes = BBoxTransform()
209 | 
210 |         self.clipBoxes = ClipBoxes()
211 | 
212 |         self.focalLoss = FocalLoss()
213 | 
214 |         for m in self.modules():
215 |             if isinstance(m, nn.Conv2d):
216 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
217 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
218 |             elif isinstance(m, nn.BatchNorm2d):
219 |                 m.weight.data.fill_(1)
220 |                 m.bias.data.zero_()
221 | 
222 |         prior = 0.01
223 | 
224 |         self.classificationModel.output.weight.data.fill_(0)
225 |         self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
226 | 
227 |         self.regressionModel.output.weight.data.fill_(0)
228 |         self.regressionModel.output.bias.data.fill_(0)
229 | 
230 |         self.freeze_bn()
231 | 
232 |     def compute_sizes(self, layer, dummy_input):
233 |         dummy_input = layer(dummy_input)
234 |         return dummy_input.size()
235 | 
236 |     def _make_layer(self, block, planes, blocks, attention, stride=1, h=None, w=None):
237 |         downsample = None
238 |         if stride != 1 or self.inplanes != planes * block.expansion:
239 |             downsample = nn.Sequential(
240 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
241 |                           kernel_size=1, stride=stride, bias=False),
242 |                 nn.BatchNorm2d(planes * block.expansion),
243 |             )
244 | 
245 |         layers = []
246 |         if block == BottleneckBlock:
247 |             layers.append(block(self.inplanes, planes, stride, downsample, attention, kappa=0.1, nu=0.05, num_heads=4, H=h, W=w))   #how to determine heights and widths =X
248 |         self.inplanes = planes * block.expansion
249 |         for i in range(1, blocks):
250 |             layers.append(block(self.inplanes, planes))
251 | 
252 |         return nn.Sequential(*layers)
253 | 
254 |     def freeze_bn(self):
255 |         '''Freeze BatchNorm layers.'''
256 |         for layer in self.modules():
257 |             if isinstance(layer, nn.BatchNorm2d):
258 |                 layer.eval()
259 | 
260 |     def forward(self, inputs):
261 | 
262 |         if self.training:
263 |             img_batch, annotations = inputs
264 |         else:
265 |             img_batch = inputs
266 | 
267 |         x = self.conv1(img_batch)
268 |         x = self.bn1(x)
269 |         x = self.relu(x)
270 |         x = self.maxpool(x)
271 | 
272 |         x1 = self.layer1(x)
273 |         x2 = self.layer2(x1)
274 |         x3 = self.layer3(x2)
275 |         x4 = self.layer4(x3)
276 | 
277 |         features = self.fpn([x2, x3, x4])
278 | 
279 |         regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
280 | 
281 |         classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
282 | 
283 |         anchors = self.anchors(img_batch)
284 | 
285 |         if self.training:
286 |             return self.focalLoss(classification, regression, anchors, annotations)
287 |         else:
288 |             transformed_anchors = self.regressBoxes(anchors, regression)
289 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
290 | 
291 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
292 | 
293 |             scores_over_thresh = (scores > 0.05)[0, :, 0]
294 | 
295 |             if scores_over_thresh.sum() == 0:
296 |                 # no boxes to NMS, just return
297 |                 return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
298 | 
299 |             classification = classification[:, scores_over_thresh, :]
300 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
301 |             scores = scores[:, scores_over_thresh, :]
302 | 
303 |             anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
304 | 
305 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
306 | 
307 |             return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
308 | 
309 | 
310 | def AttentionRetinaNet(num_classes, input_size):
311 |     model = ResNet(num_classes, BottleneckBlock, [3, 4, 6, 3], attention=True, input_size=input_size)
312 |     return model
313 | 
314 | def resnet18(num_classes, pretrained=False, **kwargs):
315 |     """Constructs a ResNet-18 model.
316 |     Args:
317 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
318 |     """
319 |     model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
320 |     if pretrained:
321 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
322 |     return model
323 | 
324 | 
325 | def resnet34(num_classes, pretrained=False, **kwargs):
326 |     """Constructs a ResNet-34 model.
327 |     Args:
328 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
329 |     """
330 |     model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
331 |     if pretrained:
332 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
333 |     return model
334 | 
335 | 
336 | def resnet50(num_classes, pretrained=False, **kwargs):
337 |     """Constructs a ResNet-50 model.
338 |     Args:
339 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
340 |     """
341 |     model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
342 |     if pretrained:
343 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
344 |     return model
345 | 
346 | def resnet50_attn(num_classes, pretrained=False):
347 |     model = ResNet(num_classes, BottleneckBlock, [3, 4, 6, 3], attention=True, input_size=(3, ))
348 | 
349 | def resnet101(num_classes, pretrained=False, **kwargs):
350 |     """Constructs a ResNet-101 model.
351 |     Args:
352 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
353 |     """
354 |     model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
355 |     if pretrained:
356 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
357 |     return model
358 | 
359 | 
360 | def resnet152(num_classes, pretrained=False, **kwargs):
361 |     """Constructs a ResNet-152 model.
362 |     Args:
363 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
364 |     """
365 |     model = ResNet(num_classes, BottleneckBlock, [3, 8, 36, 3], attention=True)
366 |     if pretrained:
367 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
368 |     return model
369 | 


--------------------------------------------------------------------------------
/model/wideresnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from .attentionConv2d import AttentionConv2d
 6 | 
 7 | 
 8 | class BasicAttentionBlock(nn.Module):
 9 |     def __init__(self, in_planes, out_planes, stride, height, width, dk, dv, dropRate=0.0):
10 |         super(BasicAttentionBlock, self).__init__()
11 |         self.bn1 = nn.BatchNorm2d(in_planes)
12 |         self.relu1 = nn.ReLU(inplace=True)
13 | 
14 |         #self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
15 |         self.conv1 = AttentionConv2d(in_planes, out_planes, height, width, dk, dv, num_heads=8, kernel_size=3, padding=1)
16 |         self.bn2 = nn.BatchNorm2d(out_planes)
17 |         self.relu2 = nn.ReLU(inplace=True)
18 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
19 |                                padding=1, bias=False)
20 |         self.droprate = dropRate
21 |         self.equalInOut = (in_planes == out_planes)
22 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
23 |                                padding=0, bias=False) or None
24 | 
25 |     def forward(self, x):
26 |         if not self.equalInOut:
27 |             x = self.relu1(self.bn1(x))
28 |         else:
29 |             out = self.relu1(self.bn1(x))
30 |         out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
31 |         if self.droprate > 0:
32 |             out = F.dropout(out, p=self.droprate, training=self.training)
33 |         out = self.conv2(out)
34 |         return torch.add(x if self.equalInOut else self.convShortcut(x), out)
35 | 
36 | class NetworkBlock(nn.Module):
37 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, height, width, dropRate=0.0):
38 |         super(NetworkBlock, self).__init__()
39 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, height, width, dropRate)
40 | 
41 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, height, width, dropRate):
42 |         layers = []
43 |         dk = int(0.1 * out_planes)
44 |         dv = int(0.2 * out_planes)
45 | 
46 |         for i in range(int(nb_layers)):
47 |             layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, height, width, dk, dv, dropRate))
48 |         return nn.Sequential(*layers)
49 | 
50 |     def forward(self, x):
51 |         return self.layer(x)
52 | 
53 | class AttentionWideResNet(nn.Module):
54 |     def __init__(self, depth, num_classes, widen_factor=1, input_dim=(32, 32), dropRate=0.0):
55 |         super(AttentionWideResNet, self).__init__()
56 |         nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
57 | 
58 |         height, width = input_dim
59 |         assert((depth - 4) % 6 == 0)
60 |         n = (depth - 4) / 6
61 |         block = BasicAttentionBlock
62 |         # 1st conv before any network block
63 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
64 |                                padding=1, bias=False)
65 |         # 1st block
66 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, height, width, dropRate)
67 |         # 2nd block
68 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, height, width, dropRate)
69 |         # 3rd block
70 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, height, width, dropRate)
71 |         # global average pooling and classifier
72 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
73 |         self.relu = nn.ReLU(inplace=True)
74 |         self.fc = nn.Linear(nChannels[3], num_classes)
75 |         self.nChannels = nChannels[3]
76 | 
77 |         for m in self.modules():
78 |             if isinstance(m, nn.Conv2d):
79 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
80 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
81 |             elif isinstance(m, nn.BatchNorm2d):
82 |                 m.weight.data.fill_(1)
83 |                 m.bias.data.zero_()
84 |             elif isinstance(m, nn.Linear):
85 |                 m.bias.data.zero_()
86 | 
87 |     def forward(self, x):
88 |         out = self.conv1(x)
89 |         out = self.block1(out)
90 |         out = self.block2(out)
91 |         out = self.block3(out)
92 |         out = self.relu(self.bn1(out))
93 |         
94 |         out = F.avg_pool2d(out, 32)
95 |         out = out.view(-1, self.nChannels)
96 |         #print(out.size())
97 |         return self.fc(out)
98 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | .spyproject
 92 | 
 93 | # Rope project settings
 94 | .ropeproject
 95 | 
 96 | # mkdocs documentation
 97 | /site
 98 | 
 99 | # mypy
100 | .mypy_cache/
101 | 
102 | *.zip
103 | *.pt
104 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/README.md:
--------------------------------------------------------------------------------
  1 | # pytorch-retinanet
  2 | 
  3 | ![img3](https://github.com/yhenon/pytorch-retinanet/blob/master/images/3.jpg)
  4 | ![img5](https://github.com/yhenon/pytorch-retinanet/blob/master/images/5.jpg)
  5 | 
  6 | Pytorch  implementation of RetinaNet object detection as described in [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) by Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár.
  7 | 
  8 | This implementation is primarily designed to be easy to read and simple to modify.
  9 | 
 10 | ## Results
 11 | Currently, this repo achieves 33.7% mAP at 600px resolution with a Resnet-50 backbone. The published result is 34.0% mAP. The difference is likely due to the use of Adam optimizer instead of SGD with weight decay.
 12 | 
 13 | ## Installation
 14 | 
 15 | 1) Clone this repo
 16 | 
 17 | 2) Install the required packages:
 18 | 
 19 | ```
 20 | apt-get install tk-dev python-tk
 21 | ```
 22 | 
 23 | 3) Install the python packages:
 24 | 	
 25 | ```
 26 | pip install cffi
 27 | 
 28 | pip install pandas
 29 | 
 30 | pip install pycocotools
 31 | 
 32 | pip install cython
 33 | 
 34 | pip install opencv-python
 35 | 
 36 | pip install requests
 37 | 
 38 | ```
 39 | 
 40 | 4) Build the NMS extension.
 41 | 
 42 | ```
 43 | cd pytorch-retinanet/lib
 44 | bash build.sh
 45 | cd ../
 46 | ```
 47 | 
 48 | Note that you may have to edit line 14 of `build.sh` if you want to change which version of python you are building the extension for.
 49 | 
 50 | ## Training
 51 | 
 52 | The network can be trained using the `train.py` script. Currently, two dataloaders are available: COCO and CSV. For training on coco, use
 53 | 
 54 | ```
 55 | python train.py --dataset coco --coco_path ../coco --depth 50
 56 | ```
 57 | 
 58 | For training using a custom dataset, with annotations in CSV format (see below), use
 59 | 
 60 | ```
 61 | python train.py --dataset csv --csv_train <path/to/train_annots.csv>  --csv_classes <path/to/train/class_list.csv>  --csv_val <path/to/val_annots.csv>
 62 | ```
 63 | 
 64 | Note that the --csv_val argument is optional, in which case no validation will be performed.
 65 | 
 66 | ## Pre-trained model
 67 | 
 68 | A pre-trained model is available at: 
 69 | - https://drive.google.com/open?id=1yLmjq3JtXi841yXWBxst0coAgR26MNBS (this is a pytorch state dict)
 70 | - https://drive.google.com/open?id=1hCtM35R_t6T8RJVSd74K4gB-A1MR-TxC (this is a pytorch model serialized via `torch.save()`)
 71 | 
 72 | The state dict model can be loaded using:
 73 | 
 74 | ```
 75 | retinanet = model.resnet50(num_classes=dataset_train.num_classes(),)
 76 | retinanet.load_state_dict(torch.load(PATH_TO_WEIGHTS))
 77 | ```
 78 | 
 79 | The pytorch model can be loaded directly using:
 80 | 
 81 | ```
 82 | retinanet = torch.load(PATH_TO_MODEL)
 83 | ```
 84 | 
 85 | ## Visualization
 86 | 
 87 | To visualize the network detection, use `visualize.py`:
 88 | 
 89 | ```
 90 | python visualize.py --dataset coco --coco_path ../coco --model <path/to/model.pt>
 91 | ```
 92 | This will visualize bounding boxes on the validation set. To visualise with a CSV dataset, use:
 93 | 
 94 | ```
 95 | python visualize.py --dataset csv --csv_classes <path/to/train/class_list.csv>  --csv_val <path/to/val_annots.csv> --model <path/to/model.pt>
 96 | ```
 97 | 
 98 | ## Model
 99 | 
100 | The retinanet model uses a resnet backbone. You can set the depth of the resnet model using the --depth argument. Depth must be one of 18, 34, 50, 101 or 152. Note that deeper models are more accurate but are slower and use more memory.
101 | 
102 | ## CSV datasets
103 | The `CSVGenerator` provides an easy way to define your own datasets.
104 | It uses two CSV files: one file containing annotations and one file containing a class name to ID mapping.
105 | 
106 | ### Annotations format
107 | The CSV file with annotations should contain one annotation per line.
108 | Images with multiple bounding boxes should use one row per bounding box.
109 | Note that indexing for pixel values starts at 0.
110 | The expected format of each line is:
111 | ```
112 | path/to/image.jpg,x1,y1,x2,y2,class_name
113 | ```
114 | 
115 | Some images may not contain any labeled objects.
116 | To add these images to the dataset as negative examples,
117 | add an annotation where `x1`, `y1`, `x2`, `y2` and `class_name` are all empty:
118 | ```
119 | path/to/image.jpg,,,,,
120 | ```
121 | 
122 | A full example:
123 | ```
124 | /data/imgs/img_001.jpg,837,346,981,456,cow
125 | /data/imgs/img_002.jpg,215,312,279,391,cat
126 | /data/imgs/img_002.jpg,22,5,89,84,bird
127 | /data/imgs/img_003.jpg,,,,,
128 | ```
129 | 
130 | This defines a dataset with 3 images.
131 | `img_001.jpg` contains a cow.
132 | `img_002.jpg` contains a cat and a bird.
133 | `img_003.jpg` contains no interesting objects/animals.
134 | 
135 | 
136 | ### Class mapping format
137 | The class name to ID mapping file should contain one mapping per line.
138 | Each line should use the following format:
139 | ```
140 | class_name,id
141 | ```
142 | 
143 | Indexing for classes starts at 0.
144 | Do not include a background class as it is implicit.
145 | 
146 | For example:
147 | ```
148 | cow,0
149 | cat,1
150 | bird,2
151 | ```
152 | 
153 | ## Acknowledgements
154 | 
155 | - Significant amounts of code are borrowed from the [keras retinanet implementation](https://github.com/fizyr/keras-retinanet)
156 | - The NMS module used is from the [pytorch faster-rcnn implementation](https://github.com/ruotianluo/pytorch-faster-rcnn)
157 | 
158 | ## Examples
159 | 
160 | ![img1](https://github.com/yhenon/pytorch-retinanet/blob/master/images/1.jpg)
161 | ![img2](https://github.com/yhenon/pytorch-retinanet/blob/master/images/2.jpg)
162 | ![img4](https://github.com/yhenon/pytorch-retinanet/blob/master/images/4.jpg)
163 | ![img6](https://github.com/yhenon/pytorch-retinanet/blob/master/images/6.jpg)
164 | ![img7](https://github.com/yhenon/pytorch-retinanet/blob/master/images/7.jpg)
165 | ![img8](https://github.com/yhenon/pytorch-retinanet/blob/master/images/8.jpg)
166 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class Anchors(nn.Module):
  7 |     def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
  8 |         super(Anchors, self).__init__()
  9 | 
 10 |         if pyramid_levels is None:
 11 |             self.pyramid_levels = [3, 4, 5, 6, 7]
 12 |         if strides is None:
 13 |             self.strides = [2 ** x for x in self.pyramid_levels]
 14 |         if sizes is None:
 15 |             self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
 16 |         if ratios is None:
 17 |             self.ratios = np.array([0.5, 1, 2])
 18 |         if scales is None:
 19 |             self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 20 | 
 21 |     def forward(self, image):
 22 |         
 23 |         image_shape = image.shape[2:]
 24 |         image_shape = np.array(image_shape)
 25 |         image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
 26 | 
 27 |         # compute anchors over all pyramid levels
 28 |         all_anchors = np.zeros((0, 4)).astype(np.float32)
 29 | 
 30 |         for idx, p in enumerate(self.pyramid_levels):
 31 |             anchors         = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
 32 |             shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
 33 |             all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
 34 | 
 35 |         all_anchors = np.expand_dims(all_anchors, axis=0)
 36 | 
 37 |         return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
 38 | 
 39 | def generate_anchors(base_size=16, ratios=None, scales=None):
 40 |     """
 41 |     Generate anchor (reference) windows by enumerating aspect ratios X
 42 |     scales w.r.t. a reference window.
 43 |     """
 44 | 
 45 |     if ratios is None:
 46 |         ratios = np.array([0.5, 1, 2])
 47 | 
 48 |     if scales is None:
 49 |         scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 50 | 
 51 |     num_anchors = len(ratios) * len(scales)
 52 | 
 53 |     # initialize output anchors
 54 |     anchors = np.zeros((num_anchors, 4))
 55 | 
 56 |     # scale base_size
 57 |     anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
 58 | 
 59 |     # compute areas of anchors
 60 |     areas = anchors[:, 2] * anchors[:, 3]
 61 | 
 62 |     # correct for ratios
 63 |     anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
 64 |     anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
 65 | 
 66 |     # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
 67 |     anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
 68 |     anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
 69 | 
 70 |     return anchors
 71 | 
 72 | def compute_shape(image_shape, pyramid_levels):
 73 |     """Compute shapes based on pyramid levels.
 74 | 
 75 |     :param image_shape:
 76 |     :param pyramid_levels:
 77 |     :return:
 78 |     """
 79 |     image_shape = np.array(image_shape[:2])
 80 |     image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
 81 |     return image_shapes
 82 | 
 83 | 
 84 | def anchors_for_shape(
 85 |     image_shape,
 86 |     pyramid_levels=None,
 87 |     ratios=None,
 88 |     scales=None,
 89 |     strides=None,
 90 |     sizes=None,
 91 |     shapes_callback=None,
 92 | ):
 93 | 
 94 |     image_shapes = compute_shape(image_shape, pyramid_levels)
 95 | 
 96 |     # compute anchors over all pyramid levels
 97 |     all_anchors = np.zeros((0, 4))
 98 |     for idx, p in enumerate(pyramid_levels):
 99 |         anchors         = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
100 |         shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
101 |         all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
102 | 
103 |     return all_anchors
104 | 
105 | 
106 | def shift(shape, stride, anchors):
107 |     shift_x = (np.arange(0, shape[1]) + 0.5) * stride
108 |     shift_y = (np.arange(0, shape[0]) + 0.5) * stride
109 | 
110 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
111 | 
112 |     shifts = np.vstack((
113 |         shift_x.ravel(), shift_y.ravel(),
114 |         shift_x.ravel(), shift_y.ravel()
115 |     )).transpose()
116 | 
117 |     # add A anchors (1, A, 4) to
118 |     # cell K shifts (K, 1, 4) to get
119 |     # shift anchors (K, A, 4)
120 |     # reshape to (K*A, 4) shifted anchors
121 |     A = anchors.shape[0]
122 |     K = shifts.shape[0]
123 |     all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
124 |     all_anchors = all_anchors.reshape((K * A, 4))
125 | 
126 |     return all_anchors
127 | 
128 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/attentionConv2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch import einsum
 5 | 
 6 | 
 7 | class AttentionConv2d(nn.Module):
 8 |     def __init__(self, input_dim, output_dim, dk, dv, num_heads, kernel_size, padding, rel_encoding=True, height=None, width=None):
 9 |         super(AttentionConv2d, self).__init__()
10 |         self.input_dim = input_dim
11 |         self.output_dim = output_dim
12 |         self.dk = dk
13 |         self.dv = dv
14 |         self.num_heads = num_heads
15 |         self.kernel_size = kernel_size
16 |         self.dkh = self.dk // self.num_heads
17 |         if rel_encoding and not height:
18 |             raise("Cannot use relative encoding without specifying input's height and width")
19 |         self.H = height
20 |         self.W = width
21 | 
22 |         self.conv_qkv = nn.Conv2d(input_dim, 2*dk + dv, 1)
23 |         self.conv_attn = nn.Conv2d(dv, dv, 1)
24 |         self.conv_out = nn.Conv2d(input_dim, output_dim - dv, kernel_size, padding=padding)
25 |         self.softmax = nn.Softmax(dim=-1)
26 |         if width is not None:
27 |             self.key_rel_w = nn.Parameter(self.dkh**-0.5 + torch.rand(2*width-1, self.dkh), requires_grad=True)
28 |         if height is not None:
29 |             self.key_rel_h = nn.Parameter(self.dkh**-0.5 + torch.rand(2*height-1, self.dkh), requires_grad=True)
30 |         self.relative_encoding = rel_encoding
31 |         
32 | 
33 |     def forward(self, input):
34 |         conv_out = self.conv_out(input)
35 | 
36 |         qkv = self.conv_qkv(input)    # batch_size, 2*dk+dv, H, W
37 |         
38 |         q, k, v = torch.split(qkv, [self.dk, self.dk, self.dv], dim=1)
39 |         
40 |         batch_size, _, H, W = q.size()
41 | 
42 |         q = q.view([batch_size, self.num_heads, self.dk // self.num_heads, H*W])
43 |         k = k.view([batch_size, self.num_heads, self.dk // self.num_heads, H*W])
44 |         v = v.view([batch_size, self.num_heads, self.dv // self.num_heads, H*W])
45 | 
46 |         q *= self.dkh ** -0.5
47 |         logits = einsum('ijkl, ijkm -> ijlm', q, k)
48 |         if self.relative_encoding:
49 |             h_rel_logits, w_rel_logits = self._relative_logits(q)
50 |             logits += h_rel_logits
51 |             logits += w_rel_logits
52 | 
53 |         weights = self.softmax(logits)
54 |         attn_out = einsum('ijkl, ijfl -> ijfk', weights, v)
55 |         attn_out = attn_out.contiguous().view(batch_size, self.dv, H, W)
56 |         attn_out = self.conv_attn(attn_out)
57 |         output = torch.cat([conv_out, attn_out], dim=1)
58 |         return output
59 | 
60 |     def _relative_logits(self, q):
61 |         b, nh, dkh, _ = q.size()
62 |         q = q.view(b, nh, dkh, self.H, self.W)
63 | 
64 |         rel_logits_w = self._relative_logits1d(q, self.key_rel_w, self.H, self.W, nh, [0, 1, 2, 4, 3, 5])
65 |         rel_logits_h = self._relative_logits1d(q.permute(0, 1, 2, 4, 3), self.key_rel_h, self.W, self.H, nh, [0, 1, 4, 2, 5, 3])
66 |         return rel_logits_h, rel_logits_w
67 | 
68 |     def _relative_logits1d(self, q, rel_k, H, W, Nh, transpose_mask):
69 |         rel_logits = einsum('bhdxy, md -> bhxym', q, rel_k)
70 | 
71 |         rel_logits = rel_logits.view([-1, Nh*H, W, 2*W-1])
72 |         rel_logits = self._rel_to_abs(rel_logits)
73 |         rel_logits = rel_logits.view([-1, Nh, H, W, W]).unsqueeze(dim=3).repeat([1,1,1,H,1,1])
74 |         rel_logits = rel_logits.permute(*transpose_mask)
75 |         rel_logits = rel_logits.contiguous().view(-1, Nh, H*W, H*W)
76 |         return rel_logits
77 | 
78 |     def _rel_to_abs(self, x):
79 |         b, nh, l, _ = x.size()
80 | 
81 | 
82 |         x = F.pad(x, (0,1), 'constant', 0)
83 |         flat_x = x.view([b, nh, l*(2*l)]);
84 |         flat_x_padded = F.pad(flat_x, (0, l-1), 'constant', 0)
85 | 
86 |         final_x = flat_x_padded.view([b, nh, l+1, 2*l-1])
87 |         final_x = final_x[:, :, :l, l-1:]
88 | 
89 |         return final_x
90 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from pycocotools.coco import COCO
 4 | from pycocotools.cocoeval import COCOeval
 5 | 
 6 | import numpy as np
 7 | import json
 8 | import os
 9 | 
10 | import torch
11 | 
12 | def evaluate_coco(dataset, model, threshold=0.05):
13 |     
14 |     model.eval()
15 |     
16 |     with torch.no_grad():
17 | 
18 |         # start collecting results
19 |         results = []
20 |         image_ids = []
21 | 
22 |         for index in range(len(dataset)):
23 |             data = dataset[index]
24 |             scale = data['scale']
25 | 
26 |             # run network
27 |             scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
28 |             scores = scores.cpu()
29 |             labels = labels.cpu()
30 |             boxes  = boxes.cpu()
31 | 
32 |             # correct boxes for image scale
33 |             boxes /= scale
34 | 
35 |             if boxes.shape[0] > 0:
36 |                 # change to (x, y, w, h) (MS COCO standard)
37 |                 boxes[:, 2] -= boxes[:, 0]
38 |                 boxes[:, 3] -= boxes[:, 1]
39 | 
40 |                 # compute predicted labels and scores
41 |                 #for box, score, label in zip(boxes[0], scores[0], labels[0]):
42 |                 for box_id in range(boxes.shape[0]):
43 |                     score = float(scores[box_id])
44 |                     label = int(labels[box_id])
45 |                     box = boxes[box_id, :]
46 | 
47 |                     # scores are sorted, so we can break
48 |                     if score < threshold:
49 |                         break
50 | 
51 |                     # append detection for each positively labeled class
52 |                     image_result = {
53 |                         'image_id'    : dataset.image_ids[index],
54 |                         'category_id' : dataset.label_to_coco_label(label),
55 |                         'score'       : float(score),
56 |                         'bbox'        : box.tolist(),
57 |                     }
58 | 
59 |                     # append detection to results
60 |                     results.append(image_result)
61 | 
62 |             # append image to list of processed images
63 |             image_ids.append(dataset.image_ids[index])
64 | 
65 |             # print progress
66 |             print('{}/{}'.format(index, len(dataset)), end='\r')
67 | 
68 |         if not len(results):
69 |             return
70 | 
71 |         # write output
72 |         json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
73 | 
74 |         # load results in COCO evaluation tool
75 |         coco_true = dataset.coco
76 |         coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
77 | 
78 |         # run COCO evaluation
79 |         coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
80 |         coco_eval.params.imgIds = image_ids
81 |         coco_eval.evaluate()
82 |         coco_eval.accumulate()
83 |         coco_eval.summarize()
84 | 
85 |         model.train()
86 | 
87 |         return
88 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/csv_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import json
  5 | import os
  6 | 
  7 | import torch
  8 | 
  9 | 
 10 | 
 11 | def compute_overlap(a, b):
 12 |     """
 13 |     Parameters
 14 |     ----------
 15 |     a: (N, 4) ndarray of float
 16 |     b: (K, 4) ndarray of float
 17 |     Returns
 18 |     -------
 19 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 20 |     """
 21 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
 22 | 
 23 |     iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
 24 |     ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
 25 | 
 26 |     iw = np.maximum(iw, 0)
 27 |     ih = np.maximum(ih, 0)
 28 | 
 29 |     ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
 30 | 
 31 |     ua = np.maximum(ua, np.finfo(float).eps)
 32 | 
 33 |     intersection = iw * ih
 34 | 
 35 |     return intersection / ua
 36 | 
 37 | 
 38 | def _compute_ap(recall, precision):
 39 |     """ Compute the average precision, given the recall and precision curves.
 40 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
 41 |     # Arguments
 42 |         recall:    The recall curve (list).
 43 |         precision: The precision curve (list).
 44 |     # Returns
 45 |         The average precision as computed in py-faster-rcnn.
 46 |     """
 47 |     # correct AP calculation
 48 |     # first append sentinel values at the end
 49 |     mrec = np.concatenate(([0.], recall, [1.]))
 50 |     mpre = np.concatenate(([0.], precision, [0.]))
 51 | 
 52 |     # compute the precision envelope
 53 |     for i in range(mpre.size - 1, 0, -1):
 54 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |     # to calculate area under PR curve, look for points
 57 |     # where X axis (recall) changes value
 58 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |     # and sum (\Delta recall) * prec
 61 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | 
 65 | def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None):
 66 |     """ Get the detections from the retinanet using the generator.
 67 |     The result is a list of lists such that the size is:
 68 |         all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
 69 |     # Arguments
 70 |         dataset         : The generator used to run images through the retinanet.
 71 |         retinanet           : The retinanet to run on the images.
 72 |         score_threshold : The score confidence threshold to use.
 73 |         max_detections  : The maximum number of detections to use per image.
 74 |         save_path       : The path to save the images with visualized detections to.
 75 |     # Returns
 76 |         A list of lists containing the detections for each image in the generator.
 77 |     """
 78 |     all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
 79 | 
 80 |     retinanet.eval()
 81 |     
 82 |     with torch.no_grad():
 83 | 
 84 |         for index in range(len(dataset)):
 85 |             data = dataset[index]
 86 |             scale = data['scale']
 87 | 
 88 |             # run network
 89 |             scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
 90 |             scores = scores.cpu().numpy()
 91 |             labels = labels.cpu().numpy()
 92 |             boxes  = boxes.cpu().numpy()
 93 | 
 94 |             # correct boxes for image scale
 95 |             boxes /= scale
 96 | 
 97 |             # select indices which have a score above the threshold
 98 |             indices = np.where(scores > score_threshold)[0]
 99 |             if indices.shape[0] > 0:
100 |                 # select those scores
101 |                 scores = scores[indices]
102 | 
103 |                 # find the order with which to sort the scores
104 |                 scores_sort = np.argsort(-scores)[:max_detections]
105 | 
106 |                 # select detections
107 |                 image_boxes      = boxes[indices[scores_sort], :]
108 |                 image_scores     = scores[scores_sort]
109 |                 image_labels     = labels[indices[scores_sort]]
110 |                 image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
111 | 
112 |                 # copy detections to all_detections
113 |                 for label in range(dataset.num_classes()):
114 |                     all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
115 |             else:
116 |                 # copy detections to all_detections
117 |                 for label in range(dataset.num_classes()):
118 |                     all_detections[index][label] = np.zeros((0, 5))
119 | 
120 |             print('{}/{}'.format(index + 1, len(dataset)), end='\r')
121 | 
122 |     return all_detections
123 | 
124 | 
125 | def _get_annotations(generator):
126 |     """ Get the ground truth annotations from the generator.
127 |     The result is a list of lists such that the size is:
128 |         all_detections[num_images][num_classes] = annotations[num_detections, 5]
129 |     # Arguments
130 |         generator : The generator used to retrieve ground truth annotations.
131 |     # Returns
132 |         A list of lists containing the annotations for each image in the generator.
133 |     """
134 |     all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
135 | 
136 |     for i in range(len(generator)):
137 |         # load the annotations
138 |         annotations = generator.load_annotations(i)
139 | 
140 |         # copy detections to all_annotations
141 |         for label in range(generator.num_classes()):
142 |             all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
143 | 
144 |         print('{}/{}'.format(i + 1, len(generator)), end='\r')
145 | 
146 |     return all_annotations
147 | 
148 | 
149 | def evaluate(
150 |     generator,
151 |     retinanet,
152 |     iou_threshold=0.5,
153 |     score_threshold=0.05,
154 |     max_detections=100,
155 |     save_path=None
156 | ):
157 |     """ Evaluate a given dataset using a given retinanet.
158 |     # Arguments
159 |         generator       : The generator that represents the dataset to evaluate.
160 |         retinanet           : The retinanet to evaluate.
161 |         iou_threshold   : The threshold used to consider when a detection is positive or negative.
162 |         score_threshold : The score confidence threshold to use for detections.
163 |         max_detections  : The maximum number of detections to use per image.
164 |         save_path       : The path to save images with visualized detections to.
165 |     # Returns
166 |         A dict mapping class names to mAP scores.
167 |     """
168 | 
169 | 
170 | 
171 |     # gather all detections and annotations
172 | 
173 |     all_detections     = _get_detections(generator, retinanet, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
174 |     all_annotations    = _get_annotations(generator)
175 | 
176 |     average_precisions = {}
177 | 
178 |     for label in range(generator.num_classes()):
179 |         false_positives = np.zeros((0,))
180 |         true_positives  = np.zeros((0,))
181 |         scores          = np.zeros((0,))
182 |         num_annotations = 0.0
183 | 
184 |         for i in range(len(generator)):
185 |             detections           = all_detections[i][label]
186 |             annotations          = all_annotations[i][label]
187 |             num_annotations     += annotations.shape[0]
188 |             detected_annotations = []
189 | 
190 |             for d in detections:
191 |                 scores = np.append(scores, d[4])
192 | 
193 |                 if annotations.shape[0] == 0:
194 |                     false_positives = np.append(false_positives, 1)
195 |                     true_positives  = np.append(true_positives, 0)
196 |                     continue
197 | 
198 |                 overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
199 |                 assigned_annotation = np.argmax(overlaps, axis=1)
200 |                 max_overlap         = overlaps[0, assigned_annotation]
201 | 
202 |                 if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
203 |                     false_positives = np.append(false_positives, 0)
204 |                     true_positives  = np.append(true_positives, 1)
205 |                     detected_annotations.append(assigned_annotation)
206 |                 else:
207 |                     false_positives = np.append(false_positives, 1)
208 |                     true_positives  = np.append(true_positives, 0)
209 | 
210 |         # no annotations -> AP for this class is 0 (is this correct?)
211 |         if num_annotations == 0:
212 |             average_precisions[label] = 0, 0
213 |             continue
214 | 
215 |         # sort by score
216 |         indices         = np.argsort(-scores)
217 |         false_positives = false_positives[indices]
218 |         true_positives  = true_positives[indices]
219 | 
220 |         # compute false positives and true positives
221 |         false_positives = np.cumsum(false_positives)
222 |         true_positives  = np.cumsum(true_positives)
223 | 
224 |         # compute recall and precision
225 |         recall    = true_positives / num_annotations
226 |         precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
227 | 
228 |         # compute average precision
229 |         average_precision  = _compute_ap(recall, precision)
230 |         average_precisions[label] = average_precision, num_annotations
231 |     
232 |     print('\nmAP:')
233 |     for label in range(generator.num_classes()):
234 |         label_name = generator.label_to_name(label)
235 |         print('{}: {}'.format(label_name, average_precisions[label][0]))
236 |     
237 |     return average_precisions
238 | 
239 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/dataloader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import sys
  3 | import os
  4 | import torch
  5 | import numpy as np
  6 | import random
  7 | import csv
  8 | 
  9 | from torch.utils.data import Dataset, DataLoader
 10 | from torchvision import transforms, utils
 11 | from torch.utils.data.sampler import Sampler
 12 | 
 13 | from pycocotools.coco import COCO
 14 | 
 15 | import skimage.io
 16 | import skimage.transform
 17 | import skimage.color
 18 | import skimage
 19 | 
 20 | from PIL import Image
 21 | 
 22 | 
 23 | class CocoDataset(Dataset):
 24 |     """Coco dataset."""
 25 | 
 26 |     def __init__(self, root_dir, set_name='train2017', transform=None):
 27 |         """
 28 |         Args:
 29 |             root_dir (string): COCO directory.
 30 |             transform (callable, optional): Optional transform to be applied
 31 |                 on a sample.
 32 |         """
 33 |         self.root_dir = root_dir
 34 |         self.set_name = set_name
 35 |         self.transform = transform
 36 | 
 37 |         self.coco      = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
 38 |         self.image_ids = self.coco.getImgIds()
 39 | 
 40 |         self.load_classes()
 41 | 
 42 |     def load_classes(self):
 43 |         # load class names (name -> label)
 44 |         categories = self.coco.loadCats(self.coco.getCatIds())
 45 |         categories.sort(key=lambda x: x['id'])
 46 | 
 47 |         self.classes             = {}
 48 |         self.coco_labels         = {}
 49 |         self.coco_labels_inverse = {}
 50 |         for c in categories:
 51 |             self.coco_labels[len(self.classes)] = c['id']
 52 |             self.coco_labels_inverse[c['id']] = len(self.classes)
 53 |             self.classes[c['name']] = len(self.classes)
 54 | 
 55 |         # also load the reverse (label -> name)
 56 |         self.labels = {}
 57 |         for key, value in self.classes.items():
 58 |             self.labels[value] = key
 59 | 
 60 |     def __len__(self):
 61 |         return len(self.image_ids)
 62 | 
 63 |     def __getitem__(self, idx):
 64 | 
 65 |         img = self.load_image(idx)
 66 |         annot = self.load_annotations(idx)
 67 |         sample = {'img': img, 'annot': annot}
 68 |         if self.transform:
 69 |             sample = self.transform(sample)
 70 | 
 71 |         return sample
 72 | 
 73 |     def load_image(self, image_index):
 74 |         image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
 75 |         path       = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
 76 |         img = skimage.io.imread(path)
 77 | 
 78 |         if len(img.shape) == 2:
 79 |             img = skimage.color.gray2rgb(img)
 80 | 
 81 |         return img.astype(np.float32)/255.0
 82 | 
 83 |     def load_annotations(self, image_index):
 84 |         # get ground truth annotations
 85 |         annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
 86 |         annotations     = np.zeros((0, 5))
 87 | 
 88 |         # some images appear to miss annotations (like image with id 257034)
 89 |         if len(annotations_ids) == 0:
 90 |             return annotations
 91 | 
 92 |         # parse annotations
 93 |         coco_annotations = self.coco.loadAnns(annotations_ids)
 94 |         for idx, a in enumerate(coco_annotations):
 95 | 
 96 |             # some annotations have basically no width / height, skip them
 97 |             if a['bbox'][2] < 1 or a['bbox'][3] < 1:
 98 |                 continue
 99 | 
100 |             annotation        = np.zeros((1, 5))
101 |             annotation[0, :4] = a['bbox']
102 |             annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
103 |             annotations       = np.append(annotations, annotation, axis=0)
104 | 
105 |         # transform from [x, y, w, h] to [x1, y1, x2, y2]
106 |         annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
107 |         annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
108 | 
109 |         return annotations
110 | 
111 |     def coco_label_to_label(self, coco_label):
112 |         return self.coco_labels_inverse[coco_label]
113 | 
114 | 
115 |     def label_to_coco_label(self, label):
116 |         return self.coco_labels[label]
117 | 
118 |     def image_aspect_ratio(self, image_index):
119 |         image = self.coco.loadImgs(self.image_ids[image_index])[0]
120 |         return float(image['width']) / float(image['height'])
121 | 
122 |     def num_classes(self):
123 |         return 80
124 | 
125 | 
126 | class CSVDataset(Dataset):
127 |     """CSV dataset."""
128 | 
129 |     def __init__(self, train_file, class_list, transform=None):
130 |         """
131 |         Args:
132 |             train_file (string): CSV file with training annotations
133 |             annotations (string): CSV file with class list
134 |             test_file (string, optional): CSV file with testing annotations
135 |         """
136 |         self.train_file = train_file
137 |         self.class_list = class_list
138 |         self.transform = transform
139 | 
140 |         # parse the provided class file
141 |         try:
142 |             with self._open_for_csv(self.class_list) as file:
143 |                 self.classes = self.load_classes(csv.reader(file, delimiter=','))
144 |         except ValueError as e:
145 |             raise_from(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
146 | 
147 |         self.labels = {}
148 |         for key, value in self.classes.items():
149 |             self.labels[value] = key
150 | 
151 |         # csv with img_path, x1, y1, x2, y2, class_name
152 |         try:
153 |             with self._open_for_csv(self.train_file) as file:
154 |                 self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
155 |         except ValueError as e:
156 |             raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
157 |         self.image_names = list(self.image_data.keys())
158 | 
159 |     def _parse(self, value, function, fmt):
160 |         """
161 |         Parse a string into a value, and format a nice ValueError if it fails.
162 |         Returns `function(value)`.
163 |         Any `ValueError` raised is catched and a new `ValueError` is raised
164 |         with message `fmt.format(e)`, where `e` is the caught `ValueError`.
165 |         """
166 |         try:
167 |             return function(value)
168 |         except ValueError as e:
169 |             raise_from(ValueError(fmt.format(e)), None)
170 | 
171 |     def _open_for_csv(self, path):
172 |         """
173 |         Open a file with flags suitable for csv.reader.
174 |         This is different for python2 it means with mode 'rb',
175 |         for python3 this means 'r' with "universal newlines".
176 |         """
177 |         if sys.version_info[0] < 3:
178 |             return open(path, 'rb')
179 |         else:
180 |             return open(path, 'r', newline='')
181 | 
182 | 
183 |     def load_classes(self, csv_reader):
184 |         result = {}
185 | 
186 |         for line, row in enumerate(csv_reader):
187 |             line += 1
188 | 
189 |             try:
190 |                 class_name, class_id = row
191 |             except ValueError:
192 |                 raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
193 |             class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
194 | 
195 |             if class_name in result:
196 |                 raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
197 |             result[class_name] = class_id
198 |         return result
199 | 
200 | 
201 |     def __len__(self):
202 |         return len(self.image_names)
203 | 
204 |     def __getitem__(self, idx):
205 | 
206 |         img = self.load_image(idx)
207 |         annot = self.load_annotations(idx)
208 |         sample = {'img': img, 'annot': annot}
209 |         if self.transform:
210 |             sample = self.transform(sample)
211 | 
212 |         return sample
213 | 
214 |     def load_image(self, image_index):
215 |         img = skimage.io.imread(self.image_names[image_index])
216 | 
217 |         if len(img.shape) == 2:
218 |             img = skimage.color.gray2rgb(img)
219 | 
220 |         return img.astype(np.float32)/255.0
221 | 
222 |     def load_annotations(self, image_index):
223 |         # get ground truth annotations
224 |         annotation_list = self.image_data[self.image_names[image_index]]
225 |         annotations     = np.zeros((0, 5))
226 | 
227 |         # some images appear to miss annotations (like image with id 257034)
228 |         if len(annotation_list) == 0:
229 |             return annotations
230 | 
231 |         # parse annotations
232 |         for idx, a in enumerate(annotation_list):
233 |             # some annotations have basically no width / height, skip them
234 |             x1 = a['x1']
235 |             x2 = a['x2']
236 |             y1 = a['y1']
237 |             y2 = a['y2']
238 | 
239 |             if (x2-x1) < 1 or (y2-y1) < 1:
240 |                 continue
241 | 
242 |             annotation        = np.zeros((1, 5))
243 |             
244 |             annotation[0, 0] = x1
245 |             annotation[0, 1] = y1
246 |             annotation[0, 2] = x2
247 |             annotation[0, 3] = y2
248 | 
249 |             annotation[0, 4]  = self.name_to_label(a['class'])
250 |             annotations       = np.append(annotations, annotation, axis=0)
251 | 
252 |         return annotations
253 | 
254 |     def _read_annotations(self, csv_reader, classes):
255 |         result = {}
256 |         for line, row in enumerate(csv_reader):
257 |             line += 1
258 | 
259 |             try:
260 |                 img_file, x1, y1, x2, y2, class_name = row[:6]
261 |             except ValueError:
262 |                 raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
263 | 
264 |             if img_file not in result:
265 |                 result[img_file] = []
266 | 
267 |             # If a row contains only an image path, it's an image without annotations.
268 |             if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
269 |                 continue
270 | 
271 |             x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
272 |             y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
273 |             x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
274 |             y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
275 | 
276 |             # Check that the bounding box is valid.
277 |             if x2 <= x1:
278 |                 raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
279 |             if y2 <= y1:
280 |                 raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
281 | 
282 |             # check if the current class name is correctly present
283 |             if class_name not in classes:
284 |                 raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
285 | 
286 |             result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
287 |         return result
288 | 
289 |     def name_to_label(self, name):
290 |         return self.classes[name]
291 | 
292 |     def label_to_name(self, label):
293 |         return self.labels[label]
294 | 
295 |     def num_classes(self):
296 |         return max(self.classes.values()) + 1
297 | 
298 |     def image_aspect_ratio(self, image_index):
299 |         image = Image.open(self.image_names[image_index])
300 |         return float(image.width) / float(image.height)
301 | 
302 | 
303 | def collater(data):
304 | 
305 |     imgs = [s['img'] for s in data]
306 |     annots = [s['annot'] for s in data]
307 |     scales = [s['scale'] for s in data]
308 |         
309 |     widths = [int(s.shape[0]) for s in imgs]
310 |     heights = [int(s.shape[1]) for s in imgs]
311 |     batch_size = len(imgs)
312 | 
313 |     max_width = np.array(widths).max()
314 |     max_height = np.array(heights).max()
315 | 
316 |     padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
317 | 
318 |     for i in range(batch_size):
319 |         img = imgs[i]
320 |         padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
321 | 
322 |     max_num_annots = max(annot.shape[0] for annot in annots)
323 |     
324 |     if max_num_annots > 0:
325 | 
326 |         annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
327 | 
328 |         if max_num_annots > 0:
329 |             for idx, annot in enumerate(annots):
330 |                 #print(annot.shape)
331 |                 if annot.shape[0] > 0:
332 |                     annot_padded[idx, :annot.shape[0], :] = annot
333 |     else:
334 |         annot_padded = torch.ones((len(annots), 1, 5)) * -1
335 | 
336 | 
337 |     padded_imgs = padded_imgs.permute(0, 3, 1, 2)
338 | 
339 |     return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
340 | 
341 | class Resizer(object):
342 |     """Convert ndarrays in sample to Tensors."""
343 | 
344 |     def __call__(self, sample, min_side=608, max_side=1024):
345 |         image, annots = sample['img'], sample['annot']
346 | 
347 |         rows, cols, cns = image.shape
348 | 
349 |         smallest_side = min(rows, cols)
350 | 
351 |         # rescale the image so the smallest side is min_side
352 |         scale = min_side / smallest_side
353 | 
354 |         # check if the largest side is now greater than max_side, which can happen
355 |         # when images have a large aspect ratio
356 |         largest_side = max(rows, cols)
357 | 
358 |         if largest_side * scale > max_side:
359 |             scale = max_side / largest_side
360 | 
361 |         # resize the image with the computed scale
362 |         image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
363 |         rows, cols, cns = image.shape
364 | 
365 |         pad_w = 32 - rows%32
366 |         pad_h = 32 - cols%32
367 | 
368 |         new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
369 |         new_image[:rows, :cols, :] = image.astype(np.float32)
370 | 
371 |         annots[:, :4] *= scale
372 | 
373 |         return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
374 | 
375 | 
376 | class Augmenter(object):
377 |     """Convert ndarrays in sample to Tensors."""
378 | 
379 |     def __call__(self, sample, flip_x=0.5):
380 | 
381 |         if np.random.rand() < flip_x:
382 |             image, annots = sample['img'], sample['annot']
383 |             image = image[:, ::-1, :]
384 | 
385 |             rows, cols, channels = image.shape
386 | 
387 |             x1 = annots[:, 0].copy()
388 |             x2 = annots[:, 2].copy()
389 |             
390 |             x_tmp = x1.copy()
391 | 
392 |             annots[:, 0] = cols - x2
393 |             annots[:, 2] = cols - x_tmp
394 | 
395 |             sample = {'img': image, 'annot': annots}
396 | 
397 |         return sample
398 | 
399 | 
400 | class Normalizer(object):
401 | 
402 |     def __init__(self):
403 |         self.mean = np.array([[[0.485, 0.456, 0.406]]])
404 |         self.std = np.array([[[0.229, 0.224, 0.225]]])
405 | 
406 |     def __call__(self, sample):
407 | 
408 |         image, annots = sample['img'], sample['annot']
409 | 
410 |         return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
411 | 
412 | class UnNormalizer(object):
413 |     def __init__(self, mean=None, std=None):
414 |         if mean == None:
415 |             self.mean = [0.485, 0.456, 0.406]
416 |         else:
417 |             self.mean = mean
418 |         if std == None:
419 |             self.std = [0.229, 0.224, 0.225]
420 |         else:
421 |             self.std = std
422 | 
423 |     def __call__(self, tensor):
424 |         """
425 |         Args:
426 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
427 |         Returns:
428 |             Tensor: Normalized image.
429 |         """
430 |         for t, m, s in zip(tensor, self.mean, self.std):
431 |             t.mul_(s).add_(m)
432 |         return tensor
433 | 
434 | 
435 | class AspectRatioBasedSampler(Sampler):
436 | 
437 |     def __init__(self, data_source, batch_size, drop_last):
438 |         self.data_source = data_source
439 |         self.batch_size = batch_size
440 |         self.drop_last = drop_last
441 |         self.groups = self.group_images()
442 | 
443 |     def __iter__(self):
444 |         random.shuffle(self.groups)
445 |         for group in self.groups:
446 |             yield group
447 | 
448 |     def __len__(self):
449 |         if self.drop_last:
450 |             return len(self.data_source) // self.batch_size
451 |         else:
452 |             return (len(self.data_source) + self.batch_size - 1) // self.batch_size
453 | 
454 |     def group_images(self):
455 |         # determine the order of the images
456 |         order = list(range(len(self.data_source)))
457 |         order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
458 | 
459 |         # divide into groups, one group = one batch
460 |         return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
461 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/1.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/3.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/4.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/5.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/6.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/7.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/images/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/images/8.jpg


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/README.md:
--------------------------------------------------------------------------------
 1 | # NMS
 2 | the comparison of nms in speed
 3 | 
 4 | method 1:
 5 | thresh=0.7, time wastes:0.0287
 6 | thresh=0.8, time wastes:0.1057
 7 | thresh=0.9, time wastes:0.4204
 8 | 
 9 | method 2:
10 | thresh=0.7, time wastes:0.0272
11 | thresh=0.8, time wastes:0.1038
12 | thresh=0.9, time wastes:0.4184
13 | 
14 | method 3:
15 | thresh=0.7, time wastes:0.0019
16 | thresh=0.8, time wastes:0.0028
17 | thresh=0.9, time wastes:0.0036
18 | 
19 | method 4:
20 | thresh=0.7, time wastes:0.0120
21 | thresh=0.8, time wastes:0.0063
22 | thresh=0.9, time wastes:0.0071
23 | 
24 | Reference:
25 | py-faster-rcnn: https://github.com/rbgirshick/py-faster-rcnn/tree/master/lib/nms
26 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebastiani/pytorch-attention-augmented-convolution/4f0eb899714f22a88a1b6a602ee2dfb20f59a4b6/pytorch-retinanet/lib/nms/__init__.py


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | 
 2 | # --------------------------------------------------------
 3 | # Faster R-CNN
 4 | # Copyright (c) 2015 Microsoft
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # Written by Ross Girshick
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | assert sizeof(int) == sizeof(np.int32_t)
14 | 
15 | cdef extern from "gpu_nms.hpp":
16 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
17 | 
18 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
19 |             np.int32_t device_id=0):
20 |     dets = dets.numpy()
21 |     cdef int boxes_num = dets.shape[0]
22 |     cdef int boxes_dim = dets.shape[1]
23 |     cdef int num_out
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         keep = np.zeros(boxes_num, dtype=np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=1] \
27 |         scores = dets[:, 4]
28 |     cdef np.ndarray[np.int_t, ndim=1] \
29 |         order = scores.argsort()[::-1]
30 |     cdef np.ndarray[np.float32_t, ndim=2] \
31 |         sorted_dets = dets[order, :]
32 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
33 |     keep = keep[:num_out]
34 |     return list(order[keep])
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | 
  9 | #include "gpu_nms.hpp"
 10 | #include <vector>
 11 | #include <iostream>
 12 | 
 13 | #define CUDA_CHECK(condition) \
 14 |   /* Code block avoids redefinition of cudaError_t error */ \
 15 |   do { \
 16 |     cudaError_t error = condition; \
 17 |     if (error != cudaSuccess) { \
 18 |       std::cout << cudaGetErrorString(error) << std::endl; \
 19 |     } \
 20 |   } while (0)
 21 | 
 22 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 23 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 24 | 
 25 | __device__ inline float devIoU(float const * const a, float const * const b) {
 26 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 27 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 28 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 29 |   float interS = width * height;
 30 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 31 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 32 |   return interS / (Sa + Sb - interS);
 33 | }
 34 | 
 35 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 36 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 37 |   const int row_start = blockIdx.y;
 38 |   const int col_start = blockIdx.x;
 39 | 
 40 |   // if (row_start > col_start) return;
 41 | 
 42 |   const int row_size =
 43 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 44 |   const int col_size =
 45 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 46 | 
 47 |   __shared__ float block_boxes[threadsPerBlock * 5];
 48 |   if (threadIdx.x < col_size) {
 49 |     block_boxes[threadIdx.x * 5 + 0] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 51 |     block_boxes[threadIdx.x * 5 + 1] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 53 |     block_boxes[threadIdx.x * 5 + 2] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 55 |     block_boxes[threadIdx.x * 5 + 3] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 57 |     block_boxes[threadIdx.x * 5 + 4] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 59 |   }
 60 |   __syncthreads();
 61 | 
 62 |   if (threadIdx.x < row_size) {
 63 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 64 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 65 |     int i = 0;
 66 |     unsigned long long t = 0;
 67 |     int start = 0;
 68 |     if (row_start == col_start) {
 69 |       start = threadIdx.x + 1;
 70 |     }
 71 |     for (i = start; i < col_size; i++) {
 72 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 73 |         t |= 1ULL << i;
 74 |       }
 75 |     }
 76 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 77 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 78 |   }
 79 | }
 80 | 
 81 | void _set_device(int device_id) {
 82 |   int current_device;
 83 |   CUDA_CHECK(cudaGetDevice(&current_device));
 84 |   if (current_device == device_id) {
 85 |     return;
 86 |   }
 87 |   // The call to cudaSetDevice must come before any calls to Get, which
 88 |   // may perform initialization using the GPU.
 89 |   CUDA_CHECK(cudaSetDevice(device_id));
 90 | }
 91 | 
 92 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 93 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 94 |   _set_device(device_id);
 95 | 
 96 |   float* boxes_dev = NULL;
 97 |   unsigned long long* mask_dev = NULL;
 98 | 
 99 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
100 | 
101 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
102 |                         boxes_num * boxes_dim * sizeof(float)));
103 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
104 |                         boxes_host,
105 |                         boxes_num * boxes_dim * sizeof(float),
106 |                         cudaMemcpyHostToDevice));
107 | 
108 |   CUDA_CHECK(cudaMalloc(&mask_dev,
109 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
110 | 
111 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
112 |               DIVUP(boxes_num, threadsPerBlock));
113 |   dim3 threads(threadsPerBlock);
114 |   nms_kernel<<<blocks, threads>>>(boxes_num,
115 |                                   nms_overlap_thresh,
116 |                                   boxes_dev,
117 |                                   mask_dev);
118 | 
119 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
120 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
121 |                         mask_dev,
122 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
123 |                         cudaMemcpyDeviceToHost));
124 | 
125 |   std::vector<unsigned long long> remv(col_blocks);
126 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
127 | 
128 |   int num_to_keep = 0;
129 |   for (int i = 0; i < boxes_num; i++) {
130 |     int nblock = i / threadsPerBlock;
131 |     int inblock = i % threadsPerBlock;
132 | 
133 |     if (!(remv[nblock] & (1ULL << inblock))) {
134 |       keep_out[num_to_keep++] = i;
135 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
136 |       for (int j = nblock; j < col_blocks; j++) {
137 |         remv[j] |= p[j];
138 |       }
139 |     }
140 |   }
141 |   *num_out = num_to_keep;
142 | 
143 |   CUDA_CHECK(cudaFree(boxes_dev));
144 |   CUDA_CHECK(cudaFree(mask_dev));
145 | }
146 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/nums_py.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon May  7 21:45:37 2018
 5 | 
 6 | @author: lps
 7 | """
 8 | import numpy as np
 9 | 
10 | 
11 | boxes=np.array([[100,100,210,210,0.72],
12 |         [250,250,420,420,0.8],
13 |         [220,220,320,330,0.92],
14 |         [100,100,210,210,0.72],
15 |         [230,240,325,330,0.81],
16 |         [220,230,315,340,0.9]]) 
17 | 
18 | 
19 | def py_cpu_nms(dets, thresh):
20 |     # dets:(m,5)  thresh:scaler
21 |     
22 |     x1 = dets[:,0]
23 |     y1 = dets[:,1]
24 |     x2 = dets[:,2]
25 |     y2 = dets[:,3]
26 |     
27 |     areas = (y2-y1+1) * (x2-x1+1)
28 |     scores = dets[:,4]
29 |     keep = []
30 |     
31 |     index = scores.argsort()[::-1]
32 |     
33 |     while index.size >0:
34 | 
35 |         i = index[0]       # every time the first is the biggst, and add it directly
36 |         keep.append(i)
37 |         
38 |         x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
39 |         y11 = np.maximum(y1[i], y1[index[1:]])
40 |         x22 = np.minimum(x2[i], x2[index[1:]])
41 |         y22 = np.minimum(y2[i], y2[index[1:]])
42 |         
43 |         w = np.maximum(0, x22-x11+1)    # the weights of overlap
44 |         h = np.maximum(0, y22-y11+1)    # the height of overlap
45 |        
46 |         overlaps = w*h
47 |         
48 |         ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
49 |         
50 |         idx = np.where(ious<=thresh)[0]
51 |         
52 |         index = index[idx+1]   # because index start from 1
53 |         
54 |     return keep
55 |         
56 | 
57 | import matplotlib.pyplot as plt
58 | def plot_bbox(dets, c='k'):
59 |     
60 |     x1 = dets[:,0]
61 |     y1 = dets[:,1]
62 |     x2 = dets[:,2]
63 |     y2 = dets[:,3]
64 |     
65 |     
66 |     plt.plot([x1,x2], [y1,y1], c)
67 |     plt.plot([x1,x1], [y1,y2], c)
68 |     plt.plot([x1,x2], [y2,y2], c)
69 |     plt.plot([x2,x2], [y1,y2], c)
70 |     plt.title("after nms")
71 | 
72 | #plot_bbox(boxes,'k')   # before nms
73 | #
74 | #keep = py_cpu_nms(boxes, thresh=0.7)
75 | #plot_bbox(boxes[keep], 'r')# after nms
76 | #        
77 | 
78 |         


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/nums_py1.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | boxes=np.array([[100,100,210,210,0.72],
 5 |         [250,250,420,420,0.8],
 6 |         [220,220,320,330,0.92],
 7 |         [100,100,210,210,0.72],
 8 |         [230,240,325,330,0.81],
 9 |         [220,230,315,340,0.9]]) 
10 | 
11 | 
12 | def py_cpu_nms(dets, thresh):
13 |     # dets:(m,5)  thresh:scaler
14 |     
15 |     x1 = dets[:,0]
16 |     y1 = dets[:,1]
17 |     x2 = dets[:,2]
18 |     y2 = dets[:,3]
19 |     
20 |     areas = (y2-y1+1) * (x2-x1+1)
21 |     scores = dets[:,4]
22 |     keep = []
23 |     
24 |     index = scores.argsort()[::-1]
25 |     
26 |     j=0
27 |     while index.size >0:
28 |         
29 |         j = j+1
30 |         i = index[0]       # every time the first is the biggst, and add it directly
31 |         keep.append(i)
32 |         
33 |         x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
34 |         y11 = np.maximum(y1[i], y1[index[1:]])
35 |         x22 = np.minimum(x2[i], x2[index[1:]])
36 |         y22 = np.minimum(y2[i], y2[index[1:]])
37 |         
38 |         w = np.maximum(0, x22-x11+1)    # the weights of overlap
39 |         h = np.maximum(0, y22-y11+1)    # the height of overlap
40 |        
41 |         overlaps = w*h
42 |         
43 |         ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
44 |         
45 |         idx = np.where(ious<=thresh)[0]
46 |         
47 |         index = index[idx+1]   # because index starts with 1
48 |         
49 |     return keep,j
50 |         
51 | import matplotlib.pyplot as plt
52 | def plot_bbox(dets, c='k'):
53 |     
54 |     x1 = dets[:,0]
55 |     y1 = dets[:,1]
56 |     x2 = dets[:,2]
57 |     y2 = dets[:,3]
58 |     
59 |     plt.plot([x1,x2], [y1,y1], c)
60 |     plt.plot([x1,x1], [y1,y2], c)
61 |     plt.plot([x1,x2], [y2,y2], c)
62 |     plt.plot([x2,x2], [y1,y2], c)
63 |     
64 | #plot_bbox(boxes,'k')   # before nms
65 | 
66 | #keep = py_cpu_nms(boxes, thresh=0.7)
67 | #plot_bbox(boxes[keep], 'r')# after nms
68 |         
69 | 
70 |         
71 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/nums_py2.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | #
 4 | #boxes=np.array([[100,100,210,210,0.72],
 5 | #        [250,250,420,420,0.8],
 6 | #        [220,220,320,330,0.92],
 7 | #        [100,100,210,210,0.72],
 8 | #        [230,240,325,330,0.81],
 9 | #        [220,230,315,340,0.9]]) 
10 | #
11 | 
12 | 
13 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
14 |     return a if a >= b else b
15 | 
16 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
17 |     return a if a <= b else b
18 | 
19 | def py_cpu_nms(np.ndarray[np.float32_t,ndim=2] dets, np.float thresh):
20 |     # dets:(m,5)  thresh:scaler
21 |     
22 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:,0]
23 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:,1]
24 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:,2]
25 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:,3]
26 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
27 |     
28 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (y2-y1+1) * (x2-x1+1)
29 |     cdef np.ndarray[np.int_t, ndim=1]  index = scores.argsort()[::-1]    # can be rewriten
30 |     keep = []
31 |     
32 |     cdef int ndets = dets.shape[0]
33 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros(ndets, dtype=np.int)
34 |     
35 |     cdef int _i, _j
36 |     
37 |     cdef int i, j
38 |     
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     cdef np.float32_t w, h
41 |     cdef np.float32_t overlap, ious
42 |     
43 |     j=0
44 |     
45 |     for _i in range(ndets):
46 |         i = index[_i]
47 |         
48 |         if suppressed[i] == 1:
49 |             continue
50 |         keep.append(i)
51 |         
52 |         ix1 = x1[i]
53 |         iy1 = y1[i]
54 |         ix2 = x2[i]
55 |         iy2 = y2[i]
56 |         
57 |         iarea = areas[i]
58 |         
59 |         for _j in range(_i+1, ndets):
60 |             j = index[_j]
61 |             if suppressed[j] == 1:
62 |                 continue
63 |             xx1 = max(ix1, x1[j])
64 |             yy1 = max(iy1, y1[j])
65 |             xx2 = max(ix2, x2[j])
66 |             yy2 = max(iy2, y2[j])
67 |     
68 |             w = max(0.0, xx2-xx1+1)
69 |             h = max(0.0, yy2-yy1+1)
70 |             
71 |             overlap = w*h 
72 |             ious = overlap / (iarea + areas[j] - overlap)
73 |             if ious>thresh:
74 |                 suppressed[j] = 1
75 |     
76 |     return keep
77 | 
78 | import matplotlib.pyplot as plt
79 | def plot_bbox(dets, c='k'):
80 |     
81 |     x1 = dets[:,0]
82 |     y1 = dets[:,1]
83 |     x2 = dets[:,2]
84 |     y2 = dets[:,3]
85 |     
86 |     plt.plot([x1,x2], [y1,y1], c)
87 |     plt.plot([x1,x1], [y1,y2], c)
88 |     plt.plot([x1,x2], [y2,y2], c)
89 |     plt.plot([x2,x2], [y1,y2], c)
90 |     
91 | 
92 | #plot_bbox(boxes,'k')   # before nms
93 | # 
94 | #keep = py_cpu_nms(boxes, thresh=0.7)
95 | #plot_bbox(boxes[keep], 'r')# after nms
96 |         
97 | 
98 |         


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/setup1.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | 
4 | setup(
5 |       name = 'nms_module',
6 |       ext_modules = cythonize('nums_py1.pyx'),
7 |       )
8 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/nms/setup2.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | 
4 | setup(
5 |       name = 'nms_module',
6 |       ext_modules = cythonize('nums_py2.pyx'),
7 |       )
8 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/setup3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | from distutils.core import setup
 11 | from Cython.Build import cythonize
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | import subprocess
 15 | import numpy as np
 16 | import os
 17 | from os.path import join as pjoin
 18 | 
 19 | 
 20 | def find_in_path(name, path):
 21 |     "Find a file in a search path"
 22 |     # Adapted fom
 23 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 24 |     for dir in path.split(os.pathsep):
 25 |         binpath = pjoin(dir, name)
 26 |         if os.path.exists(binpath):
 27 |             return os.path.abspath(binpath)
 28 |     return None
 29 | 
 30 | def locate_cuda():
 31 |     """Locate the CUDA environment on the system
 32 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 33 |     and values giving the absolute path to each directory.
 34 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 35 |     is based on finding 'nvcc' in the PATH.
 36 |     """
 37 | 
 38 |     # first check if the CUDAHOME env variable is in use
 39 |     if 'CUDAHOME' in os.environ:
 40 |         home = os.environ['CUDAHOME']
 41 |         nvcc = pjoin(home, 'bin', 'nvcc')
 42 |     else:
 43 |         # otherwise, search the PATH for NVCC
 44 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 45 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 46 |         if nvcc is None:
 47 |             raise EnvironmentError('The nvcc binary could not be '
 48 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 49 |         home = os.path.dirname(os.path.dirname(nvcc))
 50 | 
 51 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 52 |                   'include': pjoin(home, 'include'),
 53 |                   'lib64': pjoin(home, 'lib64')}
 54 |     for k, v in cudaconfig.items():
 55 |         if not os.path.exists(v):
 56 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 57 | 
 58 |     return cudaconfig
 59 | CUDA = locate_cuda()
 60 | 
 61 | try:
 62 |     numpy_include = np.get_include()
 63 | except AttributeError:
 64 |     numpy_include = np.get_numpy_include()
 65 | 
 66 | 
 67 | def customize_compiler_for_nvcc(self):
 68 |     """inject deep into distutils to customize how the dispatch
 69 |     to gcc/nvcc works.
 70 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 71 |     injected in, and still have the right customizations (i.e.
 72 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 73 |     the OO route, I have this. Note, it's kindof like a wierd functional
 74 |     subclassing going on."""
 75 | 
 76 |     # tell the compiler it can processes .cu
 77 |     self.src_extensions.append('.cu')
 78 | 
 79 |     # save references to the default compiler_so and _comple methods
 80 |     default_compiler_so = self.compiler_so
 81 |     super = self._compile
 82 | 
 83 |     # now redefine the _compile method. This gets executed for each
 84 |     # object but distutils doesn't have the ability to change compilers
 85 |     # based on source extension: we add it.
 86 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 87 |         if os.path.splitext(src)[1] == '.cu':
 88 |             # use the cuda for .cu files
 89 |             self.set_executable('compiler_so', CUDA['nvcc'])
 90 |             # use only a subset of the extra_postargs, which are 1-1 translated
 91 |             # from the extra_compile_args in the Extension class
 92 |             postargs = extra_postargs['nvcc']
 93 |         else:
 94 |             postargs = extra_postargs['gcc']
 95 | 
 96 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 97 |         # reset the default compiler_so, which we might have changed for cuda
 98 |         self.compiler_so = default_compiler_so
 99 | 
100 |     # inject our redefined _compile method into the class
101 |     self._compile = _compile
102 | 
103 | 
104 | # run the customize_compiler
105 | class custom_build_ext(build_ext):
106 |     def build_extensions(self):
107 |         customize_compiler_for_nvcc(self.compiler)
108 |         build_ext.build_extensions(self)
109 | 
110 | ext_modules =     [Extension('nms.gpu_nms',
111 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
112 |         library_dirs=[CUDA['lib64']],
113 |         libraries=['cudart'],
114 |         language='c++',
115 |         runtime_library_dirs=[CUDA['lib64']],
116 |         # this syntax is specific to this build system
117 |         # we're only going to use certain compiler args with nvcc and not with
118 |         # gcc the implementation of this trick is in customize_compiler() below
119 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
120 |                             'nvcc': ['-arch=sm_35',
121 |                                      '--ptxas-options=-v',
122 |                                      '-c',
123 |                                      '--compiler-options',
124 |                                      "'-fPIC'"]},
125 |         include_dirs = [numpy_include, CUDA['include']]
126 |     )]
127 | 
128 | setup(
129 |     name='fast_rcnn',
130 |     ext_modules=ext_modules,
131 |     # inject our custom trigger
132 |     cmdclass={'build_ext': custom_build_ext},
133 | )
134 |       
135 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/lib/test_num.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | #from nms.nums_py2 import py_cpu_nms  # for cpu
 4 | from nms.gpu_nms import gpu_nms   # for gpu 
 5 | 
 6 | 
 7 | np.random.seed( 1 )   # keep fixed
 8 | num_rois = 6000
 9 | minxy = np.random.randint(50,145,size=(num_rois ,2))
10 | maxxy = np.random.randint(150,200,size=(num_rois ,2))
11 | score = 0.8*np.random.random_sample((num_rois ,1))+0.2
12 | 
13 | boxes_new = np.concatenate((minxy,maxxy,score), axis=1).astype(np.float32)
14 | 
15 | def nms_test_time(boxes_new):
16 | 
17 |     thresh = [0.7,0.8,0.9]
18 |     T = 50
19 |     for i in range(len(thresh)):
20 |         since = time.time()
21 |         for t in range(T):
22 | 
23 | #            keep = py_cpu_nms(boxes_new, thresh=thresh[i])     # for cpu
24 |             keep = gpu_nms(boxes_new, thresh=thresh[i])         # for gpu
25 |         print("thresh={:.1f}, time wastes:{:.4f}".format(thresh[i], (time.time()-since)/T))
26 |     
27 |     return keep
28 | 
29 | 
30 | if __name__ =="__main__":
31 |     nms_test_time(boxes_new)
32 |     
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | def calc_iou(a, b):
  6 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
  7 | 
  8 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
  9 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 10 | 
 11 |     iw = torch.clamp(iw, min=0)
 12 |     ih = torch.clamp(ih, min=0)
 13 | 
 14 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 15 | 
 16 |     ua = torch.clamp(ua, min=1e-8)
 17 | 
 18 |     intersection = iw * ih
 19 | 
 20 |     IoU = intersection / ua
 21 | 
 22 |     return IoU
 23 | 
 24 | class FocalLoss(nn.Module):
 25 |     #def __init__(self):
 26 | 
 27 |     def forward(self, classifications, regressions, anchors, annotations):
 28 |         alpha = 0.25
 29 |         gamma = 2.0
 30 |         batch_size = classifications.shape[0]
 31 |         classification_losses = []
 32 |         regression_losses = []
 33 | 
 34 |         anchor = anchors[0, :, :]
 35 | 
 36 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 37 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 38 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 39 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 40 | 
 41 |         for j in range(batch_size):
 42 | 
 43 |             classification = classifications[j, :, :]
 44 |             regression = regressions[j, :, :]
 45 | 
 46 |             bbox_annotation = annotations[j, :, :]
 47 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 48 | 
 49 |             if bbox_annotation.shape[0] == 0:
 50 |                 regression_losses.append(torch.tensor(0).float().cuda())
 51 |                 classification_losses.append(torch.tensor(0).float().cuda())
 52 | 
 53 |                 continue
 54 | 
 55 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 56 | 
 57 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 58 | 
 59 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 60 | 
 61 |             #import pdb
 62 |             #pdb.set_trace()
 63 | 
 64 |             # compute the loss for classification
 65 |             targets = torch.ones(classification.shape) * -1
 66 |             targets = targets.cuda()
 67 | 
 68 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 69 | 
 70 |             positive_indices = torch.ge(IoU_max, 0.5)
 71 | 
 72 |             num_positive_anchors = positive_indices.sum()
 73 | 
 74 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 75 | 
 76 |             targets[positive_indices, :] = 0
 77 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 78 | 
 79 |             alpha_factor = torch.ones(targets.shape).cuda() * alpha
 80 | 
 81 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 82 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 83 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 84 | 
 85 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 86 | 
 87 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 88 |             cls_loss = focal_weight * bce
 89 | 
 90 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
 91 | 
 92 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
 93 | 
 94 |             # compute the loss for regression
 95 | 
 96 |             if positive_indices.sum() > 0:
 97 |                 assigned_annotations = assigned_annotations[positive_indices, :]
 98 | 
 99 |                 anchor_widths_pi = anchor_widths[positive_indices]
100 |                 anchor_heights_pi = anchor_heights[positive_indices]
101 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
102 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
103 | 
104 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
105 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
106 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
107 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
108 | 
109 |                 # clip widths to 1
110 |                 gt_widths  = torch.clamp(gt_widths, min=1)
111 |                 gt_heights = torch.clamp(gt_heights, min=1)
112 | 
113 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
114 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
115 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
116 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
117 | 
118 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
119 |                 targets = targets.t()
120 | 
121 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
122 | 
123 | 
124 |                 negative_indices = 1 - positive_indices
125 | 
126 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
127 | 
128 |                 regression_loss = torch.where(
129 |                     torch.le(regression_diff, 1.0 / 9.0),
130 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
131 |                     regression_diff - 0.5 / 9.0
132 |                 )
133 |                 regression_losses.append(regression_loss.mean())
134 |             else:
135 |                 regression_losses.append(torch.tensor(0).float().cuda())
136 | 
137 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
138 | 
139 |     
140 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import math
  4 | import time
  5 | import torch.utils.model_zoo as model_zoo
  6 | from utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes, AttentionBottleneck
  7 | from anchors import Anchors
  8 | import losses
  9 | from lib.nms.gpu_nms import gpu_nms
 10 | 
 11 | 
 12 | model_urls = {
 13 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 14 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 15 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 16 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 17 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 18 | }
 19 | 
 20 | class PyramidFeatures(nn.Module):
 21 |     def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
 22 |         super(PyramidFeatures, self).__init__()
 23 |         
 24 |         # upsample C5 to get P5 from the FPN paper
 25 |         self.P5_1           = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
 26 |         self.P5_upsampled   = nn.Upsample(scale_factor=2, mode='nearest')
 27 |         self.P5_2           = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 28 | 
 29 |         # add P5 elementwise to C4
 30 |         self.P4_1           = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
 31 |         self.P4_upsampled   = nn.Upsample(scale_factor=2, mode='nearest')
 32 |         self.P4_2           = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 33 | 
 34 |         # add P4 elementwise to C3
 35 |         self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
 36 |         self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 37 | 
 38 |         # "P6 is obtained via a 3x3 stride-2 conv on C5"
 39 |         self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
 40 | 
 41 |         # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
 42 |         self.P7_1 = nn.ReLU()
 43 |         self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
 44 | 
 45 |     def forward(self, inputs):
 46 | 
 47 |         C3, C4, C5 = inputs
 48 | 
 49 |         P5_x = self.P5_1(C5)
 50 |         P5_upsampled_x = self.P5_upsampled(P5_x)
 51 |         P5_x = self.P5_2(P5_x)
 52 |         
 53 |         P4_x = self.P4_1(C4)
 54 |         P4_x = P5_upsampled_x + P4_x
 55 |         P4_upsampled_x = self.P4_upsampled(P4_x)
 56 |         P4_x = self.P4_2(P4_x)
 57 | 
 58 |         P3_x = self.P3_1(C3)
 59 |         P3_x = P3_x + P4_upsampled_x
 60 |         P3_x = self.P3_2(P3_x)
 61 | 
 62 |         P6_x = self.P6(C5)
 63 | 
 64 |         P7_x = self.P7_1(P6_x)
 65 |         P7_x = self.P7_2(P7_x)
 66 | 
 67 |         return [P3_x, P4_x, P5_x, P6_x, P7_x]
 68 | 
 69 | 
 70 | class RegressionModel(nn.Module):
 71 |     def __init__(self, num_features_in, num_anchors=9, feature_size=256):
 72 |         super(RegressionModel, self).__init__()
 73 |         
 74 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 75 |         self.act1 = nn.ReLU()
 76 | 
 77 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 78 |         self.act2 = nn.ReLU()
 79 | 
 80 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 81 |         self.act3 = nn.ReLU()
 82 | 
 83 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 84 |         self.act4 = nn.ReLU()
 85 | 
 86 |         self.output = nn.Conv2d(feature_size, num_anchors*4, kernel_size=3, padding=1)
 87 | 
 88 |     def forward(self, x):
 89 | 
 90 |         out = self.conv1(x)
 91 |         out = self.act1(out)
 92 | 
 93 |         out = self.conv2(out)
 94 |         out = self.act2(out)
 95 | 
 96 |         out = self.conv3(out)
 97 |         out = self.act3(out)
 98 | 
 99 |         out = self.conv4(out)
100 |         out = self.act4(out)
101 | 
102 |         out = self.output(out)
103 | 
104 |         # out is B x C x W x H, with C = 4*num_anchors
105 |         out = out.permute(0, 2, 3, 1)
106 | 
107 |         return out.contiguous().view(out.shape[0], -1, 4)
108 | 
109 | class ClassificationModel(nn.Module):
110 |     def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
111 |         super(ClassificationModel, self).__init__()
112 | 
113 |         self.num_classes = num_classes
114 |         self.num_anchors = num_anchors
115 |         
116 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
117 |         self.act1 = nn.ReLU()
118 | 
119 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
120 |         self.act2 = nn.ReLU()
121 | 
122 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
123 |         self.act3 = nn.ReLU()
124 | 
125 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
126 |         self.act4 = nn.ReLU()
127 | 
128 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
129 |         self.output_act = nn.Sigmoid()
130 | 
131 |     def forward(self, x):
132 | 
133 |         out = self.conv1(x)
134 |         out = self.act1(out)
135 | 
136 |         out = self.conv2(out)
137 |         out = self.act2(out)
138 | 
139 |         out = self.conv3(out)
140 |         out = self.act3(out)
141 | 
142 |         out = self.conv4(out)
143 |         out = self.act4(out)
144 | 
145 |         out = self.output(out)
146 |         out = self.output_act(out)
147 | 
148 |         # out is B x C x W x H, with C = n_classes + n_anchors
149 |         out1 = out.permute(0, 2, 3, 1)
150 | 
151 |         batch_size, width, height, channels = out1.shape
152 | 
153 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
154 | 
155 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
156 | 
157 | class ResNet(nn.Module):
158 | 
159 |     def __init__(self, num_classes, block, layers):
160 |         self.inplanes = 64
161 |         super(ResNet, self).__init__()
162 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
163 |         self.bn1 = nn.BatchNorm2d(64)
164 |         self.relu = nn.ReLU(inplace=True)
165 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
166 |         self.layer1 = self._make_layer(block, 64, layers[0])
167 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
168 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
169 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
170 | 
171 | 
172 |         if block == BasicBlock:
173 |             fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
174 |         elif block == Bottleneck or block == AttentionBottleneck:
175 |             fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]
176 | 
177 |         self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
178 | 
179 |         self.regressionModel = RegressionModel(256)
180 |         self.classificationModel = ClassificationModel(256, num_classes=num_classes)
181 | 
182 |         self.anchors = Anchors()
183 | 
184 |         self.regressBoxes = BBoxTransform()
185 | 
186 |         self.clipBoxes = ClipBoxes()
187 |         
188 |         self.focalLoss = losses.FocalLoss()
189 |                 
190 |         for m in self.modules():
191 |             if isinstance(m, nn.Conv2d):
192 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
193 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
194 |             elif isinstance(m, nn.BatchNorm2d):
195 |                 m.weight.data.fill_(1)
196 |                 m.bias.data.zero_()
197 | 
198 |         prior = 0.01
199 |         
200 |         self.classificationModel.output.weight.data.fill_(0)
201 |         self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
202 | 
203 |         self.regressionModel.output.weight.data.fill_(0)
204 |         self.regressionModel.output.bias.data.fill_(0)
205 | 
206 |         self.freeze_bn()
207 | 
208 |     def _make_layer(self, block, planes, blocks, stride=1):
209 |         downsample = None
210 |         if stride != 1 or self.inplanes != planes * block.expansion:
211 |             downsample = nn.Sequential(
212 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
213 |                           kernel_size=1, stride=stride, bias=False),
214 |                 nn.BatchNorm2d(planes * block.expansion),
215 |             )
216 | 
217 |         layers = []
218 |         layers.append(block(self.inplanes, planes, stride, downsample))
219 |         self.inplanes = planes * block.expansion
220 |         for i in range(1, blocks):
221 |             layers.append(block(self.inplanes, planes))
222 | 
223 |         return nn.Sequential(*layers)
224 | 
225 |     def freeze_bn(self):
226 |         '''Freeze BatchNorm layers.'''
227 |         for layer in self.modules():
228 |             if isinstance(layer, nn.BatchNorm2d):
229 |                 layer.eval()
230 | 
231 |     def forward(self, inputs):
232 | 
233 |         if self.training:
234 |             img_batch, annotations = inputs
235 |         else:
236 |             img_batch = inputs
237 |             
238 |         x = self.conv1(img_batch)
239 |         x = self.bn1(x)
240 |         x = self.relu(x)
241 |         x = self.maxpool(x)
242 | 
243 |         x1 = self.layer1(x)
244 |         x2 = self.layer2(x1)
245 |         x3 = self.layer3(x2)
246 |         x4 = self.layer4(x3)
247 | 
248 |         features = self.fpn([x2, x3, x4])
249 | 
250 |         regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
251 | 
252 |         classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
253 | 
254 |         anchors = self.anchors(img_batch)
255 | 
256 |         if self.training:
257 |             #return self.focalLoss(classification, regression, anchors, annotations)
258 |             return classification, regression, anchors, annotations
259 |         else:
260 |             transformed_anchors = self.regressBoxes(anchors, regression)
261 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
262 | 
263 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
264 | 
265 |             scores_over_thresh = (scores>0.05)[0, :, 0]
266 | 
267 |             if scores_over_thresh.sum() == 0:
268 |                 # no boxes to NMS, just return
269 |                 return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
270 | 
271 |             classification = classification[:, scores_over_thresh, :]
272 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
273 |             scores = scores[:, scores_over_thresh, :]
274 |             dets = torch.cat([transformed_anchors, scores], dim=2)[0, :, :]
275 |             anchors_nms_idx = self.nms(dets.cpu().numpy(), 0.5)
276 | 
277 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
278 | 
279 |             return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
280 | 
281 |     def nms(self, dets, thresh):
282 |         """Dispatch to either CPU or GPU NMS implementations.
283 |         Accept dets as tensor"""
284 |         return gpu_nms(dets, thresh)
285 | 
286 | 
287 | 
288 | def resnet18(num_classes, pretrained=False, **kwargs):
289 |     """Constructs a ResNet-18 model.
290 |     Args:
291 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
292 |     """
293 |     model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
294 |     if pretrained:
295 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
296 |     return model
297 | 
298 | 
299 | def resnet34(num_classes, pretrained=False, **kwargs):
300 |     """Constructs a ResNet-34 model.
301 |     Args:
302 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
303 |     """
304 |     model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
305 |     if pretrained:
306 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
307 |     return model
308 | 
309 | 
310 | def resnet50(num_classes, pretrained=False, **kwargs):
311 |     """Constructs a ResNet-50 model.
312 |     Args:
313 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
314 |     """
315 |     model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
316 |     if pretrained:
317 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
318 |     return model
319 | 
320 | 
321 | def attention_resnet50(num_classes, pretrained=False, **kwargs):
322 |     """
323 |     :param num_classes:
324 |     :param pretrained:
325 |     :param kwargs:
326 |     :return:
327 |     """
328 | 
329 |     model = ResNet(num_classes, AttentionBottleneck, [3, 4, 6, 3], **kwargs)
330 |     return model
331 | 
332 | def resnet101(num_classes, pretrained=False, **kwargs):
333 |     """Constructs a ResNet-101 model.
334 |     Args:
335 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
336 |     """
337 |     model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
338 |     if pretrained:
339 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
340 |     return model
341 | 
342 | 
343 | def resnet152(num_classes, pretrained=False, **kwargs):
344 |     """Constructs a ResNet-152 model.
345 |     Args:
346 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
347 |     """
348 |     model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
349 |     if pretrained:
350 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
351 |     return model
352 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/oid_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | 
  3 | import csv
  4 | import json
  5 | import os
  6 | import warnings
  7 | 
  8 | import numpy as np
  9 | import skimage
 10 | import skimage.color
 11 | import skimage.io
 12 | import skimage.transform
 13 | from PIL import Image
 14 | from torch.utils.data import Dataset
 15 | 
 16 | 
 17 | def get_labels(metadata_dir, version='v4'):
 18 |     if version == 'v4' or version == 'challenge2018':
 19 |         csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
 20 | 
 21 |         boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
 22 |         id_to_labels = {}
 23 |         cls_index = {}
 24 | 
 25 |         i = 0
 26 |         with open(boxable_classes_descriptions) as f:
 27 |             for row in csv.reader(f):
 28 |                 # make sure the csv row is not empty (usually the last one)
 29 |                 if len(row):
 30 |                     label = row[0]
 31 |                     description = row[1].replace("\"", "").replace("'", "").replace('`', '')
 32 | 
 33 |                     id_to_labels[i] = description
 34 |                     cls_index[label] = i
 35 | 
 36 |                     i += 1
 37 |     else:
 38 |         trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
 39 |         description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
 40 | 
 41 |         description_table = {}
 42 |         with open(description_path) as f:
 43 |             for row in csv.reader(f):
 44 |                 # make sure the csv row is not empty (usually the last one)
 45 |                 if len(row):
 46 |                     description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
 47 | 
 48 |         with open(trainable_classes_path, 'rb') as f:
 49 |             trainable_classes = f.read().split('\n')
 50 | 
 51 |         id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
 52 |         cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
 53 | 
 54 |     return id_to_labels, cls_index
 55 | 
 56 | 
 57 | def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
 58 |     validation_image_ids = {}
 59 | 
 60 |     if version == 'v4':
 61 |         annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
 62 |     elif version == 'challenge2018':
 63 |         validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
 64 | 
 65 |         with open(validation_image_ids_path, 'r') as csv_file:
 66 |             reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
 67 |             reader.next()
 68 |             for line, row in enumerate(reader):
 69 |                 image_id = row['ImageID']
 70 |                 validation_image_ids[image_id] = True
 71 | 
 72 |         annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
 73 |     else:
 74 |         annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
 75 | 
 76 |     fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
 77 |                   'XMin', 'XMax', 'YMin', 'YMax',
 78 |                   'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
 79 | 
 80 |     id_annotations = dict()
 81 |     with open(annotations_path, 'r') as csv_file:
 82 |         reader = csv.DictReader(csv_file, fieldnames=fieldnames)
 83 |         next(reader)
 84 | 
 85 |         images_sizes = {}
 86 |         for line, row in enumerate(reader):
 87 |             frame = row['ImageID']
 88 | 
 89 |             if version == 'challenge2018':
 90 |                 if subset == 'train':
 91 |                     if frame in validation_image_ids:
 92 |                         continue
 93 |                 elif subset == 'validation':
 94 |                     if frame not in validation_image_ids:
 95 |                         continue
 96 |                 else:
 97 |                     raise NotImplementedError('This generator handles only the train and validation subsets')
 98 | 
 99 |             class_name = row['LabelName']
100 | 
101 |             if class_name not in cls_index:
102 |                 continue
103 | 
104 |             cls_id = cls_index[class_name]
105 | 
106 |             if version == 'challenge2018':
107 |                 # We recommend participants to use the provided subset of the training set as a validation set.
108 |                 # This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
109 |                 img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
110 |             else:
111 |                 img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
112 | 
113 |             if frame in images_sizes:
114 |                 width, height = images_sizes[frame]
115 |             else:
116 |                 try:
117 |                     with Image.open(img_path) as img:
118 |                         width, height = img.width, img.height
119 |                         images_sizes[frame] = (width, height)
120 |                 except Exception as ex:
121 |                     if version == 'challenge2018':
122 |                         raise ex
123 |                     continue
124 | 
125 |             x1 = float(row['XMin'])
126 |             x2 = float(row['XMax'])
127 |             y1 = float(row['YMin'])
128 |             y2 = float(row['YMax'])
129 | 
130 |             x1_int = int(round(x1 * width))
131 |             x2_int = int(round(x2 * width))
132 |             y1_int = int(round(y1 * height))
133 |             y2_int = int(round(y2 * height))
134 | 
135 |             # Check that the bounding box is valid.
136 |             if x2 <= x1:
137 |                 raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
138 |             if y2 <= y1:
139 |                 raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
140 | 
141 |             if y2_int == y1_int:
142 |                 warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
143 |                 continue
144 | 
145 |             if x2_int == x1_int:
146 |                 warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
147 |                 continue
148 | 
149 |             img_id = row['ImageID']
150 |             annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
151 | 
152 |             if img_id in id_annotations:
153 |                 annotations = id_annotations[img_id]
154 |                 annotations['boxes'].append(annotation)
155 |             else:
156 |                 id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
157 |     return id_annotations
158 | 
159 | 
160 | class OidDataset(Dataset):
161 |     """Oid dataset."""
162 | 
163 |     def __init__(self, main_dir, subset, version='v4', annotation_cache_dir='.', transform=None):
164 |         if version == 'v4':
165 |             metadata = '2018_04'
166 |         elif version == 'challenge2018':
167 |             metadata = 'challenge2018'
168 |         elif version == 'v3':
169 |             metadata = '2017_11'
170 |         else:
171 |             raise NotImplementedError('There is currently no implementation for versions older than v3')
172 | 
173 |         self.transform = transform
174 | 
175 |         if version == 'challenge2018':
176 |             self.base_dir = os.path.join(main_dir, 'images', 'train')
177 |         else:
178 |             self.base_dir = os.path.join(main_dir, 'images', subset)
179 | 
180 |         metadata_dir = os.path.join(main_dir, metadata)
181 |         annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
182 | 
183 |         self.id_to_labels, cls_index = get_labels(metadata_dir, version=version)
184 | 
185 |         if os.path.exists(annotation_cache_json):
186 |             with open(annotation_cache_json, 'r') as f:
187 |                 self.annotations = json.loads(f.read())
188 |         else:
189 |             self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index,
190 |                                                                 version=version)
191 |             json.dump(self.annotations, open(annotation_cache_json, "w"))
192 | 
193 |         self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
194 | 
195 |         # (label -> name)
196 |         self.labels = self.id_to_labels
197 | 
198 |     def __len__(self):
199 |         return len(self.annotations)
200 | 
201 |     def __getitem__(self, idx):
202 | 
203 |         img = self.load_image(idx)
204 |         annot = self.load_annotations(idx)
205 |         sample = {'img': img, 'annot': annot}
206 |         if self.transform:
207 |             sample = self.transform(sample)
208 | 
209 |         return sample
210 | 
211 |     def image_path(self, image_index):
212 |         path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
213 |         return path
214 | 
215 |     def load_image(self, image_index):
216 |         path = self.image_path(image_index)
217 |         img = skimage.io.imread(path)
218 | 
219 |         if len(img.shape) == 1:
220 |             img = img[0]
221 | 
222 |         if len(img.shape) == 2:
223 |             img = skimage.color.gray2rgb(img)
224 | 
225 |         try:
226 |             return img.astype(np.float32) / 255.0
227 |         except Exception:
228 |             print (path)
229 |             exit(0)
230 | 
231 |     def load_annotations(self, image_index):
232 |         # get ground truth annotations
233 |         image_annotations = self.annotations[self.id_to_image_id[image_index]]
234 | 
235 |         labels = image_annotations['boxes']
236 |         height, width = image_annotations['h'], image_annotations['w']
237 | 
238 |         boxes = np.zeros((len(labels), 5))
239 |         for idx, ann in enumerate(labels):
240 |             cls_id = ann['cls_id']
241 |             x1 = ann['x1'] * width
242 |             x2 = ann['x2'] * width
243 |             y1 = ann['y1'] * height
244 |             y2 = ann['y2'] * height
245 | 
246 |             boxes[idx, 0] = x1
247 |             boxes[idx, 1] = y1
248 |             boxes[idx, 2] = x2
249 |             boxes[idx, 3] = y2
250 |             boxes[idx, 4] = cls_id
251 | 
252 |         return boxes
253 | 
254 |     def image_aspect_ratio(self, image_index):
255 |         img_annotations = self.annotations[self.id_to_image_id[image_index]]
256 |         height, width = img_annotations['h'], img_annotations['w']
257 |         return float(width) / float(height)
258 | 
259 |     def num_classes(self):
260 |         return len(self.id_to_labels)
261 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/train.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import copy
  4 | import argparse
  5 | import pdb
  6 | import collections
  7 | import sys
  8 | 
  9 | import numpy as np
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | from torch.optim import lr_scheduler
 15 | from torch.autograd import Variable
 16 | from torchvision import datasets, models, transforms
 17 | import torchvision
 18 | 
 19 | import model
 20 | from anchors import Anchors
 21 | import losses
 22 | from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
 23 | from torch.utils.data import Dataset, DataLoader
 24 | 
 25 | import coco_eval
 26 | import csv_eval
 27 | 
 28 | #assert torch.__version__.split('.')[1] == '4'
 29 | 
 30 | print('CUDA available: {}'.format(torch.cuda.is_available()))
 31 | 
 32 | 
 33 | def main(args=None):
 34 | 
 35 |     parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
 36 | 
 37 |     parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
 38 |     parser.add_argument('--coco_path', help='Path to COCO directory')
 39 |     parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
 40 |     parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
 41 |     parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
 42 | 
 43 |     parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
 44 |     parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)
 45 |     parser.add_argument('--attention', help='use attention version', action='store_true')
 46 | 
 47 |     parser = parser.parse_args(args)
 48 | 
 49 |     # Create the data loaders
 50 |     if parser.dataset == 'coco':
 51 | 
 52 |         if parser.coco_path is None:
 53 |             raise ValueError('Must provide --coco_path when training on COCO,')
 54 | 
 55 |         dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
 56 |         dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
 57 | 
 58 |     elif parser.dataset == 'csv':
 59 | 
 60 |         if parser.csv_train is None:
 61 |             raise ValueError('Must provide --csv_train when training on COCO,')
 62 | 
 63 |         if parser.csv_classes is None:
 64 |             raise ValueError('Must provide --csv_classes when training on COCO,')
 65 | 
 66 | 
 67 |         dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
 68 | 
 69 |         if parser.csv_val is None:
 70 |             dataset_val = None
 71 |             print('No validation annotations provided.')
 72 |         else:
 73 |             dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))
 74 | 
 75 |     else:
 76 |         raise ValueError('Dataset type not understood (must be csv or coco), exiting.')
 77 | 
 78 |     sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False)
 79 |     dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)
 80 | 
 81 |     if dataset_val is not None:
 82 |         sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
 83 |         dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)
 84 | 
 85 |     # Create the model
 86 |     if parser.depth == 18:
 87 |         retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
 88 |     elif parser.depth == 34:
 89 |         retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
 90 |     elif parser.depth == 50:
 91 |         if parser.attention:
 92 |             retinanet = model.attention_resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
 93 |         else:
 94 |             retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
 95 |     elif parser.depth == 101:
 96 |         retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
 97 |     elif parser.depth == 152:
 98 |         retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
 99 |     else:
100 |         raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
101 | 
102 |     use_gpu = True
103 | 
104 |     if use_gpu:
105 |         retinanet = retinanet.cuda()
106 | 
107 |     retinanet = torch.nn.DataParallel(retinanet).cuda()
108 | 
109 |     retinanet.training = True
110 | 
111 |     optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
112 | 
113 |     scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
114 | 
115 |     loss_hist = collections.deque(maxlen=500)
116 | 
117 |     retinanet.train()
118 |     retinanet.module.freeze_bn()
119 | 
120 |     print('Num training images: {}'.format(len(dataset_train)))
121 | 
122 |     focalLoss = losses.FocalLoss()
123 | 
124 |     for epoch_num in range(parser.epochs):
125 | 
126 |         retinanet.train()
127 |         retinanet.module.freeze_bn()
128 | 
129 |         epoch_loss = []
130 | 
131 |         for iter_num, data in enumerate(dataloader_train):
132 |             try:
133 |                 optimizer.zero_grad()
134 | 
135 |                 #classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
136 |                 classification, regression, anchors, annotations = retinanet([data['img'].cuda().float(), data['annot']])
137 |                 classification_loss, regression_loss = focalLoss(classification, regression, anchors, annotations)
138 | 
139 |                 classification_loss = classification_loss.mean()
140 |                 regression_loss = regression_loss.mean()
141 | 
142 |                 loss = classification_loss + regression_loss
143 | 
144 |                 if bool(loss == 0):
145 |                     continue
146 | 
147 |                 loss.backward()
148 | 
149 |                 torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
150 | 
151 |                 optimizer.step()
152 | 
153 |                 loss_hist.append(float(loss))
154 | 
155 |                 epoch_loss.append(float(loss))
156 | 
157 |                 print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
158 | 
159 |                 del classification_loss
160 |                 del regression_loss
161 |             except Exception as e:
162 |                 print(e)
163 |                 continue
164 | 
165 |         if parser.dataset == 'coco':
166 | 
167 |             print('Evaluating dataset')
168 | 
169 |             coco_eval.evaluate_coco(dataset_val, retinanet)
170 | 
171 |         elif parser.dataset == 'csv' and parser.csv_val is not None:
172 | 
173 |             print('Evaluating dataset')
174 | 
175 |             mAP = csv_eval.evaluate(dataset_val, retinanet)
176 | 
177 | 
178 |         scheduler.step(np.mean(epoch_loss))
179 | 
180 |         torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))
181 | 
182 |     retinanet.eval()
183 | 
184 |     torch.save(retinanet, 'model_final.pt'.format(epoch_num))
185 | 
186 | if __name__ == '__main__':
187 |  main()
188 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from attentionConv2d import AttentionConv2d
  5 | 
  6 | 
  7 | def conv3x3(in_planes, out_planes, stride=1):
  8 |     """3x3 convolution with padding"""
  9 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 10 |                      padding=1, bias=False)
 11 | 
 12 | class BasicBlock(nn.Module):
 13 |     expansion = 1
 14 | 
 15 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 16 |         super(BasicBlock, self).__init__()
 17 |         self.conv1 = conv3x3(inplanes, planes, stride)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.relu = nn.ReLU(inplace=True)
 20 |         self.conv2 = conv3x3(planes, planes)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 |         self.downsample = downsample
 23 |         self.stride = stride
 24 | 
 25 |     def forward(self, x):
 26 |         residual = x
 27 | 
 28 |         out = self.conv1(x)
 29 |         out = self.bn1(out)
 30 |         out = self.relu(out)
 31 | 
 32 |         out = self.conv2(out)
 33 |         out = self.bn2(out)
 34 | 
 35 |         if self.downsample is not None:
 36 |             residual = self.downsample(x)
 37 | 
 38 |         out += residual
 39 |         out = self.relu(out)
 40 | 
 41 |         return out
 42 | 
 43 | 
 44 | class Bottleneck(nn.Module):
 45 |     expansion = 4
 46 | 
 47 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 48 |         super(Bottleneck, self).__init__()
 49 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 50 |         self.bn1 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 52 |                                padding=1, bias=False)
 53 |         self.bn2 = nn.BatchNorm2d(planes)
 54 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 55 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 56 |         self.relu = nn.ReLU(inplace=True)
 57 |         self.downsample = downsample
 58 |         self.stride = stride
 59 | 
 60 |     def forward(self, x):
 61 |         residual = x
 62 | 
 63 |         out = self.conv1(x)
 64 |         out = self.bn1(out)
 65 |         out = self.relu(out)
 66 | 
 67 |         out = self.conv2(out)
 68 |         out = self.bn2(out)
 69 |         out = self.relu(out)
 70 | 
 71 |         out = self.conv3(out)
 72 |         out = self.bn3(out)
 73 | 
 74 |         if self.downsample is not None:
 75 |             residual = self.downsample(x)
 76 | 
 77 |         out += residual
 78 |         out = self.relu(out)
 79 | 
 80 |         return out
 81 | 
 82 | class AttentionBottleneck(nn.Module):
 83 |     expansion = 4
 84 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 85 |         super(AttentionBottleneck, self).__init__()
 86 |         n = np.log(planes) // np.log(2)
 87 | 
 88 |         dk = int(planes //  2 ** (n-1))
 89 |         dv = int(planes // 2 ** (n-2))
 90 |         self.conv1 = AttentionConv2d(inplanes, planes, dk, dv, num_heads=4, kernel_size=1, padding=0,
 91 |                                      rel_encoding=False, height=None, width=None)
 92 |         self.bn1 = nn.BatchNorm2d(planes)
 93 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 94 |         self.bn2 = nn.BatchNorm2d(planes)
 95 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 96 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 97 |         self.relu  = nn.ReLU(inplace=True)
 98 |         self.downsample = downsample
 99 |         self.stride = stride
100 | 
101 |     def forward(self, x):
102 |         residual = x
103 | 
104 |         out = self.conv1(x)
105 |         out = self.bn1(out)
106 |         out = self.relu(out)
107 | 
108 |         out = self.conv2(out)
109 |         out = self.bn2(out)
110 |         out = self.relu(out)
111 | 
112 |         out = self.conv3(out)
113 |         out = self.bn3(out)
114 | 
115 |         if self.downsample is not None:
116 |             residual = self.downsample(x)
117 | 
118 |         out += residual
119 |         out = self.relu(out)
120 |         return out
121 | 
122 | class BBoxTransform(nn.Module):
123 | 
124 |     def __init__(self, mean=None, std=None):
125 |         super(BBoxTransform, self).__init__()
126 |         if mean is None:
127 |             self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
128 |         else:
129 |             self.mean = mean
130 |         if std is None:
131 |             self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
132 |         else:
133 |             self.std = std
134 | 
135 |     def forward(self, boxes, deltas):
136 | 
137 |         widths  = boxes[:, :, 2] - boxes[:, :, 0]
138 |         heights = boxes[:, :, 3] - boxes[:, :, 1]
139 |         ctr_x   = boxes[:, :, 0] + 0.5 * widths
140 |         ctr_y   = boxes[:, :, 1] + 0.5 * heights
141 | 
142 |         dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
143 |         dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
144 |         dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
145 |         dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
146 | 
147 |         pred_ctr_x = ctr_x + dx * widths
148 |         pred_ctr_y = ctr_y + dy * heights
149 |         pred_w     = torch.exp(dw) * widths
150 |         pred_h     = torch.exp(dh) * heights
151 | 
152 |         pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
153 |         pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
154 |         pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
155 |         pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
156 | 
157 |         pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
158 | 
159 |         return pred_boxes
160 | 
161 | 
162 | class ClipBoxes(nn.Module):
163 | 
164 |     def __init__(self, width=None, height=None):
165 |         super(ClipBoxes, self).__init__()
166 | 
167 |     def forward(self, boxes, img):
168 | 
169 |         batch_size, num_channels, height, width = img.shape
170 | 
171 |         boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
172 |         boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
173 | 
174 |         boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
175 |         boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
176 |       
177 |         return boxes
178 | 


--------------------------------------------------------------------------------
/pytorch-retinanet/visualize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torchvision
 3 | import time
 4 | import os
 5 | import copy
 6 | import pdb
 7 | import time
 8 | import argparse
 9 | 
10 | import sys
11 | import cv2
12 | 
13 | import torch
14 | from torch.utils.data import Dataset, DataLoader
15 | from torchvision import datasets, models, transforms
16 | 
17 | from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
18 | 
19 | 
20 | assert torch.__version__.split('.')[1] == '4'
21 | 
22 | print('CUDA available: {}'.format(torch.cuda.is_available()))
23 | 
24 | 
25 | def main(args=None):
26 | 	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
27 | 
28 | 	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
29 | 	parser.add_argument('--coco_path', help='Path to COCO directory')
30 | 	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
31 | 	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
32 | 
33 | 	parser.add_argument('--model', help='Path to model (.pt) file.')
34 | 
35 | 	parser = parser.parse_args(args)
36 | 
37 | 	if parser.dataset == 'coco':
38 | 		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
39 | 	elif parser.dataset == 'csv':
40 | 		dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))
41 | 	else:
42 | 		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')
43 | 
44 | 	sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
45 | 	dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
46 | 
47 | 	retinanet = torch.load(parser.model)
48 | 
49 | 	use_gpu = True
50 | 
51 | 	if use_gpu:
52 | 		retinanet = retinanet.cuda()
53 | 
54 | 	retinanet.eval()
55 | 
56 | 	unnormalize = UnNormalizer()
57 | 
58 | 	def draw_caption(image, box, caption):
59 | 
60 | 		b = np.array(box).astype(int)
61 | 		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
62 | 		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
63 | 
64 | 	for idx, data in enumerate(dataloader_val):
65 | 
66 | 		with torch.no_grad():
67 | 			st = time.time()
68 | 			scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
69 | 			print('Elapsed time: {}'.format(time.time()-st))
70 | 			idxs = np.where(scores>0.5)
71 | 			img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
72 | 
73 | 			img[img<0] = 0
74 | 			img[img>255] = 255
75 | 
76 | 			img = np.transpose(img, (1, 2, 0))
77 | 
78 | 			img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
79 | 
80 | 			for j in range(idxs[0].shape[0]):
81 | 				bbox = transformed_anchors[idxs[0][j], :]
82 | 				x1 = int(bbox[0])
83 | 				y1 = int(bbox[1])
84 | 				x2 = int(bbox[2])
85 | 				y2 = int(bbox[3])
86 | 				label_name = dataset_val.labels[int(classification[idxs[0][j]])]
87 | 				draw_caption(img, (x1, y1, x2, y2), label_name)
88 | 
89 | 				cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
90 | 				print(label_name)
91 | 
92 | 			cv2.imshow('img', img)
93 | 			cv2.waitKey(0)
94 | 
95 | 
96 | 
97 | if __name__ == '__main__':
98 |  main()


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.utils.data as data
  3 | import torch.optim as optim
  4 | 
  5 | from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer
  6 | from ignite.metrics import Loss, Accuracy
  7 | from ignite.contrib.handlers.param_scheduler import CosineAnnealingScheduler
  8 | from ignite.handlers.checkpoint import ModelCheckpoint
  9 | from torchvision.datasets import CIFAR100
 10 | from dataloaders import CocoDetection
 11 | from torchvision.transforms import Compose, RandomCrop, RandomHorizontalFlip, Normalize, ToTensor
 12 | from .model.wideresnet import AttentionWideResNet
 13 | from .model.retinanet import AttentionRetinaNet
 14 | from tqdm import tqdm
 15 | 
 16 | from tensorboardX import SummaryWriter
 17 | from .utils.utils import Resizer, Augmenter
 18 | import argparse
 19 | import json
 20 | 
 21 | HOME_PREFIX = '/home/se26956/projects/IRP/pytorch-attention-augmented-convolution/'
 22 | 
 23 | parser = argparse.ArgumentParser()
 24 | parser.add_argument("--config", type=str, help="config path")
 25 | 
 26 | args = parser.parse_args()
 27 | 
 28 | 
 29 | def create_summary_writer(model, data_loader, log_dir):
 30 |     writer = SummaryWriter(log_dir=log_dir)
 31 |     data_loader_iter = iter(data_loader)
 32 |     x, y = next(data_loader_iter)
 33 |     
 34 |     try:
 35 |         writer.add_graph(model, x)
 36 |     except Exception as e:
 37 |         print("Failed to save model graph: {}".format(e))
 38 |     return writer
 39 | 
 40 | def get_data_loaders(batch_size):
 41 |     normalize = Normalize(mean=[0.49137254, 0.48235294, 0.4466667],
 42 |                           std=[0.247058823, 0.24352941, 0.2615686])
 43 |     train_transforms = Compose([
 44 |         RandomCrop(32),
 45 |         RandomHorizontalFlip(),
 46 |         ToTensor(),
 47 |         normalize
 48 |     ])
 49 | 
 50 |     test_transform = Compose([
 51 |          ToTensor(),
 52 |          normalize
 53 |     ])
 54 | 
 55 |     train_dataset = CIFAR100('./data', train=True, download=True, transform=train_transforms)
 56 |     val_dataset = CIFAR100('./data', train=False, download=True, transform=test_transform)
 57 | 
 58 |     train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 59 |     val_loader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
 60 | 
 61 |     return train_loader, val_loader
 62 | 
 63 | def get_COCO_loaders(batch_size):
 64 |     normalize = Normalize(
 65 |         mean=[0.485, 0.456, 0.406],
 66 |         std=[0.229, 0.224, 0.225]
 67 |     )
 68 | 
 69 | 
 70 |     train_transforms = Compose([
 71 |         RandomHorizontalFlip(p=0.5),
 72 |         ToTensor(),
 73 |         normalize
 74 |     ])
 75 | 
 76 |     test_transform = Compose([
 77 |         ToTensor(),
 78 |         normalize
 79 |     ])
 80 | 
 81 |     train_dataset = CocoDetection(HOME_PREFIX+'data/coco_detection/train2017',
 82 |                                   HOME_PREFIX+'data/coco_detection/annotations/instances_train2017.json',
 83 |                                   img_and_target_transform=Resizer(),
 84 |                                   transform=train_transforms)
 85 | 
 86 |     test_dataset = CocoDetection(HOME_PREFIX+'data/coco_detection/val',
 87 |                                  HOME_PREFIX+'data/coco_detection/annotations/instances_val2017.json',
 88 |                                  transform=test_transform)
 89 | 
 90 |     train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 91 |     test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
 92 | 
 93 |     for input, label in train_loader:
 94 |         print(input.size())
 95 | 
 96 |     return train_loader, test_loader
 97 | 
 98 | 
 99 | def run(config):
100 | 
101 |     if config['model'] == 'AttentionWideResNet':
102 |         train_loader, val_loader = get_data_loaders(config['batch_size'])
103 |         model = AttentionWideResNet(28, 100, 10, (32, 32), 0.0)
104 |     elif config['model'] == 'AttentionRetinaNet':
105 |         train_loader, val_loader = get_COCO_loaders(config['batch_size'])
106 |         model = AttentionRetinaNet(num_classes=80, input_size=(5,3))
107 |     writer = create_summary_writer(model, train_loader, config["tb_logdir"])
108 |     model.cuda()
109 |     
110 |     log_interval = config['log_interval']
111 |     epochs = config['epochs']
112 |     model = nn.DataParallel(model)
113 | 
114 |     optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=config['momentum'])
115 |     scheduler = CosineAnnealingScheduler(optimizer, 'lr', 0.1, 0.001, len(train_loader))
116 |     
117 |     loss_fn = nn.CrossEntropyLoss().cuda()
118 |     
119 |     trainer = create_supervised_trainer(model, optimizer, loss_fn, device='cuda')
120 |     trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
121 |     trainer_saver = ModelCheckpoint(
122 |         config['checkpoint_dir'],
123 |         filename_prefix="model_ckpt",
124 |         save_interval=1000,
125 |         n_saved=10,
126 |         atomic=True,
127 |         save_as_state_dict=True,
128 |         create_dir=True
129 |     )
130 |     trainer.add_event_handler(Events.ITERATION_COMPLETED,
131 |                               trainer_saver,
132 |                               {
133 |                                   "model": model,
134 |                               })
135 |     evaluator = create_supervised_evaluator(model,
136 |                                             metrics={"accuracy": Accuracy(),
137 |                                                      'CE': Loss(loss_fn)},
138 |                                             device="cuda")
139 | 
140 |     desc = "ITERATION - loss: {:.2f}"
141 |     pbar = tqdm(
142 |         initial=0, leave=False, total=len(train_loader),
143 |         desc=desc.format(0)
144 |     )
145 | 
146 |     @trainer.on(Events.ITERATION_COMPLETED)
147 |     def log_training_loss(engine):
148 |         iter = (engine.state.iteration - 1) % len(train_loader) + 1
149 | 
150 |         if iter % log_interval == 0:
151 |             pbar.desc = desc.format(engine.state.output)
152 |             pbar.update(log_interval)
153 | 
154 |         writer.add_scalar("training/loss", engine.state.output, engine.state.iteration)
155 | 
156 |     @trainer.on(Events.EPOCH_COMPLETED)
157 |     def log_training_results(engine):
158 |         pbar.refresh()
159 |         evaluator.run(train_loader)
160 |         metrics = evaluator.state.metrics
161 |         avg_accuracy = metrics['accuracy']
162 |         avg_CE = metrics['CE']
163 |         tqdm.write(
164 |             "Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}".format(engine.state.epoch,
165 |                                                                                         avg_accuracy,
166 |                                                                                         avg_CE)
167 |         )
168 |         writer.add_scalar("training/avg_loss", avg_CE, engine.state.epoch)
169 |         writer.add_scalar("training/avg_accuracy", avg_accuracy, engine.state.epoch)
170 | 
171 |     @trainer.on(Events.EPOCH_COMPLETED)
172 |     def log_validation_results(engine):
173 |         evaluator.run(val_loader)
174 |         metrics = evaluator.state.metrics
175 |         avg_accuracy = metrics['accuracy']
176 |         avg_CE = metrics['CE']
177 |         tqdm.write(
178 |             "Validation Results - Epoch: {} Avg accuracy {:.2f} Avg loss: {:.2f}".format(engine.state.epoch,
179 |                                                                                          avg_accuracy,
180 |                                                                                          avg_CE)
181 |         )
182 |         pbar.n = pbar.last_print_n = 0
183 | 
184 |         writer.add_scalar("valdation/avg_loss", avg_CE, engine.state.epoch)
185 |         writer.add_scalar("valdation/avg_accuracy", avg_accuracy, engine.state.epoch)
186 | 
187 |     trainer.run(train_loader, max_epochs=epochs)
188 |     pbar.close()
189 |     writer.close()
190 | 
191 | 
192 | config_file = args.config
193 | with open(config_file, 'rb') as infile:
194 |     config = json.load(infile)
195 | 
196 | run(config)
197 | 
198 | 
199 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | """Original from https://github.com/yhenon/pytorch-retinanet"""
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | 
  6 | 
  7 | import skimage.io
  8 | import skimage.transform
  9 | import skimage.color
 10 | import skimage
 11 | 
 12 | from PIL import Image
 13 | 
 14 | 
 15 | def comptue_dim(dim, padding, kernel_size, stride):
 16 |     return np.floor((dim + 2*padding - kernel_size) / stride) + 1
 17 | 
 18 | def conv3x3(in_planes, out_planes, stride=1):
 19 |     """3x3 convolution with padding"""
 20 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 21 |                      padding=1, bias=False)
 22 | 
 23 | 
 24 | class BasicBlock(nn.Module):
 25 |     expansion = 1
 26 | 
 27 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 28 |         super(BasicBlock, self).__init__()
 29 |         self.conv1 = conv3x3(inplanes, planes, stride)
 30 |         self.bn1 = nn.BatchNorm2d(planes)
 31 |         self.relu = nn.ReLU(inplace=True)
 32 |         self.conv2 = conv3x3(planes, planes)
 33 |         self.bn2 = nn.BatchNorm2d(planes)
 34 |         self.downsample = downsample
 35 |         self.stride = stride
 36 | 
 37 |     def forward(self, x):
 38 |         residual = x
 39 | 
 40 |         out = self.conv1(x)
 41 |         out = self.bn1(out)
 42 |         out = self.relu(out)
 43 | 
 44 |         out = self.conv2(out)
 45 |         out = self.bn2(out)
 46 | 
 47 |         if self.downsample is not None:
 48 |             residual = self.downsample(x)
 49 | 
 50 |         out += residual
 51 |         out = self.relu(out)
 52 | 
 53 |         return out
 54 | 
 55 | 
 56 | class Bottleneck(nn.Module):
 57 |     expansion = 4
 58 | 
 59 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 60 |         super(Bottleneck, self).__init__()
 61 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 62 |         self.bn1 = nn.BatchNorm2d(planes)
 63 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 64 |                                padding=1, bias=False)
 65 |         self.bn2 = nn.BatchNorm2d(planes)
 66 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 67 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 68 |         self.relu = nn.ReLU(inplace=True)
 69 |         self.downsample = downsample
 70 |         self.stride = stride
 71 | 
 72 |     def forward(self, x):
 73 |         residual = x
 74 | 
 75 |         out = self.conv1(x)
 76 |         out = self.bn1(out)
 77 |         out = self.relu(out)
 78 | 
 79 |         out = self.conv2(out)
 80 |         out = self.bn2(out)
 81 |         out = self.relu(out)
 82 | 
 83 |         out = self.conv3(out)
 84 |         out = self.bn3(out)
 85 | 
 86 |         if self.downsample is not None:
 87 |             residual = self.downsample(x)
 88 | 
 89 |         out += residual
 90 |         out = self.relu(out)
 91 | 
 92 |         return out
 93 | 
 94 | 
 95 | class BBoxTransform(nn.Module):
 96 | 
 97 |     def __init__(self, mean=None, std=None):
 98 |         super(BBoxTransform, self).__init__()
 99 |         if mean is None:
100 |             self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
101 |         else:
102 |             self.mean = mean
103 |         if std is None:
104 |             self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
105 |         else:
106 |             self.std = std
107 | 
108 |     def forward(self, boxes, deltas):
109 | 
110 |         widths = boxes[:, :, 2] - boxes[:, :, 0]
111 |         heights = boxes[:, :, 3] - boxes[:, :, 1]
112 |         ctr_x = boxes[:, :, 0] + 0.5 * widths
113 |         ctr_y = boxes[:, :, 1] + 0.5 * heights
114 | 
115 |         dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
116 |         dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
117 |         dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
118 |         dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
119 | 
120 |         pred_ctr_x = ctr_x + dx * widths
121 |         pred_ctr_y = ctr_y + dy * heights
122 |         pred_w = torch.exp(dw) * widths
123 |         pred_h = torch.exp(dh) * heights
124 | 
125 |         pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
126 |         pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
127 |         pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
128 |         pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
129 | 
130 |         pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
131 | 
132 |         return pred_boxes
133 | 
134 | 
135 | class ClipBoxes(nn.Module):
136 | 
137 |     def __init__(self, width=None, height=None):
138 |         super(ClipBoxes, self).__init__()
139 | 
140 |     def forward(self, boxes, img):
141 |         batch_size, num_channels, height, width = img.shape
142 | 
143 |         boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
144 |         boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
145 | 
146 |         boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
147 |         boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
148 | 
149 |         return boxes
150 | 
151 | class Resizer(object):
152 |     """Convert ndarrays in sample to Tensors."""
153 | 
154 |     def __call__(self, image, annots, min_side=608, max_side=1024):
155 | 
156 |         image = np.array(image)
157 |         annots = np.array([[*annot['bbox'], annot['category_id']] for annot in annots])
158 | 
159 |         rows, cols, cns = image.shape
160 | 
161 |         smallest_side = min(rows, cols)
162 | 
163 |         # rescale the image so the smallest side is min_side
164 |         scale = min_side / smallest_side
165 | 
166 |         # check if the largest side is now greater than max_side, which can happen
167 |         # when images have a large aspect ratio
168 |         largest_side = max(rows, cols)
169 | 
170 |         if largest_side * scale > max_side:
171 |             scale = max_side / largest_side
172 | 
173 |         # resize the image with the computed scale
174 |         image = skimage.transform.resize(image, (int(round(rows * scale)), int(round((cols * scale)))))
175 |         rows, cols, cns = image.shape
176 | 
177 |         pad_w = 32 - rows % 32
178 |         pad_h = 32 - cols % 32
179 | 
180 |         new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
181 |         new_image[:rows, :cols, :] = image.astype(np.float32)
182 | 
183 |         annots[:, 4] = annots[:, 4] * scale
184 | 
185 | 
186 |         return Image.fromarray(np.uint8(new_image)), torch.from_numpy(annots), scale
187 | 
188 | 
189 | class Augmenter(object):
190 |     """Convert ndarrays in sample to Tensors."""
191 | 
192 |     def __call__(self, sample, flip_x=0.5):
193 |         if np.random.rand() < flip_x:
194 |             image, annots = sample['img'], sample['annot']
195 |             image = image[:, ::-1, :]
196 | 
197 |             rows, cols, channels = image.shape
198 | 
199 |             x1 = annots[:, 0].copy()
200 |             x2 = annots[:, 2].copy()
201 | 
202 |             x_tmp = x1.copy()
203 | 
204 |             annots[:, 0] = cols - x2
205 |             annots[:, 2] = cols - x_tmp
206 | 
207 |             sample = {'img': image, 'annot': annots}
208 | 
209 |         return sample
210 | 
211 | 
212 | class Normalizer(object):
213 | 
214 |     def __init__(self):
215 |         self.mean = np.array([[[0.485, 0.456, 0.406]]])
216 |         self.std = np.array([[[0.229, 0.224, 0.225]]])
217 | 
218 |     def __call__(self, sample):
219 |         image, annots = sample['img'], sample['annot']
220 | 
221 |         return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
222 | 
223 | 
224 | class UnNormalizer(object):
225 |     def __init__(self, mean=None, std=None):
226 |         if mean == None:
227 |             self.mean = [0.485, 0.456, 0.406]
228 |         else:
229 |             self.mean = mean
230 |         if std == None:
231 |             self.std = [0.229, 0.224, 0.225]
232 |         else:
233 |             self.std = std
234 | 
235 |     def __call__(self, tensor):
236 |         """
237 |         Args:
238 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
239 |         Returns:
240 |             Tensor: Normalized image.
241 |         """
242 |         for t, m, s in zip(tensor, self.mean, self.std):
243 |             t.mul_(s).add_(m)
244 |         return tensor
245 | 


--------------------------------------------------------------------------------