├── .gitignore
├── README.md
├── data
    ├── __init__.py
    ├── coco.py
    ├── scripts
    │   ├── COCO2017.sh
    │   ├── VOC2007.sh
    │   └── VOC2012.sh
    ├── transforms.py
    └── voc.py
├── eval.py
├── evaluator
    ├── coco_evaluator.py
    └── voc_evaluator.py
├── models
    ├── conv.py
    ├── fcos.py
    ├── fcos_rt.py
    └── resnet.py
├── test.py
├── train.py
├── train_fcos.sh
├── train_fcos_rt.sh
└── utils
    ├── __init__.py
    ├── box_ops.py
    ├── com_flops_params.py
    ├── create_labels.py
    ├── distributed_utils.py
    ├── loss.py
    └── misc.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pt
2 | *.pth
3 | *.pkl
4 | *.txt
5 | __pycache__
6 | .vscode


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Update: 2022-04-11
2 | I have reproduced FCOS. If you are still interested at `FCOS`, please try the following project:
3 | 
4 | https://github.com/yjh0410/DetLAB
5 |   
6 | You can delete this my old FCOS project.
7 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc import VOCDetection, VOC_CLASSES
2 | from .coco import COCODataset, coco_class_labels, coco_class_index
3 | 


--------------------------------------------------------------------------------
/data/coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import random
  4 | 
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | import cv2
  8 | from pycocotools.coco import COCO
  9 | 
 10 | 
 11 | coco_class_labels = ('background',
 12 |                         'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
 13 |                         'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign',
 14 |                         'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 15 |                         'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella',
 16 |                         'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
 17 |                         'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
 18 |                         'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass',
 19 |                         'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 20 |                         'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
 21 |                         'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk',
 22 |                         'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 23 |                         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book',
 24 |                         'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
 25 | 
 26 | coco_class_index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
 27 |                     21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
 28 |                     46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67,
 29 |                     70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
 30 | 
 31 | 
 32 | class COCODataset(Dataset):
 33 |     """
 34 |     COCO dataset class.
 35 |     """
 36 |     def __init__(self, 
 37 |                  data_dir=None, 
 38 |                  image_set='train2017',
 39 |                  transform=None):
 40 |         """
 41 |         COCO dataset initialization. Annotation data are read into memory by COCO API.
 42 |         Args:
 43 |             data_dir (str): dataset root directory
 44 |             json_file (str): COCO json file name
 45 |             name (str): COCO data name (e.g. 'train2017' or 'val2017')
 46 |             img_size (int): target image size after pre-processing
 47 |             min_size (int): bounding boxes smaller than this are ignored
 48 |             debug (bool): if True, only one data id is selected from the dataset
 49 |         """
 50 |         if image_set == 'train2017':
 51 |             self.json_file = 'instances_train2017.json'
 52 |         elif image_set == 'val2017':
 53 |             self.json_file = 'instances_val2017.json'
 54 |         elif image_set == 'test2017':
 55 |             self.json_file == 'image_info_test-dev2017.json'
 56 |         self.image_set = image_set
 57 |         self.data_dir = data_dir
 58 |         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
 59 |         self.ids = self.coco.getImgIds()
 60 |         self.class_ids = sorted(self.coco.getCatIds())
 61 |         # augmentation
 62 |         self.transform = transform
 63 | 
 64 | 
 65 |     def __len__(self):
 66 |         return len(self.ids)
 67 | 
 68 | 
 69 |     def __getitem__(self, index):
 70 |         img, target = self.pull_item(index)
 71 |         return img, target
 72 | 
 73 | 
 74 |     def pull_image(self, index):
 75 |         id_ = self.ids[index]
 76 |         img_file = os.path.join(self.data_dir, self.image_set,
 77 |                                 '{:012}'.format(id_) + '.jpg')
 78 |         img = cv2.imread(img_file)
 79 | 
 80 |         if self.json_file == 'instances_val5k.json' and img is None:
 81 |             img_file = os.path.join(self.data_dir, 'train2017',
 82 |                                     '{:012}'.format(id_) + '.jpg')
 83 |             img = cv2.imread(img_file)
 84 | 
 85 |         return img, id_
 86 | 
 87 | 
 88 |     def pull_anno(self, index):
 89 |         id_ = self.ids[index]
 90 | 
 91 |         anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None)
 92 |         annotations = self.coco.loadAnns(anno_ids)
 93 |         
 94 |         target = []
 95 |         for anno in annotations:
 96 |             if 'bbox' in anno:
 97 |                 xmin = np.max((0, anno['bbox'][0]))
 98 |                 ymin = np.max((0, anno['bbox'][1]))
 99 |                 xmax = xmin + anno['bbox'][2]
100 |                 ymax = ymin + anno['bbox'][3]
101 |                 
102 |                 if anno['area'] > 0 and xmax >= xmin and ymax >= ymin:
103 |                     label_ind = anno['category_id']
104 |                     cls_id = self.class_ids.index(label_ind)
105 | 
106 |                     target.append([xmin, ymin, xmax, ymax, cls_id])  # [xmin, ymin, xmax, ymax, label_ind]
107 |             else:
108 |                 print('No bbox !!')
109 |         return target
110 | 
111 | 
112 |     def load_image_annotation(self, index):
113 |         anno_ids = self.coco.getAnnIds(imgIds=[int(index)], iscrowd=None)
114 |         annotations = self.coco.loadAnns(anno_ids)
115 | 
116 |         # load an image
117 |         img_file = os.path.join(self.data_dir, self.image_set,
118 |                                 '{:012}'.format(index) + '.jpg')
119 |         img = cv2.imread(img_file)
120 |         
121 |         if self.json_file == 'instances_val5k.json' and img is None:
122 |             img_file = os.path.join(self.data_dir, 'train2017',
123 |                                     '{:012}'.format(index) + '.jpg')
124 |             img = cv2.imread(img_file)
125 | 
126 |         assert img is not None
127 | 
128 |         height, width, channels = img.shape
129 |         
130 |         # load an annotation
131 |         annotation = []
132 |         for anno in annotations:
133 |             if 'bbox' in anno and anno['area'] > 0:   
134 |                 xmin = np.max((0, anno['bbox'][0]))
135 |                 ymin = np.max((0, anno['bbox'][1]))
136 |                 xmax = np.min((width - 1, xmin + np.max((0, anno['bbox'][2] - 1))))
137 |                 ymax = np.min((height - 1, ymin + np.max((0, anno['bbox'][3] - 1))))
138 |                 if xmax > xmin and ymax > ymin:
139 |                     label_ind = anno['category_id']
140 |                     cls_id = self.class_ids.index(label_ind)
141 | 
142 |                     annotation.append([xmin, ymin, xmax, ymax, cls_id])  # [xmin, ymin, xmax, ymax, label_ind]
143 |             else:
144 |                 print('No bbox !!!')
145 |         # end here .
146 | 
147 |         return img, annotation, height, width
148 | 
149 | 
150 |     def pull_item(self, index):
151 |         id_ = self.ids[index]
152 |         img, anno, height, width = self.load_image_annotation(id_)
153 |         # check anno
154 |         if len(anno) == 0:
155 |             anno = np.zeros([1, 5])
156 |         else:
157 |             anno = np.array(anno)
158 | 
159 |         # transform
160 |         target = {'boxes': anno[:, :4],
161 |                   'labels': anno[:, 4],
162 |                   'orig_size': [height, width]}
163 |         img, target = self.transform(img, target)
164 |     
165 |         return img, target
166 | 
167 | 
168 | if __name__ == "__main__":
169 |     from transforms import TrainTransforms, ValTransforms
170 |     img_size = 512
171 |     dataset = COCODataset(
172 |                 data_dir='/mnt/share/ssd2/dataset/COCO',
173 |                 transform=TrainTransforms(img_size),
174 |                 image_set='train')
175 |     
176 |     np.random.seed(0)
177 |     class_colors = [(np.random.randint(255),
178 |                      np.random.randint(255),
179 |                      np.random.randint(255)) for _ in range(80)]
180 |     rgb_mean = np.array(dataset.transform.mean)
181 |     rgb_std = np.array(dataset.transform.std)
182 |     print('Data length: ', len(dataset))
183 |     for i in range(1000):
184 |         # load an image
185 |         img, target = dataset.pull_item(i)
186 |         img = img.permute(1,2,0).numpy()
187 |         img = (img*rgb_std + rgb_mean) * 255
188 |         # from rgb to bgr
189 |         img = img[:, :, (2, 1, 0)]
190 |         img = img.astype(np.uint8).copy()
191 |         # load a target
192 |         cls_gt = target['labels'].tolist()
193 |         box_gt = target['boxes'].tolist()
194 |         for i in range(len(cls_gt)):
195 |             cls_id = int(cls_gt[i])
196 |             cx, cy, bw, bh = box_gt[i]
197 |             x1 = int((cx - bw / 2) * img_size)
198 |             y1 = int((cy - bh / 2) * img_size)
199 |             x2 = int((cx + bw / 2) * img_size)
200 |             y2 = int((cy + bh / 2) * img_size)
201 |             img = cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2)
202 |             color = class_colors[cls_id]
203 |             cls_name = coco_class_labels[coco_class_index[cls_id]]
204 |             mess = '%s' % (cls_name)
205 |             cv2.putText(img, mess, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
206 |         cv2.imshow('gt', img)
207 |         cv2.waitKey(0)
208 | 


--------------------------------------------------------------------------------
/data/scripts/COCO2017.sh:
--------------------------------------------------------------------------------
 1 | mkdir COCO
 2 | cd COCO
 3 | 
 4 | wget http://images.cocodataset.org/zips/train2017.zip
 5 | wget http://images.cocodataset.org/zips/val2017.zip
 6 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
 7 | 
 8 | unzip train2017.zip
 9 | unzip val2017.zip
10 | unzip annotations_trainval2017.zip
11 | 
12 | rm -f train2017.zip
13 | rm -f val2017.zip
14 | rm -f annotations_trainval2017.zip
15 | 


--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/transforms.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import torchvision.transforms.functional as F
  4 | 
  5 | 
  6 | 
  7 | class Compose(object):
  8 |     """Composes several augmentations together.
  9 |     Args:
 10 |         transforms (List[Transform]): list of transforms to compose.
 11 |     Example:
 12 |         >>> augmentations.Compose([
 13 |         >>>     transforms.CenterCrop(10),
 14 |         >>>     transforms.ToTensor(),
 15 |         >>> ])
 16 |     """
 17 | 
 18 |     def __init__(self, transforms):
 19 |         self.transforms = transforms
 20 | 
 21 |     def __call__(self, image, target=None):
 22 |         for t in self.transforms:
 23 |             image, target = t(image, target)
 24 |         return image, target
 25 | 
 26 | 
 27 | class ToTensor(object):
 28 |     def __call__(self, image, target=None):
 29 |         # to rgb
 30 |         image = image[..., (2, 1, 0)]
 31 |         image = F.to_tensor(image)
 32 |         if target is not None:
 33 |             target["boxes"] = torch.as_tensor(target["boxes"]).float()
 34 |             target["labels"] = torch.as_tensor(target["labels"]).long()
 35 |         return image, target
 36 | 
 37 |         
 38 | class Normalize(object):
 39 |     def __init__(self, mean, std):
 40 |         self.mean = mean
 41 |         self.std = std
 42 | 
 43 |     def __call__(self, image, target=None):
 44 |         image = F.normalize(image, mean=self.mean, std=self.std)
 45 |         if target is not None:
 46 |             h, w = target["orig_size"]
 47 |             if "boxes" in target:
 48 |                 boxes = target["boxes"].clone()
 49 |                 # normalize
 50 |                 boxes  = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
 51 |                 # [x1, y1, x2, y2] -> [cx, cy, w, h]
 52 |                 boxes_ = boxes.clone()
 53 |                 boxes_[:, :2] = (boxes[:, 2:] + boxes[:, :2]) / 2.0
 54 |                 boxes_[:, 2:] = boxes[:, 2:] - boxes[:, :2]
 55 |                 target["boxes"] = boxes_
 56 | 
 57 |         return image, target
 58 | 
 59 | 
 60 | class Resize(object):
 61 |     def __init__(self, size=640):
 62 |         self.size = size
 63 | 
 64 |     def __call__(self, image, target=None):
 65 |         # resize
 66 |         size = (self.size, self.size)
 67 |         image = F.resize(image, size)
 68 | 
 69 |         return image, target
 70 | 
 71 | 
 72 | class RandomHorizontalFlip(object):
 73 |     def __init__(self, p=0.5):
 74 |         self.p = p
 75 | 
 76 |     def __call__(self, image, target=None):
 77 |         if random.random() < self.p:
 78 |             image = F.hflip(image)
 79 |             if target is not None:
 80 |                 h, w = target["orig_size"]
 81 |                 if "boxes" in target:
 82 |                     boxes = target["boxes"].clone()
 83 |                     boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
 84 |                     target["boxes"] = boxes
 85 | 
 86 |         return image, target
 87 | 
 88 | 
 89 | # TrainTransform
 90 | class TrainTransforms(object):
 91 |     def __init__(self, size=512, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
 92 |         self.size = size
 93 |         self.mean = mean
 94 |         self.std = std
 95 |         self.transforms = Compose([
 96 |             ToTensor(),
 97 |             RandomHorizontalFlip(),
 98 |             Resize(size),
 99 |             Normalize(mean, std)
100 |         ])
101 | 
102 |     def __call__(self, image, target):
103 |         return self.transforms(image, target)
104 | 
105 | 
106 | # ValTransform
107 | class ValTransforms(object):
108 |     def __init__(self, size=512, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):
109 |         self.size = size
110 |         self.mean = mean
111 |         self.std = std
112 |         self.transforms = Compose([
113 |             ToTensor(),
114 |             Resize(size),
115 |             Normalize(mean, std)
116 |         ])
117 | 
118 | 
119 |     def __call__(self, image, target=None):
120 |         return self.transforms(image, target)
121 | 


--------------------------------------------------------------------------------
/data/voc.py:
--------------------------------------------------------------------------------
  1 | """VOC Dataset Classes
  2 | 
  3 | Original author: Francisco Massa
  4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
  5 | 
  6 | Updated by: Ellis Brown, Max deGroot
  7 | """
  8 | import os.path as osp
  9 | import sys
 10 | import torch
 11 | import torch.utils.data as data
 12 | import cv2
 13 | import numpy as np
 14 | import random
 15 | 
 16 | import xml.etree.ElementTree as ET
 17 | 
 18 | 
 19 | VOC_CLASSES = (  # always index 0
 20 |     'aeroplane', 'bicycle', 'bird', 'boat',
 21 |     'bottle', 'bus', 'car', 'cat', 'chair',
 22 |     'cow', 'diningtable', 'dog', 'horse',
 23 |     'motorbike', 'person', 'pottedplant',
 24 |     'sheep', 'sofa', 'train', 'tvmonitor')
 25 | 
 26 | 
 27 | 
 28 | class VOCAnnotationTransform(object):
 29 |     """Transforms a VOC annotation into a Tensor of bbox coords and label index
 30 |     Initilized with a dictionary lookup of classnames to indexes
 31 | 
 32 |     Arguments:
 33 |         class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
 34 |             (default: alphabetic indexing of VOC's 20 classes)
 35 |         keep_difficult (bool, optional): keep difficult instances or not
 36 |             (default: False)
 37 |         height (int): height
 38 |         width (int): width
 39 |     """
 40 | 
 41 |     def __init__(self, class_to_ind=None, keep_difficult=False):
 42 |         self.class_to_ind = class_to_ind or dict(
 43 |             zip(VOC_CLASSES, range(len(VOC_CLASSES))))
 44 |         self.keep_difficult = keep_difficult
 45 | 
 46 |     def __call__(self, target):
 47 |         """
 48 |         Arguments:
 49 |             target (annotation) : the target annotation to be made usable
 50 |                 will be an ET.Element
 51 |         Returns:
 52 |             a list containing lists of bounding boxes  [bbox coords, class name]
 53 |         """
 54 |         res = []
 55 |         for obj in target.iter('object'):
 56 |             difficult = int(obj.find('difficult').text) == 1
 57 |             if not self.keep_difficult and difficult:
 58 |                 continue
 59 |             name = obj.find('name').text.lower().strip()
 60 |             bbox = obj.find('bndbox')
 61 | 
 62 |             pts = ['xmin', 'ymin', 'xmax', 'ymax']
 63 |             bndbox = []
 64 |             for i, pt in enumerate(pts):
 65 |                 cur_pt = int(bbox.find(pt).text) - 1
 66 |                 # scale height or width
 67 |                 cur_pt = cur_pt if i % 2 == 0 else cur_pt
 68 |                 bndbox.append(cur_pt)
 69 |             label_idx = self.class_to_ind[name]
 70 |             bndbox.append(label_idx)
 71 |             res += [bndbox]  # [x1, y1, x2, y2, label_ind]
 72 |             # img_id = target.find('filename').text[:-4]
 73 | 
 74 |         return res  # [[x1, y1, x2, y2, label_ind], ... ]
 75 | 
 76 | 
 77 | class VOCDetection(data.Dataset):
 78 |     """VOC Detection Dataset Object
 79 | 
 80 |     input is image, target is annotation
 81 | 
 82 |     Arguments:
 83 |         root (string): filepath to VOCdevkit folder.
 84 |         image_set (string): imageset to use (eg. 'train', 'val', 'test')
 85 |         transform (callable, optional): transformation to perform on the
 86 |             input image
 87 |         target_transform (callable, optional): transformation to perform on the
 88 |             target `annotation`
 89 |             (eg: take in caption string, return tensor of word indices)
 90 |         dataset_name (string, optional): which dataset to load
 91 |             (default: 'VOC2007')
 92 |     """
 93 | 
 94 |     def __init__(self, 
 95 |                  data_dir=None,
 96 |                  image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
 97 |                  transform=None, 
 98 |                  target_transform=VOCAnnotationTransform()
 99 |                  ):
100 |         self.root = data_dir
101 |         self.image_set = image_sets
102 |         self.target_transform = target_transform
103 |         self._annopath = osp.join('%s', 'Annotations', '%s.xml')
104 |         self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
105 |         self.ids = list()
106 |         for (year, name) in image_sets:
107 |             rootpath = osp.join(self.root, 'VOC' + year)
108 |             for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
109 |                 self.ids.append((rootpath, line.strip()))
110 |         # augmentation
111 |         self.transform = transform
112 | 
113 | 
114 |     def __getitem__(self, index):
115 |         img, target = self.pull_item(index)
116 |         return img, target
117 | 
118 | 
119 |     def __len__(self):
120 |         return len(self.ids)
121 | 
122 | 
123 |     def load_image_annotation(self, img_id):
124 |         # load an image
125 |         img = cv2.imread(self._imgpath % img_id)
126 |         height, width, channels = img.shape
127 |         # load an annotation
128 |         anno = ET.parse(self._annopath % img_id).getroot()
129 |         if self.target_transform is not None:
130 |             anno = self.target_transform(anno)
131 | 
132 |         return img, anno, height, width
133 | 
134 | 
135 |     def pull_item(self, index):
136 |         img_id = self.ids[index]
137 |         img, anno, height, width = self.load_image_annotation(img_id)
138 |         if len(anno) == 0:
139 |             anno = np.zeros([1, 5])
140 |         else:
141 |             anno = np.array(anno)
142 | 
143 |         # transform
144 |         target = {'boxes': anno[:, :4],
145 |                   'labels': anno[:, 4],
146 |                   'orig_size': [height, width]}
147 |         img, target = self.transform(img, target)
148 |     
149 |         return img, target
150 | 
151 | 
152 |     def pull_image(self, index):
153 |         '''Returns the original image object at index in PIL form
154 | 
155 |         Note: not using self.__getitem__(), as any transformations passed in
156 |         could mess up this functionality.
157 | 
158 |         Argument:
159 |             index (int): index of img to show
160 |         Return:
161 |             PIL img
162 |         '''
163 |         img_id = self.ids[index]
164 |         return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR), img_id
165 | 
166 | 
167 |     def pull_anno(self, index):
168 |         '''Returns the original annotation of image at index
169 | 
170 |         Note: not using self.__getitem__(), as any transformations passed in
171 |         could mess up this functionality.
172 | 
173 |         Argument:
174 |             index (int): index of img to get annotation of
175 |         Return:
176 |             list:  [img_id, [(label, bbox coords),...]]
177 |                 eg: ('001718', [('dog', (96, 13, 438, 332))])
178 |         '''
179 |         img_id = self.ids[index]
180 |         anno = ET.parse(self._annopath % img_id).getroot()
181 |         gt = self.target_transform(anno, 1, 1)
182 |         return img_id[1], gt
183 | 
184 | 
185 | if __name__ == "__main__":
186 |     from transforms import TrainTransforms, ValTransforms
187 |     img_size = 512
188 |     dataset = VOCDetection(
189 |                 data_dir='/mnt/share/ssd2/dataset/VOCdevkit',
190 |                 transform=TrainTransforms(img_size))
191 |     
192 |     np.random.seed(0)
193 |     class_colors = [(np.random.randint(255),
194 |                      np.random.randint(255),
195 |                      np.random.randint(255)) for _ in range(20)]
196 |     rgb_mean = np.array(dataset.transform.mean)
197 |     rgb_std = np.array(dataset.transform.std)
198 |     print('Data length: ', len(dataset))
199 |     for i in range(1000):
200 |         # load an image
201 |         img, target = dataset.pull_item(i)
202 |         img = img.permute(1,2,0).numpy()
203 |         img = (img*rgb_std + rgb_mean) * 255
204 |         # from rgb to bgr
205 |         img = img[:, :, (2, 1, 0)]
206 |         img = img.astype(np.uint8).copy()
207 |         # load a target
208 |         cls_gt = target['labels'].tolist()
209 |         box_gt = target['boxes'].tolist()
210 |         for i in range(len(cls_gt)):
211 |             cls_id = int(cls_gt[i])
212 |             cx, cy, bw, bh = box_gt[i]
213 |             x1 = int((cx - bw / 2) * img_size)
214 |             y1 = int((cy - bh / 2) * img_size)
215 |             x2 = int((cx + bw / 2) * img_size)
216 |             y2 = int((cy + bh / 2) * img_size)
217 |             img = cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2)
218 |             cls_name = VOC_CLASSES[cls_id]
219 |             mess = '%s' % (cls_name)
220 |             color = class_colors[cls_id]
221 |             cv2.putText(img, mess, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
222 |         cv2.imshow('gt', img)
223 |         cv2.waitKey(0)
224 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import torch
  5 | 
  6 | from evaluator.voc_evaluator import VOCAPIEvaluator
  7 | from evaluator.coco_evaluator import COCOAPIEvaluator
  8 | 
  9 | from data.transforms import ValTransforms
 10 | 
 11 | from utils.misc import TestTimeAugmentation
 12 | 
 13 | 
 14 | parser = argparse.ArgumentParser(description='FCOS-RT Evaluation')
 15 | # basic
 16 | parser.add_argument('-size', '--img_size', default=512, type=int,
 17 |                     help='img_size')
 18 | parser.add_argument('--cuda', action='store_true', default=False,
 19 |                     help='Use cuda')
 20 | # model
 21 | parser.add_argument('-v', '--version', default='fcos_rt',
 22 |                     help='fcos_rt')
 23 | parser.add_argument('--trained_model', type=str,
 24 |                     default='weights/', 
 25 |                     help='Trained state_dict file path to open')
 26 | parser.add_argument('--conf_thresh', default=0.001, type=float,
 27 |                     help='NMS threshold')
 28 | parser.add_argument('--nms_thresh', default=0.6, type=float,
 29 |                     help='NMS threshold')
 30 | # dataset
 31 | parser.add_argument('--root', default='/mnt/share/ssd2/dataset',
 32 |                     help='data root')
 33 | parser.add_argument('-d', '--dataset', default='coco-val',
 34 |                     help='voc, coco-val, coco-test.')
 35 | # TTA
 36 | parser.add_argument('-tta', '--test_aug', action='store_true', default=False,
 37 |                     help='use test augmentation.')
 38 | 
 39 | args = parser.parse_args()
 40 | 
 41 | 
 42 | def voc_test(model, data_dir, device, img_size):
 43 |     evaluator = VOCAPIEvaluator(data_root=data_dir,
 44 |                                 img_size=img_size,
 45 |                                 device=device,
 46 |                                 transform=ValTransforms(img_size),
 47 |                                 display=True
 48 |                                 )
 49 | 
 50 |     # VOC evaluation
 51 |     evaluator.evaluate(model)
 52 | 
 53 | 
 54 | def coco_test(model, data_dir, device, img_size, test=False):
 55 |     if test:
 56 |         # test-dev
 57 |         print('test on test-dev 2017')
 58 |         evaluator = COCOAPIEvaluator(
 59 |                         data_dir=data_dir,
 60 |                         img_size=img_size,
 61 |                         device=device,
 62 |                         testset=True,
 63 |                         transform=ValTransforms(img_size)
 64 |                         )
 65 | 
 66 |     else:
 67 |         # eval
 68 |         evaluator = COCOAPIEvaluator(
 69 |                         data_dir=data_dir,
 70 |                         img_size=img_size,
 71 |                         device=device,
 72 |                         testset=False,
 73 |                         transform=ValTransforms(img_size)
 74 |                         )
 75 | 
 76 |     # COCO evaluation
 77 |     evaluator.evaluate(model)
 78 | 
 79 | 
 80 | if __name__ == '__main__':
 81 |     # dataset
 82 |     if args.dataset == 'voc':
 83 |         print('eval on voc ...')
 84 |         num_classes = 20
 85 |         data_dir = os.path.join(args.root, 'VOCdevkit')
 86 |     elif args.dataset == 'coco-val':
 87 |         print('eval on coco-val ...')
 88 |         num_classes = 80
 89 |         data_dir = os.path.join(args.root, 'COCO')
 90 |     elif args.dataset == 'coco-test':
 91 |         print('eval on coco-test-dev ...')
 92 |         num_classes = 80
 93 |         data_dir = os.path.join(args.root, 'COCO')
 94 |     else:
 95 |         print('unknow dataset !! we only support voc, coco-val, coco-test !!!')
 96 |         exit(0)
 97 | 
 98 |     # cuda
 99 |     if args.cuda:
100 |         print('use cuda')
101 |         torch.backends.cudnn.benchmark = True
102 |         device = torch.device("cuda")
103 |     else:
104 |         device = torch.device("cpu")
105 | 
106 |     # input size
107 |     input_size = args.input_size
108 | 
109 |     # model
110 |     model_name = args.version
111 |     print('Model: ', model_name)
112 | 
113 |     # load model and config file
114 |     if model_name == 'fcos_rt':
115 |         from models.fcos_rt import FCOS_RT
116 |         backbone = args.backbone
117 |         # model
118 |         model = FCOS_RT(device=device, 
119 |                         img_size=input_size, 
120 |                         num_classes=num_classes, 
121 |                         trainable=False, 
122 |                         conf_thresh=args.conf_thresh,
123 |                         nms_thresh=args.nms_thresh,
124 |                         bk=backbone)
125 |     else:
126 |         print('Unknown model name...')
127 |         exit(0)
128 | 
129 | 
130 |     # load weight
131 |     model.load_state_dict(torch.load(args.trained_model, map_location=device))
132 |     model.to(device).eval()
133 |     print('Finished loading model!')
134 | 
135 |     # TTA
136 |     test_aug = TestTimeAugmentation(num_classes=num_classes) if args.test_aug else None
137 |     
138 |     # evaluation
139 |     with torch.no_grad():
140 |         if args.dataset == 'voc':
141 |             voc_test(model, data_dir, device, args.img_size)
142 |         elif args.dataset == 'coco-val':
143 |             coco_test(model, data_dir, device, args.img_size, test=False)
144 |         elif args.dataset == 'coco-test':
145 |             coco_test(model, data_dir, device, args.img_size, test=True)
146 | 


--------------------------------------------------------------------------------
/evaluator/coco_evaluator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | import torch
  4 | from data.coco import *
  5 | from pycocotools.cocoeval import COCOeval
  6 | 
  7 | 
  8 | class COCOAPIEvaluator():
  9 |     """
 10 |     COCO AP Evaluation class.
 11 |     All the data in the val2017 dataset are processed \
 12 |     and evaluated by COCO API.
 13 |     """
 14 |     def __init__(self, data_dir, device, testset=False, transform=None):
 15 |         """
 16 |         Args:
 17 |             data_dir (str): dataset root directory
 18 |             img_size (int): image size after preprocess. images are resized \
 19 |                 to squares whose shape is (img_size, img_size).
 20 |             confthre (float):
 21 |                 confidence threshold ranging from 0 to 1, \
 22 |                 which is defined in the config file.
 23 |             nmsthre (float):
 24 |                 IoU threshold of non-max supression ranging from 0 to 1.
 25 |         """
 26 |         self.testset = testset
 27 |         self.dataset = COCODataset(
 28 |                             data_dir=data_dir,
 29 |                             image_set='val2017' if not testset else 'test2017',
 30 |                             transform=None)
 31 |         self.transform = transform
 32 |         self.device = device
 33 | 
 34 |         self.map = 0.
 35 |         self.ap50_95 = 0.
 36 |         self.ap50 = 0.
 37 | 
 38 |     def evaluate(self, model):
 39 |         """
 40 |         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
 41 |         and the results are evaluated by COCO API.
 42 |         Args:
 43 |             model : model object
 44 |         Returns:
 45 |             ap50_95 (float) : calculated COCO AP for IoU=50:95
 46 |             ap50 (float) : calculated COCO AP for IoU=50
 47 |         """
 48 |         model.eval()
 49 |         ids = []
 50 |         data_dict = []
 51 |         num_images = len(self.dataset)
 52 |         print('total number of images: %d' % (num_images))
 53 | 
 54 |         # start testing
 55 |         for index in range(num_images): # all the data in val2017
 56 |             if index % 500 == 0:
 57 |                 print('[Eval: %d / %d]'%(index, num_images))
 58 | 
 59 |             # load an image
 60 |             img, id_ = self.dataset.pull_image(index)
 61 |             h, w, _ = img.shape
 62 |             scale = np.array([[w, h, w, h]])
 63 | 
 64 |             # preprocess
 65 |             x = self.transform(img)[0]
 66 |             x = x.unsqueeze(0).to(self.device)
 67 |             
 68 |             id_ = int(id_)
 69 |             ids.append(id_)
 70 |             # inference
 71 |             with torch.no_grad():
 72 |                 outputs = model(x)
 73 |                 bboxes, scores, cls_inds = outputs
 74 |                 # rescale
 75 |                 bboxes *= scale
 76 | 
 77 |             for i, box in enumerate(bboxes):
 78 |                 x1 = float(box[0])
 79 |                 y1 = float(box[1])
 80 |                 x2 = float(box[2])
 81 |                 y2 = float(box[3])
 82 |                 label = self.dataset.class_ids[int(cls_inds[i])]
 83 |                 
 84 |                 bbox = [x1, y1, x2 - x1, y2 - y1]
 85 |                 score = float(scores[i]) # object score * class score
 86 |                 A = {"image_id": id_, "category_id": label, "bbox": bbox,
 87 |                      "score": score} # COCO json format
 88 |                 data_dict.append(A)
 89 | 
 90 |         annType = ['segm', 'bbox', 'keypoints']
 91 | 
 92 |         # Evaluate the Dt (detection) json comparing with the ground truth
 93 |         if len(data_dict) > 0:
 94 |             print('evaluating ......')
 95 |             cocoGt = self.dataset.coco
 96 |             # workaround: temporarily write data to json file because pycocotools can't process dict in py36.
 97 |             if self.testset:
 98 |                 json.dump(data_dict, open('coco_test-dev.json', 'w'))
 99 |                 cocoDt = cocoGt.loadRes('coco_test-dev.json')
100 |                 return -1, -1
101 |             else:
102 |                 _, tmp = tempfile.mkstemp()
103 |                 json.dump(data_dict, open(tmp, 'w'))
104 |                 cocoDt = cocoGt.loadRes(tmp)
105 |                 cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
106 |                 cocoEval.params.imgIds = ids
107 |                 cocoEval.evaluate()
108 |                 cocoEval.accumulate()
109 |                 cocoEval.summarize()
110 | 
111 |                 ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
112 |                 print('ap50_95 : ', ap50_95)
113 |                 print('ap50 : ', ap50)
114 |                 self.map = ap50_95
115 |                 self.ap50_95 = ap50_95
116 |                 self.ap50 = ap50
117 | 
118 |                 return ap50, ap50_95
119 |         else:
120 |             return 0, 0
121 | 
122 | 


--------------------------------------------------------------------------------
/evaluator/voc_evaluator.py:
--------------------------------------------------------------------------------
  1 | """Adapted from:
  2 |     @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
  3 |     @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
  4 |     Licensed under The MIT License [see LICENSE for details]
  5 | """
  6 | 
  7 | from data.voc import VOCDetection, VOC_CLASSES
  8 | import sys
  9 | import os
 10 | import time
 11 | import numpy as np
 12 | import pickle
 13 | import xml.etree.ElementTree as ET
 14 | 
 15 | 
 16 | class VOCAPIEvaluator():
 17 |     """ VOC AP Evaluation class """
 18 |     def __init__(self, 
 19 |                  data_dir, 
 20 |                  device, 
 21 |                  transform, 
 22 |                  set_type='test', 
 23 |                  year='2007', 
 24 |                  display=False):
 25 |         self.data_dir = data_dir
 26 |         self.device = device
 27 |         self.transform = transform
 28 |         self.labelmap = VOC_CLASSES
 29 |         self.set_type = set_type
 30 |         self.year = year
 31 |         self.display = display
 32 | 
 33 |         # path
 34 |         self.devkit_path = os.path.join(data_dir, 'VOC' + year)
 35 |         self.annopath = os.path.join(data_dir, 'VOC2007', 'Annotations', '%s.xml')
 36 |         self.imgpath = os.path.join(data_dir, 'VOC2007', 'JPEGImages', '%s.jpg')
 37 |         self.imgsetpath = os.path.join(data_dir, 'VOC2007', 'ImageSets', 'Main', set_type+'.txt')
 38 |         self.output_dir = self.get_output_dir('voc_eval/', self.set_type)
 39 | 
 40 |         # dataset
 41 |         self.dataset = VOCDetection(data_dir=data_dir, 
 42 |                                     image_sets=[('2007', set_type)],
 43 |                                     transform=transform)
 44 | 
 45 |     def evaluate(self, net):
 46 |         net.eval()
 47 |         num_images = len(self.dataset)
 48 |         # all detections are collected into:
 49 |         #    all_boxes[cls][image] = N x 5 array of detections in
 50 |         #    (x1, y1, x2, y2, score)
 51 |         self.all_boxes = [[[] for _ in range(num_images)]
 52 |                         for _ in range(len(self.labelmap))]
 53 | 
 54 |         # timers
 55 |         det_file = os.path.join(self.output_dir, 'detections.pkl')
 56 | 
 57 |         for i in range(num_images):
 58 |             im, _ = self.dataset.pull_image(i)
 59 |             h, w, _ = im.shape
 60 |             scale = np.array([[w, h, w, h]])
 61 | 
 62 |             # preprocess
 63 |             x = self.transform(im)[0]
 64 |             x = x.unsqueeze(0).to(self.device)
 65 | 
 66 |             t0 = time.time()
 67 |             # forward
 68 |             bboxes, scores, cls_inds = net(x)
 69 |             detect_time = time.time() - t0
 70 |             # rescale
 71 |             bboxes *= scale
 72 | 
 73 |             for j in range(len(self.labelmap)):
 74 |                 inds = np.where(cls_inds == j)[0]
 75 |                 if len(inds) == 0:
 76 |                     self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
 77 |                     continue
 78 |                 c_bboxes = bboxes[inds]
 79 |                 c_scores = scores[inds]
 80 |                 c_dets = np.hstack((c_bboxes,
 81 |                                     c_scores[:, np.newaxis])).astype(np.float32,
 82 |                                                                     copy=False)
 83 |                 self.all_boxes[j][i] = c_dets
 84 | 
 85 |             if i % 500 == 0:
 86 |                 print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time))
 87 | 
 88 |         with open(det_file, 'wb') as f:
 89 |             pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL)
 90 | 
 91 |         print('Evaluating detections')
 92 |         self.evaluate_detections(self.all_boxes)
 93 | 
 94 |         print('Mean AP: ', self.map)
 95 |   
 96 | 
 97 |     def parse_rec(self, filename):
 98 |         """ Parse a PASCAL VOC xml file """
 99 |         tree = ET.parse(filename)
100 |         objects = []
101 |         for obj in tree.findall('object'):
102 |             obj_struct = {}
103 |             obj_struct['name'] = obj.find('name').text
104 |             obj_struct['pose'] = obj.find('pose').text
105 |             obj_struct['truncated'] = int(obj.find('truncated').text)
106 |             obj_struct['difficult'] = int(obj.find('difficult').text)
107 |             bbox = obj.find('bndbox')
108 |             obj_struct['bbox'] = [int(bbox.find('xmin').text),
109 |                                 int(bbox.find('ymin').text),
110 |                                 int(bbox.find('xmax').text),
111 |                                 int(bbox.find('ymax').text)]
112 |             objects.append(obj_struct)
113 | 
114 |         return objects
115 | 
116 | 
117 |     def get_output_dir(self, name, phase):
118 |         """Return the directory where experimental artifacts are placed.
119 |         If the directory does not exist, it is created.
120 |         A canonical path is built using the name from an imdb and a network
121 |         (if not None).
122 |         """
123 |         filedir = os.path.join(name, phase)
124 |         if not os.path.exists(filedir):
125 |             os.makedirs(filedir)
126 |         return filedir
127 | 
128 | 
129 |     def get_voc_results_file_template(self, cls):
130 |         # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
131 |         filename = 'det_' + self.set_type + '_%s.txt' % (cls)
132 |         filedir = os.path.join(self.devkit_path, 'results')
133 |         if not os.path.exists(filedir):
134 |             os.makedirs(filedir)
135 |         path = os.path.join(filedir, filename)
136 |         return path
137 | 
138 | 
139 |     def write_voc_results_file(self, all_boxes):
140 |         for cls_ind, cls in enumerate(self.labelmap):
141 |             if self.display:
142 |                 print('Writing {:s} VOC results file'.format(cls))
143 |             filename = self.get_voc_results_file_template(cls)
144 |             with open(filename, 'wt') as f:
145 |                 for im_ind, index in enumerate(self.dataset.ids):
146 |                     dets = all_boxes[cls_ind][im_ind]
147 |                     if dets == []:
148 |                         continue
149 |                     # the VOCdevkit expects 1-based indices
150 |                     for k in range(dets.shape[0]):
151 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
152 |                                 format(index[1], dets[k, -1],
153 |                                     dets[k, 0] + 1, dets[k, 1] + 1,
154 |                                     dets[k, 2] + 1, dets[k, 3] + 1))
155 | 
156 | 
157 |     def do_python_eval(self, use_07=True):
158 |         cachedir = os.path.join(self.devkit_path, 'annotations_cache')
159 |         aps = []
160 |         # The PASCAL VOC metric changed in 2010
161 |         use_07_metric = use_07
162 |         print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
163 |         if not os.path.isdir(self.output_dir):
164 |             os.mkdir(self.output_dir)
165 |         for i, cls in enumerate(self.labelmap):
166 |             filename = self.get_voc_results_file_template(cls)
167 |             rec, prec, ap = self.voc_eval(detpath=filename, 
168 |                                           classname=cls, 
169 |                                           cachedir=cachedir, 
170 |                                           ovthresh=0.5, 
171 |                                           use_07_metric=use_07_metric
172 |                                         )
173 |             aps += [ap]
174 |             print('AP for {} = {:.4f}'.format(cls, ap))
175 |             with open(os.path.join(self.output_dir, cls + '_pr.pkl'), 'wb') as f:
176 |                 pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
177 |         if self.display:
178 |             self.map = np.mean(aps)
179 |             print('Mean AP = {:.4f}'.format(np.mean(aps)))
180 |             print('~~~~~~~~')
181 |             print('Results:')
182 |             for ap in aps:
183 |                 print('{:.3f}'.format(ap))
184 |             print('{:.3f}'.format(np.mean(aps)))
185 |             print('~~~~~~~~')
186 |             print('')
187 |             print('--------------------------------------------------------------')
188 |             print('Results computed with the **unofficial** Python eval code.')
189 |             print('Results should be very close to the official MATLAB eval code.')
190 |             print('--------------------------------------------------------------')
191 |         else:
192 |             self.map = np.mean(aps)
193 |             print('Mean AP = {:.4f}'.format(np.mean(aps)))
194 | 
195 | 
196 |     def voc_ap(self, rec, prec, use_07_metric=True):
197 |         """ ap = voc_ap(rec, prec, [use_07_metric])
198 |         Compute VOC AP given precision and recall.
199 |         If use_07_metric is true, uses the
200 |         VOC 07 11 point method (default:True).
201 |         """
202 |         if use_07_metric:
203 |             # 11 point metric
204 |             ap = 0.
205 |             for t in np.arange(0., 1.1, 0.1):
206 |                 if np.sum(rec >= t) == 0:
207 |                     p = 0
208 |                 else:
209 |                     p = np.max(prec[rec >= t])
210 |                 ap = ap + p / 11.
211 |         else:
212 |             # correct AP calculation
213 |             # first append sentinel values at the end
214 |             mrec = np.concatenate(([0.], rec, [1.]))
215 |             mpre = np.concatenate(([0.], prec, [0.]))
216 | 
217 |             # compute the precision envelope
218 |             for i in range(mpre.size - 1, 0, -1):
219 |                 mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
220 | 
221 |             # to calculate area under PR curve, look for points
222 |             # where X axis (recall) changes value
223 |             i = np.where(mrec[1:] != mrec[:-1])[0]
224 | 
225 |             # and sum (\Delta recall) * prec
226 |             ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
227 |         return ap
228 | 
229 | 
230 |     def voc_eval(self, detpath, classname, cachedir, ovthresh=0.5, use_07_metric=True):
231 |         if not os.path.isdir(cachedir):
232 |             os.mkdir(cachedir)
233 |         cachefile = os.path.join(cachedir, 'annots.pkl')
234 |         # read list of images
235 |         with open(self.imgsetpath, 'r') as f:
236 |             lines = f.readlines()
237 |         imagenames = [x.strip() for x in lines]
238 |         if not os.path.isfile(cachefile):
239 |             # load annots
240 |             recs = {}
241 |             for i, imagename in enumerate(imagenames):
242 |                 recs[imagename] = self.parse_rec(self.annopath % (imagename))
243 |                 if i % 100 == 0 and self.display:
244 |                     print('Reading annotation for {:d}/{:d}'.format(
245 |                     i + 1, len(imagenames)))
246 |             # save
247 |             if self.display:
248 |                 print('Saving cached annotations to {:s}'.format(cachefile))
249 |             with open(cachefile, 'wb') as f:
250 |                 pickle.dump(recs, f)
251 |         else:
252 |             # load
253 |             with open(cachefile, 'rb') as f:
254 |                 recs = pickle.load(f)
255 | 
256 |         # extract gt objects for this class
257 |         class_recs = {}
258 |         npos = 0
259 |         for imagename in imagenames:
260 |             R = [obj for obj in recs[imagename] if obj['name'] == classname]
261 |             bbox = np.array([x['bbox'] for x in R])
262 |             difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
263 |             det = [False] * len(R)
264 |             npos = npos + sum(~difficult)
265 |             class_recs[imagename] = {'bbox': bbox,
266 |                                     'difficult': difficult,
267 |                                     'det': det}
268 | 
269 |         # read dets
270 |         detfile = detpath.format(classname)
271 |         with open(detfile, 'r') as f:
272 |             lines = f.readlines()
273 |         if any(lines) == 1:
274 | 
275 |             splitlines = [x.strip().split(' ') for x in lines]
276 |             image_ids = [x[0] for x in splitlines]
277 |             confidence = np.array([float(x[1]) for x in splitlines])
278 |             BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
279 | 
280 |             # sort by confidence
281 |             sorted_ind = np.argsort(-confidence)
282 |             sorted_scores = np.sort(-confidence)
283 |             BB = BB[sorted_ind, :]
284 |             image_ids = [image_ids[x] for x in sorted_ind]
285 | 
286 |             # go down dets and mark TPs and FPs
287 |             nd = len(image_ids)
288 |             tp = np.zeros(nd)
289 |             fp = np.zeros(nd)
290 |             for d in range(nd):
291 |                 R = class_recs[image_ids[d]]
292 |                 bb = BB[d, :].astype(float)
293 |                 ovmax = -np.inf
294 |                 BBGT = R['bbox'].astype(float)
295 |                 if BBGT.size > 0:
296 |                     # compute overlaps
297 |                     # intersection
298 |                     ixmin = np.maximum(BBGT[:, 0], bb[0])
299 |                     iymin = np.maximum(BBGT[:, 1], bb[1])
300 |                     ixmax = np.minimum(BBGT[:, 2], bb[2])
301 |                     iymax = np.minimum(BBGT[:, 3], bb[3])
302 |                     iw = np.maximum(ixmax - ixmin, 0.)
303 |                     ih = np.maximum(iymax - iymin, 0.)
304 |                     inters = iw * ih
305 |                     uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
306 |                         (BBGT[:, 2] - BBGT[:, 0]) *
307 |                         (BBGT[:, 3] - BBGT[:, 1]) - inters)
308 |                     overlaps = inters / uni
309 |                     ovmax = np.max(overlaps)
310 |                     jmax = np.argmax(overlaps)
311 | 
312 |                 if ovmax > ovthresh:
313 |                     if not R['difficult'][jmax]:
314 |                         if not R['det'][jmax]:
315 |                             tp[d] = 1.
316 |                             R['det'][jmax] = 1
317 |                         else:
318 |                             fp[d] = 1.
319 |                 else:
320 |                     fp[d] = 1.
321 | 
322 |             # compute precision recall
323 |             fp = np.cumsum(fp)
324 |             tp = np.cumsum(tp)
325 |             rec = tp / float(npos)
326 |             # avoid divide by zero in case the first detection matches a difficult
327 |             # ground truth
328 |             prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
329 |             ap = self.voc_ap(rec, prec, use_07_metric)
330 |         else:
331 |             rec = -1.
332 |             prec = -1.
333 |             ap = -1.
334 | 
335 |         return rec, prec, ap
336 | 
337 | 
338 |     def evaluate_detections(self, box_list):
339 |         self.write_voc_results_file(box_list)
340 |         self.do_python_eval()
341 | 
342 | 
343 | if __name__ == '__main__':
344 |     pass


--------------------------------------------------------------------------------
/models/conv.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Conv(nn.Module):
 5 |     def __init__(self, in_ch, out_ch, k=1, p=0, s=1, d=1, g=1, act=True, bias=False):
 6 |         super(Conv, self).__init__()
 7 |         if act:
 8 |             self.convs = nn.Sequential(
 9 |                 nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g, bias=bias),
10 |                 nn.BatchNorm2d(out_ch),
11 |                 nn.ReLU(inplace=True)
12 |             )
13 |         else:
14 |             self.convs = nn.Sequential(
15 |                 nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g, bias=bias),
16 |                 nn.BatchNorm2d(out_ch)
17 |             )
18 |         self.init_weight()
19 | 
20 |     def init_weight(self):
21 |         for m in self.modules():
22 |             if isinstance(m, nn.Conv2d):
23 |                 nn.init.normal_(m.weight, mean=0, std=0.01)
24 |                 if hasattr(m, 'bias') and m.bias is not None:
25 |                     nn.init.constant_(m.bias, 0)
26 | 
27 |     def forward(self, x):
28 |         return self.convs(x)
29 | 


--------------------------------------------------------------------------------
/models/fcos.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | from .resnet import build_backbone
  7 | from .conv import Conv
  8 | 
  9 | from utils import box_ops
 10 | from utils import loss
 11 | 
 12 | 
 13 | class FCOS(nn.Module):
 14 |     def __init__(self, 
 15 |                  device, 
 16 |                  img_size, 
 17 |                  num_classes=80, 
 18 |                  trainable=False, 
 19 |                  conf_thresh=0.05, 
 20 |                  nms_thresh=0.5, 
 21 |                  bk='r18',
 22 |                  freeze_bn=False):
 23 |         super(FCOS, self).__init__()
 24 |         self.device = device
 25 |         self.img_size = img_size
 26 |         self.num_classes = num_classes
 27 |         self.trainable = trainable
 28 |         self.conf_thresh = conf_thresh
 29 |         self.nms_thresh = nms_thresh
 30 |         self.freeze_bn = freeze_bn
 31 |         self.strides = [8, 16, 32, 64, 128]
 32 |         self.grid_cell = self.create_grid(img_size)
 33 | 
 34 |         # backbone
 35 |         self.backbone, feature_channels = build_backbone(pretrained=trainable, freeze=trainable, model=bk)
 36 |         c3, c4, c5 = feature_channels
 37 | 
 38 |         # latter layers
 39 |         self.latter_1 = nn.Conv2d(c3, 256, kernel_size=1)
 40 |         self.latter_2 = nn.Conv2d(c4, 256, kernel_size=1)
 41 |         self.latter_3 = nn.Conv2d(c5, 256, kernel_size=1)
 42 |         self.latter_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=2)
 43 |         self.latter_5 = nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=2)
 44 | 
 45 |         # smooth layers
 46 |         self.smooth_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 47 |         self.smooth_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 48 |         self.smooth_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 49 | 
 50 |         # head
 51 |         self.cls_head = nn.Sequential(
 52 |             Conv(256, 256, k=3, p=1),
 53 |             Conv(256, 256, k=3, p=1),
 54 |             Conv(256, 256, k=3, p=1),
 55 |             Conv(256, 256, k=3, p=1)
 56 |         )
 57 |         self.reg_head = nn.Sequential(
 58 |             Conv(256, 256, k=3, p=1),
 59 |             Conv(256, 256, k=3, p=1),
 60 |             Conv(256, 256, k=3, p=1),
 61 |             Conv(256, 256, k=3, p=1)
 62 |         )
 63 | 
 64 |         # det
 65 |         self.cls_det = nn.Conv2d(256, self.num_classes, kernel_size=1)
 66 |         self.reg_det = nn.Conv2d(256, 4, kernel_size=1)
 67 |         self.ctn_det = nn.Conv2d(256, 1, kernel_size=1)
 68 | 
 69 |         # init weight
 70 |         self.init_weight()
 71 | 
 72 | 
 73 |     def init_weight(self):
 74 |         for m in [self.latter_1, self.latter_2, self.latter_3, self.latter_4, self.latter_5]:
 75 |             if isinstance(m, nn.Conv2d):
 76 |                 nn.init.normal_(m.weight, mean=0, std=0.01)
 77 |                 if hasattr(m, 'bias') and m.bias is not None:
 78 |                     nn.init.constant_(m.bias, 0)
 79 |         
 80 |         for m in [self.smooth_1, self.smooth_2, self.smooth_3]:
 81 |             if isinstance(m, nn.Conv2d):
 82 |                 nn.init.normal_(m.weight, mean=0, std=0.01)
 83 |                 if hasattr(m, 'bias') and m.bias is not None:
 84 |                     nn.init.constant_(m.bias, 0)
 85 | 
 86 |         # init weight of cls_pred
 87 |         init_prob = 0.01
 88 |         bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
 89 |         nn.init.constant_(self.cls_det.bias, bias_value)
 90 |         
 91 | 
 92 |     def create_grid(self, img_size):
 93 |         total_grid_xy = []
 94 |         w = h = img_size
 95 |         for s in self.strides:
 96 |             # generate grid cells
 97 |             ws, hs = w // s, h // s
 98 |             grid_y, grid_x = torch.meshgrid([torch.arange(hs), torch.arange(ws)])
 99 |             # [H, W, 2] -> [HW, 2]
100 |             grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2)
101 |             # [1, H*W, 2]
102 |             grid_xy = grid_xy[None, :, :].to(self.device)
103 | 
104 |             total_grid_xy.append(grid_xy)
105 | 
106 |         return total_grid_xy
107 | 
108 | 
109 |     def set_grid(self, img_size):
110 |         self.img_size = img_size
111 |         self.grid_cell = self.create_grid(img_size)
112 | 
113 | 
114 |     def nms(self, dets, scores):
115 |         """"Pure Python NMS baseline."""
116 |         x1 = dets[:, 0]  #xmin
117 |         y1 = dets[:, 1]  #ymin
118 |         x2 = dets[:, 2]  #xmax
119 |         y2 = dets[:, 3]  #ymax
120 | 
121 |         areas = (x2 - x1) * (y2 - y1)
122 |         order = scores.argsort()[::-1]
123 | 
124 |         keep = []
125 |         while order.size > 0:
126 |             i = order[0]
127 |             keep.append(i)
128 |             # compute iou
129 |             xx1 = np.maximum(x1[i], x1[order[1:]])
130 |             yy1 = np.maximum(y1[i], y1[order[1:]])
131 |             xx2 = np.minimum(x2[i], x2[order[1:]])
132 |             yy2 = np.minimum(y2[i], y2[order[1:]])
133 | 
134 |             w = np.maximum(1e-28, xx2 - xx1)
135 |             h = np.maximum(1e-28, yy2 - yy1)
136 |             inter = w * h
137 | 
138 |             ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-10)
139 |             #reserve all the boundingbox whose ovr less than thresh
140 |             inds = np.where(ovr <= self.nms_thresh)[0]
141 |             order = order[inds + 1]
142 | 
143 |         return keep
144 | 
145 | 
146 |     def postprocess(self, bboxes, scores):
147 |         """
148 |         bboxes: (HxW, 4), bsize = 1
149 |         scores: (HxW, num_classes), bsize = 1
150 |         """
151 | 
152 |         cls_inds = np.argmax(scores, axis=1)
153 |         scores = scores[(np.arange(scores.shape[0]), cls_inds)]
154 |         
155 |         # threshold
156 |         keep = np.where(scores >= self.conf_thresh)
157 |         bboxes = bboxes[keep]
158 |         scores = scores[keep]
159 |         cls_inds = cls_inds[keep]
160 | 
161 |         # NMS
162 |         keep = np.zeros(len(bboxes), dtype=np.int)
163 |         for i in range(self.num_classes):
164 |             inds = np.where(cls_inds == i)[0]
165 |             if len(inds) == 0:
166 |                 continue
167 |             c_bboxes = bboxes[inds]
168 |             c_scores = scores[inds]
169 |             c_keep = self.nms(c_bboxes, c_scores)
170 |             keep[inds[c_keep]] = 1
171 | 
172 |         keep = np.where(keep > 0)
173 |         bboxes = bboxes[keep]
174 |         scores = scores[keep]
175 |         cls_inds = cls_inds[keep]
176 | 
177 |         return bboxes, scores, cls_inds
178 | 
179 | 
180 |     def forward(self, x, targets=None):
181 |         B = x.size(0)
182 |         C = self.num_classes
183 |         # backbone
184 |         c3, c4, c5 = self.backbone(x)
185 |         
186 |         # fpn
187 |         p5 = self.latter_3(c5)
188 |         p5_up = F.interpolate(p5, scale_factor=2)
189 |         p5 = self.smooth_3(p5)
190 | 
191 |         p4 = self.latter_2(c4) + p5_up
192 |         p4_up = F.interpolate(p4, scale_factor=2)
193 |         p4 = self.smooth_2(p4)
194 | 
195 |         p3 = self.smooth_1(self.latter_1(c3) + p4_up)
196 |         # p5 -> p6, p6 -> p7
197 |         p6 = self.latter_4(p5)
198 |         p7 = self.latter_5(p6)
199 | 
200 |         features = [p3, p4, p5, p6, p7]
201 | 
202 |         cls_pred = []
203 |         reg_pred = []
204 |         ctn_pred = []
205 |         # head
206 |         for i, p in enumerate(features):
207 |             cls_feat = self.cls_head(p)
208 |             reg_feat = self.reg_head(p)
209 |             # [B, C, H, W] -> [B, H*W, C]
210 |             cls_pred_i = self.cls_det(cls_feat).permute(0, 2, 3, 1).reshape(B, -1, C)
211 |             # [B, 4, H, W] -> [B, H*W, 4]
212 |             reg_pred_i = self.reg_det(reg_feat).permute(0, 2, 3, 1).reshape(B, -1, 4)
213 |             x1y1_pred_i = (self.grid_cell[i] - reg_pred_i[..., :2].exp()) * self.strides[i] # x1y1
214 |             x2y2_pred_i = (self.grid_cell[i] + reg_pred_i[..., 2:].exp()) * self.strides[i] # x2y2
215 |             box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)
216 |             # [B, 1, H, W] -> [B, H*W, 1]
217 |             ctn_det_i = self.ctn_det(reg_feat).permute(0, 2, 3, 1).reshape(B, -1, 1)
218 | 
219 |             cls_pred.append(cls_pred_i)
220 |             reg_pred.append(box_pred_i)
221 |             ctn_pred.append(ctn_det_i)
222 | 
223 |         cls_pred = torch.cat(cls_pred, dim=1)  # [B, N, C]
224 |         reg_pred = torch.cat(reg_pred, dim=1)  # [B, N, 4]
225 |         ctn_pred = torch.cat(ctn_pred, dim=1)  # [B, N, 1]
226 | 
227 |         # train
228 |         if self.trainable:
229 |             # compute giou between pred bboxes and gt bboxes
230 |             x1y1x2y2_pred = (reg_pred / self.img_size).reshape(-1, 4)
231 |             x1y1x2y2_gt = targets[:, :, -5:-1].reshape(-1, 4)
232 | 
233 |             # giou
234 |             giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B)
235 | 
236 |             # compute loss
237 |             cls_loss, reg_loss, ctn_loss, total_loss = loss.loss(
238 |                                             pred_cls=cls_pred, 
239 |                                             pred_giou=giou_pred,
240 |                                             pred_ctn=ctn_pred,
241 |                                             label=targets, 
242 |                                             num_classes=self.num_classes
243 |                                             )
244 |             
245 |             return cls_loss, reg_loss, ctn_loss, total_loss
246 | 
247 |         # test
248 |         else:
249 |             with torch.no_grad():
250 |                 # batch size = 1
251 |                 scores = torch.sqrt(cls_pred.sigmoid() * ctn_pred.sigmoid())[0]
252 |                 bboxes = torch.clamp(reg_pred / self.img_size, 0, 1)[0]
253 |                 
254 |                 # to cpu
255 |                 scores = scores.cpu().numpy()
256 |                 bboxes = bboxes.cpu().numpy()
257 | 
258 |                 # postprocess
259 |                 bboxes, scores, cls_inds = self.postprocess(bboxes, scores)
260 | 
261 |                 return bboxes, scores, cls_inds
262 | 


--------------------------------------------------------------------------------
/models/fcos_rt.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | from .resnet import build_backbone
  7 | from .conv import Conv
  8 | 
  9 | from utils import box_ops
 10 | from utils import loss
 11 | 
 12 | 
 13 | class FCOS_RT(nn.Module):
 14 |     def __init__(self, 
 15 |                  device, 
 16 |                  img_size=640, 
 17 |                  num_classes=80, 
 18 |                  trainable=False, 
 19 |                  conf_thresh=0.03, 
 20 |                  nms_thresh=0.6, 
 21 |                  bk='r18'):
 22 |         super(FCOS_RT, self).__init__()
 23 |         self.device = device
 24 |         self.img_size = img_size
 25 |         self.num_classes = num_classes
 26 |         self.trainable = trainable
 27 |         self.conf_thresh = conf_thresh
 28 |         self.nms_thresh = nms_thresh
 29 |         self.strides = [8, 16, 32]
 30 |         self.grid_cell = self.create_grid(img_size)
 31 | 
 32 |         # backbone
 33 |         self.backbone, feature_channels = build_backbone(pretrained=trainable, freeze=trainable, model=bk)
 34 |         c3, c4, c5 = feature_channels
 35 | 
 36 |         # latter layers
 37 |         self.latter_1 = nn.Conv2d(c3, 256, kernel_size=1)
 38 |         self.latter_2 = nn.Conv2d(c4, 256, kernel_size=1)
 39 |         self.latter_3 = nn.Conv2d(c5, 256, kernel_size=1)
 40 | 
 41 |         # smooth layers
 42 |         self.smooth_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 43 |         self.smooth_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 44 |         self.smooth_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 45 | 
 46 |         # head
 47 |         self.cls_head = nn.Sequential(
 48 |             Conv(256, 256, k=3, p=1),
 49 |             Conv(256, 256, k=3, p=1),
 50 |             Conv(256, 256, k=3, p=1),
 51 |             Conv(256, 256, k=3, p=1)
 52 |         )
 53 |         self.reg_head = nn.Sequential(
 54 |             Conv(256, 256, k=3, p=1),
 55 |             Conv(256, 256, k=3, p=1),
 56 |             Conv(256, 256, k=3, p=1),
 57 |             Conv(256, 256, k=3, p=1)
 58 |         )
 59 | 
 60 |         # det
 61 |         self.cls_det = nn.Conv2d(256, self.num_classes, kernel_size=1)
 62 |         self.reg_det = nn.Conv2d(256, 4, kernel_size=1)
 63 |         self.ctn_det = nn.Conv2d(256, 1, kernel_size=1)
 64 | 
 65 |         # init weight
 66 |         self.init_weight()
 67 | 
 68 | 
 69 |     def init_weight(self):
 70 |         for m in [self.latter_1, self.latter_2, self.latter_3]:
 71 |             if isinstance(m, nn.Conv2d):
 72 |                 nn.init.normal_(m.weight, mean=0, std=0.01)
 73 |                 if hasattr(m, 'bias') and m.bias is not None:
 74 |                     nn.init.constant_(m.bias, 0)
 75 |         
 76 |         for m in [self.smooth_1, self.smooth_2, self.smooth_3]:
 77 |             if isinstance(m, nn.Conv2d):
 78 |                 nn.init.normal_(m.weight, mean=0, std=0.01)
 79 |                 if hasattr(m, 'bias') and m.bias is not None:
 80 |                     nn.init.constant_(m.bias, 0)
 81 | 
 82 |         # init weight of cls_pred
 83 |         init_prob = 0.01
 84 |         bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
 85 |         nn.init.constant_(self.cls_det.bias, bias_value)
 86 |         
 87 | 
 88 |     def create_grid(self, img_size):
 89 |         total_grid_xy = []
 90 |         w = h = img_size
 91 |         for s in self.strides:
 92 |             # generate grid cells
 93 |             ws, hs = w // s, h // s
 94 |             grid_y, grid_x = torch.meshgrid([torch.arange(hs), torch.arange(ws)])
 95 |             # [H, W, 2] -> [HW, 2]
 96 |             grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2)
 97 |             # [1, H*W, 2]
 98 |             grid_xy = grid_xy[None, :, :].to(self.device)
 99 | 
100 |             total_grid_xy.append(grid_xy)
101 | 
102 |         return total_grid_xy
103 | 
104 | 
105 |     def set_grid(self, img_size):
106 |         self.img_size = img_size
107 |         self.grid_cell = self.create_grid(img_size)
108 | 
109 | 
110 |     def nms(self, dets, scores):
111 |         """"Pure Python NMS baseline."""
112 |         x1 = dets[:, 0]  #xmin
113 |         y1 = dets[:, 1]  #ymin
114 |         x2 = dets[:, 2]  #xmax
115 |         y2 = dets[:, 3]  #ymax
116 | 
117 |         areas = (x2 - x1) * (y2 - y1)
118 |         order = scores.argsort()[::-1]
119 | 
120 |         keep = []
121 |         while order.size > 0:
122 |             i = order[0]
123 |             keep.append(i)
124 |             # compute iou
125 |             xx1 = np.maximum(x1[i], x1[order[1:]])
126 |             yy1 = np.maximum(y1[i], y1[order[1:]])
127 |             xx2 = np.minimum(x2[i], x2[order[1:]])
128 |             yy2 = np.minimum(y2[i], y2[order[1:]])
129 | 
130 |             w = np.maximum(1e-28, xx2 - xx1)
131 |             h = np.maximum(1e-28, yy2 - yy1)
132 |             inter = w * h
133 | 
134 |             ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-10)
135 |             #reserve all the boundingbox whose ovr less than thresh
136 |             inds = np.where(ovr <= self.nms_thresh)[0]
137 |             order = order[inds + 1]
138 | 
139 |         return keep
140 | 
141 | 
142 |     def postprocess(self, bboxes, scores):
143 |         """
144 |         bboxes: (HxW, 4), bsize = 1
145 |         scores: (HxW, num_classes), bsize = 1
146 |         """
147 | 
148 |         cls_inds = np.argmax(scores, axis=1)
149 |         scores = scores[(np.arange(scores.shape[0]), cls_inds)]
150 |         
151 |         # threshold
152 |         keep = np.where(scores >= self.conf_thresh)
153 |         bboxes = bboxes[keep]
154 |         scores = scores[keep]
155 |         cls_inds = cls_inds[keep]
156 | 
157 |         # NMS
158 |         keep = np.zeros(len(bboxes), dtype=np.int)
159 |         for i in range(self.num_classes):
160 |             inds = np.where(cls_inds == i)[0]
161 |             if len(inds) == 0:
162 |                 continue
163 |             c_bboxes = bboxes[inds]
164 |             c_scores = scores[inds]
165 |             c_keep = self.nms(c_bboxes, c_scores)
166 |             keep[inds[c_keep]] = 1
167 | 
168 |         keep = np.where(keep > 0)
169 |         bboxes = bboxes[keep]
170 |         scores = scores[keep]
171 |         cls_inds = cls_inds[keep]
172 | 
173 |         return bboxes, scores, cls_inds
174 | 
175 | 
176 |     def forward(self, x, targets=None):
177 |         B = x.size(0)
178 |         C = self.num_classes
179 |         # backbone
180 |         c3, c4, c5 = self.backbone(x)
181 |         
182 |         # fpn
183 |         p5 = self.latter_3(c5)
184 |         p5_up = F.interpolate(p5, scale_factor=2)
185 |         p5 = self.smooth_3(p5)
186 | 
187 |         p4 = self.latter_2(c4) + p5_up
188 |         p4_up = F.interpolate(p4, scale_factor=2)
189 |         p4 = self.smooth_2(p4)
190 | 
191 |         p3 = self.smooth_1(self.latter_1(c3) + p4_up)
192 | 
193 |         features = [p3, p4, p5]
194 | 
195 |         cls_pred = []
196 |         reg_pred = []
197 |         ctn_pred = []
198 |         # head
199 |         for i, p in enumerate(features):
200 |             cls_feat = self.cls_head(p)
201 |             reg_feat = self.reg_head(p)
202 |             # [B, C, H, W] -> [B, H*W, C]
203 |             cls_pred_i = self.cls_det(cls_feat).permute(0, 2, 3, 1).contiguous().view(B, -1, C)
204 |             # [B, 4, H, W] -> [B, H*W, 4]
205 |             reg_pred_i = self.reg_det(reg_feat).permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
206 |             x1y1_pred_i = (self.grid_cell[i] - reg_pred_i[..., :2].exp()) * self.strides[i] # x1y1
207 |             x2y2_pred_i = (self.grid_cell[i] + reg_pred_i[..., 2:].exp()) * self.strides[i] # x2y2
208 |             box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)
209 |             # [B, 1, H, W] -> [B, H*W, 1]
210 |             ctn_det_i = self.ctn_det(reg_feat).permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
211 | 
212 |             cls_pred.append(cls_pred_i)
213 |             reg_pred.append(box_pred_i)
214 |             ctn_pred.append(ctn_det_i)
215 | 
216 |         cls_pred = torch.cat(cls_pred, dim=1)  # [B, N, C]
217 |         reg_pred = torch.cat(reg_pred, dim=1)  # [B, N, 4]
218 |         ctn_pred = torch.cat(ctn_pred, dim=1)  # [B, N, 1]
219 | 
220 |         # train
221 |         if self.trainable:
222 |             # compute giou between pred bboxes and gt bboxes
223 |             x1y1x2y2_pred = (reg_pred / self.img_size).view(-1, 4)
224 |             x1y1x2y2_gt = targets[:, :, -5:-1].view(-1, 4)
225 | 
226 |             # giou
227 |             giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B)
228 | 
229 |             # compute loss
230 |             cls_loss, reg_loss, ctn_loss, total_loss = loss.loss(
231 |                                             pred_cls=cls_pred, 
232 |                                             pred_giou=giou_pred,
233 |                                             pred_ctn=ctn_pred,
234 |                                             target=targets, 
235 |                                             num_classes=self.num_classes)
236 |             
237 |             return cls_loss, reg_loss, ctn_loss, total_loss
238 | 
239 |         # test
240 |         else:
241 |             with torch.no_grad():
242 |                 # batch size = 1
243 |                 scores = torch.sqrt(cls_pred.sigmoid() * ctn_pred.sigmoid())[0]
244 |                 bboxes = torch.clamp(reg_pred / self.img_size, 0, 1)[0]
245 |                 
246 |                 # to cpu
247 |                 scores = scores.cpu().numpy()
248 |                 bboxes = bboxes.cpu().numpy()
249 | 
250 |                 # postprocess
251 |                 bboxes, scores, cls_inds = self.postprocess(bboxes, scores)
252 | 
253 |                 return bboxes, scores, cls_inds
254 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | 
  5 | 
  6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  7 |            'resnet152']
  8 | 
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 14 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 15 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 16 | }
 17 | 
 18 | 
 19 | def conv3x3(in_planes, out_planes, stride=1):
 20 |     """3x3 convolution with padding"""
 21 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 22 |                      padding=1, bias=False)
 23 | 
 24 | def conv1x1(in_planes, out_planes, stride=1):
 25 |     """1x1 convolution"""
 26 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 27 | 
 28 | class BasicBlock(nn.Module):
 29 |     expansion = 1
 30 | 
 31 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 32 |         super(BasicBlock, self).__init__()
 33 |         self.conv1 = conv3x3(inplanes, planes, stride)
 34 |         self.bn1 = nn.BatchNorm2d(planes)
 35 |         self.relu = nn.ReLU(inplace=True)
 36 |         self.conv2 = conv3x3(planes, planes)
 37 |         self.bn2 = nn.BatchNorm2d(planes)
 38 |         self.downsample = downsample
 39 |         self.stride = stride
 40 | 
 41 |     def forward(self, x):
 42 |         identity = x
 43 | 
 44 |         out = self.conv1(x)
 45 |         out = self.bn1(out)
 46 |         out = self.relu(out)
 47 | 
 48 |         out = self.conv2(out)
 49 |         out = self.bn2(out)
 50 | 
 51 |         if self.downsample is not None:
 52 |             identity = self.downsample(x)
 53 | 
 54 |         out += identity
 55 |         out = self.relu(out)
 56 | 
 57 |         return out
 58 | 
 59 | class Bottleneck(nn.Module):
 60 |     expansion = 4
 61 | 
 62 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 63 |         super(Bottleneck, self).__init__()
 64 |         self.conv1 = conv1x1(inplanes, planes)
 65 |         self.bn1 = nn.BatchNorm2d(planes)
 66 |         self.conv2 = conv3x3(planes, planes, stride)
 67 |         self.bn2 = nn.BatchNorm2d(planes)
 68 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 69 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 70 |         self.relu = nn.ReLU(inplace=True)
 71 |         self.downsample = downsample
 72 |         self.stride = stride
 73 | 
 74 |     def forward(self, x):
 75 |         identity = x
 76 | 
 77 |         out = self.conv1(x)
 78 |         out = self.bn1(out)
 79 |         out = self.relu(out)
 80 | 
 81 |         out = self.conv2(out)
 82 |         out = self.bn2(out)
 83 |         out = self.relu(out)
 84 | 
 85 |         out = self.conv3(out)
 86 |         out = self.bn3(out)
 87 | 
 88 |         if self.downsample is not None:
 89 |             identity = self.downsample(x)
 90 | 
 91 |         out += identity
 92 |         out = self.relu(out)
 93 | 
 94 |         return out
 95 | 
 96 | class ResNet(nn.Module):
 97 | 
 98 |     def __init__(self, block, layers, zero_init_residual=False):
 99 |         super(ResNet, self).__init__()
100 |         self.inplanes = 64
101 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
102 |                                bias=False)
103 |         self.bn1 = nn.BatchNorm2d(64)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 |         self.layer1 = self._make_layer(block, 64, layers[0])
107 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 | 
111 |         for m in self.modules():
112 |             if isinstance(m, nn.Conv2d):
113 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
114 |             elif isinstance(m, nn.BatchNorm2d):
115 |                 nn.init.constant_(m.weight, 1)
116 |                 nn.init.constant_(m.bias, 0)
117 | 
118 |         # Zero-initialize the last BN in each residual branch,
119 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
120 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
121 |         if zero_init_residual:
122 |             for m in self.modules():
123 |                 if isinstance(m, Bottleneck):
124 |                     nn.init.constant_(m.bn3.weight, 0)
125 |                 elif isinstance(m, BasicBlock):
126 |                     nn.init.constant_(m.bn2.weight, 0)
127 | 
128 | 
129 |     def _make_layer(self, block, planes, blocks, stride=1):
130 |         downsample = None
131 |         if stride != 1 or self.inplanes != planes * block.expansion:
132 |             downsample = nn.Sequential(
133 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
134 |                 nn.BatchNorm2d(planes * block.expansion),
135 |             )
136 | 
137 |         layers = []
138 |         layers.append(block(self.inplanes, planes, stride, downsample))
139 |         self.inplanes = planes * block.expansion
140 |         for _ in range(1, blocks):
141 |             layers.append(block(self.inplanes, planes))
142 | 
143 |         return nn.Sequential(*layers)
144 | 
145 |     def freeze_bn(self):
146 |         '''Freeze BatchNorm layers.'''
147 |         for m in self.modules():
148 |             if isinstance(m, nn.BatchNorm2d):
149 |                 m.eval()
150 | 
151 |     def freeze_stage(self):
152 |         # freeze stage = 1
153 |         for p in self.conv1.parameters():
154 |             p.requires_grad = False
155 |         for p in self.bn1.parameters():
156 |             p.requires_grad = False
157 |         # for p in self.layer1.parameters():
158 |         #     p.requires_grad = False
159 | 
160 |     def forward(self, x):
161 |         c1 = self.conv1(x)
162 |         c1 = self.bn1(c1)
163 |         c1 = self.relu(c1)
164 |         c1 = self.maxpool(c1)
165 | 
166 |         c2 = self.layer1(c1)
167 |         c3 = self.layer2(c2)
168 |         c4 = self.layer3(c3)
169 |         c5 = self.layer4(c4)
170 | 
171 |         return c3, c4, c5
172 |             
173 | def resnet18(pretrained=False, freeze_bn=False, **kwargs):
174 |     """Constructs a ResNet-18 model.
175 | 
176 |     Args:
177 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
178 |     """
179 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
180 |     if pretrained:
181 |         # strict = False as we don't need fc layer params.
182 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False)
183 |         # freeze bn
184 |         if freeze_bn:
185 |             print('freeze bn ...')
186 |             model.freeze_bn()
187 |             print('freeze stage 1')
188 |             model.freeze_stage()
189 | 
190 |     return model
191 | 
192 | def resnet34(pretrained=False, freeze_bn=False, **kwargs):
193 |     """Constructs a ResNet-34 model.
194 | 
195 |     Args:
196 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
197 |     """
198 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
199 |     if pretrained:
200 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), strict=False)
201 |         # freeze bn
202 |         if freeze_bn:
203 |             print('freeze bn ...')
204 |             model.freeze_bn()
205 |             print('freeze stage 1')
206 |             model.freeze_stage()
207 | 
208 |     return model
209 | 
210 | def resnet50(pretrained=False, freeze_bn=False, **kwargs):
211 |     """Constructs a ResNet-50 model.
212 | 
213 |     Args:
214 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
215 |     """
216 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
217 |     if pretrained:
218 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False)
219 |         # freeze bn
220 |         if freeze_bn:
221 |             print('freeze bn ...')
222 |             model.freeze_bn()
223 |             print('freeze stage 1')
224 |             model.freeze_stage()
225 | 
226 |     return model
227 | 
228 | def resnet101(pretrained=False, freeze_bn=False, **kwargs):
229 |     """Constructs a ResNet-101 model.
230 | 
231 |     Args:
232 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
233 |     """
234 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
235 |     if pretrained:
236 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']), strict=False)
237 |         # freeze bn
238 |         if freeze_bn:
239 |             print('freeze bn ...')
240 |             model.freeze_bn()
241 |             print('freeze stage 1')
242 |             model.freeze_stage()
243 | 
244 |     return model
245 | 
246 | def resnet152(pretrained=False, freeze_bn=False, **kwargs):
247 |     """Constructs a ResNet-152 model.
248 | 
249 |     Args:
250 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
251 |     """
252 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
253 |     if pretrained:
254 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
255 |         # freeze bn
256 |         if freeze_bn:
257 |             print('freeze bn ...')
258 |             model.freeze_bn()
259 |             print('freeze stage 1')
260 |             model.freeze_stage()
261 | 
262 |     return model
263 | 
264 | 
265 | def build_backbone(pretrained=False, freeze=False, model='r18'):
266 |     if model == 'r18':
267 |         return resnet18(pretrained=pretrained, freeze_bn=freeze), [128, 256, 512]
268 |     elif model == 'r34':
269 |         return resnet34(pretrained=pretrained, freeze_bn=freeze), [128, 256, 512]
270 |     elif model == 'r50':
271 |         return resnet50(pretrained=pretrained, freeze_bn=freeze), [512, 1024, 2048]
272 |     elif model == 'r101':
273 |         return resnet101(pretrained=pretrained, freeze_bn=freeze), [512, 1024, 2048]
274 | 
275 | 
276 | if __name__=='__main__':
277 |     #model = torchvision.models.resnet50()
278 |     print("found ", torch.cuda.device_count(), " GPU(s)")
279 |     device = torch.device("cuda")
280 |     model = resnet101(detection=True).to(device)
281 |     print(model)
282 | 
283 |     input = torch.randn(1, 3, 512, 512).to(device)
284 |     output = model(input)


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import os
  4 | import time
  5 | import numpy as np
  6 | import torch
  7 | import torch.backends.cudnn as cudnn
  8 | 
  9 | from data.voc import VOC_CLASSES, VOCDetection
 10 | from data.coco import coco_class_index, coco_class_labels, COCODataset
 11 | from data import config
 12 | from data.transforms import ValTransforms
 13 | 
 14 | from utils.misc import TestTimeAugmentation
 15 | 
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser(description='FCOS-RT Detection')
 19 | # basic
 20 | parser.add_argument('-size', '--img_size', default=512, type=int,
 21 |                     help='img_size')
 22 | parser.add_argument('--show', action='store_true', default=False,
 23 |                     help='show the visulization results.')
 24 | parser.add_argument('-vs', '--visual_threshold', default=0.5, type=float,
 25 |                     help='Final confidence threshold')
 26 | parser.add_argument('--cuda', action='store_true', default=False, 
 27 |                     help='use cuda.')
 28 | parser.add_argument('--save_folder', default='det_results/', type=str,
 29 |                     help='Dir to save results')
 30 | # model
 31 | parser.add_argument('-v', '--version', default='fcos_rt',
 32 |                     help='fcos_rt')
 33 | parser.add_argument('-bk', '--backbone', default='r18',
 34 |                     help='r18, r50, r101')
 35 | parser.add_argument('--trained_model', default='weight/',
 36 |                     type=str, help='Trained state_dict file path to open')
 37 | parser.add_argument('--conf_thresh', default=0.1, type=float,
 38 |                     help='NMS threshold')
 39 | parser.add_argument('--nms_thresh', default=0.45, type=float,
 40 |                     help='NMS threshold')
 41 | # dataset
 42 | parser.add_argument('--root', default='/mnt/share/ssd2/dataset',
 43 |                     help='data root')
 44 | parser.add_argument('-d', '--dataset', default='coco',
 45 |                     help='coco.')
 46 | # TTA
 47 | parser.add_argument('-tta', '--test_aug', action='store_true', default=False,
 48 |                     help='use test augmentation.')
 49 | 
 50 | args = parser.parse_args()
 51 | 
 52 | 
 53 | def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
 54 |     x1, y1, x2, y2 = bbox
 55 |     x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
 56 |     t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0]
 57 |     # plot bbox
 58 |     cv2.rectangle(img, (x1, y1), (x2, y2), cls_color, 2)
 59 |     
 60 |     if label is not None:
 61 |         # plot title bbox
 62 |         cv2.rectangle(img, (x1, y1-t_size[1]), (int(x1 + t_size[0] * text_scale), y1), cls_color, -1)
 63 |         # put the test on the title bbox
 64 |         cv2.putText(img, label, (int(x1), int(y1 - 5)), 0, text_scale, (0, 0, 0), 1, lineType=cv2.LINE_AA)
 65 | 
 66 |     return img
 67 | 
 68 | 
 69 | def visualize(img, 
 70 |               bboxes, 
 71 |               scores, 
 72 |               cls_inds, 
 73 |               vis_thresh, 
 74 |               class_colors, 
 75 |               class_names, 
 76 |               class_indexs=None, 
 77 |               dataset_name='voc'):
 78 |     ts = 0.4
 79 |     for i, bbox in enumerate(bboxes):
 80 |         if scores[i] > vis_thresh:
 81 |             cls_id = int(cls_inds[i])
 82 |             if dataset_name == 'coco':
 83 |                 cls_color = class_colors[cls_id]
 84 |                 cls_id = class_indexs[cls_id]
 85 |             else:
 86 |                 cls_color = class_colors[cls_id]
 87 |                 
 88 |             if len(class_names) > 1:
 89 |                 mess = '%s: %.2f' % (class_names[cls_id], scores[i])
 90 |             else:
 91 |                 cls_color = [255, 0, 0]
 92 |                 mess = None
 93 |             img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts)
 94 | 
 95 |     return img
 96 |         
 97 | 
 98 | def test(args,
 99 |          net, 
100 |          device, 
101 |          dataset,
102 |          transforms=None,
103 |          vis_thresh=0.4, 
104 |          class_colors=None, 
105 |          class_names=None, 
106 |          class_indexs=None, 
107 |          show=False,
108 |          test_aug=None, 
109 |          dataset_name='coco'):
110 |     num_images = len(dataset)
111 |     save_path = os.path.join('det_results/', args.dataset, args.version)
112 |     os.makedirs(save_path, exist_ok=True)
113 | 
114 |     for index in range(num_images):
115 |         print('Testing image {:d}/{:d}....'.format(index+1, num_images))
116 |         image, _ = dataset.pull_image(index)
117 | 
118 |         h, w, _ = image.shape
119 |         scale = np.array([[w, h, w, h]])
120 | 
121 |         # prepare
122 |         x = transforms(image)[0]
123 |         x = x.unsqueeze(0).to(device)
124 | 
125 |         t0 = time.time()
126 |         # forward
127 |         # test augmentation:
128 |         if test_aug is not None:
129 |             bboxes, scores, cls_inds = test_aug(x, net)
130 |         else:
131 |             # inference
132 |             bboxes, scores, cls_inds = net(x)
133 |         print("detection time used ", time.time() - t0, "s")
134 |         
135 |         # rescale
136 |         bboxes *= scale
137 | 
138 |         # vis detection
139 |         img_processed = visualize(
140 |                             img=image,
141 |                             bboxes=bboxes,
142 |                             scores=scores,
143 |                             cls_inds=cls_inds,
144 |                             vis_thresh=vis_thresh,
145 |                             class_colors=class_colors,
146 |                             class_names=class_names,
147 |                             class_indexs=class_indexs,
148 |                             dataset_name=dataset_name
149 |                             )
150 |         if show:
151 |             cv2.imshow('detection', img_processed)
152 |             cv2.waitKey(0)
153 |         # save result
154 |         cv2.imwrite(os.path.join(save_path, str(index).zfill(6) +'.jpg'), img_processed)
155 | 
156 | 
157 | if __name__ == '__main__':
158 |     # cuda
159 |     if args.cuda:
160 |         print('use cuda')
161 |         cudnn.benchmark = True
162 |         device = torch.device("cuda")
163 |     else:
164 |         device = torch.device("cpu")
165 | 
166 |     # input size
167 |     input_size = args.input_size
168 | 
169 |     # dataset and evaluator
170 |     if args.dataset == 'voc':
171 |         data_dir = os.path.join(args.root, 'VOCdevkit')
172 |         class_names = VOC_CLASSES
173 |         class_indexs = None
174 |         num_classes = 20
175 |         dataset = VOCDetection(
176 |                         data_dir=data_dir,
177 |                         image_sets=[('2007', 'test')])
178 | 
179 |     elif args.dataset == 'coco':
180 |         data_dir = os.path.join(args.root, 'COCO')
181 |         class_names = coco_class_labels
182 |         class_indexs = coco_class_index
183 |         num_classes = 80
184 |         dataset = COCODataset(
185 |                     data_dir=data_dir,
186 |                     image_set='val')
187 | 
188 |     class_colors = [(np.random.randint(255),
189 |                      np.random.randint(255),
190 |                      np.random.randint(255)) for _ in range(num_classes)]
191 | 
192 |     # model
193 |     model_name = args.version
194 |     print('Model: ', model_name)
195 | 
196 |     # load model and config file
197 |     if model_name == 'fcos_rt':
198 |         from models.fcos_rt import FCOS_RT
199 |         backbone = args.backbone
200 | 
201 |     else:
202 |         print('Unknown model name...')
203 |         exit(0)
204 | 
205 |     # model
206 |     model = FCOS_RT(device=device, 
207 |                     img_size=input_size, 
208 |                     num_classes=num_classes, 
209 |                     trainable=False, 
210 |                     conf_thresh=args.conf_thresh,
211 |                     nms_thresh=args.nms_thresh,
212 |                     bk=backbone)
213 |     
214 | 
215 |     # load weight
216 |     model.load_state_dict(torch.load(args.trained_model, map_location=device), strict=False)
217 |     model.to(device).eval()
218 |     print('Finished loading model!')
219 | 
220 |     # TTA
221 |     test_aug = TestTimeAugmentation(num_classes=num_classes) if args.test_aug else None
222 | 
223 |     # run
224 |     test(args=args,
225 |         net=model, 
226 |         device=device, 
227 |         dataset=dataset,
228 |         transforms=ValTransforms(args.img_size),
229 |         vis_thresh=args.visual_threshold,
230 |         class_colors=class_colors,
231 |         class_names=class_names,
232 |         class_indexs=class_indexs,
233 |         show=args.show,
234 |         test_aug=test_aug,
235 |         dataset_name=args.dataset)
236 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import os
  4 | import argparse
  5 | import time
  6 | import random
  7 | import numpy as np
  8 | import cv2
  9 | 
 10 | import torch
 11 | import torch.optim as optim
 12 | import torch.backends.cudnn as cudnn
 13 | import torch.distributed as dist
 14 | from torch.nn.parallel import DistributedDataParallel as DDP
 15 | 
 16 | from data.voc import VOCDetection
 17 | from data.coco import COCODataset
 18 | from data.transforms import TrainTransforms, ValTransforms
 19 | 
 20 | from utils import distributed_utils
 21 | from utils import create_labels
 22 | from utils.misc import ModelEMA, detection_collate
 23 | from utils.com_flops_params import FLOPs_and_Params
 24 | 
 25 | from evaluator.coco_evaluator import COCOAPIEvaluator
 26 | from evaluator.voc_evaluator import VOCAPIEvaluator
 27 | 
 28 | 
 29 | def parse_args():
 30 |     parser = argparse.ArgumentParser(description='FCOS-RT Detection')
 31 |     # basic
 32 |     parser.add_argument('--cuda', action='store_true', default=False,
 33 |                         help='use cuda.')
 34 |     parser.add_argument('--batch_size', default=16, type=int, 
 35 |                         help='Batch size for training')
 36 |     parser.add_argument('--img_size', default=512, type=int, 
 37 |                         help='Batch size for training')
 38 |     parser.add_argument('--max_epoch', type=int, default=12,
 39 |                         help='The upper bound of warm-up')
 40 |     parser.add_argument('--lr_epoch', nargs='+', default=[8, 10], type=int,
 41 |                         help='lr epoch to decay')
 42 |     parser.add_argument('--lr', default=0.01, type=float, 
 43 |                         help='learning rate')
 44 |     parser.add_argument('--schedule', default=1, type=int, 
 45 |                         help='Schedule for training: 1x, 2x, 3x, 4x.')
 46 |     parser.add_argument('--start_iter', type=int, default=0,
 47 |                         help='start iteration to train')
 48 |     parser.add_argument('-r', '--resume', default=None, type=str, 
 49 |                         help='keep training')
 50 |     parser.add_argument('--num_workers', default=8, type=int, 
 51 |                         help='Number of workers used in dataloading')
 52 |     parser.add_argument('--num_gpu', default=1, type=int, 
 53 |                         help='Number of GPUs.')
 54 |     parser.add_argument('--start_epoch', type=int,
 55 |                             default=0, help='the start epoch to train')
 56 |     parser.add_argument('--eval_epoch', type=int,
 57 |                             default=2, help='interval between evaluations')
 58 |     parser.add_argument('--tfboard', action='store_true', default=False,
 59 |                         help='use tensorboard')
 60 |     parser.add_argument('--save_folder', default='weights/', type=str, 
 61 |                         help='Gamma update for SGD')
 62 |     parser.add_argument('--vis', action='store_true', default=False,
 63 |                         help='visualize target.')
 64 | 
 65 |     # model
 66 |     parser.add_argument('-v', '--version', default='fcos_rt',
 67 |                         help='fcos_rt, fcos')
 68 |     parser.add_argument('-bk', '--backbone', default='r18',
 69 |                         help='r18, r50, r101')
 70 | 
 71 |     # dataset
 72 |     parser.add_argument('--root', default='/mnt/share/ssd2/dataset',
 73 |                         help='data root')
 74 |     parser.add_argument('-d', '--dataset', default='coco',
 75 |                         help='coco, widerface, crowdhuman')
 76 | 
 77 |     # train trick
 78 |     parser.add_argument('--ema', action='store_true', default=False,
 79 |                         help='use ema training trick')
 80 |     parser.add_argument('--multi_scale', action='store_true', default=False,
 81 |                         help='use multi scale training trick')
 82 |     parser.add_argument('--no_warmup', action='store_true', default=False,
 83 |                         help='do not use warmup')
 84 |     parser.add_argument('--wp_epoch', type=int,
 85 |                             default=1, help='wram-up epoch')
 86 | 
 87 |     # train DDP
 88 |     parser.add_argument('-dist', '--distributed', action='store_true', default=False,
 89 |                         help='distributed training')
 90 |     parser.add_argument('--local_rank', type=int, default=0, 
 91 |                         help='local_rank')
 92 |     parser.add_argument('--sybn', action='store_true', default=False, 
 93 |                         help='use sybn.')
 94 | 
 95 | 
 96 |     return parser.parse_args()
 97 | 
 98 | 
 99 | def train():
100 |     args = parse_args()
101 |     print("Setting Arguments.. : ", args)
102 |     print("----------------------------------------------------------")
103 |     # model name
104 |     model_name = args.version
105 |     print('Model: ', model_name)
106 | 
107 |     # config
108 |     if args.version == 'fcos_rt':
109 |         scale_range = [[0, 64], [64, 128], [128, 1e5]]
110 |     elif args.version == 'fcos':
111 |         scale_range = [[0, 64], [64, 128], [128, 256], [256, 512], [512, 1e5]]
112 |         
113 |     # set distributed
114 |     local_rank = 0
115 |     if args.distributed:
116 |         dist.init_process_group(backend="nccl", init_method="env://")
117 |         local_rank = torch.distributed.get_rank()
118 |         print(local_rank)
119 |         torch.cuda.set_device(local_rank)
120 | 
121 |     # cuda
122 |     if args.cuda:
123 |         print('use cuda')
124 |         cudnn.benchmark = True
125 |         device = torch.device("cuda")
126 |     else:
127 |         device = torch.device("cpu")
128 |     
129 |     # path to save model
130 |     path_to_save = os.path.join(args.save_folder, args.dataset, args.version)
131 |     os.makedirs(path_to_save, exist_ok=True)
132 |     
133 |     # input size
134 |     train_size = args.img_size
135 |     val_size = args.img_size
136 | 
137 |     # EMA trick
138 |     if args.ema:
139 |         print('use EMA trick ...')
140 | 
141 |     # dataset and evaluator
142 |     dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device)
143 |     # dataloader
144 |     dataloader = build_dataloader(args, dataset, detection_collate)
145 | 
146 |     print('Training model on:', args.dataset)
147 |     print('The dataset size:', len(dataset))
148 |     print("----------------------------------------------------------")
149 | 
150 |     # buile model and config file
151 |     if model_name == 'fcos_rt':
152 |         from models.fcos_rt import FCOS_RT
153 |         backbone = args.backbone
154 |         # model
155 |         net = FCOS_RT(device=device, 
156 |                      img_size=train_size, 
157 |                      num_classes=num_classes, 
158 |                      trainable=True, 
159 |                      bk=backbone
160 |                      )
161 |     
162 |     elif model_name == 'fcos':
163 |         from models.fcos import FCOS
164 |         backbone = args.backbone
165 |         # model
166 |         net = FCOS(device=device, 
167 |                     img_size=train_size, 
168 |                     num_classes=num_classes, 
169 |                     trainable=True, 
170 |                     bk=backbone
171 |                     )
172 |     else:
173 |         print('Unknown model name...')
174 |         exit(0)
175 | 
176 |     model = net
177 |     model = model.to(device).train()
178 | 
179 |     # SyncBatchNorm
180 |     if args.distributed and args.sybn and args.cuda and args.num_gpu > 1:
181 |         print('use SyncBatchNorm ...')
182 |         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
183 | 
184 |     if local_rank == 0:
185 |         # compute FLOPs and Params
186 |         model.trainable = False
187 |         model = model.eval()
188 |         FLOPs_and_Params(model=model, size=train_size)
189 |         model.trainable = True
190 |         model = model.train()
191 | 
192 |     # keep training
193 |     if args.resume is not None:
194 |         print('keep training model: %s' % (args.resume))
195 |         if args.distributed:
196 |             model.module.load_state_dict(torch.load(args.resume, map_location=device))
197 |         else:
198 |             model.load_state_dict(torch.load(args.resume, map_location=device))
199 | 
200 |     # EMA
201 |     ema = ModelEMA(model) if args.ema else None
202 | 
203 |     # use tfboard
204 |     tblogger = None
205 |     if args.tfboard:
206 |         print('use tensorboard')
207 |         from torch.utils.tensorboard import SummaryWriter
208 |         c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
209 |         log_path = os.path.join('log/', args.dataset, c_time)
210 |         os.makedirs(log_path, exist_ok=True)
211 | 
212 |         tblogger = SummaryWriter(log_path)
213 |     
214 |     # basic
215 |     batch_size = args.batch_size
216 |     warmup = not args.no_warmup
217 |     max_epoch = args.max_epoch * args.schedule
218 |     lr_epoch = [e * args.schedule for e in args.lr_epoch] 
219 |     epoch_size = len(dataset) // (batch_size * args.num_gpu)
220 |     print('Schedule: %dx' % args.schedule)
221 |     print('Max epoch: ', max_epoch)
222 |     print('Lr step:', lr_epoch)
223 | 
224 |     # build optimizer
225 |     base_lr = args.lr
226 |     tmp_lr = base_lr
227 |     optimizer = optim.SGD(model.parameters(), 
228 |                           lr=tmp_lr, 
229 |                           momentum=0.9,
230 |                           weight_decay=1e-4
231 |                           )
232 |     
233 |     best_map = 0.
234 |     t0 = time.time()
235 |     epoch = 0
236 |     # start to train
237 |     for epoch in range(args.start_epoch, max_epoch):
238 |         # set epoch if DDP
239 |         if args.distributed:
240 |             dataloader.sampler.set_epoch(epoch)
241 | 
242 |         # use step lr
243 |         if epoch in lr_epoch:
244 |             tmp_lr = tmp_lr * 0.1
245 |             set_lr(optimizer, tmp_lr)
246 | 
247 |         # load a batch
248 |         for iter_i, (images, targets) in enumerate(dataloader):
249 |             ni = iter_i + epoch * epoch_size
250 |             # warmup
251 |             if epoch < args.wp_epoch and warmup:
252 |                 nw = args.wp_epoch * epoch_size
253 |                 tmp_lr = base_lr * pow(ni / nw, 4)
254 |                 set_lr(optimizer, tmp_lr)
255 | 
256 |             elif epoch == args.wp_epoch and iter_i == 0 and warmup:
257 |                 # warmup is over
258 |                 warmup = False
259 |                 tmp_lr = base_lr
260 |                 set_lr(optimizer, tmp_lr)
261 |             
262 |             # multi-scale trick
263 |             if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale:
264 |                 # randomly choose a new size
265 |                 train_size = random.randint(10, args.img_size // 32) * 32
266 |                 model.set_grid(train_size)
267 |             if args.multi_scale:
268 |                 # interpolate
269 |                 images = torch.nn.functional.interpolate(
270 |                                     input=images, 
271 |                                     size=train_size, 
272 |                                     mode='bilinear', 
273 |                                     align_corners=False)
274 | 
275 |             # make labels
276 |             if args.vis:
277 |                 vis_data(images, targets, train_size)
278 |                 continue
279 |             targets = create_labels.gt_creator(
280 |                                 img_size=train_size, 
281 |                                 num_classes=num_classes,
282 |                                 strides=net.strides,
283 |                                 scale_range=scale_range,
284 |                                 targets=targets)
285 |             
286 |             # to device
287 |             images = images.to(device)
288 |             targets = targets.to(device)
289 | 
290 |             # forward
291 |             cls_loss, reg_loss, ctn_loss, total_loss = model(images, targets=targets)
292 | 
293 |             loss_dict = dict(
294 |                 cls_loss=cls_loss,
295 |                 reg_loss=reg_loss,
296 |                 ctn_loss=ctn_loss,
297 |                 total_loss=total_loss
298 |             )
299 |             loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict)
300 | 
301 |             # check NAN
302 |             if torch.isnan(total_loss):
303 |                 continue
304 | 
305 |             # backprop
306 |             total_loss.backward()        
307 |             optimizer.step()
308 |             optimizer.zero_grad()
309 | 
310 |             # ema
311 |             if args.ema:
312 |                 ema.update(model)
313 | 
314 |             # display
315 |             if iter_i % 10 == 0:
316 |                 if args.tfboard:
317 |                     # viz loss
318 |                     tblogger.add_scalar('cls loss',  loss_dict_reduced['cls_loss'].item(),  iter_i)
319 |                     tblogger.add_scalar('reg loss',  loss_dict_reduced['reg_loss'].item(),  iter_i)
320 |                     tblogger.add_scalar('ctn loss',  loss_dict_reduced['ctn_loss'].item(),  iter_i)
321 |                 
322 |                 t1 = time.time()
323 |                 print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: cls %.2f || reg %.2f || ctn %.2f || size %d || time: %.2f]'
324 |                         % (epoch+1, 
325 |                             max_epoch,
326 |                             iter_i, 
327 |                             epoch_size, 
328 |                             tmp_lr,
329 |                             loss_dict_reduced['cls_loss'].item(), 
330 |                             loss_dict_reduced['reg_loss'].item(), 
331 |                             loss_dict_reduced['ctn_loss'].item(), 
332 |                             train_size, 
333 |                             t1-t0),
334 |                         flush=True)
335 | 
336 |                 t0 = time.time()
337 |             # update iter_i
338 |             iter_i += 1
339 |     
340 |         # evaluate
341 |         if (epoch + 1) % args.eval_epoch == 0 or epoch + 1 == max_epoch:
342 |             if args.ema:
343 |                 model_eval = ema.ema
344 |             else:
345 |                 model_eval = model.module if args.distributed else model
346 | 
347 |             # check evaluator
348 |             if evaluator is None:
349 |                 print('No evaluator ... save model and go on training.')
350 |                 print('Saving state, epoch:', epoch + 1)
351 |                 if local_rank == 0:
352 |                     torch.save(model_eval.state_dict(), os.path.join(path_to_save, 
353 |                                 args.version + '_' + args.backbone + '_' + repr(epoch + 1) + '.pth'))  
354 |             else:
355 |                 print('eval ...')
356 | 
357 |                 # set eval mode
358 |                 model_eval.trainable = False
359 |                 model_eval.set_grid(val_size)
360 |                 model_eval.eval()
361 | 
362 |                 # we only do evaluation on local_rank-0.
363 |                 if local_rank == 0:
364 |                     # evaluate
365 |                     evaluator.evaluate(model_eval)
366 | 
367 |                     cur_map = evaluator.map
368 |                     if cur_map > best_map:
369 |                         # update best-map
370 |                         best_map = cur_map
371 |                         # save model
372 |                         print('Saving state, epoch:', epoch + 1)
373 |                         torch.save(model_eval.state_dict(), os.path.join(path_to_save, 
374 |                                     args.version + '_' + args.backbone + '_' + repr(epoch + 1) + '_' + str(round(best_map, 2)) + '.pth'))  
375 | 
376 |                     if args.tfboard:
377 |                         if args.dataset == 'voc':
378 |                             tblogger.add_scalar('07test/mAP', evaluator.map, epoch)
379 |                         elif args.dataset == 'coco':
380 |                             tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch)
381 |                             tblogger.add_scalar('val/AP50', evaluator.ap50, epoch)
382 | 
383 |                 # wait for all processes to synchronize
384 |                 if args.distributed:
385 |                     dist.barrier()
386 | 
387 |                 # set train mode.
388 |                 model_eval.trainable = True
389 |                 model_eval.set_grid(train_size)
390 |                 model_eval.train()
391 | 
392 |     if args.tfboard:
393 |         tblogger.close()
394 | 
395 | 
396 | def build_dataset(args, train_size, val_size, device):
397 |     if args.dataset == 'voc':
398 |         data_dir = os.path.join(args.root, 'VOCdevkit')
399 |         num_classes = 20
400 |         dataset = VOCDetection(
401 |                         data_dir=data_dir,
402 |                         transform=TrainTransforms(train_size))
403 | 
404 |         evaluator = VOCAPIEvaluator(
405 |                         data_dir=data_dir,
406 |                         device=device,
407 |                         transform=ValTransforms(val_size))
408 | 
409 |     elif args.dataset == 'coco':
410 |         data_dir = os.path.join(args.root, 'COCO')
411 |         num_classes = 80
412 |         dataset = COCODataset(
413 |                     data_dir=data_dir,
414 |                     transform=TrainTransforms(train_size))
415 | 
416 |         evaluator = COCOAPIEvaluator(
417 |                         data_dir=data_dir,
418 |                         device=device,
419 |                         transform=ValTransforms(val_size))
420 |     
421 |     else:
422 |         print('unknow dataset !! Only support voc and coco !!')
423 |         exit(0)
424 | 
425 |     return dataset, evaluator, num_classes
426 | 
427 | 
428 | def build_dataloader(args, dataset, collate_fn=None):
429 |     # distributed
430 |     if args.distributed and args.num_gpu > 1:
431 |         # dataloader
432 |         dataloader = torch.utils.data.DataLoader(
433 |                         dataset=dataset, 
434 |                         batch_size=args.batch_size, 
435 |                         collate_fn=collate_fn,
436 |                         num_workers=args.num_workers,
437 |                         pin_memory=True,
438 |                         sampler=torch.utils.data.distributed.DistributedSampler(dataset)
439 |                         )
440 | 
441 |     else:
442 |         # dataloader
443 |         dataloader = torch.utils.data.DataLoader(
444 |                         dataset=dataset, 
445 |                         shuffle=True,
446 |                         batch_size=args.batch_size, 
447 |                         collate_fn=collate_fn,
448 |                         num_workers=args.num_workers,
449 |                         pin_memory=True
450 |                         )
451 |     return dataloader
452 | 
453 | 
454 | def set_lr(optimizer, lr):
455 |     for param_group in optimizer.param_groups:
456 |         param_group['lr'] = lr
457 | 
458 | 
459 | def vis_data(images, targets, input_size, num_classes):
460 |     B = images.size(0)
461 |     # vis data
462 |     mean=(0.406, 0.456, 0.485)
463 |     std=(0.225, 0.224, 0.229)
464 |     mean = np.array(mean, dtype=np.float32)
465 |     std = np.array(std, dtype=np.float32)
466 | 
467 |     for bi in range(B):
468 |         img = images[bi].permute(1, 2, 0).cpu().numpy()[:, :, ::-1]
469 |         img = ((img * std + mean)*255).astype(np.uint8)
470 |         cv2.imwrite('1.jpg', img)
471 | 
472 |         img_ = cv2.imread('1.jpg')
473 |         target_i = targets[bi] # [N, C]
474 |         bboxes = target_i['boxes']
475 |         labels = target_i['labels']
476 |         for box, cls_id in zip(bboxes, labels):
477 |             xmin, ymin, xmax, ymax = box
478 |             cls_id = int(cls_id)
479 |             xmin *= input_size
480 |             ymin *= input_size
481 |             xmax *= input_size
482 |             ymax *= input_size
483 |             cv2.rectangle(img_, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2)
484 | 
485 |     cv2.imshow('img', img_)
486 |     cv2.waitKey(0)
487 | 
488 | 
489 | if __name__ == '__main__':
490 |     train()
491 | 


--------------------------------------------------------------------------------
/train_fcos.sh:
--------------------------------------------------------------------------------
 1 | python train.py \
 2 |         --cuda \
 3 |         -v fcos \
 4 |         -bk r50 \
 5 |         --img_size 640 \
 6 |         --lr 0.01 \
 7 |         --batch_size 16 \
 8 |         --schedule 1 \
 9 |         --no_warmup
10 | 


--------------------------------------------------------------------------------
/train_fcos_rt.sh:
--------------------------------------------------------------------------------
 1 | python train.py \
 2 |         --cuda \
 3 |         -d coco \
 4 |         -v fcos_rt \
 5 |         -bk r50 \
 6 |         --img_size 512 \
 7 |         --lr 0.01 \
 8 |         --batch_size 16 \
 9 |         --schedule 4 \
10 |         --multi_scale
11 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/FCOS-RT_PyTorch/d81eb389e12f6e05ae75bfa69a56447b8fa2a02f/utils/__init__.py


--------------------------------------------------------------------------------
/utils/box_ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def iou_score(bboxes_a, bboxes_b, batch_size):
 4 |     """
 5 |         Input:\n
 6 |         bboxes_a : [B*N, 4] = [x1, y1, x2, y2] \n
 7 |         bboxes_b : [B*N, 4] = [x1, y1, x2, y2] \n
 8 | 
 9 |         Output:\n
10 |         iou : [B, N] = [iou, ...] \n
11 |     """
12 |     tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2])
13 |     br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:])
14 |     area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
15 |     area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
16 | 
17 |     en = (tl < br).type(tl.type()).prod(dim=1)
18 |     area_i = torch.prod(br - tl, 1) * en  # * ((tl < br).all())
19 |     iou = area_i / (area_a + area_b - area_i + 1e-14)
20 | 
21 |     return iou.view(batch_size, -1)
22 | 
23 | 
24 | def giou_score(bboxes_a, bboxes_b, batch_size):
25 |     """
26 |         bbox_1 : [B*N, 4] = [x1, y1, x2, y2]
27 |         bbox_2 : [B*N, 4] = [x1, y1, x2, y2]
28 |     """
29 |     # iou
30 |     tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2])
31 |     br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:])
32 |     area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
33 |     area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
34 | 
35 |     en = (tl < br).type(tl.type()).prod(dim=1)
36 |     area_i = torch.prod(br - tl, 1) * en  # * ((tl < br).all())
37 |     iou = (area_i / (area_a + area_b - area_i + 1e-14)).clamp(0)
38 |     
39 |     # giou
40 |     tl = torch.min(bboxes_a[:, :2], bboxes_b[:, :2])
41 |     br = torch.max(bboxes_a[:, 2:], bboxes_b[:, 2:])
42 |     en = (tl < br).type(tl.type()).prod(dim=1)
43 |     area_c = torch.prod(br - tl, 1) * en  # * ((tl < br).all())
44 | 
45 |     giou = (iou - (area_c - area_i) / (area_c + 1e-14))
46 | 
47 |     return giou.view(batch_size, -1)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     box1 = torch.tensor([[10, 10, 20, 20]])
52 |     box2 = torch.tensor([[15, 15, 25, 25]])
53 |     iou = iou_score(box1, box2)
54 |     print(iou)
55 |     giou = giou_score(box1, box2)
56 |     print(giou)
57 | 


--------------------------------------------------------------------------------
/utils/com_flops_params.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from thop import profile
 3 | 
 4 | 
 5 | 
 6 | 
 7 | def FLOPs_and_Params(model, size):
 8 |     device = model.device
 9 |     x = torch.randn(1, 3, size, size).to(device)
10 |     model.trainable = False
11 |     model.eval()
12 | 
13 |     flops, params = profile(model, inputs=(x, ))
14 |     print('FLOPs : ', flops / 1e9, ' B')
15 |     print('Params : ', params / 1e6, ' M')
16 | 
17 |     model.trainable = True
18 |     model.train()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     pass
23 | 


--------------------------------------------------------------------------------
/utils/create_labels.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def gt_creator(img_size, num_classes, strides, scale_range, targets):
 6 |     batch_size = len(targets)
 7 |     w = h = img_size
 8 |     gt_tensor = []
 9 |     
10 |     # empty gt tensor
11 |     for s in strides:
12 |         gt_tensor.append(np.zeros([batch_size, h//s, w//s, num_classes + 4 + 1]))
13 | 
14 |     # generate gt datas  
15 |     for bi in range(batch_size):
16 |         target = targets[bi]
17 |         bboxes = target['boxes'].tolist()
18 |         labels = target['labels'].tolist()
19 |         for box, cls_id in zip(bboxes, labels):
20 |             x1, y1, x2, y2 = box
21 |             cls_id = int(cls_id)
22 | 
23 |             # compute the center, width and height
24 |             xc = (x2 + x1) / 2 * w
25 |             yc = (y2 + y1) / 2 * h
26 |             bw = (x2 - x1) * w
27 |             bh = (y2 - y1) * h
28 | 
29 |             if bw < 1. or bh < 1.:
30 |                 # print('A dirty data !!!')
31 |                 continue    
32 | 
33 |             for si, s in enumerate(strides):
34 |                 hs, ws = h // s, w // s
35 |                 x1_s, x2_s = x1 * ws, x2 * ws
36 |                 y1_s, y2_s = y1 * hs, y2 * hs
37 |                 xc_s = xc / s
38 |                 yc_s = yc / s
39 |                 sr = scale_range[si]
40 | 
41 |                 gridx = int(xc_s)
42 |                 gridy = int(yc_s)
43 | 
44 |                 # By default, we only consider the 3x3 neighborhood of the center point
45 |                 for i in range(gridx - 1, gridx + 2):
46 |                     for j in range(gridy - 1, gridy + 2):
47 |                         if (j >= 0 and j < gt_tensor[si].shape[1]) and (i >= 0 and i < gt_tensor[si].shape[2]):
48 |                             t = j - y1_s
49 |                             b = y2_s - j
50 |                             l = i - x1_s
51 |                             r = x2_s - i
52 |                             if min(t, b, l, r) > 0:
53 |                                 if max(t, b, l, r) >= (sr[0]/s) and max(t, b, l, r) < (sr[1]/s):
54 |                                     gt_tensor[si][bi, j, i, cls_id] = 1.0
55 |                                     gt_tensor[si][bi, j, i, num_classes:num_classes + 4] = np.array([x1, y1, x2, y2])
56 |                                     gt_tensor[si][bi, j, i, num_classes + 4] = np.sqrt(min(l, r) / max(l, r) * \
57 |                                                                                         min(t, b) / max(t, b))
58 |         
59 |     gt_tensor = [gt.reshape(batch_size, -1, num_classes + 4 + 1) for gt in gt_tensor]
60 |     gt_tensor = np.concatenate(gt_tensor, axis=1)
61 | 
62 |     return torch.from_numpy(gt_tensor).float()
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     pass


--------------------------------------------------------------------------------
/utils/distributed_utils.py:
--------------------------------------------------------------------------------
 1 | # from github: https://github.com/ruinmessi/ASFF/blob/master/utils/distributed_util.py
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def get_world_size():
 7 |     if not torch.distributed.is_initialized():
 8 |         return 1
 9 |     return torch.distributed.get_world_size()
10 | 
11 | 
12 | def get_rank():
13 |     if not torch.distributed.is_initialized():
14 |         return 0
15 |     return torch.distributed.get_rank()
16 | 
17 | 
18 | def is_main_process():
19 |     if not torch.distributed.is_initialized():
20 |         return True
21 |     return torch.distributed.get_rank() == 0
22 | 
23 | 
24 | def synchronize():
25 |     """
26 |     Helper function to synchronize between multiple processes when
27 |     using distributed training
28 |     """
29 |     if not torch.distributed.is_initialized():
30 |         return
31 |     world_size = torch.distributed.get_world_size()
32 |     rank = torch.distributed.get_rank()
33 |     if world_size == 1:
34 |         return
35 | 
36 |     def _send_and_wait(r):
37 |         if rank == r:
38 |             tensor = torch.tensor(0, device="cuda")
39 |         else:
40 |             tensor = torch.tensor(1, device="cuda")
41 |         torch.distributed.broadcast(tensor, r)
42 |         while tensor.item() == 1:
43 |             time.sleep(1)
44 | 
45 |     _send_and_wait(0)
46 |     # now sync on the main process
47 |     _send_and_wait(1)
48 | 
49 | 
50 | def reduce_loss_dict(loss_dict):
51 |     """
52 |     Reduce the loss dictionary from all processes so that process with rank
53 |     0 has the averaged results. Returns a dict with the same fields as
54 |     loss_dict, after reduction.
55 |     """
56 |     world_size = get_world_size()
57 |     if world_size < 2:
58 |         return loss_dict
59 |     with torch.no_grad():
60 |         loss_names = []
61 |         all_losses = []
62 |         for k in sorted(loss_dict.keys()):
63 |             loss_names.append(k)
64 |             all_losses.append(loss_dict[k])
65 |         all_losses = torch.stack(all_losses, dim=0)
66 |         torch.distributed.reduce(all_losses, dst=0)
67 |         if torch.distributed.get_rank() == 0:
68 |             # only main process gets accumulated, so only divide by
69 |             # world_size in this case
70 |             all_losses /= world_size
71 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
72 |     return reduced_losses


--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class FocalWithLogitsLoss(nn.Module):
 7 |     def __init__(self, reduction='mean', gamma=2.0, alpha=0.25):
 8 |         super(FocalWithLogitsLoss, self).__init__()
 9 |         self.reduction = reduction
10 |         self.gamma = gamma
11 |         self.alpha = alpha
12 | 
13 |     def forward(self, logits, targets):
14 |         p = torch.sigmoid(logits)
15 |         ce_loss = F.binary_cross_entropy_with_logits(input=logits, 
16 |                                                      target=targets, 
17 |                                                      reduction="none"
18 |                                                      )
19 |         p_t = p * targets + (1.0 - p) * (1.0 - targets)
20 |         loss = ce_loss * ((1.0 - p_t) ** self.gamma)
21 | 
22 |         if self.alpha >= 0:
23 |             alpha_t = self.alpha * targets + (1.0 - self.alpha) * (1.0 - targets)
24 |             loss = alpha_t * loss
25 | 
26 |         if self.reduction == "mean":
27 |             batch_size = logits.size(0)
28 |             pos_inds = (targets == 1.0).float()
29 |             # [B, H*W, C] -> [B,]
30 |             num_pos = pos_inds.sum([1, 2]).clamp(1)
31 |             loss = loss.sum([1, 2])
32 |     
33 |             loss = (loss / num_pos).sum() / batch_size
34 | 
35 |         elif self.reduction == "sum":
36 |             loss = torch.sum(loss)
37 | 
38 |         return loss
39 | 
40 | 
41 | def loss(pred_cls, pred_giou, pred_ctn, target, num_classes):
42 |     # create loss_f
43 |     cls_loss_function = FocalWithLogitsLoss(reduction='mean')
44 |     ctn_loss_function = nn.BCELoss(reduction='none')
45 |     
46 |     # groundtruth    
47 |     gt_cls = target[..., :num_classes]
48 |     gt_ctn = target[..., -1]
49 |     gt_pos = (gt_ctn > 0.).float()
50 |     num_pos = gt_pos.sum(-1, keepdim=True).clamp(1)
51 | 
52 |     batch_size = pred_cls.size(0)
53 |     # cls loss
54 |     cls_loss = cls_loss_function(pred_cls, gt_cls)
55 |         
56 |     # reg loss
57 |     reg_loss = ((1. - pred_giou) * gt_pos / num_pos).sum() / batch_size
58 | 
59 |     # ctn loss
60 |     ctn_loss = (ctn_loss_function(pred_ctn[..., 0].sigmoid(), gt_ctn) * gt_pos / num_pos).sum() / batch_size
61 | 
62 |     # total loss
63 |     total_loss = cls_loss + reg_loss + ctn_loss
64 | 
65 |     return cls_loss, reg_loss, ctn_loss, total_loss
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     pass


--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import math
  5 | from copy import deepcopy
  6 | 
  7 | 
  8 | def nms(dets, scores, nms_thresh=0.4):
  9 |     """"Pure Python NMS baseline."""
 10 |     x1 = dets[:, 0]  #xmin
 11 |     y1 = dets[:, 1]  #ymin
 12 |     x2 = dets[:, 2]  #xmax
 13 |     y2 = dets[:, 3]  #ymax
 14 | 
 15 |     areas = (x2 - x1) * (y2 - y1)
 16 |     order = scores.argsort()[::-1]
 17 | 
 18 |     keep = []
 19 |     while order.size > 0:
 20 |         i = order[0]
 21 |         keep.append(i)
 22 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 23 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 24 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 25 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 26 | 
 27 |         w = np.maximum(1e-28, xx2 - xx1)
 28 |         h = np.maximum(1e-28, yy2 - yy1)
 29 |         inter = w * h
 30 | 
 31 |         # Cross Area / (bbox + particular area - Cross Area)
 32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-10)
 33 |         #reserve all the boundingbox whose ovr less than thresh
 34 |         inds = np.where(ovr <= nms_thresh)[0]
 35 |         order = order[inds + 1]
 36 | 
 37 |     return keep
 38 | 
 39 | 
 40 | def is_parallel(model):
 41 |     # Returns True if model is of type DP or DDP
 42 |     return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
 43 | 
 44 | 
 45 | def detection_collate(batch):
 46 |     """Custom collate fn for dealing with batches of images that have a different
 47 |     number of associated object annotations (bounding boxes).
 48 | 
 49 |     Arguments:
 50 |         batch: (tuple) A tuple of tensor images and lists of annotations
 51 | 
 52 |     Return:
 53 |         A tuple containing:
 54 |             1) (tensor) batch of images stacked on their 0 dim
 55 |             2) (list of tensors) annotations for a given image are stacked on
 56 |                                  0 dim
 57 |     """
 58 |     targets = []
 59 |     imgs = []
 60 |     for sample in batch:
 61 |         imgs.append(sample[0])
 62 |         targets.append(sample[1])
 63 |     return torch.stack(imgs, 0), targets
 64 | 
 65 | 
 66 | # Model EMA
 67 | class ModelEMA(object):
 68 |     def __init__(self, model, decay=0.9999, updates=0):
 69 |         # create EMA
 70 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
 71 |         self.updates = updates
 72 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000.))
 73 |         for p in self.ema.parameters():
 74 |             p.requires_grad_(False)
 75 | 
 76 |     def update(self, model):
 77 |         # Update EMA parameters
 78 |         with torch.no_grad():
 79 |             self.updates += 1
 80 |             d = self.decay(self.updates)
 81 | 
 82 |             msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
 83 |             for k, v in self.ema.state_dict().items():
 84 |                 if v.dtype.is_floating_point:
 85 |                     v *= d
 86 |                     v += (1. - d) * msd[k].detach()
 87 | 
 88 | 
 89 | # test augmentation
 90 | class TestTimeAugmentation(object):
 91 |     def __init__(self, num_classes=80, nms_thresh=0.4, scale_range=[320, 640, 32]):
 92 |         self.nms = nms
 93 |         self.num_classes = num_classes
 94 |         self.nms_thresh = nms_thresh
 95 |         self.scales = np.arange(scale_range[0], scale_range[1]+1, scale_range[2])
 96 |         
 97 |     def __call__(self, x, model):
 98 |         # x: Tensor -> [B, C, H, W]
 99 |         bboxes_list = []
100 |         scores_list = []
101 |         labels_list = []
102 | 
103 |         # multi scale
104 |         for s in self.scales:
105 |             if x.size(-1) == s and x.size(-2) == s:
106 |                 x_scale = x
107 |             else:
108 |                 x_scale =torch.nn.functional.interpolate(
109 |                                         input=x, 
110 |                                         size=(s, s), 
111 |                                         mode='bilinear', 
112 |                                         align_corners=False)
113 |             model.set_grid(s)
114 |             bboxes, scores, labels = model(x_scale)
115 |             bboxes_list.append(bboxes)
116 |             scores_list.append(scores)
117 |             labels_list.append(labels)
118 | 
119 |             # Flip
120 |             x_flip = torch.flip(x_scale, [-1])
121 |             bboxes, scores, labels = model(x_flip)
122 |             bboxes = bboxes.copy()
123 |             bboxes[:, 0::2] = 1.0 - bboxes[:, 2::-2]
124 |             bboxes_list.append(bboxes)
125 |             scores_list.append(scores)
126 |             labels_list.append(labels)
127 | 
128 |         bboxes = np.concatenate(bboxes_list)
129 |         scores = np.concatenate(scores_list)
130 |         labels = np.concatenate(labels_list)
131 | 
132 |         # nms
133 |         keep = np.zeros(len(bboxes), dtype=np.int)
134 |         for i in range(self.num_classes):
135 |             inds = np.where(labels == i)[0]
136 |             if len(inds) == 0:
137 |                 continue
138 |             c_bboxes = bboxes[inds]
139 |             c_scores = scores[inds]
140 |             c_keep = self.nms(c_bboxes, c_scores, self.nms_thresh)
141 |             keep[inds[c_keep]] = 1
142 | 
143 |         keep = np.where(keep > 0)
144 |         bboxes = bboxes[keep]
145 |         scores = scores[keep]
146 |         labels = labels[keep]
147 | 
148 |         return bboxes, scores, labels
149 | 


--------------------------------------------------------------------------------