├── .github ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── README.md ├── VGG.py ├── data.py ├── data └── README.md ├── functions ├── __init__.py └── loss │ └── multitask.py ├── load_model.py ├── models └── vggrpn.py ├── mscoco.py ├── test ├── __init__.py ├── test_anchorutils.py ├── test_basic_anchor_creation.py ├── test_cupyutils.py └── test_iou.py ├── train.py └── utils ├── __init__.py ├── anchorutils.py ├── cupyutils.py ├── imgutils.py ├── iouutils.py └── profiler.py /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hvy/chainer-faster-rcnn/cf76a654024dab181d1e96735c443ca4ef5d973a/.github/CONTRIBUTING.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hvy/chainer-faster-rcnn/cf76a654024dab181d1e96735c443ca4ef5d973a/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # *Title* 2 | 3 | ## Description 4 | 5 | *Description* 6 | 7 | ## Tasks 8 | 9 | - [ ] *First task* 10 | - [ ] *Second task* 11 | - [ ] *Third task* 12 | 13 | ## Review Request 14 | 15 | *Reviewer* 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.swp 3 | *.model 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Faster R-CNN Implementation using Chainer 2 | 3 | An experimental repository in progress with Chainer code to run the Faster R-CNN. The method was originally proposed by Shaoqing Ren et al. in June, 2015 and is one of the best performing object detection and classification algorithms in terms of speed performance and accuracy, at the time of writing this in April 2016. 4 | 5 | ## References 6 | 7 | - [Original paper, arXiv](http://arxiv.org/abs/1506.01497) 8 | - [Original MATLAB implementaion, GitHub](https://github.com/ShaoqingRen/faster_rcnn) 9 | - [Python implementation (~10% slower), GitHub](https://github.com/rbgirshick/py-faster-rcnn) 10 | 11 | ## Faster R-CNN with Chainer 12 | 13 | ### Challenges 14 | 15 | - Implement the forward and backward methods for the Multi-Task Loss using Chainer. 16 | - First, make it work. Then optimize for performance in terms of speed. 17 | - Consider implementing the CPU version first if it seems easier. 18 | - Data caching, reuse on GPU for performance reasons. 19 | - E.g. initial anchor creation. 20 | 21 | ### Dataset 22 | 23 | Dataset being used at the moment is MSCOCO which can be downloaded [here](http://mscoco.org/dataset/#download). 24 | 25 | ### Test 26 | 27 | Tests are implemented using `unittest`. To run a test, run the following command from the project root. 28 | 29 | ```bash 30 | python -m test. 31 | ``` 32 | -------------------------------------------------------------------------------- /VGG.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import Variable 3 | import chainer.links as L 4 | import chainer.functions as F 5 | 6 | 7 | class VGG(chainer.Chain): 8 | 9 | """ 10 | VGGNet 11 | - It takes (224, 224, 3) sized image as imput 12 | """ 13 | 14 | def __init__(self): 15 | super(VGG, self).__init__( 16 | conv1_1 = L.Convolution2D(3, 64, 3, stride=1, pad=1), 17 | conv1_2 = L.Convolution2D(64, 64, 3, stride=1, pad=1), 18 | 19 | conv2_1 = L.Convolution2D(64, 128, 3, stride=1, pad=1), 20 | conv2_2 = L.Convolution2D(128, 128, 3, stride=1, pad=1), 21 | 22 | conv3_1 = L.Convolution2D(128, 256, 3, stride=1, pad=1), 23 | conv3_2 = L.Convolution2D(256, 256, 3, stride=1, pad=1), 24 | conv3_3 = L.Convolution2D(256, 256, 3, stride=1, pad=1), 25 | 26 | conv4_1 = L.Convolution2D(256, 512, 3, stride=1, pad=1), 27 | conv4_2 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 28 | conv4_3 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 29 | 30 | conv5_1 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 31 | conv5_2 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 32 | conv5_3 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 33 | 34 | fc6=L.Linear(25088, 4096), 35 | fc7=L.Linear(4096, 4096), 36 | fc8=L.Linear(4096, 1000) 37 | ) 38 | self.train = False 39 | 40 | def __call__(self, x, t): 41 | h = F.relu(self.conv1_1(x)) 42 | h = F.relu(self.conv1_2(h)) 43 | h = F.max_pooling_2d(h, 2, stride=2) 44 | 45 | h = F.relu(self.conv2_1(h)) 46 | h = F.relu(self.conv2_2(h)) 47 | h = F.max_pooling_2d(h, 2, stride=2) 48 | 49 | h = F.relu(self.conv3_1(h)) 50 | h = F.relu(self.conv3_2(h)) 51 | h = F.relu(self.conv3_3(h)) 52 | h = F.max_pooling_2d(h, 2, stride=2) 53 | 54 | h = F.relu(self.conv4_1(h)) 55 | h = F.relu(self.conv4_2(h)) 56 | h = F.relu(self.conv4_3(h)) 57 | h = F.max_pooling_2d(h, 2, stride=2) 58 | 59 | h = F.relu(self.conv5_1(h)) 60 | h = F.relu(self.conv5_2(h)) 61 | h = F.relu(self.conv5_3(h)) 62 | h = F.max_pooling_2d(h, 2, stride=2) 63 | 64 | h = F.dropout(F.relu(self.fc6(h)), train=self.train, ratio=0.5) 65 | h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.5) 66 | h = self.fc8(h) 67 | 68 | if self.train: 69 | self.loss = F.softmax_cross_entropy(h, t) 70 | self.acc = F.accuracy(h, t) 71 | return self.loss 72 | else: 73 | self.pred = F.softmax(h) 74 | return self.pred 75 | 76 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import six 5 | 6 | # NOTE: Not tested with most recent code. 7 | 8 | # Dataset base directory, set to development directory for quick testing 9 | basedir = "./data/coco/samples/" 10 | 11 | 12 | def create_from_mscoco(filename, shape, size): 13 | """Save an MSCOCO dataset representational file to disk with the name 14 | specified by the parameter. 15 | """ 16 | if not os.path.exists(filename): 17 | print('Processing MSCOCO to create {}, this might take a while...' 18 | .format(filename)) 19 | data_train, target_train = load_reshape(1, shape, size) 20 | 21 | coco = {} 22 | coco['data'] = np.append(data_train, target_train, axis=0) 23 | 24 | with open(filename, 'wb') as output: 25 | six.moves.cPickle.dump(coco, output, -1) 26 | else: 27 | print('Found {}'.format(filename)) 28 | 29 | 30 | def load_reshape(num, shape, size): 31 | """Load the MNIST gzip files from disk and reshape the data.""" 32 | shape = tuple([num, *shape]) 33 | data = np.zeros(num * size, dtype=np.uint8).reshape(shape) 34 | target = None 35 | 36 | # TODO 37 | # target = np.zeros(num, dtype=np.uint8).reshape((num, )) 38 | 39 | # For each file (.jpg) in the directory 40 | for file in os.listdir(basedir): 41 | with open(os.path.join(basedir, file), 'r') as image: 42 | # image.read(16) 43 | print(image) 44 | image.read() 45 | for i in six.moves.range(num): 46 | print(i) 47 | # target[i] = ord(f_labels.read(1)) 48 | for j in six.moves.range(shape[1]): # For each pixel in width 49 | print(j) 50 | for k in six.moves.range(shape[2]): 51 | # For each pixel in height 52 | print(k) 53 | data[i, 0, j, k] = ord(image.read(1)) 54 | print(data) 55 | 56 | # with gzip.open(images, 'rb') as f_images,\ 57 | # gzip.open(labels, 'rb') as f_labels: 58 | # f_images.read(16) 59 | # f_labels.read(8) 60 | 61 | return data, target 62 | 63 | 64 | if __name__ == '__main__': 65 | """Go through all predownloaded images of the MSCOCO dataset, pack them 66 | into pickled files so that they can be easily read durint training. Notice 67 | that the large size of the file (estimated to be around 10-15GB) assumes 68 | that the hardware running this script has sufficient amounts of memory. 69 | 70 | The data is reshaped in to 2-dimensional data with annotations with 3 71 | channels representing the RGC color channels. 72 | """ 73 | 74 | pklfilename = 'mscoco.pkl' 75 | if os.path.exists(pklfilename): 76 | print('The MSCOCO dataset is already reshaped and processed') 77 | sys.exit() 78 | 79 | print('Preparing the MSCOCO dataset...') 80 | 81 | width = 640 82 | height = 480 83 | num_channels = 3 84 | size = width * height * num_channels 85 | dim = (num_channels, width, height) 86 | create_from_mscoco(pklfilename, dim, size) 87 | print('Done preparing the MSCOCO dataset') 88 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Dataset 2 | 3 | Add the datasets here for training, validation and testing. E.g. create a directory here called `coco` with the following structure 4 | 5 | ``` 6 | coco/ 7 | annotations/ 8 | captions_train2014.json 9 | captions_val2014.json 10 | ... 11 | images/ 12 | test/ # Used for development 13 | COCO_train2014_000000000009.jpg # Example 14 | ... 15 | test2014/ 16 | train2014/ 17 | val2014/ 18 | sample/ # Used for development 19 | COCO_train2014_000000000009.jpg # Example 20 | ... 21 | ``` 22 | -------------------------------------------------------------------------------- /functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hvy/chainer-faster-rcnn/cf76a654024dab181d1e96735c443ca4ef5d973a/functions/__init__.py -------------------------------------------------------------------------------- /functions/loss/multitask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from chainer import cuda 4 | from chainer import function 5 | from chainer.utils import type_check 6 | 7 | from utils import cupyutils 8 | from utils import iouutils 9 | from utils import imgutils 10 | from utils import anchorutils 11 | 12 | 13 | def coord_param(box, anchor): 14 | """ 15 | x, y, w, h 16 | """ 17 | xp = cuda.get_array_module(box, anchor) 18 | xy = (box[0:2] - anchor[0:2]) / anchor[2:4] 19 | wh = xp.log(box[2:4] / anchor[2:4]) 20 | return xp.concatenate([xy, wh]) 21 | 22 | 23 | def loss_cls(p, p_start): 24 | raise NotImplementedError('loss_cls') 25 | 26 | 27 | def loss_bbox(t, t_star): 28 | sum(smooth_l1(t_star - t)) 29 | 30 | 31 | def smooth_l1(xs): 32 | xp = cuda.get_array_module(xs) 33 | return [0.5 * x * x if x < 1 else x - 0.5 for x in xp.abs(xs)] 34 | 35 | 36 | class MultiTask(function.Function): 37 | 38 | def __init__(self, lbd, spatial_scale, use_cudnn=True): 39 | self.lbd = lbd # The lambda parameter mentioned in the Faster R-CNN paper 40 | self.spatial_scale = spatial_scale 41 | self.use_cudnn = use_cudnn 42 | 43 | # Generate generic anchors 44 | # NOTE: Commented away since the anchors are now passed as an argument 45 | # self.anchors = generate_anchors() 46 | 47 | def check_type_forward(self, in_types): 48 | # TODO: Also check the dimensions of the given anchors 49 | type_check.expect( 50 | in_types.size() == 4, 51 | in_types[0].shape[2] == in_types[1].shape[2], # e.g. 40 52 | in_types[0].shape[3] == in_types[1].shape[3], # e.g. 27 53 | in_types[2].shape[1] == 4 # Anchor dimensions 54 | ) 55 | 56 | def forward_cpu(self, inputs): 57 | raise NotImplementedError('forward_cpu') 58 | 59 | def forward_gpu(self, inputs): 60 | # Parse the input 61 | xp = cuda.get_array_module(*inputs) 62 | cls, bbox, anchors, gt_boxes = inputs 63 | width, height = cls.shape[-2:] 64 | 65 | print('---------- FORWARD ----------') 66 | print('Lambda: {}'.format(self.lbd)) 67 | print('Spatial scale: {}'.format(self.spatial_scale)) 68 | print('bbox.shape: {}'.format(bbox.shape)) 69 | print('cls.shape: {}'.format(cls.shape)) 70 | print('Downsampled width: {}'.format(width)) 71 | print('Downsampled height: {}'.format(height)) 72 | print('Ground truth boxes (target): {}'.format(gt_boxes)) 73 | print('Inside Anchors: {}'.format(anchors.shape)) 74 | 75 | feat_stride = 1 / self.spatial_scale 76 | print('feat_stride: {}'.format(feat_stride)) 77 | 78 | # Bounding box labels, 1 is positive, 0 is negative, -1 is ignored 79 | labels = xp.empty((len(anchors), ), dtype=xp.float32) 80 | labels.fill(-1) 81 | 82 | overlaps = iouutils.ious(anchors, gt_boxes) 83 | 84 | print('IOUs of Inside Anchors and Ground Truth Boxes') 85 | print(overlaps) 86 | print('IOU Overlap Non-Zero Counts: {}'.format(xp.count_nonzero(overlaps))) 87 | print('Ground Truth Boxes Shape: {}'.format(overlaps.shape)) 88 | 89 | # TODO: Continue here... 90 | 91 | # Select the ground truth box with highest IOU for each anchor 92 | argmax_overlaps = overlaps.argmax(axis=1) 93 | print('Highest IOU Ground Truth Index for each Anchor') 94 | print(argmax_overlaps) 95 | print(' #non-zeros: {}'.format(xp.count_nonzero(argmax_overlaps))) 96 | max_overlaps = overlaps.take((xp.arange(len(anchors)), argmax_overlaps)) # TODO Or use None to index all elements? 97 | print('Top Overlaps') 98 | print(max_overlaps) 99 | 100 | return (1,1) # Always a tuple, e.g. y, for all methods 101 | 102 | def backward_cpu(self, inputs, grad_outputs): 103 | raise NotImplementedError('backward_cpu') 104 | 105 | def backward_gpu(self, inputs, grad_outputs): 106 | raise NotImplementedError('backward_gpu') 107 | 108 | 109 | def multitask(cls, bbox, anchors, t, lbd=10, spatial_scale=0.0625, use_cudnn=True): # 0.0625 = 1/16, e.g. 4 max pooling layers with size and stride of 2. 110 | return MultiTask(lbd, spatial_scale, use_cudnn)(cls, bbox, anchors, t) 111 | -------------------------------------------------------------------------------- /load_model.py: -------------------------------------------------------------------------------- 1 | from chainer import serializers 2 | from chainer import Variable 3 | from VGG import VGG 4 | from vggrpn import VGGRPN 5 | 6 | 7 | def load_model(filename, model): 8 | print('Loading model {:s}'.format(filename)) 9 | serializers.load_hdf5(filename, model) 10 | print('Successfully loaded model') 11 | return model 12 | 13 | 14 | def save_model(filename, model): 15 | print('Saving model {:s}'.format(filename)) 16 | serializers.save_hdf5(filename, model) 17 | print('Successfully saved model') 18 | 19 | 20 | if __name__ == '__main__': 21 | """Makes a copy of a (trained) VGG model to a VGG RPN model 22 | and saved it to a file named 'VGGRPN.model' 23 | """ 24 | # Create an empty VGG model (w random weights) 25 | vgg = VGG() 26 | 27 | # Load the parameter data (weights and biases) from file 28 | vgg = load_model('VGG.model', vgg) 29 | 30 | # Create an empty VGG RPN model (w random weights) 31 | vgg_rpn = VGGRPN() 32 | 33 | # Copy the shared parameters from the VGG model to the VGG RPN model 34 | for attr in vgg.namedlinks(skipself=True): 35 | print('Copying layer {:s}'.format(attr[0][1:])) 36 | layer = getattr(vgg_rpn, attr[0][1:]) 37 | layer.W = Variable(attr[1].W.data) 38 | layer.b = Variable(attr[1].b.data) 39 | setattr(vgg_rpn, attr[0], layer) 40 | 41 | # Save the new VGG RPN model to file 42 | save_model('vggrpn.model', vgg_rpn) 43 | 44 | print('Done') 45 | -------------------------------------------------------------------------------- /models/vggrpn.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | import chainer.links as L 3 | import chainer.functions as F 4 | from functions.loss.multitask import multitask 5 | 6 | F.multitask = multitask 7 | 8 | 9 | class VGGRPN(chainer.Chain): 10 | """VGGNet 11 | - It takes (224, 224, 3) sized image as input 12 | """ 13 | 14 | def __init__(self): 15 | super(VGGRPN, self).__init__( 16 | conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1), 17 | conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1), 18 | 19 | conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1), 20 | conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1), 21 | 22 | conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1), 23 | conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1), 24 | conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1), 25 | 26 | conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1), 27 | conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1), 28 | conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1), 29 | 30 | conv5_1=L.Convolution2D(512, 512, 3, stride=1, pad=1), 31 | conv5_2=L.Convolution2D(512, 512, 3, stride=1, pad=1), 32 | conv5_3=L.Convolution2D(512, 512, 3, stride=1, pad=1), 33 | 34 | # RPN. See models/coco/VGG16/faster_rcnn_end2end/train.prototext 35 | # for reference 36 | rpn_conv=L.Convolution2D(512, 512, 3, stride=1, pad=1), 37 | 38 | # RPN Classification (foreground/background) Sibling 39 | # (Kernel size = 1, a linear mapping) 40 | # 18 = 2 (bg/fg) * 9 (anchors) 41 | rpn_cls=L.Convolution2D(512, 18, 1, stride=1, pad=0), 42 | 43 | # RPN Bounding Box Prediction Sibling 44 | # (Kernel size = 1, a linear mapping) 45 | # 36 = 4 (x, y, w, h) * 9 (anchors) 46 | rpn_bbox=L.Convolution2D(512, 36, 1, stride=1, pad=0), 47 | 48 | # TODO: Remove the following layers if they aren't used 49 | fc6=L.Linear(25088, 4096), 50 | fc7=L.Linear(4096, 4096), 51 | fc8=L.Linear(4096, 1000) 52 | ) 53 | self.train = True 54 | self.k = 9 # Number of achors 55 | 56 | def __call__(self, x, t, anchors): 57 | h = F.relu(self.conv1_1(x)) 58 | h = F.relu(self.conv1_2(h)) 59 | h = F.max_pooling_2d(h, 2, stride=2) 60 | 61 | h = F.relu(self.conv2_1(h)) 62 | h = F.relu(self.conv2_2(h)) 63 | h = F.max_pooling_2d(h, 2, stride=2) 64 | 65 | h = F.relu(self.conv3_1(h)) 66 | h = F.relu(self.conv3_2(h)) 67 | h = F.relu(self.conv3_3(h)) 68 | h = F.max_pooling_2d(h, 2, stride=2) 69 | 70 | h = F.relu(self.conv4_1(h)) 71 | h = F.relu(self.conv4_2(h)) 72 | h = F.relu(self.conv4_3(h)) 73 | h = F.max_pooling_2d(h, 2, stride=2) 74 | 75 | h = F.relu(self.conv5_1(h)) 76 | h = F.relu(self.conv5_2(h)) 77 | h = F.relu(self.conv5_3(h)) 78 | # h = F.max_pooling_2d(h, 2, stride=2) 79 | 80 | # RPN 81 | # h = F.relu(self.rpn_conv(h)) 82 | h = F.relu(self.rpn_conv(h)) 83 | 84 | # h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.5) 85 | # h = self.fc8(h) 86 | 87 | if self.train: 88 | print('self.train == True') 89 | # TODO Need to compute the loss and the acc here 90 | # TODO Create a new loss function class (multitask.py, 91 | # function.Function) and set self.loss = F.multitask(h, t) 92 | 93 | cls = F.softmax(self.rpn_cls(h)) 94 | 95 | print('Computed cls softmax') 96 | 97 | # TODO For now, create a target t by copying the values from the 98 | # cls to test the smooth L1 loss 99 | # t = Variable(cls.data.copy()) 100 | # self.smooth_l1_loss = huber_loss(cls, t, delta=1) 101 | # print('Smooth L1 loss shape: {}'.format(self.smooth_l1_loss 102 | # .data.shape)) 103 | 104 | bbox = self.rpn_bbox(h) 105 | 106 | self.loss = F.multitask(cls, bbox, anchors, t) 107 | 108 | return self.loss 109 | 110 | # self.loss = F.softmax_cross_entropy(h, t) 111 | # self.acc = F.accuracy(h, t) 112 | # return self.loss 113 | else: 114 | print('self.train == False') 115 | self.pred = self.rpn_cls(h), self.rpn_bbox(h) 116 | # self.pred = F.softmax(h) 117 | return self.pred 118 | -------------------------------------------------------------------------------- /mscoco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import json 5 | from utils import profiler 6 | 7 | 8 | def _filename_from_id(id, group='train'): 9 | if isinstance(id, int): 10 | id = str(id) 11 | padding = '0' * (12 - len(id)) 12 | return 'COCO_{}2014_{}.jpg'.format(group, padding + id) 13 | 14 | 15 | class MSCOCO: 16 | 17 | def __init__(self): 18 | self.files = {} 19 | self.annotations = {} 20 | 21 | def load_annotations(self, path): 22 | with open(path) as jsonfile: 23 | decoded = json.load(jsonfile) 24 | for annotation in decoded['annotations']: 25 | filename = _filename_from_id(annotation['image_id']) 26 | if filename in self.annotations: 27 | self.annotations[filename].append(annotation) 28 | else: 29 | self.annotations[filename] = [annotation] 30 | 31 | def load_images(self, path): 32 | for f in os.listdir(path): 33 | if os.path.isfile(os.path.join(path, f)): 34 | self.files[f] = cv2.imread(os.path.join(path, f)) 35 | 36 | def _load_images(self, path): 37 | """ Use later. Not used at the moment since we want to use dictionaries a 38 | nd not lists. 39 | """ 40 | raise NotImplementedError('_load_images') 41 | 42 | def image(self, id, group='train', normalized=True): 43 | filename = _filename_from_id(id, group) 44 | image = self.files[filename] 45 | height, width, channels = image.shape 46 | 47 | # Reshape to Chainer preferred format, 48 | # i.e. (batch_size, n_chanels, width, height) 49 | image = np.array([image]).astype(np.float32) \ 50 | .reshape((1, channels, width, height)) 51 | 52 | if normalized: 53 | image /= 255 54 | 55 | annotation = self.annotations[filename] 56 | return image, annotation 57 | 58 | 59 | if __name__ == '__main__': 60 | # NOTE: Run a local test when it is run as main 61 | print('Memory usage (before): {} MB' 62 | .format(profiler.memory_usage(format='mb'))) 63 | coco = MSCOCO() 64 | print('Loading images...') 65 | coco.load_images('./data/coco/images/test') 66 | print('Done loading images') 67 | print('Loading annotations...') 68 | coco.load_annotations('./data/coco/annotations/233833_annotations.json') 69 | print('Done loading annotations') 70 | print('Memory usage (after): {} MB' 71 | .format(profiler.memory_usage(format='mb'))) 72 | image, annotations = coco.image('233833') 73 | print(image.shape) 74 | print(annotations) 75 | bboxs = [a['bbox'] for a in annotations] 76 | print(bboxs) 77 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hvy/chainer-faster-rcnn/cf76a654024dab181d1e96735c443ca4ef5d973a/test/__init__.py -------------------------------------------------------------------------------- /test/test_anchorutils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import chainer 5 | from chainer.cuda import cupy as cp 6 | 7 | from utils import imgutils 8 | from utils import anchorutils 9 | 10 | 11 | class TestAnchorUtils(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.gpu = True 15 | self.write_result_img = True 16 | 17 | def test_anchor_generation(self): 18 | gpu = self.gpu 19 | write_result_img = self.write_result_img 20 | 21 | img_width = 640 22 | img_height = 427 23 | feat_stride = 16 # 2^4 as in VGG16 24 | 25 | # Generate the anchors, for the given image properties 26 | # This should only be done once for our dataset for performance reasons 27 | anchors_inside = anchorutils.generate_inside_anchors( 28 | img_width, img_height, feat_stride=feat_stride, 29 | allowed_offset=None, gpu=gpu) 30 | 31 | # Assert that the returned list of anchors is either on the CPU 32 | # or the GPU depending on the given parameters 33 | arr_module = cp.get_array_module(anchors_inside) 34 | if gpu: 35 | self.assertTrue(arr_module == cp) 36 | self.assertTrue(isinstance(anchors_inside, chainer.cuda.ndarray)) 37 | else: 38 | self.assertTrue(arr_module == np) 39 | self.assertTrue(isinstance(anchors_inside, numpy.ndarray)) 40 | 41 | print('Anchors inside: {}'.format(len(anchors_inside))) 42 | 43 | img_area = img_width * img_height 44 | for anchor in anchors_inside: 45 | x1 = anchor[0] 46 | y1 = anchor[1] 47 | x2 = anchor[2] 48 | y2 = anchor[3] 49 | 50 | self.assertTrue(x1 >= 0) 51 | self.assertTrue(y1 >= 0) 52 | self.assertTrue(x2 < img_width) 53 | self.assertTrue(y2 < img_height) 54 | 55 | area = (x2 - x1) * (y2 - y1) 56 | 57 | self.assertTrue(area > 0) 58 | self.assertTrue(area <= img_area) 59 | 60 | # Save the image to disk if specified in the test 61 | if write_result_img: 62 | print('Saving image to disk...') 63 | img = imgutils.draw_empty(img_width, img_height) 64 | for anchor in anchors_inside: 65 | imgutils.draw_box(img, anchor[0], anchor[1], anchor[2], 66 | anchor[3]) 67 | imgutils.write_img('test_anchor_generation.jpg', img) 68 | 69 | 70 | if __name__ == '__main__': 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /test/test_basic_anchor_creation.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from utils import imgutils 4 | from utils import anchorutils 5 | 6 | 7 | class BasicAnchorCreationTest(unittest.TestCase): 8 | 9 | def setUp(self): 10 | self.gpu = True 11 | self.anchorsize_err_tolerance = 0.2 12 | self.filename = 'anchors_basic.jpg' 13 | 14 | def test_anchor_sizes(self): 15 | """ Generate the basic 9 anchors and compute their sizes to make sure 16 | that the sizes of the anchors aren't too small or too big. 17 | """ 18 | gpu = self.gpu 19 | anchorsize_err_tolerance = self.anchorsize_err_tolerance 20 | 21 | base_size = 16 22 | ratios = [0.5, 1, 2] 23 | scales = [8, 16, 32] 24 | 25 | # Possible sizes (areas), e.g. 128^2, 256^2, 512^2 26 | original_sizes = [(base_size * scale) ** 2 for scale in scales] 27 | 28 | anchors = anchorutils.generate_anchors(base_size, ratios, scales, 29 | gpu=gpu) 30 | 31 | for anchor in anchors: 32 | x1, y1, x2, y2 = anchor 33 | size = (x2 - x1) * (y2 - y1) 34 | 35 | # Compute the difference between the sizes of the generated anchor 36 | # and the original anchor sizes 37 | diffs = [abs(original_size - size) 38 | for original_size in original_sizes] 39 | min_idx, min_val = min(enumerate(diffs), key=lambda p: p[1]) 40 | 41 | self.assertTrue(min_val < original_sizes[min_idx] * 42 | anchorsize_err_tolerance) 43 | 44 | def test_draw_anchors(self): 45 | """ Generate the basic 9 anchors and draw and then save them to a 46 | file. 47 | """ 48 | gpu = self.gpu 49 | filename = self.filename 50 | 51 | img_width = 1024 52 | img_height = 1024 53 | img = imgutils.draw_empty(img_width, img_height) 54 | 55 | anchors = anchorutils.generate_anchors(gpu=gpu) 56 | 57 | for anchor in anchors: 58 | x1, y1, x2, y2 = anchor 59 | 60 | # Shift the anchor center to the image center 61 | x_shift = 0.5 * img_width 62 | y_shift = 0.5 * img_height 63 | x1 += x_shift 64 | y1 += y_shift 65 | x2 += x_shift 66 | y2 += y_shift 67 | 68 | anchor_color = imgutils.rnd_color() 69 | 70 | imgutils.draw_box(img, x1, y1, x2, y2, color=anchor_color) 71 | 72 | imgutils.write_img(filename, img) 73 | 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /test/test_cupyutils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import time 5 | from chainer.cuda import cupy as cp 6 | from utils import cupyutils 7 | 8 | 9 | class TestCupyutils(unittest.TestCase): 10 | 11 | def _arrays_with_module(self, xp, arr_len, arr_step, n_arr): 12 | return [xp.arange(0, arr_len, arr_step) for a in range(n_arr)] 13 | 14 | def test_meshgrid_performance(self): 15 | arr_len = 1000 16 | arr_step = 0.5 17 | n_arr = 50 # Should be an even number 18 | 19 | # NumPy, CPU 20 | arrs = self._arrays_with_module(np, arr_len, arr_step, n_arr) 21 | start_time = time.time() 22 | 23 | meshed_np = [] 24 | for i in range(0, len(arrs), 2): 25 | meshed = np.meshgrid(arrs[i], arrs[i+1]) 26 | self.assertTrue(cp.get_array_module(meshed, np)) 27 | meshed_np.append(meshed) 28 | 29 | running_time = time.time() - start_time 30 | print('Time with NumPy: {} s'.format(round(running_time, 2))) 31 | 32 | # CuPy, GPU 33 | arrs = self._arrays_with_module(cp, arr_len, arr_step, n_arr) 34 | start_time = time.time() 35 | 36 | meshed_cp = [] 37 | for i in range(0, len(arrs), 2): 38 | meshed = cupyutils.meshgrid(arrs[i], arrs[i+1]) 39 | self.assertTrue(cp.get_array_module(meshed, cp)) 40 | meshed_cp.append(meshed) 41 | 42 | running_time = time.time() - start_time 43 | print('Time with CuPy: {} s'.format(round(running_time, 2))) 44 | 45 | def test_meshgrid_acc(self): 46 | arr_len = 10 47 | arr_step = 0.2 48 | n_arr = 6 # Should be an even number 49 | 50 | # NumPy, CPU 51 | arrs = self._arrays_with_module(np, arr_len, arr_step, n_arr) 52 | 53 | meshed_np = [] 54 | for i in range(0, len(arrs), 2): 55 | meshed = np.meshgrid(arrs[i], arrs[i+1]) 56 | self.assertTrue(cp.get_array_module(meshed, np)) 57 | meshed_np.append(meshed) 58 | 59 | # Cuda, GPU 60 | arrs = self._arrays_with_module(cp, arr_len, arr_step, n_arr) 61 | 62 | meshed_cp = [] 63 | for i in range(0, len(arrs), 2): 64 | meshed = cupyutils.meshgrid(arrs[i], arrs[i+1]) 65 | self.assertTrue(cp.get_array_module(meshed, cp)) 66 | meshed_cp.append(meshed) 67 | 68 | print('Comparing NumPy and CuPy values') 69 | self.assertEqual(len(meshed_np), len(meshed_cp)) # n_arr / 2 70 | for m_cp, m_np in zip(meshed_cp, meshed_np): 71 | self.assertEqual(len(m_cp), len(m_np)) # 2 72 | for cs, ns in zip(m_cp[0], m_np[0]): 73 | self.assertEqual(cs.shape, ns.shape) 74 | for c, n in zip(cs, ns): 75 | self.assertEqual(c, n) 76 | for cs, ns in zip(m_cp[1], m_np[1]): 77 | self.assertEqual(cs.shape, ns.shape) 78 | for c, n in zip(cs, ns): 79 | self.assertEqual(c, n) 80 | 81 | 82 | if __name__ == '__main__': 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /test/test_iou.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import time 4 | import math 5 | import numpy as np 6 | from chainer.cuda import cupy as cp 7 | from utils import iouutils 8 | 9 | 10 | class TestIouUtils(unittest.TestCase): 11 | 12 | def setUp(self): 13 | anchors = cp.arange(1000, dtype=cp.float32) 14 | gt_boxes = anchors * 1 15 | anchors = anchors.reshape(250, 4) 16 | gt_boxes = gt_boxes.reshape(250, 4)[:10] 17 | 18 | self.cpu_anchors = cp.asnumpy(anchors) 19 | self.cpu_gt_boxes = cp.asnumpy(gt_boxes) 20 | self.gpu_anchors = anchors 21 | self.gpu_gt_boxes = gt_boxes 22 | 23 | def test_acc_gpu(self): 24 | boxes = cp.array([[0, 0, 10, 10], [2, 3, 4, 5]], dtype=cp.float32) 25 | query_boxes = cp.array([[0, 0, 10, 10], [2, 2, 15, 15], [5, 5, 15, 15], [20, 20, 22, 30]], dtype=cp.float32) 26 | ious = iouutils.ious(boxes, query_boxes) 27 | print(ious) 28 | print(ious.shape) 29 | 30 | def test_speed_gpu(self): 31 | a = cp.array([[1,2,1,2], [3,4,3,4], [5,5,5,5]], dtype=cp.float32) 32 | b = cp.array([[10,2,10,2], [1,1,11,1]], dtype=cp.float32) 33 | ans = iouutils.ious(a, b) 34 | print(ans) 35 | print(ans.shape) 36 | 37 | # def test_gpu_speed(self): 38 | # # TODO 39 | # print('GPU') 40 | # anchors = self.gpu_anchors 41 | # gt_boxes = self.gpu_gt_boxes 42 | # assert cp.get_array_module(anchors, gt_boxes).__name__ == 'cupy' 43 | # # ious = bboxutils.ious_gpu(anchors, gt_boxes) 44 | # start = time.clock() 45 | # for gt_box in gt_boxes: 46 | # for anchor in anchors: 47 | # iou = bboxutils.iou_gpu(anchor, gt_box) 48 | # # print(iou) 49 | # end = time.clock() 50 | # print('GPU Time: {} s'.format(end - start)) 51 | 52 | # def test_gpu_acc(self): 53 | # anchor = cp.array([10, 10, 20, 40], dtype=cp.float32) 54 | # gt_box = cp.array([15, 15, 35, 45], dtype=cp.float32) 55 | # # iou = bboxutils.iou_gpu(anchor, gt_box) 56 | # # assert math.isclose(iou, 0.1613, rel_tol=1e-2) 57 | # # print('IOU {}'.format(iou)) 58 | 59 | # anchors = cp.vstack((anchor, (anchor + 10), anchor)) 60 | # gt_bboxs = cp.vstack((gt_box, (anchor + 1.0))) 61 | 62 | # for g in gt_bboxs: 63 | # for a in anchors: 64 | # print('----------------') 65 | # print(g) 66 | # print(a) 67 | # print(bboxutils.iou_cpu(a, g)) 68 | 69 | # ious = bboxutils.ious_gpu(anchors, gt_bboxs) 70 | # print('IOUS {}'.format(ious.shape)) 71 | # for iou in ious: 72 | # print(iou) 73 | 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import Variable, optimizers, serializers 3 | from models.vggrpn import VGGRPN 4 | from mscoco import MSCOCO 5 | from utils import anchorutils 6 | 7 | # TODO: Hardcoded to use the GPU 8 | xp = chainer.cuda.cupy 9 | 10 | 11 | def load_model(filename, model): 12 | """Load the model with the file data.""" 13 | print('Loading pretrained model...') 14 | try: 15 | serializers.load_hdf5(filename, model) 16 | print('Successfully loaded pretrained model') 17 | except OSError as err: 18 | print('OS error: {}'.format(err)) 19 | print('Could not find a pretrained model. \ 20 | Proceeding with a randomly initialized model.') 21 | 22 | 23 | if __name__ == "__main__": 24 | print("Training the RPN...") 25 | model = VGGRPN() 26 | 27 | # TODO: Skip the model loading during test. 28 | # load_model('vggrpn.model', model) 29 | 30 | model.to_gpu() 31 | optimizer = optimizers.SGD() 32 | optimizer.setup(model) 33 | 34 | # Load a sample image, the image with id 233833 35 | coco = MSCOCO() 36 | coco.load_images('./data/coco/images/test') 37 | coco.load_annotations('./data/coco/annotations/233833_annotations.json') 38 | 39 | # Get the image and annotation data from the MSCOCO wrapper 40 | image, annotations = coco.image('233833') 41 | 42 | # Preprocess the annotation (ground truth boxes) 43 | gtboxes = [] 44 | for annotation in annotations: 45 | x1, y1, w, h = annotation['bbox'] 46 | gtbox = [x1, y1, x1 + w, y1 + h] 47 | gtboxes.append(gtbox) 48 | gtboxes = xp.array(gtboxes, dtype=xp.float32) 49 | 50 | print('Ground Truth Boxes') 51 | print(gtboxes) 52 | 53 | print(image.shape) 54 | img_width = image.shape[2] 55 | img_height = image.shape[3] 56 | 57 | # Optimization 58 | # Generate anchors once, assuming that the dimensions are the same, 59 | # reuse those anchors throughout the training 60 | anchors = anchorutils.generate_inside_anchors( 61 | img_width, img_height, feat_stride=16, allowed_offset=None, 62 | gpu=True) 63 | print('Anchors inside image with dimensions ({w}, {h}): {num_anchors}' 64 | .format(w=img_width, h=img_height, num_anchors=len(anchors))) 65 | print('Anchor array module: {}'.format(xp.get_array_module(anchors))) 66 | 67 | # Start the training 68 | for epoch in range(1): 69 | print('Epoch: {epoch}'.format(epoch=epoch)) 70 | 71 | # TODO: All data used in during the epoch should be transferred 72 | # to the GPU here to for performance resasons. 73 | 74 | for i in range(1): 75 | # Mini batch 76 | # x = Variable(xp.asarray(x_train)) 77 | # t = Variable(xp.asarray(y_train[indexes[i : i + batchsize]])) 78 | x = Variable(xp.asarray(image)) 79 | print("image.shape: {}".format(x.data.shape)) 80 | t = Variable(gtboxes) 81 | model.zerograds() 82 | # TODO: Make sure we need to reinitialize the anchors here as 83 | # Chainer variables, otherwise do it once in the beginning 84 | anchors_var = Variable(anchors) 85 | loss = model(x, t, anchors_var) 86 | print('Loss: {}'.format(loss)) 87 | # optimizer.update(model, x, t) 88 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hvy/chainer-faster-rcnn/cf76a654024dab181d1e96735c443ca4ef5d973a/utils/__init__.py -------------------------------------------------------------------------------- /utils/anchorutils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from chainer.cuda import cupy as cp 3 | from utils import cupyutils 4 | 5 | xp = None 6 | 7 | 8 | def generate_inside_anchors(width, height, feat_stride=16, allowed_offset=None, 9 | gpu=True): 10 | """Return a set of anchors for a given image dimension. 11 | For performance improvement, anchors should be generated once and be 12 | reused. However, it assumes that the dimensions are the same during the 13 | whole training and the testing process. 14 | """ 15 | # TODO: Allow anchors to be slighly outside the image and still be included 16 | global xp 17 | xp = cp if gpu else np 18 | 19 | anchors = generate_anchors(gpu=gpu) 20 | 21 | # Apply the 9 anchors to all positions on the filter map 22 | feat_map_width = width / feat_stride 23 | feat_map_height = height / feat_stride 24 | shift_x = xp.arange(0, feat_map_width) * feat_stride 25 | shift_y = xp.arange(0, feat_map_height) * feat_stride 26 | 27 | if gpu: 28 | shift_x, shift_y = cupyutils.meshgrid(shift_x, shift_y) 29 | else: 30 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 31 | 32 | shifts = xp.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), 33 | shift_y.ravel())).transpose() 34 | 35 | A = len(anchors) # 9 36 | K = shifts.shape[0] 37 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)) 38 | .transpose((1, 0, 2))) 39 | shifts = shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 40 | all_anchors = all_anchors.reshape((K * A, 4)) 41 | 42 | # TODO 43 | # In the original paper, they get the indices here, e.g. 44 | # inds_inside = ... and then the anchors 45 | # Following list comprehension works but is very slow, replace it with 46 | # something faste maybe cupy.copy, 47 | # xp.copyto(all_anchors, all_anchors.dtype(0), where=...? 48 | anchors_inside = [] 49 | for a in all_anchors: 50 | if _anchor_inside(a, width, height): 51 | anchors_inside.append(a) 52 | 53 | final_anchors = xp.asarray(anchors_inside, dtype=xp.float32) 54 | 55 | return final_anchors 56 | 57 | 58 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], scales=[8, 16, 32], 59 | gpu=True): 60 | """Generate anchor (reference) windows by enumerating aspect ratios X 61 | scales wrt a reference (0, 0, 15, 15) window.""" 62 | global xp 63 | xp = cp if gpu else np 64 | 65 | ratios = xp.array(ratios) 66 | scales = xp.array(scales) 67 | base_anchor = xp.array([0, 0, base_size - 1, base_size - 1]) 68 | 69 | ratio_anchors = _ratio_enum(base_anchor, ratios) 70 | 71 | anchors = xp.vstack([_scale_enum(ratio_anchors[i, :], scales) 72 | for i in range(ratio_anchors.shape[0])]) 73 | 74 | return anchors 75 | 76 | 77 | def _whctrs(anchor): 78 | """Return width, height, x center, and y center for an anchor (window).""" 79 | w = anchor[2] - anchor[0] + 1 80 | h = anchor[3] - anchor[1] + 1 81 | x_ctr = anchor[0] + 0.5 * (w - 1) 82 | y_ctr = anchor[1] + 0.5 * (h - 1) 83 | return w, h, x_ctr, y_ctr 84 | 85 | 86 | def _mkanchors(ws, hs, x_ctr, y_ctr): 87 | """Given a vector of widths (ws) and heights (hs) around a center 88 | (x_ctr, y_ctr), output a set of anchors (windows). 89 | """ 90 | ws = ws[:, xp.newaxis] 91 | hs = hs[:, xp.newaxis] 92 | anchors = xp.hstack((x_ctr - 0.5 * (ws - 1), 93 | y_ctr - 0.5 * (hs - 1), 94 | x_ctr + 0.5 * (ws - 1), 95 | y_ctr + 0.5 * (hs - 1))) 96 | return anchors 97 | 98 | 99 | def _ratio_enum(anchor, ratios): 100 | """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | size = w * h 103 | size_ratios = size / ratios 104 | ws = xp.ceil(xp.sqrt(size_ratios)) 105 | hs = xp.ceil(ws * ratios) 106 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 107 | return anchors 108 | 109 | 110 | def _scale_enum(anchor, scales): 111 | """Enumerate a set of anchors for each scale wrt an anchor.""" 112 | w, h, x_ctr, y_ctr = _whctrs(anchor) 113 | ws = w * scales 114 | hs = h * scales 115 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 116 | return anchors 117 | 118 | 119 | def _anchor_inside(anchor, img_width, img_height): 120 | """Return True if the given anchor is completely inside the given image. 121 | """ 122 | return ((anchor[0] >= 0) & (anchor[1] >= 0) & (anchor[2] < img_width) & 123 | (anchor[3] < img_height)) 124 | -------------------------------------------------------------------------------- /utils/cupyutils.py: -------------------------------------------------------------------------------- 1 | from chainer.cuda import cupy 2 | 3 | 4 | def meshgrid(*xi): 5 | """Simplified implementation of numpy.meshgrid using cupy.""" 6 | s0 = (1, 1) 7 | x, y = xi 8 | 9 | output = [cupy.asanyarray(x_tmp).reshape(s0[:i] + (-1,) + s0[i + 1::]) 10 | for i, x_tmp in enumerate(xi)] 11 | # TODO: Alternatives to list comprehension? 12 | shape = [x.size for x in output] 13 | 14 | # Switch first and second axis 15 | fst_new_shape = (1, len(x)) 16 | snd_new_shape = (len(y), 1) 17 | output[0] = output[0].reshape((fst_new_shape)) 18 | output[1] = output[1].reshape((snd_new_shape)) 19 | shape[0], shape[1] = shape[1], shape[0] 20 | 21 | mult_fact = cupy.ones(shape, dtype=int) 22 | 23 | # TODO: Alternatives to list comprehension? 24 | return [x * mult_fact for x in output] 25 | -------------------------------------------------------------------------------- /utils/imgutils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import cv2 as cv 4 | 5 | 6 | # Basic NumPy colors 7 | blue = (255, 0, 0) 8 | green = (0, 255, 0) 9 | red = (0, 0, 255) 10 | black = (0, 0, 0) 11 | white = (255, 255, 255) 12 | 13 | 14 | def draw_empty(width, height): 15 | """Create an empty black image with the given width and height.""" 16 | num_channels = 3 17 | # Note the order of the width and height below 18 | return np.zeros((height, width, num_channels), np.uint8) 19 | 20 | 21 | def draw_line(img, x1, y1, x2, y2, color=white, thickness=1): 22 | """Draw a line on the given image.""" 23 | x1, y1, x2, y2 = map(_to_int, [x1, y1, x2, y2]) 24 | cv.line(img, (x1, y1), (x2, y2), color, thickness) 25 | return img 26 | 27 | 28 | def draw_box(img, x1, y1, x2, y2, color=None, thickness=1): 29 | """Draw a rectangle on the given image.""" 30 | if color is None: 31 | color = rnd_color() 32 | x1, y1, x2, y2 = map(_to_int, [x1, y1, x2, y2]) 33 | cv.rectangle(img, (x1, y1), (x2, y2), color, thickness) 34 | return img 35 | 36 | 37 | def write_img(filename, img): 38 | """Save the given image to a file.""" 39 | print('imgutil: Saving {} with shape {}'.format(filename, img.shape)) 40 | cv.imwrite(filename, img) 41 | 42 | 43 | def rnd_color(): 44 | """Return a random RGB color.""" 45 | r = random.randint(0, 255) 46 | g = random.randint(0, 255) 47 | b = random.randint(0, 255) 48 | return r, g, b 49 | 50 | 51 | def _to_int(x): 52 | """Try to parse the given value as an int if it already isn't.""" 53 | return x if isinstance(x, int) else int(x) 54 | -------------------------------------------------------------------------------- /utils/iouutils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from chainer.cuda import cupy as cp 3 | 4 | # TODO Hardcoded to use GPU 5 | xp = cp 6 | 7 | 8 | def ious_slow(anchors, gt_boxes): 9 | """Return a set IOU(Intersection-Over-Union)s for ground-truth box for 10 | each anchor. Naive implementation. 11 | """ 12 | return [iou_naive(anchor, gt_box) for anchor in anchors 13 | for gt_box in gt_boxes] 14 | 15 | 16 | def iou_naive(anchor, gt_box): 17 | """Return the IOU for the given anchor ground-truth box pair. 18 | Naive/Slow. 19 | """ 20 | anchor_x1 = anchor[0] 21 | anchor_y1 = anchor[1] 22 | anchor_x2 = anchor[2] 23 | anchor_y2 = anchor[3] 24 | 25 | gt_box_x1 = gt_box[0] 26 | gt_box_y1 = gt_box[1] 27 | gt_box_x2 = gt_box[2] 28 | gt_box_y2 = gt_box[3] 29 | 30 | area_intersection = (max(0, min(anchor_x2, gt_box_x2) - 31 | max(anchor_x1, gt_box_x1)) * 32 | max(0, min(anchor_y2, gt_box_y2) - 33 | max(anchor_y1, gt_box_y1))) 34 | 35 | area_anchor = (anchor_x2 - anchor_x1) * (anchor_y2 - anchor_y1) 36 | area_gt_box = (gt_box_x2 - gt_box_x1) * (gt_box_y2 - gt_box_y1) 37 | 38 | area_union = area_anchor + area_gt_box - area_intersection 39 | 40 | return area_intersection / area_union 41 | 42 | 43 | def ious(boxes, query_boxes): 44 | """Return a set IOU(Intersection-Over-Union)s for ground-truth box for 45 | each anchor. Naive implementation. 46 | """ 47 | # TODO: Improve speed, e.g. test range() instead of enumerate() 48 | overlaps = xp.zeros((boxes.shape[0], query_boxes.shape[0]), 49 | dtype=np.float32) 50 | 51 | for q_i, q in enumerate(query_boxes): 52 | q_area = (q[2] - q[0] + 1) * (q[3] - q[1] + 1) # Area of the query box 53 | for b_i, b in enumerate(boxes): 54 | iw = min(b[2], q[2]) - max(b[0], q[0]) + 1 55 | if iw > 0: 56 | ih = min(b[3], q[3]) - max(b[1], q[1]) + 1 57 | if ih > 0: 58 | ua = ((b[2] - b[0] + 1) * (b[3] - b[1] + 1) + 59 | q_area - iw * ih) 60 | overlaps[b_i, q_i] = iw * ih / ua 61 | 62 | return overlaps 63 | 64 | 65 | def iou_gpu_0(anchor, gt_box): 66 | """Compute the intersection over union rate for the given anchor and a 67 | gt_box. Not very fast, but works... 68 | """ 69 | return cp.ElementwiseKernel( 70 | 'raw float32 anchor, raw float32 gt_box', 71 | 'float32 iou', 72 | ''' 73 | float inters = max(0.0, min(anchor[2], gt_box[2]) - 74 | max(anchor[0], gt_box[0])) * 75 | max(0.0, min(anchor[3], gt_box[3]) - 76 | max(anchor[1], gt_box[1])); 77 | float anchor_area = (anchor[2] - anchor[0]) * 78 | (anchor[3] - anchor[1]); 79 | float gt_area = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1]); 80 | float union_area = anchor_area + gt_area - inters; 81 | 82 | iou = inters / union_area; 83 | ''', 'intersection_over_union' 84 | )(anchor, gt_box, size=1) # Is size=1 fine? 85 | 86 | 87 | def ious_gpu_1(boxes, query_boxes): 88 | """Kernel function IOU computation.""" 89 | # TODO: Fix, does not work. Not using ElementwiseKernel correct. 90 | n_boxes = boxes.shape[0] 91 | n_query_boxes = query_boxes.shape[0] 92 | 93 | print(n_boxes) 94 | print(n_query_boxes) 95 | print(boxes) 96 | print(query_boxes) 97 | 98 | ious = cp.zeros((n_query_boxes, n_boxes), dtype=cp.float32) 99 | 100 | print(ious) 101 | 102 | cp.ElementwiseKernel( 103 | '''raw float32 boxes, float32 query_boxes, raw int32 num_boxes, 104 | raw int32 num_query_boxes 105 | ''', 106 | 'raw float32 ious', 107 | ''' 108 | for (int q = 0; q < num_query_boxes; ++q) { 109 | float box_area = (query_boxes[q, 2] - query_boxes[q, 0] + 1.0) * 110 | (query_boxes[q, 3] - query_boxes[q, 1] + 1.0); 111 | ious[q, 0] = q; 112 | for (int b = 0; b < num_boxes; ++b) { 113 | float iw = min(boxes[b, 2], query_boxes[q, 2]) - 114 | max(boxes[b, 0], query_boxes[q, 0]) + 1.0; 115 | if (iw > 0.0) { 116 | float ih = min(boxes[b, 3], query_boxes[q, 3]) - 117 | max(boxes[b, 1], query_boxes[q, 1]) + 1.0; 118 | if (ih > 0.0) { 119 | float ua = (boxes[b, 2] - boxes[b, 0] + 1.0) * 120 | (boxes[b, 3] - boxes[b, 1] + 1.0) + 121 | box_area - (iw * ih); 122 | // ious[q, b] = q; 123 | //ious[q, b] = (iw * ih) / ua; 124 | } 125 | } else { 126 | ious[q, b] = -1.1; 127 | } 128 | } 129 | } 130 | ''', 131 | 'intersecion_over_unions' 132 | )(boxes, query_boxes, n_boxes, n_query_boxes, ious, size=1) 133 | return ious 134 | 135 | 136 | def ious_gpu_2(boxes, query_boxes): 137 | """Kernel function IOU computation.""" 138 | # TODO: Fix, does not work. Not using ElementwiseKernel correct. 139 | n_boxes = boxes.shape[0] 140 | n_query_boxes = query_boxes.shape[0] 141 | 142 | print(n_boxes) 143 | print(n_query_boxes) 144 | print(boxes) 145 | print(query_boxes) 146 | 147 | ious = cp.zeros((n_query_boxes, n_boxes), dtype=cp.float32) 148 | 149 | print(ious) 150 | 151 | cp.ElementwiseKernel( 152 | '''raw float32 boxes, raw float32 query_boxes, raw int32 num_boxes, 153 | raw int32 num_query_boxes 154 | ''', 155 | 'raw float32 ious', 156 | ''' 157 | for (int q = 0; q < num_query_boxes; ++q) { 158 | float box_area = (query_boxes[q, 2] - query_boxes[q, 0] + 1.0) * 159 | (query_boxes[q, 3] - query_boxes[q, 1] + 1.0); 160 | ious[q, 0] = q; 161 | for (int b = 0; b < num_boxes; ++b) { 162 | float iw = min(boxes[b, 2], query_boxes[q, 2]) - 163 | max(boxes[b, 0], query_boxes[q, 0]) + 1.0; 164 | if (iw > 0.0) { 165 | float ih = min(boxes[b, 3], query_boxes[q, 3]) - 166 | max(boxes[b, 1], query_boxes[q, 1]) + 1.0; 167 | if (ih > 0.0) { 168 | float ua = (boxes[b, 2] - boxes[b, 0] + 1.0) * 169 | (boxes[b, 3] - boxes[b, 1] + 1.0) + 170 | box_area - (iw * ih); 171 | // ious[q, b] = q; 172 | //ious[q, b] = (iw * ih) / ua; 173 | } 174 | } else { 175 | ious[q, b] = -1.1; 176 | } 177 | } 178 | } 179 | ''', 180 | 'intersecion_over_unions' 181 | )(boxes, query_boxes, n_boxes, n_query_boxes, ious, size=1) 182 | return ious 183 | 184 | 185 | # def test_kernel(a, b): 186 | # print('a.shape {}'.format(a.shape)) 187 | # print('b.shape {}'.format(b.shape)) 188 | # print('a {}'.format(a)) 189 | # print('b {}'.format(b)) 190 | # ans = cp.zeros((a.shape[0]* b.shape[0]), dtype=cp.float32) 191 | # size = a.shape[0] * b.shape[0] 192 | # print('size {}'.format(size)) 193 | # return cp.ElementwiseKernel( 194 | # 'raw T a, raw T b', 195 | # 'T ans', 196 | # ''' 197 | # int a_idx = i % n_boxes, 198 | # ans = i; 199 | # // ans = a[i, 0, 0] + b[0, i, 0]; 200 | # ''', 201 | # 'testing' 202 | # )(a, b, ans, size=size) 203 | 204 | # def test_kernel(a, b): 205 | # print('a.shape {}'.format(a.shape)) 206 | # print('b.shape {}'.format(b.shape)) 207 | # print('a {}'.format(a)) 208 | # print('b {}'.format(b)) 209 | # ans = cp.zeros((b.shape[0] * a.shape[0]), dtype=cp.float32) 210 | # size = a.shape[0] * b.shape[0] 211 | # 212 | # print('size {}'.format(size)) 213 | # # TODO Seems like a and b are ravel()-ed so index with [i] instead of [i, j] 214 | # cp.ElementwiseKernel( 215 | # 'raw float32 boxes, raw float32 qboxes, raw int32 n_b, raw int32 n_qb', 216 | # 'T iou', 217 | # ''' 218 | # int q = i % n_qb; 219 | # int b = i % n_b; 220 | # 221 | # 222 | # //float box_area = (qboxes[(q * n_q) + 2] - qboxes[q * n_q] + 1.0) * (qboxes[(q * n_q) + 3] - qboxes[(q * n_q) + 1] + 1.0); 223 | # float box_area = -1.0; 224 | # float iw = min(boxes[b, 2], qboxes[q, 2]) - max(boxes[b, 0], qboxes[q, 0]) + 1.0; 225 | # if (iw > 0.0) { 226 | # float ih = min(boxes[b, 3], qboxes[q, 3]) - max(boxes[b, 1], qboxes[q, 1]) + 1.0; 227 | # if (ih > 0.0) { 228 | # float ua = (boxes[b, 2] - boxes[b, 0] + 1.0) * (boxes[b, 3] - boxes[b, 1] + 1.0) + 229 | # box_area - (iw * ih); 230 | # iou = (iw * ih) / ua; 231 | # } 232 | # } 233 | # int ind = q * n_q; 234 | # iou = qboxes[ind]; 235 | # ''', 236 | # 'testing' 237 | # )(a, b, a.shape[0], b.shape[0], ans, size=size) 238 | # return ans 239 | -------------------------------------------------------------------------------- /utils/profiler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import psutil 3 | 4 | 5 | def memory_usage(format='bytes'): 6 | """Returns the memory usage in the given format.""" 7 | process = psutil.Process(os.getpid()) 8 | usage_bytes = process.memory_full_info().uss 9 | 10 | for child_process in process.children(recursive=True): 11 | usage_bytes += child_process.memory_full_info().uss 12 | 13 | if format == 'bytes': 14 | return usage_bytes 15 | elif format == 'kb' or format == 'kilobytes': 16 | return usage_bytes / 1000 17 | elif format == 'mb' or format == 'megabytes': 18 | return usage_bytes / 1000000 19 | elif format == 'gb' or format == 'gigabytes': 20 | return usage_bytes / 1000000000 21 | 22 | raise ValueError('Invalid format') 23 | --------------------------------------------------------------------------------