├── README.md ├── chapter1 └── model-evaluation │ ├── README.md │ ├── conf │ ├── arial.ttf │ └── conf.yaml │ ├── data │ ├── detections │ │ └── 1.txt │ ├── groundtruths │ │ └── 1.txt │ └── results │ │ ├── class1.png │ │ └── class2.png │ ├── evaluation.ipynb │ ├── evaluation.py │ └── lib │ ├── Evaluator.py │ ├── Evaluator.pyc │ ├── __pycache__ │ ├── Evaluator.cpython-36.pyc │ ├── detection.cpython-36.pyc │ └── utils.cpython-36.pyc │ ├── detection.py │ ├── detection.pyc │ ├── utils.py │ └── utils.pyc ├── chapter2 ├── mlp.py ├── perception.py ├── perception_sequential.py └── visdom.py ├── chapter3 ├── densenet_block.py ├── detnet_bottleneck.py ├── fpn.py ├── inceptionv1.py ├── inceptionv2.py ├── resnet_bottleneck.py └── vgg.py ├── chapter4 └── faster-rcnn-pytorch │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── _init_paths.py │ ├── cfgs │ ├── res101.yml │ ├── res101_ls.yml │ ├── res50.yml │ └── vgg16.yml │ ├── demo.py │ ├── images │ ├── img1.jpg │ ├── img1_det.jpg │ ├── img1_det_res101.jpg │ ├── img2.jpg │ ├── img2_det.jpg │ ├── img2_det_res101.jpg │ ├── img3.jpg │ ├── img3_det.jpg │ ├── img3_det_res101.jpg │ ├── img4.jpg │ ├── img4_det.jpg │ └── img4_det_res101.jpg │ ├── lib │ ├── datasets │ │ ├── VOCdevkit-matlab-wrapper │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── ds_utils.py │ │ ├── factory.py │ │ ├── imagenet.py │ │ ├── imdb.py │ │ ├── pascal_voc.py │ │ ├── pascal_voc_rbg.py │ │ ├── tools │ │ │ └── mcg_munge.py │ │ ├── vg.py │ │ ├── vg_eval.py │ │ └── voc_eval.py │ ├── make.sh │ ├── model │ │ ├── __init__.py │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── faster_rcnn.py │ │ │ ├── resnet.py │ │ │ └── vgg16.py │ │ ├── nms │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── nms │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── make.sh │ │ │ ├── nms_cpu.py │ │ │ ├── nms_gpu.py │ │ │ ├── nms_kernel.cu │ │ │ ├── nms_wrapper.py │ │ │ └── src │ │ │ │ ├── nms_cuda.c │ │ │ │ ├── nms_cuda.h │ │ │ │ ├── nms_cuda_kernel.cu │ │ │ │ └── nms_cuda_kernel.h │ │ ├── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ └── src │ │ │ │ ├── roi_align.c │ │ │ │ ├── roi_align.h │ │ │ │ ├── roi_align_cuda.c │ │ │ │ ├── roi_align_cuda.h │ │ │ │ ├── roi_align_kernel.cu │ │ │ │ └── roi_align_kernel.h │ │ ├── roi_crop │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize │ │ │ │ │ └── __init__.py │ │ │ │ └── roi_crop │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ └── src │ │ │ │ ├── roi_crop.c │ │ │ │ ├── roi_crop.h │ │ │ │ ├── roi_crop_cuda.c │ │ │ │ ├── roi_crop_cuda.h │ │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ │ └── roi_crop_cuda_kernel.h │ │ ├── roi_pooling │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pooling │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ └── src │ │ │ │ ├── roi_pooling.c │ │ │ │ ├── roi_pooling.h │ │ │ │ ├── roi_pooling_cuda.c │ │ │ │ ├── roi_pooling_cuda.h │ │ │ │ ├── roi_pooling_kernel.cu │ │ │ │ └── roi_pooling_kernel.h │ │ ├── rpn │ │ │ ├── __init__.py │ │ │ ├── anchor_target_layer.py │ │ │ ├── bbox_transform.py │ │ │ ├── generate_anchors.py │ │ │ ├── proposal_layer.py │ │ │ ├── proposal_target_layer_cascade.py │ │ │ └── rpn.py │ │ └── utils │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── bbox.pyx │ │ │ ├── blob.py │ │ │ ├── config.py │ │ │ └── net_utils.py │ ├── pycocotools │ │ ├── UPSTREAM_REV │ │ ├── __init__.py │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ ├── license.txt │ │ ├── mask.py │ │ ├── maskApi.c │ │ └── maskApi.h │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── minibatch.py │ │ ├── roibatchLoader.py │ │ └── roidb.py │ └── setup.py │ ├── requirements.txt │ ├── test_net.py │ └── trainval_net.py ├── chapter5 ├── dssd-pytorch │ ├── arm.py │ └── tcb.py └── ssd-pytorch │ ├── .gitattributes │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── data │ ├── __init__.py │ ├── config.py │ ├── example.jpg │ ├── scripts │ │ ├── COCO2014.sh │ │ ├── VOC2007.sh │ │ └── VOC2012.sh │ └── voc0712.py │ ├── demo │ ├── __init__.py │ ├── demo.ipynb │ └── live.py │ ├── doc │ ├── SSD.jpg │ ├── detection_example.png │ ├── detection_example2.png │ ├── detection_examples.png │ └── ssd.png │ ├── eval.py │ ├── layers │ ├── __init__.py │ ├── box_utils.py │ ├── functions │ │ ├── __init__.py │ │ ├── detection.py │ │ └── prior_box.py │ └── modules │ │ ├── __init__.py │ │ ├── l2norm.py │ │ └── multibox_loss.py │ ├── ssd.py │ ├── test.py │ ├── train.py │ └── utils │ ├── __init__.py │ └── augmentations.py ├── chapter6 └── yolov2-pytorch │ ├── README.md │ ├── cfgs │ ├── __init__.py │ ├── config.py │ ├── config_voc.py │ └── exps │ │ ├── __init__.py │ │ ├── darknet19_exp1.py │ │ └── darknet19_exp2.py │ ├── darknet.py │ ├── datasets │ ├── __init__.py │ ├── imdb.py │ ├── pascal_voc.py │ └── voc_eval.py │ ├── demo.py │ ├── demo │ ├── 2007_000039.jpg │ ├── dog.jpg │ ├── eagle.jpg │ ├── giraffe.jpg │ ├── horses.jpg │ ├── out │ │ ├── 2007_000039.jpg │ │ ├── dog.jpg │ │ ├── eagle.jpg │ │ ├── giraffe.jpg │ │ ├── horses.jpg │ │ ├── person.jpg │ │ ├── ragged-edge-london-office-6.jpg │ │ └── scream.jpg │ ├── person.jpg │ ├── ragged-edge-london-office-6.jpg │ └── scream.jpg │ ├── layers │ ├── __init__.py │ ├── reorg │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── reorg_layer │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── reorg_layer.py │ │ └── src │ │ │ ├── reorg_cpu.c │ │ │ ├── reorg_cpu.h │ │ │ ├── reorg_cuda.c │ │ │ ├── reorg_cuda.h │ │ │ ├── reorg_cuda_kernel.cu │ │ │ └── reorg_cuda_kernel.h │ └── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ ├── __init__.py │ │ └── roi_pooling │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── roi_pool.py │ │ ├── roi_pool_py.py │ │ └── src │ │ ├── cuda │ │ ├── roi_pooling_kernel.cu │ │ └── roi_pooling_kernel.h │ │ ├── roi_pooling.c │ │ ├── roi_pooling.h │ │ ├── roi_pooling_cuda.c │ │ └── roi_pooling_cuda.h │ ├── make.sh │ ├── requirements.txt │ ├── test.py │ ├── train.py │ └── utils │ ├── __init__.py │ ├── bbox.c │ ├── bbox.pyx │ ├── build.py │ ├── im_transform.py │ ├── network.py │ ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py │ ├── nms_wrapper.py │ ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h │ ├── timer.py │ ├── yolo.c │ ├── yolo.py │ └── yolo.pyx ├── chapter7 ├── mobilenet_v1.py ├── mobilenet_v2.py ├── mobilenet_v2_block.py ├── shufflenet_v1.py └── squeezenet_fire.py ├── chapter8 ├── nms.py └── retinanet.py └── reference └── README.md /README.md: -------------------------------------------------------------------------------- 1 | # 深度学习之PyTorch物体检测实战 2 | 3 | ### 说明 4 | * 这是《深度学习之PyTorch物体检测实战》这本书对应的代码,书籍将会在2019年年底由机械工业出版社出版。 5 | * 物体检测是一个十分注重实践的知识点,因此强烈建议读者下载此代码,结合书籍进行实践学习。 6 | * 由于工程量较大,因此本书的Faster RCNN、SSD与YOLO三大网络的实现皆借鉴了其他作者的实现,并增加了一些注解、尽量保留最小实现模块,读者也可以根据自身需求来选择。 7 | * 在运行时遇到任何问题,欢迎在此提交issue,或者到其他repo的issue里寻找答案。 8 | * Enjoy Coding! 9 | 10 | ### 环境版本 11 | * PyTorch:0.4.0 12 | * Python:3.6 13 | * CUDA:9.0 14 | 15 | ### 目录 16 | ------------------- 17 | * chapter1:浅谈物体检测与PyTorch 18 | * chapter2:PyTorch基础 19 | * chapter3:网络骨架-Backbone 20 | * chapter4:两阶经典检测器:Faster RCNN 21 | * chapter5:单阶多层检测器:SSD 22 | * chapter6:单阶经典检测器:YOLO 23 | * chapter7:模型加速之轻量化网络 24 | * chapter8:物体检测细节处理 25 | * chapter9:物体检测难点 26 | * chapter10:物体检测的未来发展 27 | 28 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/README.md: -------------------------------------------------------------------------------- 1 | ## 如何运行 2 | * 安装jupyter notebook,可以在网页端打开脚本,实现可视化 3 | 4 | ## 1. 模型评测 5 | * 修改eveluate.ipynb中的路径,执行run 6 | 7 | ## 2. 模型badcase可视化 8 | * 修改badcase.ipynb中的路径,执行run 9 | 10 | ## 3. 模型前向结果保存 11 | * 修改inference.ipynb中的路径,执行run 12 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/conf/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/conf/arial.ttf -------------------------------------------------------------------------------- /chapter1/model-evaluation/conf/conf.yaml: -------------------------------------------------------------------------------- 1 | colors: ['#F0080F','#0A00D7','#95090A','#700FD4','#00a0F0','#00050C', '#30b404','#F00F00','#008377','#E0203B', 2 | '#F00500','#008000','#0000FF','#F0F0F0','#7C0C00','#E0F00F','#00B000','#000FF0','#ADD806', '#320032', 3 | '#48000C', '#C00085','#F5000A', '#00E4B5','#0000E6','#0070D6','#D00003','#DD0000','#FF0000','#2E0057',] 4 | 5 | iouThreshold: 0.5 6 | 7 | gtFormat: 'xyrb' 8 | detFormat: 'xyrb' 9 | gtCoordinates: 'abs' 10 | detCoordinates: 'abs' 11 | 12 | showPlot: 'True' 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/data/detections/1.txt: -------------------------------------------------------------------------------- 1 | class1 12 58 53 96 0.87 2 | class1 51 88 152 191 0.98 3 | class2 345 898 431 945 0.67 4 | class2 597 346 674 415 0.45 5 | class1 243 546 298 583 0.83 6 | class2 99 345 150 426 0.96 7 | 8 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/data/groundtruths/1.txt: -------------------------------------------------------------------------------- 1 | class1 14 56 50 100 2 | class1 50 90 150 189 3 | class2 345 894 432 940 4 | class1 458 657 580 742 5 | class2 590 354 675 420 6 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/data/results/class1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/data/results/class1.png -------------------------------------------------------------------------------- /chapter1/model-evaluation/data/results/class2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/data/results/class2.png -------------------------------------------------------------------------------- /chapter1/model-evaluation/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import yaml 4 | sys.path.insert(0, os.path.join(os.getcwd(), 'lib')) 5 | from detection import detections, plot_save_result 6 | 7 | conf_path = './conf/conf.yaml' 8 | with open(conf_path, 'r', encoding='utf-8') as f: 9 | data=f.read() 10 | cfg = yaml.load(data) 11 | 12 | gtFolder = 'data/groundtruths' 13 | detFolder = 'data/detections' 14 | savePath = 'data/results' 15 | 16 | results, classes = detections(cfg, gtFolder, detFolder, savePath) 17 | plot_save_result(cfg, results, classes, savePath) 18 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/Evaluator.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/Evaluator.pyc -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/__pycache__/Evaluator.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/Evaluator.cpython-36.pyc -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/__pycache__/detection.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/detection.cpython-36.pyc -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/detection.py: -------------------------------------------------------------------------------- 1 | import os 2 | from Evaluator import * 3 | import pdb 4 | 5 | def getGTBoxes(cfg, GTFolder): 6 | 7 | files = os.listdir(GTFolder) 8 | files.sort() 9 | 10 | classes = [] 11 | num_pos = {} 12 | gt_boxes = {} 13 | for f in files: 14 | nameOfImage = f.replace(".txt", "") 15 | fh1 = open(os.path.join(GTFolder, f), "r") 16 | 17 | for line in fh1: 18 | line = line.replace("\n", "") 19 | if line.replace(' ', '') == '': 20 | continue 21 | splitLine = line.split(" ") 22 | 23 | cls = (splitLine[0]) # class 24 | left = float(splitLine[1]) 25 | top = float(splitLine[2]) 26 | right = float(splitLine[3]) 27 | bottom = float(splitLine[4]) 28 | one_box = [left, top, right, bottom, 0] 29 | 30 | if cls not in classes: 31 | classes.append(cls) 32 | gt_boxes[cls] = {} 33 | num_pos[cls] = 0 34 | 35 | num_pos[cls] += 1 36 | 37 | if nameOfImage not in gt_boxes[cls]: 38 | gt_boxes[cls][nameOfImage] = [] 39 | gt_boxes[cls][nameOfImage].append(one_box) 40 | 41 | fh1.close() 42 | return gt_boxes, classes, num_pos 43 | 44 | def getDetBoxes(cfg, DetFolder): 45 | 46 | files = os.listdir(DetFolder) 47 | files.sort() 48 | 49 | det_boxes = {} 50 | for f in files: 51 | nameOfImage = f.replace(".txt", "") 52 | fh1 = open(os.path.join(DetFolder, f), "r") 53 | 54 | for line in fh1: 55 | line = line.replace("\n", "") 56 | if line.replace(' ', '') == '': 57 | continue 58 | splitLine = line.split(" ") 59 | 60 | cls = (splitLine[0]) # class 61 | left = float(splitLine[1]) 62 | top = float(splitLine[2]) 63 | right = float(splitLine[3]) 64 | bottom = float(splitLine[4]) 65 | score = float(splitLine[5]) 66 | one_box = [left, top, right, bottom, score, nameOfImage] 67 | 68 | if cls not in det_boxes: 69 | det_boxes[cls]=[] 70 | det_boxes[cls].append(one_box) 71 | 72 | fh1.close() 73 | return det_boxes 74 | 75 | def detections(cfg, 76 | gtFolder, 77 | detFolder, 78 | savePath, 79 | show_process=True): 80 | 81 | 82 | gt_boxes, classes, num_pos = getGTBoxes(cfg, gtFolder) 83 | det_boxes = getDetBoxes(cfg, detFolder) 84 | 85 | evaluator = Evaluator() 86 | 87 | return evaluator.GetPascalVOCMetrics(cfg, classes, gt_boxes, num_pos, det_boxes) 88 | 89 | def plot_save_result(cfg, results, classes, savePath): 90 | 91 | 92 | plt.rcParams['savefig.dpi'] = 80 93 | plt.rcParams['figure.dpi'] = 130 94 | 95 | acc_AP = 0 96 | validClasses = 0 97 | fig_index = 0 98 | 99 | for cls_index, result in enumerate(results): 100 | if result is None: 101 | raise IOError('Error: Class %d could not be found.' % classId) 102 | 103 | cls = result['class'] 104 | precision = result['precision'] 105 | recall = result['recall'] 106 | average_precision = result['AP'] 107 | acc_AP = acc_AP + average_precision 108 | mpre = result['interpolated precision'] 109 | mrec = result['interpolated recall'] 110 | npos = result['total positives'] 111 | total_tp = result['total TP'] 112 | total_fp = result['total FP'] 113 | 114 | fig_index+=1 115 | plt.figure(fig_index) 116 | plt.plot(recall, precision, cfg['colors'][cls_index], label='Precision') 117 | plt.xlabel('recall') 118 | plt.ylabel('precision') 119 | ap_str = "{0:.2f}%".format(average_precision * 100) 120 | plt.title('Precision x Recall curve \nClass: %s, AP: %s' % (str(cls), ap_str)) 121 | plt.legend(shadow=True) 122 | plt.grid() 123 | plt.savefig(os.path.join(savePath, cls + '.png')) 124 | plt.show() 125 | plt.pause(0.05) 126 | 127 | 128 | mAP = acc_AP / fig_index 129 | mAP_str = "{0:.2f}%".format(mAP * 100) 130 | print('mAP: %s' % mAP_str) 131 | 132 | -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/detection.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/detection.pyc -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/utils.py -------------------------------------------------------------------------------- /chapter1/model-evaluation/lib/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/utils.pyc -------------------------------------------------------------------------------- /chapter2/mlp.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | class MLP(nn.Module): 4 | def __init__(self, in_dim, hid_dim1, hid_dim2, out_dim): 5 | super(MLP, self).__init__() 6 | self.layer = nn.Sequential( 7 | nn.Linear(in_dim, hid_dim1), 8 | nn.ReLU(), 9 | nn.Linear(hid_dim1, hid_dim2), 10 | nn.ReLU(), 11 | nn.Linear(hid_dim2, out_dim), 12 | nn.ReLU() 13 | ) 14 | def forward(self, x): 15 | x = self.layer(x) 16 | return x 17 | -------------------------------------------------------------------------------- /chapter2/perception.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class Linear(nn.Module): 5 | def __init__(self, in_dim, out_dim): 6 | super(Linear, self).__init__() 7 | self.w = nn.Parameter(torch.randn(in_dim, out_dim)) 8 | self.b = nn.Parameter(torch.randn(out_dim)) 9 | 10 | def forward(self, x): 11 | x = x.matmul(self.w) 12 | y = x + self.b.expand_as(x) 13 | return y 14 | 15 | class Perception(nn.Module): 16 | def __init__(self, in_dim, hid_dim, out_dim): 17 | super(Perception, self).__init__() 18 | self.layer1 = Linear(in_dim, hid_dim) 19 | self.layer2 = Linear(hid_dim, out_dim) 20 | def forward(self, x): 21 | x = self.layer1(x) 22 | y = torch.sigmoid(x) 23 | y = self.layer2(y) 24 | y = torch.sigmoid(y) 25 | return y 26 | -------------------------------------------------------------------------------- /chapter2/perception_sequential.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | class Perception(nn.Module): 4 | def __init__(self, in_dim, hid_dim, out_dim): 5 | super(Perception, self).__init__() 6 | self.layer = nn.Sequential( 7 | nn.Linear(in_dim, hid_dim), 8 | nn.Sigmoid(), 9 | nn.Linear(hid_dim, out_dim), 10 | nn.Sigmoid() 11 | ) 12 | def forward(self, x): 13 | y = self.layer(x) 14 | return y 15 | -------------------------------------------------------------------------------- /chapter2/visdom.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import visdom 3 | 4 | vis = visdom.Visdom(env='first') 5 | vis.text('first visdom', win='text1') 6 | vis.text('hello PyTorch', win='text1', append=True) 7 | 8 | for i in range(20): 9 | vis.line(X=torch.FloatTensor([i]), Y=torch.FloatTensor([-i**2+20*i+1]), opts={'title': 'y=-x^2+20x+1'}, win='loss', update='append') 10 | 11 | vis.image(torch.randn(3, 256, 256), win='random_image') 12 | -------------------------------------------------------------------------------- /chapter3/densenet_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | class Bottleneck(nn.Module): 6 | def __init__(self, nChannels, growthRate): 7 | super(Bottleneck, self).__init__() 8 | interChannels = 4*growthRate 9 | self.bn1 = nn.BatchNorm2d(nChannels) 10 | self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, 11 | bias=False) 12 | self.bn2 = nn.BatchNorm2d(interChannels) 13 | self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, 14 | padding=1, bias=False) 15 | def forward(self, x): 16 | out = self.conv1(F.relu(self.bn1(x))) 17 | out = self.conv2(F.relu(self.bn2(out))) 18 | out = torch.cat((x, out), 1) 19 | return out 20 | 21 | class Denseblock(nn.Module): 22 | def __init__(self, nChannels, growthRate, nDenseBlocks): 23 | super(Denseblock, self).__init__() 24 | layers = [] 25 | for i in range(int(nDenseBlocks)): 26 | layers.append(Bottleneck(nChannels, growthRate)) 27 | nChannels += growthRate 28 | self.denseblock = nn.Sequential(*layers) 29 | def forward(self, x): 30 | return self.denseblock(x) 31 | 32 | -------------------------------------------------------------------------------- /chapter3/detnet_bottleneck.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | class DetBottleneck(nn.Module): 3 | 4 | def __init__(self, inplanes, planes, stride=1, extra=False): 5 | super(DetBottleneck, self).__init__() 6 | self.bottleneck = nn.Sequential( 7 | nn.Conv2d(inplanes, planes, 1, bias=False), 8 | nn.BatchNorm2d(planes), 9 | nn.ReLU(inplace=True), 10 | nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=2, 11 | dilation=2, bias=False), 12 | nn.BatchNorm2d(planes), 13 | nn.ReLU(inplace=True), 14 | nn.Conv2d(planes, planes, 1, bias=False), 15 | nn.BatchNorm2d(planes), 16 | ) 17 | self.relu = nn.ReLU(inplace=True) 18 | self.extra = extra 19 | if self.extra: 20 | self.extra_conv = nn.Sequential( 21 | nn.Conv2d(inplanes, planes, 1, bias=False), 22 | nn.BatchNorm2d(planes) 23 | ) 24 | 25 | def forward(self, x): 26 | if self.extra: 27 | identity = self.extra_conv(x) 28 | else: 29 | identity = x 30 | out = self.bottleneck(x) 31 | out += identity 32 | out = self.relu(out) 33 | return out 34 | 35 | -------------------------------------------------------------------------------- /chapter3/fpn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import math 4 | 5 | class Bottleneck(nn.Module): 6 | expansion = 4 7 | def __init__(self, in_planes, planes, stride=1, downsample=None): 8 | super(Bottleneck, self).__init__() 9 | self.bottleneck = nn.Sequential( 10 | nn.Conv2d(in_planes, planes, 1, bias=False), 11 | nn.BatchNorm2d(planes), 12 | nn.ReLU(inplace=True), 13 | nn.Conv2d(planes, planes, 3, stride, 1, bias=False), 14 | nn.BatchNorm2d(planes), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(planes, self.expansion * planes, 1, bias=False), 17 | nn.BatchNorm2d(self.expansion * planes), 18 | ) 19 | self.relu = nn.ReLU(inplace=True) 20 | self.downsample = downsample 21 | def forward(self, x): 22 | identity = x 23 | out = self.bottleneck(x) 24 | if self.downsample is not None: 25 | identity = self.downsample(x) 26 | out += identity 27 | out = self.relu(out) 28 | return out 29 | 30 | class FPN(nn.Module): 31 | def __init__(self, layers): 32 | super(FPN, self).__init__() 33 | self.inplanes = 64 34 | self.conv1 = nn.Conv2d(3, 64, 7, 2, 3, bias=False) 35 | self.bn1 = nn.BatchNorm2d(64) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.maxpool = nn.MaxPool2d(3, 2, 1) 38 | 39 | self.layer1 = self._make_layer(64, layers[0]) 40 | self.layer2 = self._make_layer(128, layers[1], 2) 41 | self.layer3 = self._make_layer(256, layers[2], 2) 42 | self.layer4 = self._make_layer(512, layers[3], 2) 43 | self.toplayer = nn.Conv2d(2048, 256, 1, 1, 0) 44 | 45 | self.smooth1 = nn.Conv2d(256, 256, 3, 1, 1) 46 | self.smooth2 = nn.Conv2d(256, 256, 3, 1, 1) 47 | self.smooth3 = nn.Conv2d(256, 256, 3, 1, 1) 48 | 49 | self.latlayer1 = nn.Conv2d(1024, 256, 1, 1, 0) 50 | self.latlayer2 = nn.Conv2d( 512, 256, 1, 1, 0) 51 | self.latlayer3 = nn.Conv2d( 256, 256, 1, 1, 0) 52 | 53 | def _make_layer(self, planes, blocks, stride=1): 54 | downsample = None 55 | if stride != 1 or self.inplanes != Bottleneck.expansion * planes: 56 | downsample = nn.Sequential( 57 | nn.Conv2d(self.inplanes, Bottleneck.expansion * planes, 1, stride, bias=False), 58 | nn.BatchNorm2d(Bottleneck.expansion * planes) 59 | ) 60 | layers = [] 61 | layers.append(Bottleneck(self.inplanes, planes, stride, downsample)) 62 | self.inplanes = planes * Bottleneck.expansion 63 | for i in range(1, blocks): 64 | layers.append(Bottleneck(self.inplanes, planes)) 65 | return nn.Sequential(*layers) 66 | 67 | def _upsample_add(self, x, y): 68 | _,_,H,W = y.shape 69 | return F.upsample(x, size=(H,W), mode='bilinear') + y 70 | 71 | def forward(self, x): 72 | 73 | c1 = self.maxpool(self.relu(self.bn1(self.conv1(x)))) 74 | c2 = self.layer1(c1) 75 | c3 = self.layer2(c2) 76 | c4 = self.layer3(c3) 77 | c5 = self.layer4(c4) 78 | 79 | p5 = self.toplayer(c5) 80 | p4 = self._upsample_add(p5, self.latlayer1(c4)) 81 | p3 = self._upsample_add(p4, self.latlayer2(c3)) 82 | p2 = self._upsample_add(p3, self.latlayer3(c2)) 83 | 84 | p4 = self.smooth1(p4) 85 | p3 = self.smooth2(p3) 86 | p2 = self.smooth3(p2) 87 | return p2, p3, p4, p5 88 | 89 | -------------------------------------------------------------------------------- /chapter3/inceptionv1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | class BasicConv2d(nn.Module): 5 | def __init__(self, in_channels, out_channels, kernel_size, padding=0): 6 | super(BasicConv2d, self).__init__() 7 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding) 8 | def forward(self, x): 9 | x = self.conv(x) 10 | return F.relu(x, inplace=True) 11 | 12 | class Inceptionv1(nn.Module): 13 | def __init__(self, in_dim, hid_1_1, hid_2_1, hid_2_3, hid_3_1, out_3_5, out_4_1): 14 | super(Inceptionv1, self).__init__() 15 | self.branch1x1 = BasicConv2d(in_dim, hid_1_1, 1) 16 | self.branch3x3 = nn.Sequential( 17 | BasicConv2d(in_dim, hid_2_1, 1), 18 | BasicConv2d(hid_2_1, hid_2_3, 3, padding=1) 19 | ) 20 | self.branch5x5 = nn.Sequential( 21 | BasicConv2d(in_dim, hid_3_1, 1), 22 | BasicConv2d(hid_3_1, out_3_5, 5, padding=2) 23 | ) 24 | self.branch_pool = nn.Sequential( 25 | nn.MaxPool2d(3, stride=1, padding=1), 26 | BasicConv2d(in_dim, out_4_1, 1) 27 | ) 28 | def forward(self, x): 29 | b1 = self.branch1x1(x) 30 | b2 = self.branch3x3(x) 31 | b3 = self.branch5x5(x) 32 | b4 = self.branch_pool(x) 33 | output = torch.cat((b1, b2, b3, b4), dim=1) 34 | return output 35 | 36 | -------------------------------------------------------------------------------- /chapter3/inceptionv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | class BasicConv2d(nn.Module): 6 | def __init__(self, in_channels, out_channels, kernel_size, padding=0): 7 | super(BasicConv2d, self).__init__() 8 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding) 9 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001) 10 | def forward(self, x): 11 | x = self.conv(x) 12 | x = self.bn(x) 13 | return F.relu(x, inplace=True) 14 | 15 | class Inceptionv2(nn.Module): 16 | def __init__(self): 17 | super(Inceptionv2, self).__init__() 18 | self.branch1 = BasicConv2d(192, 96, 1, 0) 19 | self.branch2 = nn.Sequential( 20 | BasicConv2d(192, 48, 1, 0), 21 | BasicConv2d(48, 64, 3, 1) 22 | ) 23 | self.branch3 = nn.Sequential( 24 | BasicConv2d(192, 64, 1, 0), 25 | BasicConv2d(64, 96, 3, 1), 26 | BasicConv2d(96, 96, 3, 1) 27 | ) 28 | self.branch4 = nn.Sequential( 29 | nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), 30 | BasicConv2d(192, 64, 1, 0) 31 | ) 32 | def forward(self, x): 33 | x0 = self.branch1(x) 34 | x1 = self.branch2(x) 35 | x2 = self.branch3(x) 36 | x3 = self.branch4(x) 37 | out = torch.cat((x0, x1, x2, x3), 1) 38 | return out 39 | 40 | -------------------------------------------------------------------------------- /chapter3/resnet_bottleneck.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class Bottleneck(nn.Module): 4 | def __init__(self, in_dim, out_dim, stride=1): 5 | super(Bottleneck, self).__init__() 6 | self.bottleneck = nn.Sequential( 7 | nn.Conv2d(in_dim, in_dim, 1, bias=False), 8 | nn.BatchNorm2d(in_dim), 9 | nn.ReLU(inplace=True), 10 | nn.Conv2d(in_dim, in_dim, 3, stride, 1, bias=False), 11 | nn.BatchNorm2d(in_dim), 12 | nn.ReLU(inplace=True), 13 | nn.Conv2d(in_dim, out_dim, 1, bias=False), 14 | nn.BatchNorm2d(out_dim), 15 | ) 16 | self.relu = nn.ReLU(inplace=True) 17 | self.downsample = nn.Sequential( 18 | nn.Conv2d(in_dim, out_dim, 1, 1), 19 | nn.BatchNorm2d(out_dim), 20 | ) 21 | 22 | def forward(self, x): 23 | identity = x 24 | out = self.bottleneck(x) 25 | identity = self.downsample(x) 26 | out += identity 27 | out = self.relu(out) 28 | return out 29 | 30 | -------------------------------------------------------------------------------- /chapter3/vgg.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | class VGG(nn.Module): 3 | def __init__(self, num_classes=1000): 4 | super(VGG, self).__init__() 5 | layers = [] 6 | in_dim = 3 7 | out_dim = 64 8 | for i in range(13): 9 | layers += [nn.Conv2d(in_dim, out_dim, 3, 1, 1), nn.ReLU(inplace=True)] 10 | in_dim = out_dim 11 | if i==1 or i==3 or i==6 or i==9 or i==12: 12 | layers += [nn.MaxPool2d(2, 2)] 13 | if i!=9: 14 | out_dim*=2 15 | self.features = nn.Sequential(*layers) 16 | self.classifier = nn.Sequential( 17 | nn.Linear(512 * 7 * 7, 4096), 18 | nn.ReLU(True), 19 | nn.Dropout(), 20 | nn.Linear(4096, 4096), 21 | nn.ReLU(True), 22 | nn.Dropout(), 23 | nn.Linear(4096, num_classes), 24 | ) 25 | def forward(self, x): 26 | x = self.features(x) 27 | x = x.view(x.size(0), -1) 28 | x = self.classifier(x) 29 | return x 30 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | 3 | # READ THIS BEFORE YOU REFACTOR ME 4 | # 5 | # setup.py uses the list of patterns in this file to decide 6 | # what to delete, but it's not 100% sound. So, for example, 7 | # if you delete aten/build/ because it's redundant with build/, 8 | # aten/build/ will stop being cleaned. So be careful when 9 | # refactoring this file! 10 | 11 | ## PyTorch 12 | 13 | .mypy_cache 14 | *.pyc 15 | */*.pyc 16 | */*.so* 17 | */**/__pycache__ 18 | */**/*.dylib* 19 | */**/*.pyc 20 | */**/*.pyd 21 | */**/*.so* 22 | */**/**/*.pyc 23 | */**/**/**/*.pyc 24 | */**/**/**/**/*.pyc 25 | aten/build/ 26 | aten/src/ATen/Config.h 27 | aten/src/ATen/cuda/CUDAConfig.h 28 | build/ 29 | dist/ 30 | docs/src/**/* 31 | test/.coverage 32 | test/cpp/api/mnist 33 | test/data/gpu_tensors.pt 34 | test/data/legacy_modules.t7 35 | test/data/legacy_serialized.pt 36 | test/data/linear.pt 37 | test/htmlcov 38 | third_party/build/ 39 | tools/shared/_utils_internal.py 40 | torch.egg-info/ 41 | torch/csrc/autograd/generated/* 42 | torch/csrc/cudnn/cuDNN.cpp 43 | torch/csrc/generated 44 | torch/csrc/generic/TensorMethods.cpp 45 | torch/csrc/jit/generated/* 46 | torch/csrc/nn/THCUNN.cpp 47 | torch/csrc/nn/THCUNN.cwrap 48 | torch/csrc/nn/THNN_generic.cpp 49 | torch/csrc/nn/THNN_generic.cwrap 50 | torch/csrc/nn/THNN_generic.h 51 | torch/csrc/nn/THNN.cpp 52 | torch/csrc/nn/THNN.cwrap 53 | torch/lib/*.a* 54 | torch/lib/*.dll* 55 | torch/lib/*.dylib* 56 | torch/lib/*.h 57 | torch/lib/*.lib 58 | torch/lib/*.so* 59 | torch/lib/build 60 | torch/lib/cmake 61 | torch/lib/include 62 | torch/lib/pkgconfig 63 | torch/lib/protoc 64 | torch/lib/tmp_install 65 | torch/lib/torch_shm_manager 66 | torch/version.py 67 | 68 | # IPython notebook checkpoints 69 | .ipynb_checkpoints 70 | 71 | # Editor temporaries 72 | *.swn 73 | *.swo 74 | *.swp 75 | *.swm 76 | *~ 77 | 78 | # macOS dir files 79 | .DS_Store 80 | 81 | # Symbolic files 82 | tools/shared/cwrap_common.py 83 | 84 | # Ninja files 85 | .ninja_deps 86 | .ninja_log 87 | compile_commands.json 88 | *.egg-info/ 89 | docs/source/scripts/activation_images/ 90 | 91 | ## General 92 | 93 | # Compiled Object files 94 | *.slo 95 | *.lo 96 | *.o 97 | *.cuo 98 | *.obj 99 | 100 | # Compiled Dynamic libraries 101 | *.so 102 | *.dylib 103 | *.dll 104 | 105 | # Compiled Static libraries 106 | *.lai 107 | *.la 108 | *.a 109 | *.lib 110 | 111 | # Compiled protocol buffers 112 | *.pb.h 113 | *.pb.cc 114 | *_pb2.py 115 | 116 | # Compiled python 117 | *.pyc 118 | *.pyd 119 | 120 | # Compiled MATLAB 121 | *.mex* 122 | 123 | # IPython notebook checkpoints 124 | .ipynb_checkpoints 125 | 126 | # Editor temporaries 127 | *.swn 128 | *.swo 129 | *.swp 130 | *~ 131 | 132 | # Sublime Text settings 133 | *.sublime-workspace 134 | *.sublime-project 135 | 136 | # Eclipse Project settings 137 | *.*project 138 | .settings 139 | 140 | # QtCreator files 141 | *.user 142 | 143 | # PyCharm files 144 | .idea 145 | 146 | # Visual Studio Code files 147 | .vscode 148 | .vs 149 | 150 | # OSX dir files 151 | .DS_Store 152 | 153 | ## Caffe2 154 | 155 | # build, distribute, and bins (+ python proto bindings) 156 | build 157 | build_host_protoc 158 | build_android 159 | build_ios 160 | /build_* 161 | .build_debug/* 162 | .build_release/* 163 | distribute/* 164 | *.testbin 165 | *.bin 166 | cmake_build 167 | .cmake_build 168 | gen 169 | .setuptools-cmake-build 170 | .pytest_cache 171 | aten/build/* 172 | 173 | # Bram 174 | plsdontbreak 175 | 176 | # Generated documentation 177 | docs/_site 178 | docs/gathered 179 | _site 180 | doxygen 181 | docs/dev 182 | 183 | # LevelDB files 184 | *.sst 185 | *.ldb 186 | LOCK 187 | LOG* 188 | CURRENT 189 | MANIFEST-* 190 | 191 | # generated version file 192 | caffe2/version.py 193 | 194 | # setup.py intermediates 195 | .eggs 196 | caffe2.egg-info 197 | 198 | # Atom/Watchman required file 199 | .watchmanconfig 200 | 201 | # cython generated files 202 | lib/model/utils/bbox.c 203 | lib/pycocotools/_mask.c -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jianwei Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/README.md: -------------------------------------------------------------------------------- 1 | # 第四章 Faster RCNN 2 | 3 | ## 简介 4 | 5 | 该代码主要参考了[jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch)的PyTorch复现工程,如在学习时遇到问题,可前往[jwyang的问题区](https://github.com/jwyang/faster-rcnn.pytorch/issues)查看是否有解决方法。 6 | 7 | ## 准备工作 8 | 9 | 首先clone本书代码到本地: 10 | ``` 11 | git clone https://github.com/dongdonghy/Detection-PyTorch-Notebook.git 12 | ``` 13 | 14 | 然后切换到本代码: 15 | ``` 16 | cd Detection-PyTorch-Notebook/faster-rcnn.pytorch 17 | ``` 18 | 19 | ### 依赖 20 | 21 | * Python 2.7或者3.6 22 | * Pytorch 0.4.0 23 | * CUDA 8.0或者更高 24 | 25 | ### 数据准备 26 | 27 | * **PASCAL_VOC 07+12**: 如果是VOC的数据集,按照[py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models)的方法准备VOC数据集,并创建软连接到data文件夹。 28 | 29 | * **COCO**: 如果是COCO数据集,则按照[py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models)的方法准备COCO数据集,并创建软连接到data文件夹。 30 | 31 | ### 预训练权重 32 | 33 | 作者提供了VGG与ResNet101两个不同的预训练权重: 34 | 35 | * VGG16: [Dropbox](https://www.dropbox.com/s/s3brpk0bdq60nyb/vgg16_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/vgg16_caffe.pth) 36 | 37 | * ResNet101: [Dropbox](https://www.dropbox.com/s/iev3tkbz5wyyuz9/resnet101_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth) 38 | 39 | 下载相应的预训练权重,并放到data/pretrained_model文件夹下,从实验发现caffe得到的预训练权重模型精度更高,因此使用了caffe的预训练权重。 40 | 41 | ### 编译 42 | 43 | 由于NMS、RoI Pooling、RoI Align等模块依赖于自己实现的CUDA C代码,因此这部分需要单独进行编译,首先在lib/make.sh中将CUDA_ARCH改为自己GPU对应的arch,对应表如下: 44 | 45 | | GPU model | Architecture | 46 | | ------------- | ------------- | 47 | | TitanX (Maxwell/Pascal) | sm_52 | 48 | | GTX 960M | sm_50 | 49 | | GTX 1080 (Ti) | sm_61 | 50 | | Grid K520 (AWS g2.2xlarge) | sm_30 | 51 | | Tesla K80 (AWS p2.xlarge) | sm_37 | 52 | 53 | 更多关于arch的介绍可以参考官方介绍:[cuda-gpus](https://developer.nvidia.com/cuda-gpus) 或者[sm-architectures](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) 54 | 55 | 使用pip安装Python的依赖库: 56 | ``` 57 | pip install -r requirements.txt 58 | ``` 59 | 60 | 编译依赖CUDA的库: 61 | 62 | ``` 63 | cd lib 64 | sh make.sh 65 | ``` 66 | 67 | ## 训练 68 | 69 | 训练Faster RCNN指令如下:这里默认使用VOC数据集、VGG16的预训练模型,众多超参可根据实际情况修改。 70 | ``` 71 | CUDA_VISIBLE_DEVICES=0 python trainval_net.py \ 72 | 73 | 这里使用了train_net.py中的默认参数,众多超参可根据实际情况修改。Batch Size以及Worker Number可根据GPU能力合理选择。 74 | 75 | ``` 76 | ## 前向测试 77 | 78 | 训练完想要测试模型在测试集上的前向效果,运行如下指令: 79 | ``` 80 | python test_net.py --dataset pascal_voc --net vgg16 \ 81 | --checksession $SESSION --checkepoch $EPOCH --checkpoint $CHECKPOINT \ 82 | --cuda 83 | ``` 84 | SESSION、EPOCH、CHECKPOINT修改为自己想要前向测试的模型 85 | 86 | 87 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | LEARNING_RATE: 0.001 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_SIZE: 7 17 | POOLING_MODE: align 18 | CROP_RESIZE_WITH_MAX_POOL: False 19 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/cfgs/res101_ls.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | SCALES: [800] 13 | DOUBLE_BIAS: False 14 | LEARNING_RATE: 0.001 15 | TEST: 16 | HAS_RPN: True 17 | SCALES: [800] 18 | MAX_SIZE: 1200 19 | RPN_POST_NMS_TOP_N: 1000 20 | POOLING_SIZE: 7 21 | POOLING_MODE: align 22 | CROP_RESIZE_WITH_MAX_POOL: False 23 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | # IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | WEIGHT_DECAY: 0.0001 13 | DOUBLE_BIAS: False 14 | SNAPSHOT_PREFIX: res50_faster_rcnn 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: crop 18 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | BATCH_SIZE: 256 10 | LEARNING_RATE: 0.01 11 | TEST: 12 | HAS_RPN: True 13 | POOLING_MODE: align 14 | CROP_RESIZE_WITH_MAX_POOL: False 15 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img1_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1_det.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img1_det_res101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1_det_res101.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img2_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2_det.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img2_det_res101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2_det_res101.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img3_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3_det.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img3_det_res101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3_det_res101.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img4_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4_det.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/images/img4_det_res101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4_det_res101.jpg -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.coco import coco 16 | from datasets.imagenet import imagenet 17 | from datasets.vg import vg 18 | 19 | import numpy as np 20 | 21 | # Set up voc__ 22 | for year in ['2007', '2012']: 23 | for split in ['train', 'val', 'trainval', 'test']: 24 | name = 'voc_{}_{}'.format(year, split) 25 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 26 | 27 | # Set up coco_2014_ 28 | for year in ['2014']: 29 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']: 30 | name = 'coco_{}_{}'.format(year, split) 31 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 32 | 33 | # Set up coco_2014_cap_ 34 | for year in ['2014']: 35 | for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']: 36 | name = 'coco_{}_{}'.format(year, split) 37 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 38 | 39 | # Set up coco_2015_ 40 | for year in ['2015']: 41 | for split in ['test', 'test-dev']: 42 | name = 'coco_{}_{}'.format(year, split) 43 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 44 | 45 | # Set up vg_ 46 | # for version in ['1600-400-20']: 47 | # for split in ['minitrain', 'train', 'minival', 'val', 'test']: 48 | # name = 'vg_{}_{}'.format(version,split) 49 | # __sets[name] = (lambda split=split, version=version: vg(version, split)) 50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']: 51 | for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']: 52 | name = 'vg_{}_{}'.format(version,split) 53 | __sets[name] = (lambda split=split, version=version: vg(version, split)) 54 | 55 | # set up image net. 56 | for split in ['train', 'val', 'val1', 'val2', 'test']: 57 | name = 'imagenet_{}'.format(split) 58 | devkit_path = 'data/imagenet/ILSVRC/devkit' 59 | data_path = 'data/imagenet/ILSVRC' 60 | __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) 61 | 62 | def get_imdb(name): 63 | """Get an imdb (image database) by name.""" 64 | if name not in __sets: 65 | raise KeyError('Unknown dataset: {}'.format(name)) 66 | return __sets[name]() 67 | 68 | 69 | def list_imdbs(): 70 | """List all registered imdbs.""" 71 | return list(__sets.keys()) 72 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | #You may also want to ad the following 7 | #export C_INCLUDE_PATH=/opt/cuda/include 8 | 9 | export CXXFLAGS="-std=c++11" 10 | export CFLAGS="-std=c99" 11 | 12 | python3 setup.py build_ext --inplace 13 | rm -rf build 14 | 15 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61 " 16 | 17 | # compile NMS 18 | cd model/nms/src 19 | echo "Compiling nms kernels by nvcc..." 20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 21 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 22 | 23 | cd ../ 24 | python3 build.py 25 | 26 | # compile roi_pooling 27 | cd ../../ 28 | cd model/roi_pooling/src 29 | echo "Compiling roi pooling kernels by nvcc..." 30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 31 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 32 | cd ../ 33 | python3 build.py 34 | 35 | # compile roi_align 36 | cd ../../ 37 | cd model/roi_align/src 38 | echo "Compiling roi align kernels by nvcc..." 39 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 40 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 41 | cd ../ 42 | python3 build.py 43 | 44 | # compile roi_crop 45 | cd ../../ 46 | cd model/roi_crop/src 47 | echo "Compiling roi crop kernels by nvcc..." 48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 49 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 50 | cd ../ 51 | python3 build.py 52 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import math 14 | import torchvision.models as models 15 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 16 | import pdb 17 | 18 | class vgg16(_fasterRCNN): 19 | def __init__(self, classes, pretrained=False, class_agnostic=False): 20 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 21 | self.dout_base_model = 512 22 | self.pretrained = pretrained 23 | self.class_agnostic = class_agnostic 24 | 25 | _fasterRCNN.__init__(self, classes, class_agnostic) 26 | 27 | def _init_modules(self): 28 | vgg = models.vgg16() 29 | if self.pretrained: 30 | print("Loading pretrained weights from %s" %(self.model_path)) 31 | state_dict = torch.load(self.model_path) 32 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 33 | 34 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 35 | 36 | # not using the last maxpool layer 37 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 38 | 39 | # Fix the layers before conv3: 40 | for layer in range(10): 41 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 42 | 43 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 44 | 45 | self.RCNN_top = vgg.classifier 46 | 47 | # not using the last maxpool layer 48 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 49 | 50 | if self.class_agnostic: 51 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 52 | else: 53 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 54 | 55 | def _head_to_tail(self, pool5): 56 | 57 | pool5_flat = pool5.view(pool5.size(0), -1) 58 | fc7 = self.RCNN_top(pool5_flat) 59 | 60 | return fc7 61 | 62 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.minimum(x2[i], x2[order[1:]]) 24 | yy2 = np.minimum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | THCudaTensor_size(state, boxes_host, 0), 15 | THCudaTensor_size(state, boxes_host, 1), 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | sources = ['src/roi_align.c'] 7 | headers = ['src/roi_align.h'] 8 | extra_objects = [] 9 | #sources = [] 10 | #headers = [] 11 | defines = [] 12 | with_cuda = False 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | print(this_file) 16 | 17 | if torch.cuda.is_available(): 18 | print('Including CUDA code.') 19 | sources += ['src/roi_align_cuda.c'] 20 | headers += ['src/roi_align_cuda.h'] 21 | defines += [('WITH_CUDA', None)] 22 | with_cuda = True 23 | 24 | extra_objects = ['src/roi_align_kernel.cu.o'] 25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 26 | 27 | ffi = create_extension( 28 | '_ext.roi_align', 29 | headers=headers, 30 | sources=sources, 31 | define_macros=defines, 32 | relative_to=__file__, 33 | with_cuda=with_cuda, 34 | extra_objects=extra_objects 35 | ) 36 | 37 | if __name__ == '__main__': 38 | ffi.build() 39 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | roi_align.roi_align_forward(self.aligned_height, 30 | self.aligned_width, 31 | self.spatial_scale, features, 32 | rois, output) 33 | # raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); 3 | 4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale, 5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | extra_objects = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | this_file = os.path.dirname(os.path.realpath(__file__)) 14 | print(this_file) 15 | 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/roi_pooling_cuda.c'] 19 | headers += ['src/roi_pooling_cuda.h'] 20 | defines += [('WITH_CUDA', None)] 21 | with_cuda = True 22 | extra_objects = ['src/roi_pooling.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_pooling', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import pdb 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | #array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | try: 40 | xrange # Python 2 41 | except NameError: 42 | xrange = range # Python 3 43 | 44 | 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 46 | scales=2**np.arange(3, 6)): 47 | """ 48 | Generate anchor (reference) windows by enumerating aspect ratios X 49 | scales wrt a reference (0, 0, 15, 15) window. 50 | """ 51 | 52 | # 首先创建一个基本anchor为[0, 0, 15, 15] 53 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 54 | # 将基本anchor进行宽高变化,生成三种宽高比的anchor 55 | ratio_anchors = _ratio_enum(base_anchor, ratios) 56 | # 将上述anchor再进行尺度变化,得到最终的9种anchors 57 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 58 | for i in xrange(ratio_anchors.shape[0])]) 59 | # 返回对应于feature map大小的anchors 60 | return anchors 61 | 62 | def _whctrs(anchor): 63 | """ 64 | Return width, height, x center, and y center for an anchor (window). 65 | """ 66 | 67 | w = anchor[2] - anchor[0] + 1 68 | h = anchor[3] - anchor[1] + 1 69 | x_ctr = anchor[0] + 0.5 * (w - 1) 70 | y_ctr = anchor[1] + 0.5 * (h - 1) 71 | return w, h, x_ctr, y_ctr 72 | 73 | def _mkanchors(ws, hs, x_ctr, y_ctr): 74 | """ 75 | Given a vector of widths (ws) and heights (hs) around a center 76 | (x_ctr, y_ctr), output a set of anchors (windows). 77 | """ 78 | 79 | ws = ws[:, np.newaxis] 80 | hs = hs[:, np.newaxis] 81 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 82 | y_ctr - 0.5 * (hs - 1), 83 | x_ctr + 0.5 * (ws - 1), 84 | y_ctr + 0.5 * (hs - 1))) 85 | return anchors 86 | 87 | def _ratio_enum(anchor, ratios): 88 | """ 89 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 90 | """ 91 | 92 | w, h, x_ctr, y_ctr = _whctrs(anchor) 93 | size = w * h 94 | size_ratios = size / ratios 95 | ws = np.round(np.sqrt(size_ratios)) 96 | hs = np.round(ws * ratios) 97 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 98 | return anchors 99 | 100 | def _scale_enum(anchor, scales): 101 | """ 102 | Enumerate a set of anchors for each scale wrt an anchor. 103 | """ 104 | 105 | w, h, x_ctr, y_ctr = _whctrs(anchor) 106 | ws = w * scales 107 | hs = h * scales 108 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 109 | return anchors 110 | 111 | if __name__ == '__main__': 112 | import time 113 | t = time.time() 114 | a = generate_anchors() 115 | print(time.time() - t) 116 | print(a) 117 | from IPython import embed; embed() 118 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | from imageio import imread 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 41 | else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | blobs['gt_boxes'] = gt_boxes 48 | blobs['im_info'] = np.array( 49 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 50 | dtype=np.float32) 51 | 52 | blobs['img_id'] = roidb[0]['img_id'] 53 | 54 | return blobs 55 | 56 | def _get_image_blob(roidb, scale_inds): 57 | """Builds an input blob from the images in the roidb at the specified 58 | scales. 59 | """ 60 | num_images = len(roidb) 61 | 62 | processed_ims = [] 63 | im_scales = [] 64 | for i in range(num_images): 65 | #im = cv2.imread(roidb[i]['image']) 66 | im = imread(roidb[i]['image']) 67 | 68 | if len(im.shape) == 2: 69 | im = im[:,:,np.newaxis] 70 | im = np.concatenate((im,im,im), axis=2) 71 | # flip the channel, since the original one using cv2 72 | # rgb -> bgr 73 | im = im[:,:,::-1] 74 | 75 | if roidb[i]['flipped']: 76 | im = im[:, ::-1, :] 77 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 78 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 79 | cfg.TRAIN.MAX_SIZE) 80 | im_scales.append(im_scale) 81 | processed_ims.append(im) 82 | 83 | # Create a blob to hold the input images 84 | blob = im_list_to_blob(processed_ims) 85 | 86 | return blob, im_scales 87 | -------------------------------------------------------------------------------- /chapter4/faster-rcnn-pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | cffi 3 | opencv-python 4 | scipy 5 | msgpack 6 | easydict 7 | matplotlib 8 | pyyaml 9 | tensorboardX 10 | imageio 11 | -------------------------------------------------------------------------------- /chapter5/dssd-pytorch/arm.py: -------------------------------------------------------------------------------- 1 | def arm_multibox(vgg, extra_layers, cfg): 2 | 3 | arm_loc_layers = [] 4 | arm_conf_layers = [] 5 | vgg_source = [21, 28, -2] 6 | 7 | for k, v in enumerate(vgg_source): 8 | arm_loc_layers += [nn.Conv2d(vgg[v].out_channels, 9 | cfg[k] * 4, kernel_size=3, padding=1)] 10 | arm_conf_layers += [nn.Conv2d(vgg[v].out_channels, 11 | cfg[k] * 2, kernel_size=3, padding=1)] 12 | 13 | for k, v in enumerate(extra_layers[1::2], 3): 14 | arm_loc_layers += [nn.Conv2d(v.out_channels, cfg[k] 15 | * 4, kernel_size=3, padding=1)] 16 | arm_conf_layers += [nn.Conv2d(v.out_channels, cfg[k] 17 | * 2, kernel_size=3, padding=1)] 18 | 19 | return (arm_loc_layers, arm_conf_layers) 20 | 21 | -------------------------------------------------------------------------------- /chapter5/dssd-pytorch/tcb.py: -------------------------------------------------------------------------------- 1 | def add_tcb(cfg): 2 | feature_scale_layers = [] 3 | feature_upsample_layers = [] 4 | feature_pred_layers = [] 5 | 6 | for k, v in enumerate(cfg): 7 | feature_scale_layers += [nn.Conv2d(cfg[k], 256, 3, padding=1), 8 | nn.ReLU(inplace=True), 9 | nn.Conv2d(256, 256, 3, padding=1) 10 | ] 11 | feature_pred_layers += [nn.ReLU(inplace=True), 12 | nn.Conv2d(256, 256, 3, padding=1), 13 | nn.ReLU(inplace=True) 14 | ] 15 | 16 | if k != len(cfg) - 1: 17 | feature_upsample_layers += [nn.ConvTranspose2d(256, 256, 4, 2)] 18 | 19 | return (feature_scale_layers, feature_upsample_layers, feature_pred_layers) 20 | 21 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-language=Python 2 | .ipynb_checkpoints/* linguist-documentation 3 | dev.ipynb linguist-documentation 4 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # atom remote-sync package 92 | .remote-sync.json 93 | 94 | # weights 95 | weights/ 96 | 97 | #DS_Store 98 | .DS_Store 99 | 100 | # dev stuff 101 | eval/ 102 | eval.ipynb 103 | dev.ipynb 104 | .vscode/ 105 | 106 | # not ready 107 | videos/ 108 | templates/ 109 | data/ssd_dataloader.py 110 | data/datasets/ 111 | doc/visualize.py 112 | read_results.py 113 | ssd300_120000/ 114 | demos/live 115 | webdemo.py 116 | test_data_aug.py 117 | 118 | # attributes 119 | 120 | # pycharm 121 | .idea/ 122 | 123 | # temp checkout soln 124 | data/datasets/ 125 | data/ssd_dataloader.py 126 | 127 | # pylint 128 | .pylintrc -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Max deGroot, Ellis Brown 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/README.md: -------------------------------------------------------------------------------- 1 | # 第五章 SSD 2 | 3 | ## 简介 4 | 5 | 该代码主要参考了[amdegroot/ssd.pytorch](https://github.com/amdegroot/ssd.pytorch)的PyTorch复现工程,如在学习时遇到问题,可前往[amdegroot的问题区](https://github.com/amdegroot/ssd.pytorch/issues)查看是否有解决方法。 6 | 7 | ## 数据集 8 | 代码提供了COCO与PASCAL VOC两种数据集的使用方法,在此以VOC2012为例,利用下面脚本可以自动完成下载,当然也可以手动把数据集放到对应文件夹。 9 | ```Shell 10 | # 默认数据路径为data/VOCdevkit 11 | sh data/scripts/VOC2012.sh 12 | ``` 13 | 14 | ## 训练 15 | * 利用如下指令下载VGG的预训练权重,并放到默认创建的weights文件夹内,当然,也可以手动从下列网址下载再放到weights文件夹内。 16 | ```Shell 17 | mkdir weights 18 | cd weights 19 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth 20 | ``` 21 | 22 | * 利用如下指令可以进行模型训练: 23 | ```Shell 24 | python train.py 25 | ``` 26 | 根据所需修改脚本中的超参数。 27 | 28 | ## 前向计算 29 | 利用下面脚本进行前向计算: 30 | ```Shell 31 | python eval.py 32 | ``` 33 | 根据所需修改脚本中的超参数 34 | 35 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT 2 | 3 | #from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map 4 | from .config import * 5 | import torch 6 | import cv2 7 | import numpy as np 8 | 9 | def detection_collate(batch): 10 | """Custom collate fn for dealing with batches of images that have a different 11 | number of associated object annotations (bounding boxes). 12 | 13 | Arguments: 14 | batch: (tuple) A tuple of tensor images and lists of annotations 15 | 16 | Return: 17 | A tuple containing: 18 | 1) (tensor) batch of images stacked on their 0 dim 19 | 2) (list of tensors) annotations for a given image are stacked on 20 | 0 dim 21 | """ 22 | targets = [] 23 | imgs = [] 24 | for sample in batch: 25 | imgs.append(sample[0]) 26 | targets.append(torch.FloatTensor(sample[1])) 27 | return torch.stack(imgs, 0), targets 28 | 29 | 30 | def base_transform(image, size, mean): 31 | x = cv2.resize(image, (size, size)).astype(np.float32) 32 | x -= mean 33 | x = x.astype(np.float32) 34 | return x 35 | 36 | 37 | class BaseTransform: 38 | def __init__(self, size, mean): 39 | self.size = size 40 | self.mean = np.array(mean, dtype=np.float32) 41 | 42 | def __call__(self, image, boxes=None, labels=None): 43 | return base_transform(image, self.size, self.mean), boxes, labels 44 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | import os.path 3 | 4 | # gets home dir cross platform 5 | HOME = os.path.expanduser("~") 6 | 7 | # for making bounding boxes pretty 8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 9 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 10 | 11 | MEANS = (104, 117, 123) 12 | 13 | # SSD300 CONFIGS 14 | voc = { 15 | 'num_classes': 21, 16 | 'lr_steps': (80000, 100000, 120000), 17 | 'max_iter': 120000, 18 | 'feature_maps': [38, 19, 10, 5, 3, 1], 19 | 'min_dim': 300, 20 | 'steps': [8, 16, 32, 64, 100, 300], 21 | 'min_sizes': [30, 60, 111, 162, 213, 264], 22 | 'max_sizes': [60, 111, 162, 213, 264, 315], 23 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 24 | 'variance': [0.1, 0.2], 25 | 'clip': True, 26 | 'name': 'VOC', 27 | } 28 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/data/example.jpg -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/scripts/COCO2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start=`date +%s` 4 | 5 | # handle optional download dir 6 | if [ -z "$1" ] 7 | then 8 | # navigate to ~/data 9 | echo "navigating to ~/data/ ..." 10 | mkdir -p ~/data 11 | cd ~/data/ 12 | mkdir -p ./coco 13 | cd ./coco 14 | mkdir -p ./images 15 | mkdir -p ./annotations 16 | else 17 | # check if specified dir is valid 18 | if [ ! -d $1 ]; then 19 | echo $1 " is not a valid directory" 20 | exit 0 21 | fi 22 | echo "navigating to " $1 " ..." 23 | cd $1 24 | fi 25 | 26 | if [ ! -d images ] 27 | then 28 | mkdir -p ./images 29 | fi 30 | 31 | # Download the image data. 32 | cd ./images 33 | echo "Downloading MSCOCO train images ..." 34 | curl -LO http://images.cocodataset.org/zips/train2014.zip 35 | echo "Downloading MSCOCO val images ..." 36 | curl -LO http://images.cocodataset.org/zips/val2014.zip 37 | 38 | cd ../ 39 | if [ ! -d annotations] 40 | then 41 | mkdir -p ./annotations 42 | fi 43 | 44 | # Download the annotation data. 45 | cd ./annotations 46 | echo "Downloading MSCOCO train/val annotations ..." 47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip 48 | echo "Finished downloading. Now extracting ..." 49 | 50 | # Unzip data 51 | echo "Extracting train images ..." 52 | unzip ../images/train2014.zip -d ../images 53 | echo "Extracting val images ..." 54 | unzip ../images/val2014.zip -d ../images 55 | echo "Extracting annotations ..." 56 | unzip ./annotations_trainval2014.zip 57 | 58 | echo "Removing zip files ..." 59 | rm ../images/train2014.zip 60 | rm ../images/val2014.zip 61 | rm ./annotations_trainval2014.zip 62 | 63 | echo "Creating trainval35k dataset..." 64 | 65 | # Download annotations json 66 | echo "Downloading trainval35k annotations from S3" 67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip 68 | 69 | # combine train and val 70 | echo "Combining train and val images" 71 | mkdir ../images/trainval35k 72 | cd ../images/train2014 73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp 74 | cd ../val2014 75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + 76 | 77 | 78 | end=`date +%s` 79 | runtime=$((end-start)) 80 | 81 | echo "Completed in " $runtime " seconds" 82 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/data/scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/demo/__init__.py -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/demo/live.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | from torch.autograd import Variable 4 | import cv2 5 | import time 6 | from imutils.video import FPS, WebcamVideoStream 7 | import argparse 8 | 9 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection') 10 | parser.add_argument('--weights', default='weights/ssd_300_VOC0712.pth', 11 | type=str, help='Trained state_dict file path') 12 | parser.add_argument('--cuda', default=False, type=bool, 13 | help='Use cuda in live demo') 14 | args = parser.parse_args() 15 | 16 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] 17 | FONT = cv2.FONT_HERSHEY_SIMPLEX 18 | 19 | 20 | def cv2_demo(net, transform): 21 | def predict(frame): 22 | height, width = frame.shape[:2] 23 | x = torch.from_numpy(transform(frame)[0]).permute(2, 0, 1) 24 | x = Variable(x.unsqueeze(0)) 25 | y = net(x) # forward pass 26 | detections = y.data 27 | # scale each detection back up to the image 28 | scale = torch.Tensor([width, height, width, height]) 29 | for i in range(detections.size(1)): 30 | j = 0 31 | while detections[0, i, j, 0] >= 0.6: 32 | pt = (detections[0, i, j, 1:] * scale).cpu().numpy() 33 | cv2.rectangle(frame, 34 | (int(pt[0]), int(pt[1])), 35 | (int(pt[2]), int(pt[3])), 36 | COLORS[i % 3], 2) 37 | cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])), 38 | FONT, 2, (255, 255, 255), 2, cv2.LINE_AA) 39 | j += 1 40 | return frame 41 | 42 | # start video stream thread, allow buffer to fill 43 | print("[INFO] starting threaded video stream...") 44 | stream = WebcamVideoStream(src=0).start() # default camera 45 | time.sleep(1.0) 46 | # start fps timer 47 | # loop over frames from the video file stream 48 | while True: 49 | # grab next frame 50 | frame = stream.read() 51 | key = cv2.waitKey(1) & 0xFF 52 | 53 | # update FPS counter 54 | fps.update() 55 | frame = predict(frame) 56 | 57 | # keybindings for display 58 | if key == ord('p'): # pause 59 | while True: 60 | key2 = cv2.waitKey(1) or 0xff 61 | cv2.imshow('frame', frame) 62 | if key2 == ord('p'): # resume 63 | break 64 | cv2.imshow('frame', frame) 65 | if key == 27: # exit 66 | break 67 | 68 | 69 | if __name__ == '__main__': 70 | import sys 71 | from os import path 72 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 73 | 74 | from data import BaseTransform, VOC_CLASSES as labelmap 75 | from ssd import build_ssd 76 | 77 | net = build_ssd('test', 300, 21) # initialize SSD 78 | net.load_state_dict(torch.load(args.weights)) 79 | transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0)) 80 | 81 | fps = FPS().start() 82 | cv2_demo(net.eval(), transform) 83 | # stop the timer and display FPS information 84 | fps.stop() 85 | 86 | print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) 87 | print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) 88 | 89 | # cleanup 90 | cv2.destroyAllWindows() 91 | stream.stop() 92 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/doc/SSD.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/SSD.jpg -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/doc/detection_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_example.png -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/doc/detection_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_example2.png -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/doc/detection_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_examples.png -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/doc/ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/ssd.png -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ..box_utils import decode, nms 4 | from data import voc as cfg 5 | 6 | 7 | class Detect(Function): 8 | """At test time, Detect is the final layer of SSD. Decode location preds, 9 | apply non-maximum suppression to location predictions based on conf 10 | scores and threshold to a top_k number of output predictions for both 11 | confidence score and locations. 12 | """ 13 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): 14 | self.num_classes = num_classes 15 | self.background_label = bkg_label 16 | self.top_k = top_k 17 | # Parameters used in nms. 18 | self.nms_thresh = nms_thresh 19 | if nms_thresh <= 0: 20 | raise ValueError('nms_threshold must be non negative.') 21 | self.conf_thresh = conf_thresh 22 | self.variance = cfg['variance'] 23 | 24 | def forward(self, loc_data, conf_data, prior_data): 25 | """ 26 | Args: 27 | loc_data: (tensor) Loc preds from loc layers 28 | Shape: [batch,num_priors*4] 29 | conf_data: (tensor) Shape: Conf preds from conf layers 30 | Shape: [batch*num_priors,num_classes] 31 | prior_data: (tensor) Prior boxes and variances from priorbox layers 32 | Shape: [1,num_priors,4] 33 | """ 34 | num = loc_data.size(0) # batch size 35 | num_priors = prior_data.size(0) 36 | output = torch.zeros(num, self.num_classes, self.top_k, 5) 37 | conf_preds = conf_data.view(num, num_priors, 38 | self.num_classes).transpose(2, 1) 39 | 40 | # Decode predictions into bboxes. 41 | for i in range(num): 42 | decoded_boxes = decode(loc_data[i], prior_data, self.variance) 43 | # For each class, perform nms 44 | conf_scores = conf_preds[i].clone() 45 | 46 | for cl in range(1, self.num_classes): 47 | c_mask = conf_scores[cl].gt(self.conf_thresh) 48 | scores = conf_scores[cl][c_mask] 49 | if scores.dim() == 0: 50 | continue 51 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 52 | boxes = decoded_boxes[l_mask].view(-1, 4) 53 | # idx of highest scoring and non-overlapping boxes per class 54 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 55 | output[i, cl, :count] = \ 56 | torch.cat((scores[ids[:count]].unsqueeze(1), 57 | boxes[ids[:count]]), 1) 58 | flt = output.contiguous().view(num, -1, 5) 59 | _, idx = flt[:, :, 0].sort(1, descending=True) 60 | _, rank = idx.sort(1) 61 | flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) 62 | return output 63 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from math import sqrt as sqrt 3 | from itertools import product as product 4 | import torch 5 | 6 | 7 | class PriorBox(object): 8 | """Compute priorbox coordinates in center-offset form for each source 9 | feature map. 10 | """ 11 | def __init__(self, cfg): 12 | super(PriorBox, self).__init__() 13 | self.image_size = cfg['min_dim'] 14 | # number of priors for feature map location (either 4 or 6) 15 | self.num_priors = len(cfg['aspect_ratios']) 16 | self.variance = cfg['variance'] or [0.1] 17 | self.feature_maps = cfg['feature_maps'] 18 | self.min_sizes = cfg['min_sizes'] 19 | self.max_sizes = cfg['max_sizes'] 20 | self.steps = cfg['steps'] 21 | self.aspect_ratios = cfg['aspect_ratios'] 22 | self.clip = cfg['clip'] 23 | self.version = cfg['name'] 24 | for v in self.variance: 25 | if v <= 0: 26 | raise ValueError('Variances must be greater than 0') 27 | 28 | # 生成所有的PriorBox,需要每一个特征图的信息 29 | def forward(self): 30 | mean = [] 31 | for k, f in enumerate(self.feature_maps): 32 | for i, j in product(range(f), repeat=2): 33 | # f_k为每个特征图的尺寸 34 | f_k = self.image_size / self.steps[k] 35 | # 求取每个box的中心坐标 36 | cx = (j + 0.5) / f_k 37 | cy = (i + 0.5) / f_k 38 | 39 | # 对应{S_k, S_k}大小的PriorBox 40 | s_k = self.min_sizes[k]/self.image_size 41 | mean += [cx, cy, s_k, s_k] 42 | 43 | # 对应{√(S_k S_(k+1) ), √(S_k S_(k+1) )}大小的PriorBox 44 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size)) 45 | mean += [cx, cy, s_k_prime, s_k_prime] 46 | 47 | # 剩余的比例为2、1/2、3、1/3的PriorBox 48 | for ar in self.aspect_ratios[k]: 49 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)] 50 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)] 51 | # back to torch land 52 | output = torch.Tensor(mean).view(-1, 4) 53 | if self.clip: 54 | output.clamp_(max=1, min=0) 55 | return output 56 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2norm import L2Norm 2 | from .multibox_loss import MultiBoxLoss 3 | 4 | __all__ = ['L2Norm', 'MultiBoxLoss'] 5 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/layers/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | class L2Norm(nn.Module): 8 | def __init__(self,n_channels, scale): 9 | super(L2Norm,self).__init__() 10 | self.n_channels = n_channels 11 | self.gamma = scale or None 12 | self.eps = 1e-10 13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | init.constant(self.weight,self.gamma) 18 | 19 | def forward(self, x): 20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 21 | #x /= norm 22 | x = torch.div(x,norm) 23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 24 | return out 25 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import os 4 | import argparse 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import torchvision.transforms as transforms 9 | from torch.autograd import Variable 10 | from data import VOC_ROOT, VOC_CLASSES as labelmap 11 | from PIL import Image 12 | from data import VOCAnnotationTransform, VOCDetection, BaseTransform, VOC_CLASSES 13 | import torch.utils.data as data 14 | from ssd import build_ssd 15 | 16 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection') 17 | parser.add_argument('--trained_model', default='weights/ssd_300_VOC0712.pth', 18 | type=str, help='Trained state_dict file path to open') 19 | parser.add_argument('--save_folder', default='eval/', type=str, 20 | help='Dir to save results') 21 | parser.add_argument('--visual_threshold', default=0.6, type=float, 22 | help='Final confidence threshold') 23 | parser.add_argument('--cuda', default=True, type=bool, 24 | help='Use cuda to train model') 25 | parser.add_argument('--voc_root', default=VOC_ROOT, help='Location of VOC root directory') 26 | parser.add_argument('-f', default=None, type=str, help="Dummy arg so we can load in Jupyter Notebooks") 27 | args = parser.parse_args() 28 | 29 | if args.cuda and torch.cuda.is_available(): 30 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 31 | else: 32 | torch.set_default_tensor_type('torch.FloatTensor') 33 | 34 | if not os.path.exists(args.save_folder): 35 | os.mkdir(args.save_folder) 36 | 37 | 38 | def test_net(save_folder, net, cuda, testset, transform, thresh): 39 | # dump predictions and assoc. ground truth to text file for now 40 | filename = save_folder+'test1.txt' 41 | num_images = len(testset) 42 | for i in range(num_images): 43 | print('Testing image {:d}/{:d}....'.format(i+1, num_images)) 44 | img = testset.pull_image(i) 45 | img_id, annotation = testset.pull_anno(i) 46 | x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1) 47 | x = Variable(x.unsqueeze(0)) 48 | 49 | with open(filename, mode='a') as f: 50 | f.write('\nGROUND TRUTH FOR: '+img_id+'\n') 51 | for box in annotation: 52 | f.write('label: '+' || '.join(str(b) for b in box)+'\n') 53 | if cuda: 54 | x = x.cuda() 55 | 56 | y = net(x) # forward pass 57 | detections = y.data 58 | # scale each detection back up to the image 59 | scale = torch.Tensor([img.shape[1], img.shape[0], 60 | img.shape[1], img.shape[0]]) 61 | pred_num = 0 62 | for i in range(detections.size(1)): 63 | j = 0 64 | while detections[0, i, j, 0] >= 0.6: 65 | if pred_num == 0: 66 | with open(filename, mode='a') as f: 67 | f.write('PREDICTIONS: '+'\n') 68 | score = detections[0, i, j, 0] 69 | label_name = labelmap[i-1] 70 | pt = (detections[0, i, j, 1:]*scale).cpu().numpy() 71 | coords = (pt[0], pt[1], pt[2], pt[3]) 72 | pred_num += 1 73 | with open(filename, mode='a') as f: 74 | f.write(str(pred_num)+' label: '+label_name+' score: ' + 75 | str(score) + ' '+' || '.join(str(c) for c in coords) + '\n') 76 | j += 1 77 | 78 | 79 | def test_voc(): 80 | # load net 81 | num_classes = len(VOC_CLASSES) + 1 # +1 background 82 | net = build_ssd('test', 300, num_classes) # initialize SSD 83 | net.load_state_dict(torch.load(args.trained_model)) 84 | net.eval() 85 | print('Finished loading model!') 86 | # load data 87 | testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) 88 | if args.cuda: 89 | net = net.cuda() 90 | cudnn.benchmark = True 91 | # evaluation 92 | test_net(args.save_folder, net, args.cuda, testset, 93 | BaseTransform(net.size, (104, 117, 123)), 94 | thresh=args.visual_threshold) 95 | 96 | if __name__ == '__main__': 97 | test_voc() 98 | -------------------------------------------------------------------------------- /chapter5/ssd-pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .augmentations import SSDAugmentation -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/README.md: -------------------------------------------------------------------------------- 1 | # 第六章 YOLO v2 2 | 3 | ## 简介 4 | 5 | 该代码主要参考了[longcw/yolo2-pytorch](https://github.com/longcw/yolo2-pytorch)的PyTorch复现工程,如在学习时遇到问题,可前往[longcw的问题区](https://github.com/longcw/yolo2-pytorch/issues)查看是否有解决方法。 6 | 7 | ## 准备工作 8 | 9 | ### 1 编译 10 | * 编译 reorg 模块,修改mask.sh中的arch,具体可参考第四章。 11 | ```bash 12 | cd yolo2-pytorch 13 | ./make.sh 14 | ``` 15 | ### 2 数据集 16 | * 以VOC2012为例,将数据集建立软链接到data文件夹下: 17 | 18 | ```bash 19 | cd yolo2-pytorch 20 | mkdir data 21 | cd data 22 | ln -s "your VOCdevkit path" VOCdevkit2012 23 | ``` 24 | ### 3 预训练权重 25 | * 下载预训练权重[darknet19](https://drive.google.com/file/d/0B4pXCfnYmG1WRG52enNpcV80aDg/view?usp=sharing) 26 | * 然后在`yolo2-pytorch/cfgs/exps/darknet19_exp1.py`中修改权重的路径。 27 | 28 | ## 训练 29 | * 运行如下指令: 30 | ```python 31 | python train.py 32 | ``` 33 | 34 | ## 前向计算 35 | 36 | * 在`yolo2-pytorch/cfgs/config.py`中修改trained_model的路径。 37 | ```bash 38 | mkdir output 39 | python test.py 40 | ``` 41 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/cfgs/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .config_voc import * # noqa 3 | from .exps.darknet19_exp1 import * # noqa 4 | 5 | 6 | def mkdir(path, max_depth=3): 7 | parent, child = os.path.split(path) 8 | if not os.path.exists(parent) and max_depth > 1: 9 | mkdir(parent, max_depth-1) 10 | 11 | if not os.path.exists(path): 12 | os.mkdir(path) 13 | 14 | 15 | # input and output size 16 | ############################ 17 | multi_scale_inp_size = [np.array([320, 320], dtype=np.int), 18 | np.array([352, 352], dtype=np.int), 19 | np.array([384, 384], dtype=np.int), 20 | np.array([416, 416], dtype=np.int), 21 | np.array([448, 448], dtype=np.int), 22 | np.array([480, 480], dtype=np.int), 23 | np.array([512, 512], dtype=np.int), 24 | np.array([544, 544], dtype=np.int), 25 | np.array([576, 576], dtype=np.int), 26 | # np.array([608, 608], dtype=np.int), 27 | ] # w, h 28 | multi_scale_out_size = [multi_scale_inp_size[0] / 32, 29 | multi_scale_inp_size[1] / 32, 30 | multi_scale_inp_size[2] / 32, 31 | multi_scale_inp_size[3] / 32, 32 | multi_scale_inp_size[4] / 32, 33 | multi_scale_inp_size[5] / 32, 34 | multi_scale_inp_size[6] / 32, 35 | multi_scale_inp_size[7] / 32, 36 | multi_scale_inp_size[8] / 32, 37 | # multi_scale_inp_size[9] / 32, 38 | ] # w, h 39 | inp_size = np.array([416, 416], dtype=np.int) # w, h 40 | out_size = inp_size / 32 41 | 42 | 43 | # for display 44 | ############################ 45 | def _to_color(indx, base): 46 | """ return (b, r, g) tuple""" 47 | base2 = base * base 48 | b = 2 - indx / base2 49 | r = 2 - (indx % base2) / base 50 | g = 2 - (indx % base2) % base 51 | return b * 127, r * 127, g * 127 52 | 53 | 54 | base = int(np.ceil(pow(num_classes, 1. / 3))) 55 | colors = [_to_color(x, base) for x in range(num_classes)] 56 | 57 | 58 | # detection config 59 | ############################ 60 | thresh = 0.3 61 | 62 | 63 | # dir config 64 | ############################ 65 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 66 | DATA_DIR = os.path.join(ROOT_DIR, 'data') 67 | MODEL_DIR = os.path.join(ROOT_DIR, 'models') 68 | TRAIN_DIR = os.path.join(MODEL_DIR, 'training') 69 | TEST_DIR = os.path.join(MODEL_DIR, 'testing') 70 | 71 | trained_model = os.path.join(MODEL_DIR, h5_fname) 72 | pretrained_model = os.path.join(MODEL_DIR, pretrained_fname) 73 | train_output_dir = os.path.join(TRAIN_DIR, exp_name) 74 | test_output_dir = os.path.join(TEST_DIR, imdb_test, h5_fname) 75 | mkdir(train_output_dir, max_depth=3) 76 | mkdir(test_output_dir, max_depth=4) 77 | 78 | rand_seed = 1024 79 | use_tensorboard = True 80 | 81 | log_interval = 50 82 | disp_interval = 10 83 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/config_voc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # trained model 5 | h5_fname = 'yolo-voc.weights.h5' 6 | 7 | # VOC 8 | label_names = ('aeroplane', 'bicycle', 'bird', 'boat', 9 | 'bottle', 'bus', 'car', 'cat', 'chair', 10 | 'cow', 'diningtable', 'dog', 'horse', 11 | 'motorbike', 'person', 'pottedplant', 12 | 'sheep', 'sofa', 'train', 'tvmonitor') 13 | num_classes = len(label_names) 14 | 15 | anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), 16 | (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], 17 | dtype=np.float) 18 | num_anchors = len(anchors) 19 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/exps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/cfgs/exps/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/exps/darknet19_exp1.py: -------------------------------------------------------------------------------- 1 | exp_name = 'darknet19_voc07trainval_exp3' 2 | 3 | pretrained_fname = 'weights/darknet19.weights.npz' 4 | 5 | start_step = 0 6 | lr_decay_epochs = {60, 90} 7 | lr_decay = 1./10 8 | 9 | max_epoch = 160 10 | 11 | weight_decay = 0.0005 12 | momentum = 0.9 13 | init_learning_rate = 1e-3 14 | 15 | # for training yolo2 16 | object_scale = 5. 17 | noobject_scale = 1. 18 | class_scale = 1. 19 | coord_scale = 1. 20 | iou_thresh = 0.6 21 | 22 | # dataset 23 | imdb_train = 'voc_2012_trainval' 24 | imdb_test = 'voc_2012_test' 25 | batch_size = 1 26 | train_batch_size = 16 27 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/cfgs/exps/darknet19_exp2.py: -------------------------------------------------------------------------------- 1 | exp_name = 'darknet19_voc12trainval_exp1' 2 | 3 | pretrained_fname = 'darknet19.weights.npz' 4 | 5 | start_step = 0 6 | lr_decay_epochs = {60, 90} 7 | lr_decay = 1./10 8 | 9 | max_epoch = 160 10 | 11 | weight_decay = 0.0005 12 | momentum = 0.9 13 | init_learning_rate = 1e-3 14 | 15 | # for training yolo2 16 | object_scale = 5. 17 | noobject_scale = 1. 18 | class_scale = 1. 19 | coord_scale = 1. 20 | iou_thresh = 0.6 21 | 22 | # dataset 23 | imdb_train = 'voc_2012_trainval' 24 | imdb_test = 'voc_2012_test' 25 | batch_size = 1 26 | train_batch_size = 16 27 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/datasets/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from torch.multiprocessing import Pool 5 | 6 | from darknet import Darknet19 7 | import utils.yolo as yolo_utils 8 | import utils.network as net_utils 9 | from utils.timer import Timer 10 | import cfgs.config as cfg 11 | 12 | # This prevents deadlocks in the data loader, caused by 13 | # some incompatibility between pytorch and cv2 multiprocessing. 14 | # See https://github.com/pytorch/pytorch/issues/1355. 15 | cv2.setNumThreads(0) 16 | 17 | 18 | def preprocess(fname): 19 | # return fname 20 | image = cv2.imread(fname) 21 | im_data = np.expand_dims( 22 | yolo_utils.preprocess_test((image, None, cfg.multi_scale_inp_size), 0)[0], 0) 23 | return image, im_data 24 | 25 | 26 | # hyper-parameters 27 | # npz_fname = 'models/yolo-voc.weights.npz' 28 | # h5_fname = 'models/yolo-voc.weights.h5' 29 | trained_model = cfg.trained_model 30 | # trained_model = os.path.join( 31 | # cfg.train_output_dir, 'darknet19_voc07trainval_exp3_158.h5') 32 | thresh = 0.5 33 | im_path = 'demo' 34 | # --- 35 | 36 | net = Darknet19() 37 | net_utils.load_net(trained_model, net) 38 | # net.load_from_npz(npz_fname) 39 | # net_utils.save_net(h5_fname, net) 40 | net.cuda() 41 | net.eval() 42 | print('load model succ...') 43 | 44 | t_det = Timer() 45 | t_total = Timer() 46 | im_fnames = sorted((fname 47 | for fname in os.listdir(im_path) 48 | if os.path.splitext(fname)[-1] == '.jpg')) 49 | im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) 50 | pool = Pool(processes=1) 51 | 52 | for i, (image, im_data) in enumerate(pool.imap( 53 | preprocess, im_fnames, chunksize=1)): 54 | t_total.tic() 55 | im_data = net_utils.np_to_variable( 56 | im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) 57 | t_det.tic() 58 | bbox_pred, iou_pred, prob_pred = net(im_data) 59 | det_time = t_det.toc() 60 | # to numpy 61 | bbox_pred = bbox_pred.data.cpu().numpy() 62 | iou_pred = iou_pred.data.cpu().numpy() 63 | prob_pred = prob_pred.data.cpu().numpy() 64 | 65 | # print bbox_pred.shape, iou_pred.shape, prob_pred.shape 66 | 67 | bboxes, scores, cls_inds = yolo_utils.postprocess( 68 | bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) 69 | 70 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) 71 | 72 | if im2show.shape[0] > 1100: 73 | im2show = cv2.resize(im2show, 74 | (int(1000. * 75 | float(im2show.shape[1]) / im2show.shape[0]), 76 | 1000)) 77 | cv2.imshow('test', im2show) 78 | 79 | total_time = t_total.toc() 80 | # wait_time = max(int(60 - total_time * 1000), 1) 81 | cv2.waitKey(0) 82 | 83 | if i % 1 == 0: 84 | format_str = 'frame: %d, ' \ 85 | '(detection: %.1f Hz, %.1f ms) ' \ 86 | '(total: %.1f Hz, %.1f ms)' 87 | print((format_str % ( 88 | i, 89 | 1. / det_time, det_time * 1000, 90 | 1. / total_time, total_time * 1000))) 91 | 92 | t_total.clear() 93 | t_det.clear() 94 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/2007_000039.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/2007_000039.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/dog.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/eagle.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/giraffe.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/horses.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/2007_000039.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/2007_000039.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/dog.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/eagle.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/giraffe.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/horses.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/person.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/ragged-edge-london-office-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/ragged-edge-london-office-6.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/out/scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/scream.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/person.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/ragged-edge-london-office-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/ragged-edge-london-office-6.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/demo/scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/scream.jpg -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/reorg/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/reorg/_ext/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/_ext/reorg_layer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._reorg_layer import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/reorg_cpu.c'] 7 | headers = ['src/reorg_cpu.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/reorg_cuda.c'] 14 | headers += ['src/reorg_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | # print(this_file) 20 | extra_objects = ['src/reorg_cuda_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.reorg_layer', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/reorg_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import reorg_layer 4 | 5 | 6 | class ReorgFunction(Function): 7 | def __init__(self, stride=2): 8 | self.stride = stride 9 | 10 | def forward(self, x): 11 | stride = self.stride 12 | 13 | bsize, c, h, w = x.size() 14 | out_w, out_h, out_c = int(w / stride), int(h / stride), c * (stride * stride) # noqa 15 | out = torch.FloatTensor(bsize, out_c, out_h, out_w) 16 | 17 | if x.is_cuda: 18 | out = out.cuda() 19 | reorg_layer.reorg_cuda(x, out_w, out_h, out_c, bsize, 20 | stride, 0, out) 21 | else: 22 | reorg_layer.reorg_cpu(x, out_w, out_h, out_c, bsize, 23 | stride, 0, out) 24 | 25 | return out 26 | 27 | def backward(self, grad_top): 28 | stride = self.stride 29 | bsize, c, h, w = grad_top.size() 30 | 31 | out_w, out_h, out_c = w * stride, h * stride, c / (stride * stride) 32 | grad_bottom = torch.FloatTensor(bsize, int(out_c), out_h, out_w) 33 | 34 | # rev_stride = 1. / stride # reverse 35 | if grad_top.is_cuda: 36 | grad_bottom = grad_bottom.cuda() 37 | reorg_layer.reorg_cuda(grad_top, w, h, c, bsize, 38 | stride, 1, grad_bottom) 39 | else: 40 | reorg_layer.reorg_cpu(grad_top, w, h, c, bsize, 41 | stride, 1, grad_bottom) 42 | 43 | return grad_bottom 44 | 45 | 46 | class ReorgLayer(torch.nn.Module): 47 | def __init__(self, stride): 48 | super(ReorgLayer, self).__init__() 49 | 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | x = ReorgFunction(self.stride)(x) 54 | return x 55 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor) 4 | { 5 | // Grab the tensor 6 | float * x = THFloatTensor_data(x_tensor); 7 | float * out = THFloatTensor_data(out_tensor); 8 | 9 | // https://github.com/pjreddie/darknet/blob/master/src/blas.c 10 | int b,i,j,k; 11 | int out_c = c/(stride*stride); 12 | 13 | for(b = 0; b < batch; ++b){ 14 | for(k = 0; k < c; ++k){ 15 | for(j = 0; j < h; ++j){ 16 | for(i = 0; i < w; ++i){ 17 | int in_index = i + w*(j + h*(k + c*b)); 18 | int c2 = k % out_c; 19 | int offset = k / out_c; 20 | int w2 = i*stride + offset % stride; 21 | int h2 = j*stride + offset / stride; 22 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 23 | if(forward) out[out_index] = x[in_index]; 24 | else out[in_index] = x[out_index]; 25 | } 26 | } 27 | } 28 | } 29 | 30 | return 1; 31 | } -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cpu.h: -------------------------------------------------------------------------------- 1 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor); -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "reorg_cuda_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor) 7 | { 8 | float * x = THCudaTensor_data(state, x_tensor); 9 | float * out = THCudaTensor_data(state, out_tensor); 10 | 11 | cudaStream_t stream = THCState_getCurrentStream(state); 12 | reorg_ongpu(x, w, h, c, batch, stride, forward, out, stream); 13 | 14 | return 1; 15 | } -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda.h: -------------------------------------------------------------------------------- 1 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor); -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "reorg_cuda_kernel.h" 9 | 10 | #define BLOCK 512 11 | 12 | dim3 cuda_gridsize(int n) 13 | { 14 | int k = (n-1) / BLOCK + 1; 15 | int x = k; 16 | int y = 1; 17 | if(x > 65535){ 18 | x = ceil(sqrt(k)); 19 | y = (n-1)/(x*BLOCK) + 1; 20 | } 21 | dim3 d(x, y, 1); 22 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 23 | return d; 24 | } 25 | 26 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) 27 | { 28 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 29 | if(i >= N) return; 30 | int in_index = i; 31 | int in_w = i%w; 32 | i = i/w; 33 | int in_h = i%h; 34 | i = i/h; 35 | int in_c = i%c; 36 | i = i/c; 37 | int b = i%batch; 38 | 39 | int out_c = c/(stride*stride); 40 | 41 | int c2 = in_c % out_c; 42 | int offset = in_c / out_c; 43 | int w2 = in_w*stride + offset % stride; 44 | int h2 = in_h*stride + offset / stride; 45 | //printf("%d\n", offset); 46 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 47 | 48 | // printf("%d %d %d\n", w2, h2, c2); 49 | //printf("%d %d\n", in_index, out_index); 50 | //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); 51 | 52 | if(forward) out[out_index] = x[in_index]; 53 | else out[in_index] = x[out_index]; 54 | //if(forward) out[1] = x[1]; 55 | //else out[0] = x[0]; 56 | } 57 | 58 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream) 59 | { 60 | int size = w*h*c*batch; 61 | cudaError_t err; 62 | 63 | reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); 64 | 65 | err = cudaGetLastError(); 66 | if(cudaSuccess != err) 67 | { 68 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 69 | exit( -1 ); 70 | } 71 | } 72 | 73 | 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _REORG_CUDA_KERNEL 2 | #define _REORG_CUDA_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream); 9 | 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/roi_pooling/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/roi_pooling.c'] 7 | headers = ['src/roi_pooling.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/roi_pooling_cuda.c'] 14 | headers += ['src/roi_pooling_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.roi_pooling', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import roi_pooling 4 | 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.output = None 12 | self.argmax = None 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | batch_size, num_channels, data_height, data_width = features.size() 18 | num_rois = rois.size()[0] 19 | output = torch.zeros(num_rois, num_channels, 20 | self.pooled_height, self.pooled_width) 21 | argmax = torch.IntTensor(num_rois, num_channels, 22 | self.pooled_height, self.pooled_width).zero_() 23 | 24 | if not features.is_cuda: 25 | _features = features.permute(0, 2, 3, 1) 26 | roi_pooling.roi_pooling_forward(self.pooled_height, 27 | self.pooled_width, 28 | self.spatial_scale, 29 | _features, 30 | rois, 31 | output) 32 | # output = output.cuda() 33 | else: 34 | output = output.cuda() 35 | argmax = argmax.cuda() 36 | roi_pooling.roi_pooling_forward_cuda(self.pooled_height, 37 | self.pooled_width, 38 | self.spatial_scale, 39 | features, 40 | rois, 41 | output, 42 | argmax) 43 | self.output = output 44 | self.argmax = argmax 45 | self.rois = rois 46 | self.feature_size = features.size() 47 | 48 | return output 49 | 50 | def backward(self, grad_output): 51 | assert(self.feature_size is not None and grad_output.is_cuda) 52 | 53 | batch_size, num_channels, data_height, data_width = self.feature_size 54 | 55 | grad_input = torch.zeros(batch_size, num_channels, 56 | data_height, data_width).cuda() 57 | roi_pooling.roi_pooling_backward_cuda(self.pooled_height, 58 | self.pooled_width, 59 | self.spatial_scale, 60 | grad_output, 61 | self.rois, 62 | grad_input, 63 | self.argmax) 64 | 65 | # print grad_input 66 | 67 | return grad_input, None 68 | 69 | 70 | class RoIPool(torch.nn.Module): 71 | def __init__(self, pooled_height, pooled_width, spatial_scale): 72 | super(RoIPool, self).__init__() 73 | 74 | self.pooled_width = int(pooled_width) 75 | self.pooled_height = int(pooled_height) 76 | self.spatial_scale = float(spatial_scale) 77 | 78 | def forward(self, features, rois): 79 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) # noqa 80 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | class RoIPool(nn.Module): 8 | def __init__(self, pooled_height, pooled_width, spatial_scale): 9 | super(RoIPool, self).__init__() 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | batch_size, num_channels, data_height, data_width = features.size() 16 | num_rois = rois.size()[0] 17 | outputs = Variable(torch.zeros(num_rois, num_channels, 18 | self.pooled_height, 19 | self.pooled_width)).cuda() 20 | 21 | for roi_ind, roi in enumerate(rois): 22 | batch_ind = int(roi[0].data[0]) 23 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 24 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 25 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 26 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 27 | bin_size_w = float(roi_width) / float(self.pooled_width) 28 | bin_size_h = float(roi_height) / float(self.pooled_height) 29 | 30 | for ph in range(self.pooled_height): 31 | hstart = int(np.floor(ph * bin_size_h)) 32 | hend = int(np.ceil((ph + 1) * bin_size_h)) 33 | hstart = min(data_height, max(0, hstart + roi_start_h)) 34 | hend = min(data_height, max(0, hend + roi_start_h)) 35 | for pw in range(self.pooled_width): 36 | wstart = int(np.floor(pw * bin_size_w)) 37 | wend = int(np.ceil((pw + 1) * bin_size_w)) 38 | wstart = min(data_width, max(0, wstart + roi_start_w)) 39 | wend = min(data_width, max(0, wend + roi_start_w)) 40 | 41 | is_empty = (hend <= hstart) or(wend <= wstart) 42 | if is_empty: 43 | outputs[roi_ind, :, ph, pw] = 0 44 | else: 45 | data = features[batch_ind] 46 | outputs[roi_ind, :, ph, pw] = torch.max( 47 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) # noqa 48 | 49 | return outputs 50 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/src/cuda/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | int batch_size = THCudaTensor_size(state, features, 0); 27 | if (batch_size != 1) 28 | { 29 | return 0; 30 | } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | if (batch_size != 1) 70 | { 71 | return 0; 72 | } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd utils 6 | python build.py build_ext --inplace 7 | cd ../ 8 | 9 | cd layers/reorg/src 10 | echo "Compiling reorg layer kernels by nvcc..." 11 | nvcc -c -o reorg_cuda_kernel.cu.o reorg_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 12 | cd ../ 13 | python build.py 14 | cd ../ 15 | 16 | cd roi_pooling/src/cuda 17 | echo "Compiling roi_pooling kernels by nvcc..." 18 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 19 | cd ../../ 20 | python build.py 21 | cd ../ 22 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.27.3 2 | opencv-python==3.3.0.10 3 | h5py==2.7.1 4 | pycrayon==0.5 -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/utils/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/im_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def imcv2_recolor(im, a=.1): 6 | # t = [np.random.uniform()] 7 | # t += [np.random.uniform()] 8 | # t += [np.random.uniform()] 9 | # t = np.array(t) * 2. - 1. 10 | t = np.random.uniform(-1, 1, 3) 11 | 12 | # random amplify each channel 13 | im = im.astype(np.float) 14 | im *= (1 + t * a) 15 | mx = 255. * (1 + a) 16 | up = np.random.uniform(-1, 1) 17 | im = np.power(im / mx, 1. + up * .5) 18 | # return np.array(im * 255., np.uint8) 19 | return im 20 | 21 | 22 | def imcv2_affine_trans(im): 23 | # Scale and translate 24 | h, w, c = im.shape 25 | scale = np.random.uniform() / 10. + 1. 26 | max_offx = (scale - 1.) * w 27 | max_offy = (scale - 1.) * h 28 | offx = int(np.random.uniform() * max_offx) 29 | offy = int(np.random.uniform() * max_offy) 30 | 31 | im = cv2.resize(im, (0, 0), fx=scale, fy=scale) 32 | im = im[offy: (offy + h), offx: (offx + w)] 33 | flip = np.random.uniform() > 0.5 34 | if flip: 35 | im = cv2.flip(im, 1) 36 | 37 | return im, [scale, [offx, offy], flip] 38 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/utils/nms/__init__.py -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def py_cpu_nms(dets, thresh): 12 | """Pure Python NMS baseline.""" 13 | x1 = dets[:, 0] 14 | y1 = dets[:, 1] 15 | x2 = dets[:, 2] 16 | y2 = dets[:, 3] 17 | scores = dets[:, 4] 18 | 19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 20 | order = scores.argsort()[::-1] 21 | 22 | keep = [] 23 | while order.size > 0: 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1 + 1) 32 | h = np.maximum(0.0, yy2 - yy1 + 1) 33 | inter = w * h 34 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 35 | 36 | inds = np.where(ovr <= thresh)[0] 37 | order = order[inds + 1] 38 | 39 | return keep 40 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .nms.cpu_nms import cpu_nms 9 | from .nms.gpu_nms import gpu_nms 10 | 11 | 12 | # def nms(dets, thresh, force_cpu=False): 13 | # """Dispatch to either CPU or GPU NMS implementations.""" 14 | # 15 | # if dets.shape[0] == 0: 16 | # return [] 17 | # if cfg.USE_GPU_NMS and not force_cpu: 18 | # return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # else: 20 | # return cpu_nms(dets, thresh) 21 | 22 | 23 | def nms(dets, thresh, force_cpu=False): 24 | """Dispatch to either CPU or GPU NMS implementations.""" 25 | 26 | if dets.shape[0] == 0: 27 | return [] 28 | if force_cpu: 29 | return cpu_nms(dets, thresh) 30 | return gpu_nms(dets, thresh) 31 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, 38 | # and uncompressed RLE to encoded RLE mask. 39 | # 40 | # Usage: 41 | # Rs = encode( masks ) 42 | # masks = decode( Rs ) 43 | # R = merge( Rs, intersect=false ) 44 | # o = iou( dt, gt, iscrowd ) 45 | # a = area( Rs ) 46 | # bbs = toBbox( Rs ) 47 | # Rs = frPyObjects( [pyObjects], h, w ) 48 | # 49 | # In the API the following formats are used: 50 | # Rs - [dict] Run-length encoding of binary masks 51 | # R - dict Run-length encoding of binary mask 52 | # masks - [hxwxn] Binary mask(s) 53 | # (must have type np.ndarray(dtype=uint8) in column-major order) 54 | # iscrowd - [nx1] list of np.ndarray. 55 | # 1 indicates corresponding gt image has crowd region to ignore 56 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 57 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 58 | # dt,gt - May be either bounding boxes or encoded masks 59 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 60 | # 61 | # Finally, a note about the intersection over union (iou) computation. 62 | # The standard iou of a ground truth (gt) and detected (dt) object is 63 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 64 | # For "crowd" regions, we use a modified criteria. If a gt object is 65 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 66 | # Choosing gt' in the crowd gt that best matches the dt can be done using 67 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 68 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 69 | # For crowd gt regions we use this modified criteria above for the iou. 70 | # 71 | # To compile run "python setup.py build_ext --inplace" 72 | # Please do not contact us for help with compiling. 73 | # 74 | # Microsoft COCO Toolbox. version 2.0 75 | # Data, paper, and tutorials available at: http://mscoco.org/ 76 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 77 | # Licensed under the Simplified BSD License [see coco/license.txt] 78 | 79 | encode = _mask.encode 80 | decode = _mask.decode 81 | iou = _mask.iou 82 | merge = _mask.merge 83 | area = _mask.area 84 | toBbox = _mask.toBbox 85 | frPyObjects = _mask.frPyObjects 86 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /chapter6/yolov2-pytorch/utils/yolo.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | DTYPE = np.float 6 | ctypedef np.float_t DTYPE_t 7 | 8 | cdef extern from "math.h": 9 | double abs(double m) 10 | double log(double x) 11 | 12 | 13 | def yolo_to_bbox( 14 | np.ndarray[DTYPE_t, ndim=4] bbox_pred, 15 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W): 16 | return yolo_to_bbox_c(bbox_pred, anchors, H, W) 17 | 18 | cdef yolo_to_bbox_c( 19 | np.ndarray[DTYPE_t, ndim=4] bbox_pred, 20 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W): 21 | """ 22 | Parameters 23 | ---------- 24 | bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th)) 25 | anchors: (num_anchors, 2) (pw, ph) 26 | Returns 27 | ------- 28 | bbox_out: (HxWxnum_anchors, 4) ndarray of bbox (x1, y1, x2, y2) rescaled to (0, 1) 29 | """ 30 | cdef unsigned int bsize = bbox_pred.shape[0] 31 | cdef unsigned int num_anchors = anchors.shape[0] 32 | cdef np.ndarray[DTYPE_t, ndim=4] bbox_out = np.zeros((bsize, H*W, num_anchors, 4), dtype=DTYPE) 33 | 34 | cdef DTYPE_t cx, cy, bw, bh 35 | cdef unsigned int row, col, a, ind 36 | for b in range(bsize): 37 | for row in range(H): 38 | for col in range(W): 39 | ind = row * W + col 40 | for a in range(num_anchors): 41 | cx = (bbox_pred[b, ind, a, 0] + col) / W 42 | cy = (bbox_pred[b, ind, a, 1] + row) / H 43 | bw = bbox_pred[b, ind, a, 2] * anchors[a][0] / W * 0.5 44 | bh = bbox_pred[b, ind, a, 3] * anchors[a][1] / H * 0.5 45 | 46 | bbox_out[b, ind, a, 0] = cx - bw 47 | bbox_out[b, ind, a, 1] = cy - bh 48 | bbox_out[b, ind, a, 2] = cx + bw 49 | bbox_out[b, ind, a, 3] = cy + bh 50 | 51 | return bbox_out -------------------------------------------------------------------------------- /chapter7/mobilenet_v1.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | class MobileNet(nn.Module): 4 | def __init__(self): 5 | super(MobileNet, self).__init__() 6 | 7 | def conv_bn(dim_in, dim_out, stride): 8 | return nn.Sequential( 9 | nn.Conv2d(dim_in, dim_out, 3, stride, 1, bias=False), 10 | nn.BatchNorm2d(dim_out), 11 | nn.ReLU(inplace=True) 12 | ) 13 | 14 | def conv_dw(dim_in, dim_out, stride): 15 | return nn.Sequential( 16 | nn.Conv2d(dim_in, dim_in, 3, stride, 1, groups= dim_in, bias=False), 17 | nn.BatchNorm2d(dim_in), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(dim_in, dim_out, 1, 1, 0, bias=False), 20 | nn.BatchNorm2d(dim_out), 21 | nn.ReLU(inplace=True), 22 | ) 23 | self.model = nn.Sequential( 24 | conv_bn( 3, 32, 2), 25 | conv_dw( 32, 64, 1), 26 | conv_dw( 64, 128, 2), 27 | conv_dw(128, 128, 1), 28 | conv_dw(128, 256, 2), 29 | conv_dw(256, 256, 1), 30 | conv_dw(256, 512, 2), 31 | conv_dw(512, 512, 1), 32 | conv_dw(512, 512, 1), 33 | conv_dw(512, 512, 1), 34 | conv_dw(512, 512, 1), 35 | conv_dw(512, 512, 1), 36 | conv_dw(512, 1024, 2), 37 | conv_dw(1024, 1024, 1), 38 | nn.AvgPool2d(7), 39 | ) 40 | self.fc = nn.Linear(1024, 1000) 41 | 42 | def forward(self, x): 43 | x = self.model(x) 44 | x = x.view(-1, 1024) 45 | x = self.fc(x) 46 | return x 47 | 48 | -------------------------------------------------------------------------------- /chapter7/mobilenet_v2_block.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | class InvertedResidual(nn.Module): 4 | 5 | def __init__(self, inp, oup, stride, expand_ratio): 6 | super(InvertedResidual, self).__init__() 7 | self.stride = stride 8 | hidden_dim = round(inp * expand_ratio) 9 | self.conv = nn.Sequential( 10 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 11 | nn.BatchNorm2d(hidden_dim), 12 | nn.ReLU6(inplace=True), 13 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 14 | nn.BatchNorm2d(hidden_dim), 15 | nn.ReLU6(inplace=True), 16 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 17 | nn.BatchNorm2d(oup), 18 | ) 19 | 20 | def forward(self, x): 21 | return x + self.conv(x) 22 | 23 | -------------------------------------------------------------------------------- /chapter7/shufflenet_v1.py: -------------------------------------------------------------------------------- 1 | class ShuffleNet(nn.Module): 2 | 3 | def __init__(self, groups=3, in_channels=3, num_classes=1000): 4 | super(ShuffleNet, self).__init__() 5 | self.groups = groups 6 | self.stage_repeats = [3, 7, 3] 7 | self.in_channels = in_channels 8 | self.num_classes = num_classes 9 | self.stage_out_channels = [-1, 24, 240, 480, 960] 10 | 11 | self.conv1 = conv3x3(self.in_channels, 12 | self.stage_out_channels[1], # stage 1 13 | stride=2) 14 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 15 | self.stage2 = self._make_stage(2) 16 | self.stage3 = self._make_stage(3) 17 | self.stage4 = self._make_stage(4) 18 | num_inputs = self.stage_out_channels[-1] 19 | self.fc = nn.Linear(num_inputs, self.num_classes) 20 | 21 | def _make_stage(self, stage): 22 | modules = OrderedDict() 23 | stage_name = "ShuffleUnit_Stage{}".format(stage) 24 | grouped_conv = stage > 2 25 | first_module = ShuffleUnit( 26 | self.stage_out_channels[stage-1], 27 | self.stage_out_channels[stage], 28 | groups=self.groups, 29 | grouped_conv=grouped_conv, 30 | combine='concat' 31 | ) 32 | modules[stage_name+"_0"] = first_module 33 | for i in range(self.stage_repeats[stage-2]): 34 | name = stage_name + "_{}".format(i+1) 35 | module = ShuffleUnit( 36 | self.stage_out_channels[stage], 37 | self.stage_out_channels[stage], 38 | groups=self.groups, 39 | grouped_conv=True, 40 | combine='add' 41 | ) 42 | modules[name] = module 43 | return nn.Sequential(modules) 44 | 45 | def forward(self, x): 46 | x = self.conv1(x) 47 | x = self.maxpool(x) 48 | x = self.stage2(x) 49 | x = self.stage3(x) 50 | x = self.stage4(x) 51 | x = F.avg_pool2d(x, x.data.size()[-2:]) 52 | x = x.view(x.size(0), -1) 53 | x = self.fc(x) 54 | return F.log_softmax(x, dim=1) 55 | 56 | -------------------------------------------------------------------------------- /chapter7/squeezenet_fire.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class Fire(nn.Module): 5 | 6 | def __init__(self, inplanes, squeeze_planes, expand_planes): 7 | super(Fire, self).__init__() 8 | self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1) 9 | self.bn1 = nn.BatchNorm2d(squeeze_planes) 10 | self.relu1 = nn.ReLU(inplace=True) 11 | self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1) 12 | self.bn2 = nn.BatchNorm2d(expand_planes) 13 | self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1) 14 | self.bn3 = nn.BatchNorm2d(expand_planes) 15 | self.relu2 = nn.ReLU(inplace=True) 16 | 17 | def forward(self, x): 18 | x = self.conv1(x) 19 | x = self.bn1(x) 20 | x = self.relu1(x) 21 | out1 = self.conv2(x) 22 | out1 = self.bn2(out1) 23 | out2 = self.conv3(x) 24 | out2 = self.bn3(out2) 25 | out = torch.cat([out1, out2], 1) 26 | out = self.relu2(out) 27 | return out 28 | 29 | -------------------------------------------------------------------------------- /chapter8/nms.py: -------------------------------------------------------------------------------- 1 | def nms(self, bboxes, scores, thresh=0.5): 2 | 3 | x1 = bboxes[:,0] 4 | y1 = bboxes[:,1] 5 | x2 = bboxes[:,2] 6 | y2 = bboxes[:,3] 7 | areas = (x2-x1+1)*(y2-y1+1) 8 | _, order = scores.sort(0, descending=True) 9 | keep = [] 10 | 11 | while order.numel() > 0: 12 | if order.numel() == 1: 13 | i = order.item() 14 | keep.append(i) 15 | break 16 | else: 17 | i = order[0].item() 18 | keep.append(i) 19 | xx1 = x1[order[1:]].clamp(min=x1[i]) 20 | yy1 = y1[order[1:]].clamp(min=y1[i]) 21 | xx2 = x2[order[1:]].clamp(max=x2[i]) 22 | yy2 = y2[order[1:]].clamp(max=y2[i]) 23 | inter = (xx2-xx1).clamp(min=0) * (yy2-yy1).clamp(min=0) 24 | iou = inter / (areas[i]+areas[order[1:]]-inter) 25 | idx = (iou <= threshold).nonzero().squeeze() 26 | if idx.numel() == 0: 27 | break 28 | order = order[idx+1] 29 | 30 | return torch.LongTensor(keep) 31 | 32 | -------------------------------------------------------------------------------- /chapter8/retinanet.py: -------------------------------------------------------------------------------- 1 | class RetinaNet(nn.Module): 2 | 3 | num_anchors = 9 4 | 5 | def __init__(self, num_classes=20): 6 | super(RetinaNet, self).__init__() 7 | self.fpn = FPN50() 8 | self.num_classes = num_classes 9 | self.loc_head = self._make_head(self.num_anchors*4) 10 | self.cls_head = self._make_head(self.num_anchors*self.num_classes) 11 | 12 | def forward(self, x): 13 | fms = self.fpn(x) 14 | loc_preds = [] 15 | cls_preds = [] 16 | for fm in fms: 17 | loc_pred = self.loc_head(fm) 18 | cls_pred = self.cls_head(fm) 19 | loc_pred=loc_pred.permute(0,2,3,1).contiguous().view(x.size(0),-1,4) 20 | cls_pred=cls_pred.permute(0,2,3,1).contiguous().view(x.size(0),-1,self.num_classes) 21 | loc_preds.append(loc_pred) 22 | cls_preds.append(cls_pred) 23 | return torch.cat(loc_preds,1), torch.cat(cls_preds,1) 24 | 25 | def _make_head(self, out_planes): 26 | layers = [] 27 | for _ in range(4): 28 | layers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)) 29 | layers.append(nn.ReLU(True)) 30 | layers.append(nn.Conv2d(256, out_planes, kernel_size=3, stride=1, padding=1)) 31 | return nn.Sequential(*layers) 32 | 33 | def freeze_bn(self): 34 | for layer in self.modules(): 35 | if isinstance(layer, nn.BatchNorm2d): 36 | layer.eval() 37 | 38 | --------------------------------------------------------------------------------