├── README.md
├── chapter1
    └── model-evaluation
    │   ├── README.md
    │   ├── conf
    │       ├── arial.ttf
    │       └── conf.yaml
    │   ├── data
    │       ├── detections
    │       │   └── 1.txt
    │       ├── groundtruths
    │       │   └── 1.txt
    │       └── results
    │       │   ├── class1.png
    │       │   └── class2.png
    │   ├── evaluation.ipynb
    │   ├── evaluation.py
    │   └── lib
    │       ├── Evaluator.py
    │       ├── Evaluator.pyc
    │       ├── __pycache__
    │           ├── Evaluator.cpython-36.pyc
    │           ├── detection.cpython-36.pyc
    │           └── utils.cpython-36.pyc
    │       ├── detection.py
    │       ├── detection.pyc
    │       ├── utils.py
    │       └── utils.pyc
├── chapter2
    ├── mlp.py
    ├── perception.py
    ├── perception_sequential.py
    └── visdom.py
├── chapter3
    ├── densenet_block.py
    ├── detnet_bottleneck.py
    ├── fpn.py
    ├── inceptionv1.py
    ├── inceptionv2.py
    ├── resnet_bottleneck.py
    └── vgg.py
├── chapter4
    └── faster-rcnn-pytorch
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── _init_paths.py
    │   ├── cfgs
    │       ├── res101.yml
    │       ├── res101_ls.yml
    │       ├── res50.yml
    │       └── vgg16.yml
    │   ├── demo.py
    │   ├── images
    │       ├── img1.jpg
    │       ├── img1_det.jpg
    │       ├── img1_det_res101.jpg
    │       ├── img2.jpg
    │       ├── img2_det.jpg
    │       ├── img2_det_res101.jpg
    │       ├── img3.jpg
    │       ├── img3_det.jpg
    │       ├── img3_det_res101.jpg
    │       ├── img4.jpg
    │       ├── img4_det.jpg
    │       └── img4_det_res101.jpg
    │   ├── lib
    │       ├── datasets
    │       │   ├── VOCdevkit-matlab-wrapper
    │       │   │   ├── get_voc_opts.m
    │       │   │   ├── voc_eval.m
    │       │   │   └── xVOCap.m
    │       │   ├── __init__.py
    │       │   ├── coco.py
    │       │   ├── ds_utils.py
    │       │   ├── factory.py
    │       │   ├── imagenet.py
    │       │   ├── imdb.py
    │       │   ├── pascal_voc.py
    │       │   ├── pascal_voc_rbg.py
    │       │   ├── tools
    │       │   │   └── mcg_munge.py
    │       │   ├── vg.py
    │       │   ├── vg_eval.py
    │       │   └── voc_eval.py
    │       ├── make.sh
    │       ├── model
    │       │   ├── __init__.py
    │       │   ├── faster_rcnn
    │       │   │   ├── __init__.py
    │       │   │   ├── faster_rcnn.py
    │       │   │   ├── resnet.py
    │       │   │   └── vgg16.py
    │       │   ├── nms
    │       │   │   ├── .gitignore
    │       │   │   ├── __init__.py
    │       │   │   ├── _ext
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── nms
    │       │   │   │   │   └── __init__.py
    │       │   │   ├── build.py
    │       │   │   ├── make.sh
    │       │   │   ├── nms_cpu.py
    │       │   │   ├── nms_gpu.py
    │       │   │   ├── nms_kernel.cu
    │       │   │   ├── nms_wrapper.py
    │       │   │   └── src
    │       │   │   │   ├── nms_cuda.c
    │       │   │   │   ├── nms_cuda.h
    │       │   │   │   ├── nms_cuda_kernel.cu
    │       │   │   │   └── nms_cuda_kernel.h
    │       │   ├── roi_align
    │       │   │   ├── __init__.py
    │       │   │   ├── _ext
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_align
    │       │   │   │   │   └── __init__.py
    │       │   │   ├── build.py
    │       │   │   ├── functions
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_align.py
    │       │   │   ├── make.sh
    │       │   │   ├── modules
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_align.py
    │       │   │   └── src
    │       │   │   │   ├── roi_align.c
    │       │   │   │   ├── roi_align.h
    │       │   │   │   ├── roi_align_cuda.c
    │       │   │   │   ├── roi_align_cuda.h
    │       │   │   │   ├── roi_align_kernel.cu
    │       │   │   │   └── roi_align_kernel.h
    │       │   ├── roi_crop
    │       │   │   ├── __init__.py
    │       │   │   ├── _ext
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── crop_resize
    │       │   │   │   │   └── __init__.py
    │       │   │   │   └── roi_crop
    │       │   │   │   │   └── __init__.py
    │       │   │   ├── build.py
    │       │   │   ├── functions
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── crop_resize.py
    │       │   │   │   ├── gridgen.py
    │       │   │   │   └── roi_crop.py
    │       │   │   ├── make.sh
    │       │   │   ├── modules
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── gridgen.py
    │       │   │   │   └── roi_crop.py
    │       │   │   └── src
    │       │   │   │   ├── roi_crop.c
    │       │   │   │   ├── roi_crop.h
    │       │   │   │   ├── roi_crop_cuda.c
    │       │   │   │   ├── roi_crop_cuda.h
    │       │   │   │   ├── roi_crop_cuda_kernel.cu
    │       │   │   │   └── roi_crop_cuda_kernel.h
    │       │   ├── roi_pooling
    │       │   │   ├── __init__.py
    │       │   │   ├── _ext
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_pooling
    │       │   │   │   │   └── __init__.py
    │       │   │   ├── build.py
    │       │   │   ├── functions
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_pool.py
    │       │   │   ├── modules
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── roi_pool.py
    │       │   │   └── src
    │       │   │   │   ├── roi_pooling.c
    │       │   │   │   ├── roi_pooling.h
    │       │   │   │   ├── roi_pooling_cuda.c
    │       │   │   │   ├── roi_pooling_cuda.h
    │       │   │   │   ├── roi_pooling_kernel.cu
    │       │   │   │   └── roi_pooling_kernel.h
    │       │   ├── rpn
    │       │   │   ├── __init__.py
    │       │   │   ├── anchor_target_layer.py
    │       │   │   ├── bbox_transform.py
    │       │   │   ├── generate_anchors.py
    │       │   │   ├── proposal_layer.py
    │       │   │   ├── proposal_target_layer_cascade.py
    │       │   │   └── rpn.py
    │       │   └── utils
    │       │   │   ├── .gitignore
    │       │   │   ├── __init__.py
    │       │   │   ├── bbox.pyx
    │       │   │   ├── blob.py
    │       │   │   ├── config.py
    │       │   │   └── net_utils.py
    │       ├── pycocotools
    │       │   ├── UPSTREAM_REV
    │       │   ├── __init__.py
    │       │   ├── _mask.pyx
    │       │   ├── coco.py
    │       │   ├── cocoeval.py
    │       │   ├── license.txt
    │       │   ├── mask.py
    │       │   ├── maskApi.c
    │       │   └── maskApi.h
    │       ├── roi_data_layer
    │       │   ├── __init__.py
    │       │   ├── minibatch.py
    │       │   ├── roibatchLoader.py
    │       │   └── roidb.py
    │       └── setup.py
    │   ├── requirements.txt
    │   ├── test_net.py
    │   └── trainval_net.py
├── chapter5
    ├── dssd-pytorch
    │   ├── arm.py
    │   └── tcb.py
    └── ssd-pytorch
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── data
    │       ├── __init__.py
    │       ├── config.py
    │       ├── example.jpg
    │       ├── scripts
    │       │   ├── COCO2014.sh
    │       │   ├── VOC2007.sh
    │       │   └── VOC2012.sh
    │       └── voc0712.py
    │   ├── demo
    │       ├── __init__.py
    │       ├── demo.ipynb
    │       └── live.py
    │   ├── doc
    │       ├── SSD.jpg
    │       ├── detection_example.png
    │       ├── detection_example2.png
    │       ├── detection_examples.png
    │       └── ssd.png
    │   ├── eval.py
    │   ├── layers
    │       ├── __init__.py
    │       ├── box_utils.py
    │       ├── functions
    │       │   ├── __init__.py
    │       │   ├── detection.py
    │       │   └── prior_box.py
    │       └── modules
    │       │   ├── __init__.py
    │       │   ├── l2norm.py
    │       │   └── multibox_loss.py
    │   ├── ssd.py
    │   ├── test.py
    │   ├── train.py
    │   └── utils
    │       ├── __init__.py
    │       └── augmentations.py
├── chapter6
    └── yolov2-pytorch
    │   ├── README.md
    │   ├── cfgs
    │       ├── __init__.py
    │       ├── config.py
    │       ├── config_voc.py
    │       └── exps
    │       │   ├── __init__.py
    │       │   ├── darknet19_exp1.py
    │       │   └── darknet19_exp2.py
    │   ├── darknet.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── imdb.py
    │       ├── pascal_voc.py
    │       └── voc_eval.py
    │   ├── demo.py
    │   ├── demo
    │       ├── 2007_000039.jpg
    │       ├── dog.jpg
    │       ├── eagle.jpg
    │       ├── giraffe.jpg
    │       ├── horses.jpg
    │       ├── out
    │       │   ├── 2007_000039.jpg
    │       │   ├── dog.jpg
    │       │   ├── eagle.jpg
    │       │   ├── giraffe.jpg
    │       │   ├── horses.jpg
    │       │   ├── person.jpg
    │       │   ├── ragged-edge-london-office-6.jpg
    │       │   └── scream.jpg
    │       ├── person.jpg
    │       ├── ragged-edge-london-office-6.jpg
    │       └── scream.jpg
    │   ├── layers
    │       ├── __init__.py
    │       ├── reorg
    │       │   ├── __init__.py
    │       │   ├── _ext
    │       │   │   ├── __init__.py
    │       │   │   └── reorg_layer
    │       │   │   │   └── __init__.py
    │       │   ├── build.py
    │       │   ├── reorg_layer.py
    │       │   └── src
    │       │   │   ├── reorg_cpu.c
    │       │   │   ├── reorg_cpu.h
    │       │   │   ├── reorg_cuda.c
    │       │   │   ├── reorg_cuda.h
    │       │   │   ├── reorg_cuda_kernel.cu
    │       │   │   └── reorg_cuda_kernel.h
    │       └── roi_pooling
    │       │   ├── __init__.py
    │       │   ├── _ext
    │       │       ├── __init__.py
    │       │       └── roi_pooling
    │       │       │   └── __init__.py
    │       │   ├── build.py
    │       │   ├── roi_pool.py
    │       │   ├── roi_pool_py.py
    │       │   └── src
    │       │       ├── cuda
    │       │           ├── roi_pooling_kernel.cu
    │       │           └── roi_pooling_kernel.h
    │       │       ├── roi_pooling.c
    │       │       ├── roi_pooling.h
    │       │       ├── roi_pooling_cuda.c
    │       │       └── roi_pooling_cuda.h
    │   ├── make.sh
    │   ├── requirements.txt
    │   ├── test.py
    │   ├── train.py
    │   └── utils
    │       ├── __init__.py
    │       ├── bbox.c
    │       ├── bbox.pyx
    │       ├── build.py
    │       ├── im_transform.py
    │       ├── network.py
    │       ├── nms
    │           ├── .gitignore
    │           ├── __init__.py
    │           ├── cpu_nms.pyx
    │           ├── gpu_nms.hpp
    │           ├── gpu_nms.pyx
    │           ├── nms_kernel.cu
    │           └── py_cpu_nms.py
    │       ├── nms_wrapper.py
    │       ├── pycocotools
    │           ├── UPSTREAM_REV
    │           ├── __init__.py
    │           ├── _mask.c
    │           ├── _mask.pyx
    │           ├── coco.py
    │           ├── cocoeval.py
    │           ├── license.txt
    │           ├── mask.py
    │           ├── maskApi.c
    │           └── maskApi.h
    │       ├── timer.py
    │       ├── yolo.c
    │       ├── yolo.py
    │       └── yolo.pyx
├── chapter7
    ├── mobilenet_v1.py
    ├── mobilenet_v2.py
    ├── mobilenet_v2_block.py
    ├── shufflenet_v1.py
    └── squeezenet_fire.py
├── chapter8
    ├── nms.py
    └── retinanet.py
└── reference
    └── README.md


/README.md:
--------------------------------------------------------------------------------
 1 | # 深度学习之PyTorch物体检测实战
 2 | 
 3 | ### 说明
 4 | * 这是《深度学习之PyTorch物体检测实战》这本书对应的代码，书籍将会在2019年年底由机械工业出版社出版。
 5 | * 物体检测是一个十分注重实践的知识点，因此强烈建议读者下载此代码，结合书籍进行实践学习。
 6 | * 由于工程量较大，因此本书的Faster RCNN、SSD与YOLO三大网络的实现皆借鉴了其他作者的实现，并增加了一些注解、尽量保留最小实现模块，读者也可以根据自身需求来选择。
 7 | * 在运行时遇到任何问题，欢迎在此提交issue，或者到其他repo的issue里寻找答案。
 8 | * Enjoy Coding！
 9 | 
10 | ### 环境版本
11 | * PyTorch：0.4.0
12 | * Python：3.6
13 | * CUDA：9.0
14 | 
15 | ### 目录
16 | -------------------
17 | * chapter1：浅谈物体检测与PyTorch
18 | * chapter2：PyTorch基础
19 | * chapter3：网络骨架-Backbone
20 | * chapter4：两阶经典检测器：Faster RCNN
21 | * chapter5：单阶多层检测器：SSD
22 | * chapter6：单阶经典检测器：YOLO
23 | * chapter7：模型加速之轻量化网络
24 | * chapter8：物体检测细节处理
25 | * chapter9：物体检测难点
26 | * chapter10：物体检测的未来发展
27 | 
28 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/README.md:
--------------------------------------------------------------------------------
 1 | ## 如何运行
 2 | * 安装jupyter notebook，可以在网页端打开脚本，实现可视化
 3 | 
 4 | ## 1. 模型评测
 5 | * 修改eveluate.ipynb中的路径，执行run
 6 | 
 7 | ## 2. 模型badcase可视化
 8 | * 修改badcase.ipynb中的路径，执行run
 9 | 
10 | ## 3. 模型前向结果保存
11 | * 修改inference.ipynb中的路径，执行run
12 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/conf/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/conf/arial.ttf


--------------------------------------------------------------------------------
/chapter1/model-evaluation/conf/conf.yaml:
--------------------------------------------------------------------------------
 1 | colors: ['#F0080F','#0A00D7','#95090A','#700FD4','#00a0F0','#00050C', '#30b404','#F00F00','#008377','#E0203B',
 2 |           '#F00500','#008000','#0000FF','#F0F0F0','#7C0C00','#E0F00F','#00B000','#000FF0','#ADD806', '#320032',
 3 |          '#48000C', '#C00085','#F5000A', '#00E4B5','#0000E6','#0070D6','#D00003','#DD0000','#FF0000','#2E0057',]
 4 | 
 5 | iouThreshold: 0.5
 6 | 
 7 | gtFormat: 'xyrb'
 8 | detFormat: 'xyrb'
 9 | gtCoordinates: 'abs'
10 | detCoordinates: 'abs'
11 | 
12 | showPlot: 'True'
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/data/detections/1.txt:
--------------------------------------------------------------------------------
1 | class1 12 58 53 96 0.87
2 | class1 51 88 152 191 0.98
3 | class2 345 898 431 945 0.67
4 | class2 597 346 674 415 0.45
5 | class1 243 546 298 583 0.83
6 | class2 99 345 150 426 0.96
7 | 
8 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/data/groundtruths/1.txt:
--------------------------------------------------------------------------------
1 | class1 14 56 50 100
2 | class1 50 90 150 189
3 | class2 345 894 432 940
4 | class1 458 657 580 742
5 | class2 590 354 675 420
6 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/data/results/class1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/data/results/class1.png


--------------------------------------------------------------------------------
/chapter1/model-evaluation/data/results/class2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/data/results/class2.png


--------------------------------------------------------------------------------
/chapter1/model-evaluation/evaluation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import yaml
 4 | sys.path.insert(0, os.path.join(os.getcwd(), 'lib'))
 5 | from detection import detections, plot_save_result
 6 | 
 7 | conf_path = './conf/conf.yaml'
 8 | with open(conf_path, 'r', encoding='utf-8') as f:
 9 |     data=f.read()
10 | cfg = yaml.load(data)
11 | 
12 | gtFolder = 'data/groundtruths'
13 | detFolder = 'data/detections'
14 | savePath = 'data/results'
15 | 
16 | results, classes = detections(cfg, gtFolder, detFolder, savePath)
17 | plot_save_result(cfg, results, classes, savePath)
18 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/Evaluator.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/Evaluator.pyc


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/__pycache__/Evaluator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/Evaluator.cpython-36.pyc


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/__pycache__/detection.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/detection.cpython-36.pyc


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/detection.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from Evaluator import *
  3 | import pdb
  4 | 
  5 | def getGTBoxes(cfg, GTFolder):
  6 | 
  7 |     files = os.listdir(GTFolder)
  8 |     files.sort()
  9 | 
 10 |     classes = []
 11 |     num_pos = {}
 12 |     gt_boxes = {}
 13 |     for f in files:
 14 |         nameOfImage = f.replace(".txt", "")
 15 |         fh1 = open(os.path.join(GTFolder, f), "r")
 16 |         
 17 |         for line in fh1:
 18 |             line = line.replace("\n", "")
 19 |             if line.replace(' ', '') == '':
 20 |                 continue
 21 |             splitLine = line.split(" ")
 22 | 
 23 |             cls = (splitLine[0])  # class
 24 |             left = float(splitLine[1])
 25 |             top = float(splitLine[2])
 26 |             right = float(splitLine[3])
 27 |             bottom = float(splitLine[4])      
 28 |             one_box = [left, top, right, bottom, 0]
 29 |               
 30 |             if cls not in classes:
 31 |                 classes.append(cls)
 32 |                 gt_boxes[cls] = {}
 33 |                 num_pos[cls] = 0
 34 | 
 35 |             num_pos[cls] += 1
 36 | 
 37 |             if nameOfImage not in gt_boxes[cls]:
 38 |                 gt_boxes[cls][nameOfImage] = []
 39 |             gt_boxes[cls][nameOfImage].append(one_box)  
 40 |             
 41 |         fh1.close()
 42 |     return gt_boxes, classes, num_pos
 43 | 
 44 | def getDetBoxes(cfg, DetFolder):
 45 | 
 46 |     files = os.listdir(DetFolder)
 47 |     files.sort()
 48 | 
 49 |     det_boxes = {}
 50 |     for f in files:
 51 |         nameOfImage = f.replace(".txt", "")
 52 |         fh1 = open(os.path.join(DetFolder, f), "r")
 53 | 
 54 |         for line in fh1:
 55 |             line = line.replace("\n", "")
 56 |             if line.replace(' ', '') == '':
 57 |                 continue
 58 |             splitLine = line.split(" ")
 59 | 
 60 |             cls = (splitLine[0])  # class
 61 |             left = float(splitLine[1])
 62 |             top = float(splitLine[2])
 63 |             right = float(splitLine[3])
 64 |             bottom = float(splitLine[4])
 65 |             score = float(splitLine[5])
 66 |             one_box = [left, top, right, bottom, score, nameOfImage]
 67 | 
 68 |             if cls not in det_boxes:
 69 |                 det_boxes[cls]=[]
 70 |             det_boxes[cls].append(one_box)
 71 | 
 72 |         fh1.close()
 73 |     return det_boxes
 74 | 
 75 | def detections(cfg,
 76 |                gtFolder,
 77 |                detFolder,
 78 |                savePath,
 79 |                show_process=True):
 80 |     
 81 | 
 82 |     gt_boxes, classes, num_pos = getGTBoxes(cfg, gtFolder)
 83 |     det_boxes = getDetBoxes(cfg, detFolder)
 84 |     
 85 |     evaluator = Evaluator()
 86 | 
 87 |     return evaluator.GetPascalVOCMetrics(cfg, classes, gt_boxes, num_pos, det_boxes)
 88 | 
 89 | def plot_save_result(cfg, results, classes, savePath):
 90 |     
 91 |     
 92 |     plt.rcParams['savefig.dpi'] = 80
 93 |     plt.rcParams['figure.dpi'] = 130
 94 | 
 95 |     acc_AP = 0
 96 |     validClasses = 0
 97 |     fig_index = 0
 98 | 
 99 |     for cls_index, result in enumerate(results):
100 |         if result is None:
101 |             raise IOError('Error: Class %d could not be found.' % classId)
102 | 
103 |         cls = result['class']
104 |         precision = result['precision']
105 |         recall = result['recall']
106 |         average_precision = result['AP']
107 |         acc_AP = acc_AP + average_precision
108 |         mpre = result['interpolated precision']
109 |         mrec = result['interpolated recall']
110 |         npos = result['total positives']
111 |         total_tp = result['total TP']
112 |         total_fp = result['total FP']
113 | 
114 |         fig_index+=1
115 |         plt.figure(fig_index)
116 |         plt.plot(recall, precision, cfg['colors'][cls_index], label='Precision')
117 |         plt.xlabel('recall')
118 |         plt.ylabel('precision')
119 |         ap_str = "{0:.2f}%".format(average_precision * 100)
120 |         plt.title('Precision x Recall curve \nClass: %s, AP: %s' % (str(cls), ap_str))
121 |         plt.legend(shadow=True)
122 |         plt.grid()
123 |         plt.savefig(os.path.join(savePath, cls + '.png'))
124 |         plt.show()
125 |         plt.pause(0.05)
126 | 
127 | 
128 |     mAP = acc_AP / fig_index
129 |     mAP_str = "{0:.2f}%".format(mAP * 100)
130 |     print('mAP: %s' % mAP_str)
131 |     
132 | 


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/detection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/detection.pyc


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/utils.py


--------------------------------------------------------------------------------
/chapter1/model-evaluation/lib/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter1/model-evaluation/lib/utils.pyc


--------------------------------------------------------------------------------
/chapter2/mlp.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | class MLP(nn.Module):
 4 |     def __init__(self, in_dim, hid_dim1, hid_dim2, out_dim):
 5 |         super(MLP, self).__init__()
 6 |         self.layer = nn.Sequential(
 7 |           nn.Linear(in_dim, hid_dim1),
 8 |           nn.ReLU(),
 9 |           nn.Linear(hid_dim1, hid_dim2),
10 |           nn.ReLU(),
11 |           nn.Linear(hid_dim2, out_dim),
12 |           nn.ReLU()
13 |        )
14 |     def forward(self, x):
15 |         x = self.layer(x)
16 |         return x
17 | 


--------------------------------------------------------------------------------
/chapter2/perception.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class Linear(nn.Module):
 5 |     def __init__(self, in_dim, out_dim):
 6 |         super(Linear, self).__init__()
 7 |         self.w = nn.Parameter(torch.randn(in_dim, out_dim))
 8 |         self.b = nn.Parameter(torch.randn(out_dim))
 9 | 
10 |     def forward(self, x):
11 |         x = x.matmul(self.w)
12 |         y = x + self.b.expand_as(x)
13 |         return y
14 | 
15 | class Perception(nn.Module):
16 |     def __init__(self, in_dim, hid_dim, out_dim):
17 |         super(Perception, self).__init__()
18 |         self.layer1 = Linear(in_dim, hid_dim)
19 |         self.layer2 = Linear(hid_dim, out_dim)
20 |     def forward(self, x):
21 |         x = self.layer1(x)
22 |         y = torch.sigmoid(x)
23 |         y = self.layer2(y)
24 |         y = torch.sigmoid(y)
25 |         return y
26 | 


--------------------------------------------------------------------------------
/chapter2/perception_sequential.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | class Perception(nn.Module):
 4 |     def __init__(self, in_dim, hid_dim, out_dim):
 5 |         super(Perception, self).__init__()
 6 |         self.layer = nn.Sequential(
 7 |           nn.Linear(in_dim, hid_dim),
 8 |           nn.Sigmoid(),
 9 |           nn.Linear(hid_dim, out_dim),
10 |           nn.Sigmoid()
11 | )
12 |     def forward(self, x):
13 |         y = self.layer(x)
14 |         return y
15 | 


--------------------------------------------------------------------------------
/chapter2/visdom.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import visdom
 3 | 
 4 | vis = visdom.Visdom(env='first')
 5 | vis.text('first visdom', win='text1')
 6 | vis.text('hello PyTorch', win='text1', append=True)
 7 | 
 8 | for i in range(20):
 9 |     vis.line(X=torch.FloatTensor([i]), Y=torch.FloatTensor([-i**2+20*i+1]), opts={'title': 'y=-x^2+20x+1'}, win='loss', update='append')
10 | 
11 | vis.image(torch.randn(3, 256, 256), win='random_image')
12 | 


--------------------------------------------------------------------------------
/chapter3/densenet_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Bottleneck(nn.Module):
 6 |     def __init__(self, nChannels, growthRate):
 7 |         super(Bottleneck, self).__init__()
 8 |         interChannels = 4*growthRate
 9 |         self.bn1 = nn.BatchNorm2d(nChannels)
10 |         self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
11 |                                bias=False)
12 |         self.bn2 = nn.BatchNorm2d(interChannels)
13 |         self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
14 |                                padding=1, bias=False)
15 |     def forward(self, x):
16 |         out = self.conv1(F.relu(self.bn1(x)))
17 |         out = self.conv2(F.relu(self.bn2(out)))
18 |         out = torch.cat((x, out), 1)
19 |         return out
20 | 
21 | class Denseblock(nn.Module):
22 |     def __init__(self, nChannels, growthRate, nDenseBlocks):
23 |         super(Denseblock, self).__init__()
24 |         layers = []
25 |         for i in range(int(nDenseBlocks)):
26 |             layers.append(Bottleneck(nChannels, growthRate))
27 |             nChannels += growthRate
28 |         self.denseblock = nn.Sequential(*layers)
29 |     def forward(self, x):
30 |         return self.denseblock(x)
31 | 
32 | 


--------------------------------------------------------------------------------
/chapter3/detnet_bottleneck.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | class DetBottleneck(nn.Module):
 3 | 
 4 |     def __init__(self, inplanes, planes, stride=1, extra=False):
 5 |         super(DetBottleneck, self).__init__()
 6 |         self.bottleneck = nn.Sequential(
 7 |                 nn.Conv2d(inplanes, planes, 1, bias=False), 
 8 |                 nn.BatchNorm2d(planes),
 9 |                 nn.ReLU(inplace=True),
10 |                 nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=2, 
11 |                                dilation=2, bias=False),
12 |                 nn.BatchNorm2d(planes),
13 |                 nn.ReLU(inplace=True),
14 |                 nn.Conv2d(planes, planes, 1, bias=False),
15 |                 nn.BatchNorm2d(planes),
16 |         )
17 |         self.relu = nn.ReLU(inplace=True)
18 |         self.extra = extra
19 |         if self.extra:
20 |             self.extra_conv = nn.Sequential(
21 |                 nn.Conv2d(inplanes, planes, 1, bias=False),
22 |                 nn.BatchNorm2d(planes)
23 |             )
24 | 
25 |     def forward(self, x):
26 |         if self.extra:
27 |             identity = self.extra_conv(x)
28 |         else:
29 |             identity = x
30 |         out = self.bottleneck(x)
31 |         out += identity
32 |         out = self.relu(out)
33 |         return out
34 | 
35 | 


--------------------------------------------------------------------------------
/chapter3/fpn.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | import math
 4 | 
 5 | class Bottleneck(nn.Module):
 6 |     expansion = 4
 7 |     def __init__(self, in_planes, planes, stride=1, downsample=None):
 8 |         super(Bottleneck, self).__init__()
 9 |         self.bottleneck = nn.Sequential(
10 |                 nn.Conv2d(in_planes, planes, 1, bias=False),
11 |                 nn.BatchNorm2d(planes),
12 |                 nn.ReLU(inplace=True),
13 |                 nn.Conv2d(planes, planes, 3, stride, 1, bias=False),
14 |                 nn.BatchNorm2d(planes),
15 |                 nn.ReLU(inplace=True),
16 |                 nn.Conv2d(planes, self.expansion * planes, 1, bias=False),
17 |                 nn.BatchNorm2d(self.expansion * planes),
18 |             )
19 |         self.relu = nn.ReLU(inplace=True)
20 |         self.downsample = downsample
21 |     def forward(self, x):
22 |         identity = x
23 |         out = self.bottleneck(x)
24 |         if self.downsample is not None:
25 |             identity = self.downsample(x)
26 |         out += identity
27 |         out = self.relu(out)
28 |         return out
29 | 
30 | class FPN(nn.Module):
31 |     def __init__(self, layers):
32 |         super(FPN, self).__init__()
33 |         self.inplanes = 64
34 |         self.conv1 = nn.Conv2d(3, 64, 7, 2, 3, bias=False)
35 |         self.bn1 = nn.BatchNorm2d(64)
36 |         self.relu = nn.ReLU(inplace=True)
37 |         self.maxpool = nn.MaxPool2d(3, 2, 1)
38 | 
39 |         self.layer1 = self._make_layer(64, layers[0])
40 |         self.layer2 = self._make_layer(128, layers[1], 2)
41 |         self.layer3 = self._make_layer(256, layers[2], 2)
42 |         self.layer4 = self._make_layer(512, layers[3], 2)
43 |         self.toplayer = nn.Conv2d(2048, 256, 1, 1, 0) 
44 | 
45 |         self.smooth1 = nn.Conv2d(256, 256, 3, 1, 1)
46 |         self.smooth2 = nn.Conv2d(256, 256, 3, 1, 1)
47 |         self.smooth3 = nn.Conv2d(256, 256, 3, 1, 1)
48 | 
49 |         self.latlayer1 = nn.Conv2d(1024, 256, 1, 1, 0)
50 |         self.latlayer2 = nn.Conv2d( 512, 256, 1, 1, 0)
51 |         self.latlayer3 = nn.Conv2d( 256, 256, 1, 1, 0)
52 | 
53 |     def _make_layer(self, planes, blocks, stride=1):
54 |         downsample  = None
55 |         if stride != 1 or self.inplanes != Bottleneck.expansion * planes:
56 |             downsample  = nn.Sequential(
57 |                 nn.Conv2d(self.inplanes, Bottleneck.expansion * planes, 1, stride, bias=False),
58 |                 nn.BatchNorm2d(Bottleneck.expansion * planes)
59 |             )
60 |         layers = []
61 |         layers.append(Bottleneck(self.inplanes, planes, stride, downsample))
62 |         self.inplanes = planes * Bottleneck.expansion
63 |         for i in range(1, blocks):
64 |             layers.append(Bottleneck(self.inplanes, planes))
65 |         return nn.Sequential(*layers)
66 | 
67 |     def _upsample_add(self, x, y):
68 |         _,_,H,W = y.shape
69 |         return F.upsample(x, size=(H,W), mode='bilinear') + y
70 | 
71 |     def forward(self, x):
72 | 
73 |         c1 = self.maxpool(self.relu(self.bn1(self.conv1(x))))
74 |         c2 = self.layer1(c1)
75 |         c3 = self.layer2(c2)
76 |         c4 = self.layer3(c3)
77 |         c5 = self.layer4(c4)
78 | 
79 |         p5 = self.toplayer(c5)
80 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
81 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
82 |         p2 = self._upsample_add(p3, self.latlayer3(c2))
83 | 
84 |         p4 = self.smooth1(p4)
85 |         p3 = self.smooth2(p3)
86 |         p2 = self.smooth3(p2)
87 |         return p2, p3, p4, p5
88 | 
89 | 


--------------------------------------------------------------------------------
/chapter3/inceptionv1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | class BasicConv2d(nn.Module):
 5 |     def __init__(self, in_channels, out_channels, kernel_size, padding=0):
 6 |         super(BasicConv2d, self).__init__()
 7 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding)
 8 |     def forward(self, x):
 9 |         x = self.conv(x)
10 |         return F.relu(x, inplace=True)
11 | 
12 | class Inceptionv1(nn.Module):
13 |      def __init__(self, in_dim, hid_1_1, hid_2_1, hid_2_3, hid_3_1, out_3_5, out_4_1):
14 |           super(Inceptionv1, self).__init__()
15 |           self.branch1x1 = BasicConv2d(in_dim, hid_1_1, 1)
16 |           self.branch3x3 = nn.Sequential(
17 |                BasicConv2d(in_dim, hid_2_1, 1),
18 |                BasicConv2d(hid_2_1, hid_2_3, 3, padding=1)
19 |           )
20 |           self.branch5x5 = nn.Sequential(
21 |                BasicConv2d(in_dim, hid_3_1, 1),
22 |                BasicConv2d(hid_3_1, out_3_5, 5, padding=2)
23 |           )
24 |           self.branch_pool = nn.Sequential(
25 |                nn.MaxPool2d(3, stride=1, padding=1),
26 |                BasicConv2d(in_dim, out_4_1, 1)
27 |           )
28 |      def forward(self, x):
29 |           b1 = self.branch1x1(x)
30 |           b2 = self.branch3x3(x)
31 |           b3 = self.branch5x5(x)
32 |           b4 = self.branch_pool(x)
33 |           output = torch.cat((b1, b2, b3, b4), dim=1)
34 |           return output
35 | 
36 | 


--------------------------------------------------------------------------------
/chapter3/inceptionv2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class BasicConv2d(nn.Module):
 6 |     def __init__(self, in_channels, out_channels, kernel_size, padding=0):
 7 |         super(BasicConv2d, self).__init__()
 8 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding)
 9 |         self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
10 |     def forward(self, x):
11 |         x = self.conv(x)
12 |         x = self.bn(x)
13 |         return F.relu(x, inplace=True)
14 | 
15 | class Inceptionv2(nn.Module):
16 |     def __init__(self):
17 |         super(Inceptionv2, self).__init__()
18 |         self.branch1 = BasicConv2d(192, 96, 1, 0)
19 |         self.branch2 = nn.Sequential(
20 |             BasicConv2d(192, 48, 1, 0),
21 |             BasicConv2d(48, 64, 3, 1)
22 |         )
23 |         self.branch3 = nn.Sequential(
24 |             BasicConv2d(192, 64, 1, 0),
25 |             BasicConv2d(64, 96, 3, 1),
26 |             BasicConv2d(96, 96, 3, 1)
27 |         )
28 |         self.branch4 = nn.Sequential(
29 |             nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
30 |             BasicConv2d(192, 64, 1, 0)
31 |         )
32 |     def forward(self, x):
33 |         x0 = self.branch1(x)
34 |         x1 = self.branch2(x)
35 |         x2 = self.branch3(x)
36 |         x3 = self.branch4(x)
37 |         out = torch.cat((x0, x1, x2, x3), 1)
38 |         return out
39 | 
40 | 


--------------------------------------------------------------------------------
/chapter3/resnet_bottleneck.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class Bottleneck(nn.Module):
 4 |     def __init__(self, in_dim, out_dim, stride=1):
 5 |         super(Bottleneck, self).__init__()
 6 |         self.bottleneck = nn.Sequential(
 7 |                 nn.Conv2d(in_dim, in_dim, 1, bias=False),
 8 |                 nn.BatchNorm2d(in_dim),
 9 |                 nn.ReLU(inplace=True),
10 |                 nn.Conv2d(in_dim, in_dim, 3, stride, 1, bias=False),
11 |                 nn.BatchNorm2d(in_dim),
12 |                 nn.ReLU(inplace=True),
13 |                 nn.Conv2d(in_dim, out_dim, 1, bias=False),
14 |                 nn.BatchNorm2d(out_dim),
15 |             )
16 |         self.relu = nn.ReLU(inplace=True)
17 |         self.downsample = nn.Sequential(
18 |                 nn.Conv2d(in_dim, out_dim, 1, 1),
19 |                 nn.BatchNorm2d(out_dim),
20 |             )
21 | 
22 |     def forward(self, x):
23 |         identity = x
24 |         out = self.bottleneck(x)
25 |         identity = self.downsample(x)
26 |         out += identity
27 |         out = self.relu(out)
28 |         return out
29 | 
30 | 


--------------------------------------------------------------------------------
/chapter3/vgg.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | class VGG(nn.Module):
 3 |     def __init__(self, num_classes=1000):
 4 |         super(VGG, self).__init__()
 5 |         layers = []
 6 |         in_dim = 3
 7 |         out_dim = 64
 8 |         for i in range(13):
 9 |             layers += [nn.Conv2d(in_dim, out_dim, 3, 1, 1), nn.ReLU(inplace=True)]
10 |             in_dim = out_dim
11 |             if i==1 or i==3 or i==6 or i==9 or i==12:
12 |                 layers += [nn.MaxPool2d(2, 2)]
13 |                 if i!=9:
14 |                     out_dim*=2
15 |         self.features = nn.Sequential(*layers)
16 |         self.classifier = nn.Sequential(
17 |             nn.Linear(512 * 7 * 7, 4096),
18 |             nn.ReLU(True),
19 |             nn.Dropout(),
20 |             nn.Linear(4096, 4096),
21 |             nn.ReLU(True),
22 |             nn.Dropout(),
23 |             nn.Linear(4096, num_classes),
24 |         )
25 |     def forward(self, x):
26 |         x = self.features(x)
27 |         x = x.view(x.size(0), -1)
28 |         x = self.classifier(x)
29 |         return x
30 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/.gitignore:
--------------------------------------------------------------------------------
  1 | data/*
  2 | 
  3 | # READ THIS BEFORE YOU REFACTOR ME
  4 | #
  5 | # setup.py uses the list of patterns in this file to decide
  6 | # what to delete, but it's not 100% sound.  So, for example,
  7 | # if you delete aten/build/ because it's redundant with build/,
  8 | # aten/build/ will stop being cleaned.  So be careful when
  9 | # refactoring this file!
 10 | 
 11 | ## PyTorch
 12 | 
 13 | .mypy_cache
 14 | *.pyc
 15 | */*.pyc
 16 | */*.so*
 17 | */**/__pycache__
 18 | */**/*.dylib*
 19 | */**/*.pyc
 20 | */**/*.pyd
 21 | */**/*.so*
 22 | */**/**/*.pyc
 23 | */**/**/**/*.pyc
 24 | */**/**/**/**/*.pyc
 25 | aten/build/
 26 | aten/src/ATen/Config.h
 27 | aten/src/ATen/cuda/CUDAConfig.h
 28 | build/
 29 | dist/
 30 | docs/src/**/*
 31 | test/.coverage
 32 | test/cpp/api/mnist
 33 | test/data/gpu_tensors.pt
 34 | test/data/legacy_modules.t7
 35 | test/data/legacy_serialized.pt
 36 | test/data/linear.pt
 37 | test/htmlcov
 38 | third_party/build/
 39 | tools/shared/_utils_internal.py
 40 | torch.egg-info/
 41 | torch/csrc/autograd/generated/*
 42 | torch/csrc/cudnn/cuDNN.cpp
 43 | torch/csrc/generated
 44 | torch/csrc/generic/TensorMethods.cpp
 45 | torch/csrc/jit/generated/*
 46 | torch/csrc/nn/THCUNN.cpp
 47 | torch/csrc/nn/THCUNN.cwrap
 48 | torch/csrc/nn/THNN_generic.cpp
 49 | torch/csrc/nn/THNN_generic.cwrap
 50 | torch/csrc/nn/THNN_generic.h
 51 | torch/csrc/nn/THNN.cpp
 52 | torch/csrc/nn/THNN.cwrap
 53 | torch/lib/*.a*
 54 | torch/lib/*.dll*
 55 | torch/lib/*.dylib*
 56 | torch/lib/*.h
 57 | torch/lib/*.lib
 58 | torch/lib/*.so*
 59 | torch/lib/build
 60 | torch/lib/cmake
 61 | torch/lib/include
 62 | torch/lib/pkgconfig
 63 | torch/lib/protoc
 64 | torch/lib/tmp_install
 65 | torch/lib/torch_shm_manager
 66 | torch/version.py
 67 | 
 68 | # IPython notebook checkpoints
 69 | .ipynb_checkpoints
 70 | 
 71 | # Editor temporaries
 72 | *.swn
 73 | *.swo
 74 | *.swp
 75 | *.swm
 76 | *~
 77 | 
 78 | # macOS dir files
 79 | .DS_Store
 80 | 
 81 | # Symbolic files
 82 | tools/shared/cwrap_common.py
 83 | 
 84 | # Ninja files
 85 | .ninja_deps
 86 | .ninja_log
 87 | compile_commands.json
 88 | *.egg-info/
 89 | docs/source/scripts/activation_images/
 90 | 
 91 | ## General
 92 | 
 93 | # Compiled Object files
 94 | *.slo
 95 | *.lo
 96 | *.o
 97 | *.cuo
 98 | *.obj
 99 | 
100 | # Compiled Dynamic libraries
101 | *.so
102 | *.dylib
103 | *.dll
104 | 
105 | # Compiled Static libraries
106 | *.lai
107 | *.la
108 | *.a
109 | *.lib
110 | 
111 | # Compiled protocol buffers
112 | *.pb.h
113 | *.pb.cc
114 | *_pb2.py
115 | 
116 | # Compiled python
117 | *.pyc
118 | *.pyd
119 | 
120 | # Compiled MATLAB
121 | *.mex*
122 | 
123 | # IPython notebook checkpoints
124 | .ipynb_checkpoints
125 | 
126 | # Editor temporaries
127 | *.swn
128 | *.swo
129 | *.swp
130 | *~
131 | 
132 | # Sublime Text settings
133 | *.sublime-workspace
134 | *.sublime-project
135 | 
136 | # Eclipse Project settings
137 | *.*project
138 | .settings
139 | 
140 | # QtCreator files
141 | *.user
142 | 
143 | # PyCharm files
144 | .idea
145 | 
146 | # Visual Studio Code files
147 | .vscode
148 | .vs
149 | 
150 | # OSX dir files
151 | .DS_Store
152 | 
153 | ## Caffe2
154 | 
155 | # build, distribute, and bins (+ python proto bindings)
156 | build
157 | build_host_protoc
158 | build_android
159 | build_ios
160 | /build_*
161 | .build_debug/*
162 | .build_release/*
163 | distribute/*
164 | *.testbin
165 | *.bin
166 | cmake_build
167 | .cmake_build
168 | gen
169 | .setuptools-cmake-build
170 | .pytest_cache
171 | aten/build/*
172 | 
173 | # Bram
174 | plsdontbreak
175 | 
176 | # Generated documentation
177 | docs/_site
178 | docs/gathered
179 | _site
180 | doxygen
181 | docs/dev
182 | 
183 | # LevelDB files
184 | *.sst
185 | *.ldb
186 | LOCK
187 | LOG*
188 | CURRENT
189 | MANIFEST-*
190 | 
191 | # generated version file
192 | caffe2/version.py
193 | 
194 | # setup.py intermediates
195 | .eggs
196 | caffe2.egg-info
197 | 
198 | # Atom/Watchman required file
199 | .watchmanconfig
200 | 
201 | # cython generated files
202 | lib/model/utils/bbox.c
203 | lib/pycocotools/_mask.c


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jianwei Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # 第四章 Faster RCNN
 2 | 
 3 | ## 简介
 4 | 
 5 | 该代码主要参考了[jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch)的PyTorch复现工程，如在学习时遇到问题，可前往[jwyang的问题区](https://github.com/jwyang/faster-rcnn.pytorch/issues)查看是否有解决方法。
 6 | 
 7 | ## 准备工作
 8 | 
 9 | 首先clone本书代码到本地：
10 | ```
11 | git clone https://github.com/dongdonghy/Detection-PyTorch-Notebook.git
12 | ```
13 | 
14 | 然后切换到本代码：
15 | ```
16 | cd Detection-PyTorch-Notebook/faster-rcnn.pytorch
17 | ```
18 | 
19 | ### 依赖
20 | 
21 | * Python 2.7或者3.6
22 | * Pytorch 0.4.0
23 | * CUDA 8.0或者更高
24 | 
25 | ### 数据准备
26 | 
27 | * **PASCAL_VOC 07+12**: 如果是VOC的数据集，按照[py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models)的方法准备VOC数据集，并创建软连接到data文件夹。
28 | 
29 | * **COCO**: 如果是COCO数据集，则按照[py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models)的方法准备COCO数据集，并创建软连接到data文件夹。
30 | 
31 | ### 预训练权重
32 | 
33 | 作者提供了VGG与ResNet101两个不同的预训练权重:
34 | 
35 | * VGG16: [Dropbox](https://www.dropbox.com/s/s3brpk0bdq60nyb/vgg16_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/vgg16_caffe.pth)
36 | 
37 | * ResNet101: [Dropbox](https://www.dropbox.com/s/iev3tkbz5wyyuz9/resnet101_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth)
38 | 
39 | 下载相应的预训练权重，并放到data/pretrained_model文件夹下，从实验发现caffe得到的预训练权重模型精度更高，因此使用了caffe的预训练权重。
40 | 
41 | ### 编译
42 | 
43 | 由于NMS、RoI Pooling、RoI Align等模块依赖于自己实现的CUDA C代码，因此这部分需要单独进行编译，首先在lib/make.sh中将CUDA_ARCH改为自己GPU对应的arch，对应表如下：
44 | 
45 |   | GPU model  | Architecture |
46 |   | ------------- | ------------- |
47 |   | TitanX (Maxwell/Pascal) | sm_52 |
48 |   | GTX 960M | sm_50 |
49 |   | GTX 1080 (Ti) | sm_61 |
50 |   | Grid K520 (AWS g2.2xlarge) | sm_30 |
51 |   | Tesla K80 (AWS p2.xlarge) | sm_37 |
52 | 
53 | 更多关于arch的介绍可以参考官方介绍：[cuda-gpus](https://developer.nvidia.com/cuda-gpus) 或者[sm-architectures](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
54 | 
55 | 使用pip安装Python的依赖库：
56 | ```
57 | pip install -r requirements.txt
58 | ```
59 | 
60 | 编译依赖CUDA的库：
61 | 
62 | ```
63 | cd lib
64 | sh make.sh
65 | ```
66 | 
67 | ## 训练
68 | 
69 | 训练Faster RCNN指令如下：这里默认使用VOC数据集、VGG16的预训练模型，众多超参可根据实际情况修改。
70 | ```
71 | CUDA_VISIBLE_DEVICES=0 python trainval_net.py \
72 | 
73 | 这里使用了train_net.py中的默认参数，众多超参可根据实际情况修改。Batch Size以及Worker Number可根据GPU能力合理选择。
74 | 
75 | ```
76 | ## 前向测试
77 | 
78 | 训练完想要测试模型在测试集上的前向效果，运行如下指令：
79 | ```
80 | python test_net.py --dataset pascal_voc --net vgg16 \
81 |                    --checksession $SESSION --checkepoch $EPOCH --checkpoint $CHECKPOINT \
82 |                    --cuda
83 | ```
84 | SESSION、EPOCH、CHECKPOINT修改为自己想要前向测试的模型
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_SIZE: 7
17 | POOLING_MODE: align
18 | CROP_RESIZE_WITH_MAX_POOL: False
19 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 | TEST:
16 |   HAS_RPN: True
17 |   SCALES: [800]
18 |   MAX_SIZE: 1200
19 |   RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   # IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   WEIGHT_DECAY: 0.0001
13 |   DOUBLE_BIAS: False
14 |   SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_MODE: crop
18 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   BATCH_SIZE: 256
10 |   LEARNING_RATE: 0.01
11 | TEST:
12 |   HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img1_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1_det.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img1_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img1_det_res101.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img2_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2_det.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img2_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img2_det_res101.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img3_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3_det.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img3_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img3_det_res101.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img4_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4_det.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/images/img4_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/images/img4_det_res101.jpg


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | from datasets.imagenet import imagenet
17 | from datasets.vg import vg
18 | 
19 | import numpy as np
20 | 
21 | # Set up voc_<year>_<split>
22 | for year in ['2007', '2012']:
23 |   for split in ['train', 'val', 'trainval', 'test']:
24 |     name = 'voc_{}_{}'.format(year, split)
25 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 | 
27 | # Set up coco_2014_<split>
28 | for year in ['2014']:
29 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
30 |     name = 'coco_{}_{}'.format(year, split)
31 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
32 | 
33 | # Set up coco_2014_cap_<split>
34 | for year in ['2014']:
35 |   for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
36 |     name = 'coco_{}_{}'.format(year, split)
37 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
38 | 
39 | # Set up coco_2015_<split>
40 | for year in ['2015']:
41 |   for split in ['test', 'test-dev']:
42 |     name = 'coco_{}_{}'.format(year, split)
43 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
44 | 
45 | # Set up vg_<split>
46 | # for version in ['1600-400-20']:
47 | #     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
48 | #         name = 'vg_{}_{}'.format(version,split)
49 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
51 |     for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
52 |         name = 'vg_{}_{}'.format(version,split)
53 |         __sets[name] = (lambda split=split, version=version: vg(version, split))
54 |         
55 | # set up image net.
56 | for split in ['train', 'val', 'val1', 'val2', 'test']:
57 |     name = 'imagenet_{}'.format(split)
58 |     devkit_path = 'data/imagenet/ILSVRC/devkit'
59 |     data_path = 'data/imagenet/ILSVRC'
60 |     __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
61 | 
62 | def get_imdb(name):
63 |   """Get an imdb (image database) by name."""
64 |   if name not in __sets:
65 |     raise KeyError('Unknown dataset: {}'.format(name))
66 |   return __sets[name]()
67 | 
68 | 
69 | def list_imdbs():
70 |   """List all registered imdbs."""
71 |   return list(__sets.keys())
72 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | export CUDA_PATH=/usr/local/cuda/
 6 | #You may also want to ad the following
 7 | #export C_INCLUDE_PATH=/opt/cuda/include
 8 | 
 9 | export CXXFLAGS="-std=c++11"
10 | export CFLAGS="-std=c99"
11 | 
12 | python3 setup.py build_ext --inplace
13 | rm -rf build
14 | 
15 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61 "
16 | 
17 | # compile NMS
18 | cd model/nms/src
19 | echo "Compiling nms kernels by nvcc..."
20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
21 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
22 | 
23 | cd ../
24 | python3 build.py
25 | 
26 | # compile roi_pooling
27 | cd ../../
28 | cd model/roi_pooling/src
29 | echo "Compiling roi pooling kernels by nvcc..."
30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
31 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
32 | cd ../
33 | python3 build.py
34 | 
35 | # compile roi_align
36 | cd ../../
37 | cd model/roi_align/src
38 | echo "Compiling roi align kernels by nvcc..."
39 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
40 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
41 | cd ../
42 | python3 build.py
43 | 
44 | # compile roi_crop
45 | cd ../../
46 | cd model/roi_crop/src
47 | echo "Compiling roi crop kernels by nvcc..."
48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
49 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
50 | cd ../
51 | python3 build.py
52 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import math
14 | import torchvision.models as models
15 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
16 | import pdb
17 | 
18 | class vgg16(_fasterRCNN):
19 |   def __init__(self, classes, pretrained=False, class_agnostic=False):
20 |     self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
21 |     self.dout_base_model = 512
22 |     self.pretrained = pretrained
23 |     self.class_agnostic = class_agnostic
24 | 
25 |     _fasterRCNN.__init__(self, classes, class_agnostic)
26 | 
27 |   def _init_modules(self):
28 |     vgg = models.vgg16()
29 |     if self.pretrained:
30 |         print("Loading pretrained weights from %s" %(self.model_path))
31 |         state_dict = torch.load(self.model_path)
32 |         vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
33 | 
34 |     vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
35 | 
36 |     # not using the last maxpool layer
37 |     self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
38 | 
39 |     # Fix the layers before conv3:
40 |     for layer in range(10):
41 |       for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
42 | 
43 |     # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
44 | 
45 |     self.RCNN_top = vgg.classifier
46 | 
47 |     # not using the last maxpool layer
48 |     self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
49 | 
50 |     if self.class_agnostic:
51 |       self.RCNN_bbox_pred = nn.Linear(4096, 4)
52 |     else:
53 |       self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)      
54 | 
55 |   def _head_to_tail(self, pool5):
56 |     
57 |     pool5_flat = pool5.view(pool5.size(0), -1)
58 |     fc7 = self.RCNN_top(pool5_flat)
59 | 
60 |     return fc7
61 | 
62 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | def nms_cpu(dets, thresh):
 7 |     dets = dets.numpy()
 8 |     x1 = dets[:, 0]
 9 |     y1 = dets[:, 1]
10 |     x2 = dets[:, 2]
11 |     y2 = dets[:, 3]
12 |     scores = dets[:, 4]
13 | 
14 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 |     order = scores.argsort()[::-1]
16 | 
17 |     keep = []
18 |     while order.size > 0:
19 |         i = order.item(0)
20 |         keep.append(i)
21 |         xx1 = np.maximum(x1[i], x1[order[1:]])
22 |         yy1 = np.maximum(y1[i], y1[order[1:]])
23 |         xx2 = np.minimum(x2[i], x2[order[1:]])
24 |         yy2 = np.minimum(y2[i], y2[order[1:]])
25 | 
26 |         w = np.maximum(0.0, xx2 - xx1 + 1)
27 |         h = np.maximum(0.0, yy2 - yy1 + 1)
28 |         inter = w * h
29 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 | 
31 |         inds = np.where(ovr <= thresh)[0]
32 |         order = order[inds + 1]
33 | 
34 |     return torch.IntTensor(keep)
35 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | if torch.cuda.is_available():
10 |     from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     # ---numpy version---
18 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     # ---pytorch version---
20 | 
21 |     return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         THCudaTensor_size(state, boxes_host, 0),
15 | 		         THCudaTensor_size(state, boxes_host, 1),
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | sources = ['src/roi_align.c']
 7 | headers = ['src/roi_align.h']
 8 | extra_objects = []
 9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 | 
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 | 
17 | if torch.cuda.is_available():
18 |     print('Including CUDA code.')
19 |     sources += ['src/roi_align_cuda.c']
20 |     headers += ['src/roi_align_cuda.h']
21 |     defines += [('WITH_CUDA', None)]
22 |     with_cuda = True
23 |     
24 |     extra_objects = ['src/roi_align_kernel.cu.o']
25 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 | 
27 | ffi = create_extension(
28 |     '_ext.roi_align',
29 |     headers=headers,
30 |     sources=sources,
31 |     define_macros=defines,
32 |     relative_to=__file__,
33 |     with_cuda=with_cuda,
34 |     extra_objects=extra_objects
35 | )
36 | 
37 | if __name__ == '__main__':
38 |     ffi.build()
39 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             roi_align.roi_align_forward(self.aligned_height,
30 |                                         self.aligned_width,
31 |                                         self.spatial_scale, features,
32 |                                         rois, output)
33 | #            raise NotImplementedError
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 |         assert(self.feature_size is not None and grad_output.is_cuda)
39 | 
40 |         batch_size, num_channels, data_height, data_width = self.feature_size
41 | 
42 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
43 |                                   data_width).zero_()
44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
45 |                                           self.aligned_width,
46 |                                           self.spatial_scale, grad_output,
47 |                                           self.rois, grad_input)
48 | 
49 |         # print grad_input
50 | 
51 |         return grad_input, None
52 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 |                       THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 | 
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 |                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 | 
16 | if torch.cuda.is_available():
17 |     print('Including CUDA code.')
18 |     sources += ['src/roi_pooling_cuda.c']
19 |     headers += ['src/roi_pooling_cuda.h']
20 |     defines += [('WITH_CUDA', None)]
21 |     with_cuda = True
22 |     extra_objects = ['src/roi_pooling.cu.o']
23 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_pooling',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/rpn/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | import pdb
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | #array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | try:
 40 |     xrange          # Python 2
 41 | except NameError:
 42 |     xrange = range  # Python 3
 43 | 
 44 | 
 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 46 |                      scales=2**np.arange(3, 6)):
 47 |     """
 48 |     Generate anchor (reference) windows by enumerating aspect ratios X
 49 |     scales wrt a reference (0, 0, 15, 15) window.
 50 |     """
 51 | 
 52 |     # 首先创建一个基本anchor为[0, 0, 15, 15]
 53 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 54 |     # 将基本anchor进行宽高变化，生成三种宽高比的anchor
 55 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 56 |     # 将上述anchor再进行尺度变化，得到最终的9种anchors
 57 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 58 |                          for i in xrange(ratio_anchors.shape[0])])
 59 |     # 返回对应于feature map大小的anchors
 60 |     return anchors
 61 | 
 62 | def _whctrs(anchor):
 63 |     """
 64 |     Return width, height, x center, and y center for an anchor (window).
 65 |     """
 66 | 
 67 |     w = anchor[2] - anchor[0] + 1
 68 |     h = anchor[3] - anchor[1] + 1
 69 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 70 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 71 |     return w, h, x_ctr, y_ctr
 72 | 
 73 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 74 |     """
 75 |     Given a vector of widths (ws) and heights (hs) around a center
 76 |     (x_ctr, y_ctr), output a set of anchors (windows).
 77 |     """
 78 | 
 79 |     ws = ws[:, np.newaxis]
 80 |     hs = hs[:, np.newaxis]
 81 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 82 |                          y_ctr - 0.5 * (hs - 1),
 83 |                          x_ctr + 0.5 * (ws - 1),
 84 |                          y_ctr + 0.5 * (hs - 1)))
 85 |     return anchors
 86 | 
 87 | def _ratio_enum(anchor, ratios):
 88 |     """
 89 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 90 |     """
 91 | 
 92 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 93 |     size = w * h
 94 |     size_ratios = size / ratios
 95 |     ws = np.round(np.sqrt(size_ratios))
 96 |     hs = np.round(ws * ratios)
 97 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 98 |     return anchors
 99 | 
100 | def _scale_enum(anchor, scales):
101 |     """
102 |     Enumerate a set of anchors for each scale wrt an anchor.
103 |     """
104 | 
105 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
106 |     ws = w * scales
107 |     hs = h * scales
108 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
109 |     return anchors
110 | 
111 | if __name__ == '__main__':
112 |     import time
113 |     t = time.time()
114 |     a = generate_anchors()
115 |     print(time.time() - t)
116 |     print(a)
117 |     from IPython import embed; embed()
118 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter4/faster-rcnn-pytorch/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 | 
14 | try:
15 |     xrange          # Python 2
16 | except NameError:
17 |     xrange = range  # Python 3
18 | 
19 | 
20 | def im_list_to_blob(ims):
21 |     """Convert a list of images into a network input.
22 | 
23 |     Assumes images are already prepared (means subtracted, BGR order, ...).
24 |     """
25 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 |     num_images = len(ims)
27 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 |                     dtype=np.float32)
29 |     for i in xrange(num_images):
30 |         im = ims[i]
31 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 | 
33 |     return blob
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     im_size_min = np.min(im_shape[0:2])
43 |     im_size_max = np.max(im_shape[0:2])
44 |     im_scale = float(target_size) / float(im_size_min)
45 |     # Prevent the biggest axis from being more than MAX_SIZE
46 |     # if np.round(im_scale * im_size_max) > max_size:
47 |     #     im_scale = float(max_size) / float(im_size_max)
48 |     # im = imresize(im, im_scale)
49 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 |                     interpolation=cv2.INTER_LINEAR)
51 | 
52 |     return im, im_scale
53 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | from imageio import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 |   # Sample random scales to use for each image in this batch
23 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 |                   size=num_images)
25 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
27 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
28 | 
29 |   # Get the input image blob, formatted for caffe
30 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
31 | 
32 |   blobs = {'data': im_blob}
33 | 
34 |   assert len(im_scales) == 1, "Single batch only"
35 |   assert len(roidb) == 1, "Single batch only"
36 |   
37 |   # gt boxes: (x1, y1, x2, y2, cls)
38 |   if cfg.TRAIN.USE_ALL_GT:
39 |     # Include all ground truth boxes
40 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
41 |   else:
42 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
43 |     gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 |   blobs['gt_boxes'] = gt_boxes
48 |   blobs['im_info'] = np.array(
49 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
50 |     dtype=np.float32)
51 | 
52 |   blobs['img_id'] = roidb[0]['img_id']
53 | 
54 |   return blobs
55 | 
56 | def _get_image_blob(roidb, scale_inds):
57 |   """Builds an input blob from the images in the roidb at the specified
58 |   scales.
59 |   """
60 |   num_images = len(roidb)
61 | 
62 |   processed_ims = []
63 |   im_scales = []
64 |   for i in range(num_images):
65 |     #im = cv2.imread(roidb[i]['image'])
66 |     im = imread(roidb[i]['image'])
67 | 
68 |     if len(im.shape) == 2:
69 |       im = im[:,:,np.newaxis]
70 |       im = np.concatenate((im,im,im), axis=2)
71 |     # flip the channel, since the original one using cv2
72 |     # rgb -> bgr
73 |     im = im[:,:,::-1]
74 | 
75 |     if roidb[i]['flipped']:
76 |       im = im[:, ::-1, :]
77 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
78 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
79 |                     cfg.TRAIN.MAX_SIZE)
80 |     im_scales.append(im_scale)
81 |     processed_ims.append(im)
82 | 
83 |   # Create a blob to hold the input images
84 |   blob = im_list_to_blob(processed_ims)
85 | 
86 |   return blob, im_scales
87 | 


--------------------------------------------------------------------------------
/chapter4/faster-rcnn-pytorch/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | cffi
 3 | opencv-python
 4 | scipy
 5 | msgpack
 6 | easydict
 7 | matplotlib
 8 | pyyaml
 9 | tensorboardX
10 | imageio
11 | 


--------------------------------------------------------------------------------
/chapter5/dssd-pytorch/arm.py:
--------------------------------------------------------------------------------
 1 | def arm_multibox(vgg, extra_layers, cfg):
 2 | 
 3 |     arm_loc_layers = []
 4 |     arm_conf_layers = []
 5 |     vgg_source = [21, 28, -2]
 6 | 
 7 |     for k, v in enumerate(vgg_source):
 8 |         arm_loc_layers += [nn.Conv2d(vgg[v].out_channels,
 9 |                                  cfg[k] * 4, kernel_size=3, padding=1)]
10 |         arm_conf_layers += [nn.Conv2d(vgg[v].out_channels,
11 |                         cfg[k] * 2, kernel_size=3, padding=1)]
12 | 
13 |     for k, v in enumerate(extra_layers[1::2], 3):
14 |         arm_loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
15 |                                  * 4, kernel_size=3, padding=1)]
16 |         arm_conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
17 |                                   * 2, kernel_size=3, padding=1)]
18 | 
19 |     return (arm_loc_layers, arm_conf_layers)
20 | 
21 | 


--------------------------------------------------------------------------------
/chapter5/dssd-pytorch/tcb.py:
--------------------------------------------------------------------------------
 1 | def add_tcb(cfg):
 2 |     feature_scale_layers = []
 3 |     feature_upsample_layers = []
 4 |     feature_pred_layers = []
 5 | 
 6 |     for k, v in enumerate(cfg):
 7 |         feature_scale_layers += [nn.Conv2d(cfg[k], 256, 3, padding=1),
 8 |                                  nn.ReLU(inplace=True),
 9 |                                  nn.Conv2d(256, 256, 3, padding=1)
10 |         ]
11 |         feature_pred_layers += [nn.ReLU(inplace=True),
12 |                                 nn.Conv2d(256, 256, 3, padding=1),
13 |                                 nn.ReLU(inplace=True)
14 |         ]
15 | 
16 |         if k != len(cfg) - 1:
17 |             feature_upsample_layers += [nn.ConvTranspose2d(256, 256, 4, 2)]
18 | 
19 |     return (feature_scale_layers, feature_upsample_layers, feature_pred_layers)
20 | 
21 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 | .ipynb_checkpoints/* linguist-documentation
3 | dev.ipynb linguist-documentation
4 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | # atom remote-sync package
 92 | .remote-sync.json
 93 | 
 94 | # weights
 95 | weights/
 96 | 
 97 | #DS_Store
 98 | .DS_Store
 99 | 
100 | # dev stuff
101 | eval/
102 | eval.ipynb
103 | dev.ipynb
104 | .vscode/
105 | 
106 | # not ready
107 | videos/
108 | templates/
109 | data/ssd_dataloader.py
110 | data/datasets/
111 | doc/visualize.py
112 | read_results.py
113 | ssd300_120000/
114 | demos/live
115 | webdemo.py
116 | test_data_aug.py
117 | 
118 | # attributes
119 | 
120 | # pycharm
121 | .idea/
122 | 
123 | # temp checkout soln
124 | data/datasets/
125 | data/ssd_dataloader.py
126 | 
127 | # pylint
128 | .pylintrc


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Max deGroot, Ellis Brown
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # 第五章 SSD
 2 | 
 3 | ## 简介
 4 | 
 5 | 该代码主要参考了[amdegroot/ssd.pytorch](https://github.com/amdegroot/ssd.pytorch)的PyTorch复现工程，如在学习时遇到问题，可前往[amdegroot的问题区](https://github.com/amdegroot/ssd.pytorch/issues)查看是否有解决方法。
 6 | 
 7 | ## 数据集
 8 | 代码提供了COCO与PASCAL VOC两种数据集的使用方法，在此以VOC2012为例，利用下面脚本可以自动完成下载，当然也可以手动把数据集放到对应文件夹。
 9 | ```Shell
10 | # 默认数据路径为data/VOCdevkit
11 | sh data/scripts/VOC2012.sh 
12 | ```
13 | 
14 | ## 训练
15 | * 利用如下指令下载VGG的预训练权重，并放到默认创建的weights文件夹内，当然，也可以手动从下列网址下载再放到weights文件夹内。
16 | ```Shell
17 | mkdir weights
18 | cd weights
19 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
20 | ```
21 | 
22 | * 利用如下指令可以进行模型训练：
23 | ```Shell
24 | python train.py
25 | ```
26 | 根据所需修改脚本中的超参数。
27 | 
28 | ## 前向计算
29 | 利用下面脚本进行前向计算：
30 | ```Shell
31 | python eval.py
32 | ```
33 | 根据所需修改脚本中的超参数
34 | 
35 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
 2 | 
 3 | #from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
 4 | from .config import *
 5 | import torch
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | def detection_collate(batch):
10 |     """Custom collate fn for dealing with batches of images that have a different
11 |     number of associated object annotations (bounding boxes).
12 | 
13 |     Arguments:
14 |         batch: (tuple) A tuple of tensor images and lists of annotations
15 | 
16 |     Return:
17 |         A tuple containing:
18 |             1) (tensor) batch of images stacked on their 0 dim
19 |             2) (list of tensors) annotations for a given image are stacked on
20 |                                  0 dim
21 |     """
22 |     targets = []
23 |     imgs = []
24 |     for sample in batch:
25 |         imgs.append(sample[0])
26 |         targets.append(torch.FloatTensor(sample[1]))
27 |     return torch.stack(imgs, 0), targets
28 | 
29 | 
30 | def base_transform(image, size, mean):
31 |     x = cv2.resize(image, (size, size)).astype(np.float32)
32 |     x -= mean
33 |     x = x.astype(np.float32)
34 |     return x
35 | 
36 | 
37 | class BaseTransform:
38 |     def __init__(self, size, mean):
39 |         self.size = size
40 |         self.mean = np.array(mean, dtype=np.float32)
41 | 
42 |     def __call__(self, image, boxes=None, labels=None):
43 |         return base_transform(image, self.size, self.mean), boxes, labels
44 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | import os.path
 3 | 
 4 | # gets home dir cross platform
 5 | HOME = os.path.expanduser("~")
 6 | 
 7 | # for making bounding boxes pretty
 8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
 9 |           (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
10 | 
11 | MEANS = (104, 117, 123)
12 | 
13 | # SSD300 CONFIGS
14 | voc = {
15 |     'num_classes': 21,
16 |     'lr_steps': (80000, 100000, 120000),
17 |     'max_iter': 120000,
18 |     'feature_maps': [38, 19, 10, 5, 3, 1],
19 |     'min_dim': 300,
20 |     'steps': [8, 16, 32, 64, 100, 300],
21 |     'min_sizes': [30, 60, 111, 162, 213, 264],
22 |     'max_sizes': [60, 111, 162, 213, 264, 315],
23 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
24 |     'variance': [0.1, 0.2],
25 |     'clip': True,
26 |     'name': 'VOC',
27 | }
28 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/data/example.jpg


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/scripts/COCO2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | start=`date +%s`
 4 | 
 5 | # handle optional download dir
 6 | if [ -z "$1" ]
 7 |   then
 8 |     # navigate to ~/data
 9 |     echo "navigating to ~/data/ ..."
10 |     mkdir -p ~/data
11 |     cd ~/data/
12 |     mkdir -p ./coco
13 |     cd ./coco
14 |     mkdir -p ./images
15 |     mkdir -p ./annotations
16 |   else
17 |     # check if specified dir is valid
18 |     if [ ! -d $1 ]; then
19 |         echo $1 " is not a valid directory"
20 |         exit 0
21 |     fi
22 |     echo "navigating to " $1 " ..."
23 |     cd $1
24 | fi
25 | 
26 | if [ ! -d images ]
27 |   then
28 |     mkdir -p ./images
29 | fi
30 | 
31 | # Download the image data.
32 | cd ./images
33 | echo "Downloading MSCOCO train images ..."
34 | curl -LO http://images.cocodataset.org/zips/train2014.zip
35 | echo "Downloading MSCOCO val images ..."
36 | curl -LO http://images.cocodataset.org/zips/val2014.zip
37 | 
38 | cd ../
39 | if [ ! -d annotations]
40 |   then
41 |     mkdir -p ./annotations
42 | fi
43 | 
44 | # Download the annotation data.
45 | cd ./annotations
46 | echo "Downloading MSCOCO train/val annotations ..."
47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
48 | echo "Finished downloading. Now extracting ..."
49 | 
50 | # Unzip data
51 | echo "Extracting train images ..."
52 | unzip ../images/train2014.zip -d ../images
53 | echo "Extracting val images ..."
54 | unzip ../images/val2014.zip -d ../images
55 | echo "Extracting annotations ..."
56 | unzip ./annotations_trainval2014.zip
57 | 
58 | echo "Removing zip files ..."
59 | rm ../images/train2014.zip
60 | rm ../images/val2014.zip
61 | rm ./annotations_trainval2014.zip
62 | 
63 | echo "Creating trainval35k dataset..."
64 | 
65 | # Download annotations json
66 | echo "Downloading trainval35k annotations from S3"
67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
68 | 
69 | # combine train and val 
70 | echo "Combining train and val images"
71 | mkdir ../images/trainval35k
72 | cd ../images/train2014
73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp
74 | cd ../val2014
75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} +
76 | 
77 | 
78 | end=`date +%s`
79 | runtime=$((end-start))
80 | 
81 | echo "Completed in " $runtime " seconds"
82 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/demo/__init__.py


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/demo/live.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import torch
 3 | from torch.autograd import Variable
 4 | import cv2
 5 | import time
 6 | from imutils.video import FPS, WebcamVideoStream
 7 | import argparse
 8 | 
 9 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection')
10 | parser.add_argument('--weights', default='weights/ssd_300_VOC0712.pth',
11 |                     type=str, help='Trained state_dict file path')
12 | parser.add_argument('--cuda', default=False, type=bool,
13 |                     help='Use cuda in live demo')
14 | args = parser.parse_args()
15 | 
16 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
17 | FONT = cv2.FONT_HERSHEY_SIMPLEX
18 | 
19 | 
20 | def cv2_demo(net, transform):
21 |     def predict(frame):
22 |         height, width = frame.shape[:2]
23 |         x = torch.from_numpy(transform(frame)[0]).permute(2, 0, 1)
24 |         x = Variable(x.unsqueeze(0))
25 |         y = net(x)  # forward pass
26 |         detections = y.data
27 |         # scale each detection back up to the image
28 |         scale = torch.Tensor([width, height, width, height])
29 |         for i in range(detections.size(1)):
30 |             j = 0
31 |             while detections[0, i, j, 0] >= 0.6:
32 |                 pt = (detections[0, i, j, 1:] * scale).cpu().numpy()
33 |                 cv2.rectangle(frame,
34 |                               (int(pt[0]), int(pt[1])),
35 |                               (int(pt[2]), int(pt[3])),
36 |                               COLORS[i % 3], 2)
37 |                 cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])),
38 |                             FONT, 2, (255, 255, 255), 2, cv2.LINE_AA)
39 |                 j += 1
40 |         return frame
41 | 
42 |     # start video stream thread, allow buffer to fill
43 |     print("[INFO] starting threaded video stream...")
44 |     stream = WebcamVideoStream(src=0).start()  # default camera
45 |     time.sleep(1.0)
46 |     # start fps timer
47 |     # loop over frames from the video file stream
48 |     while True:
49 |         # grab next frame
50 |         frame = stream.read()
51 |         key = cv2.waitKey(1) & 0xFF
52 | 
53 |         # update FPS counter
54 |         fps.update()
55 |         frame = predict(frame)
56 | 
57 |         # keybindings for display
58 |         if key == ord('p'):  # pause
59 |             while True:
60 |                 key2 = cv2.waitKey(1) or 0xff
61 |                 cv2.imshow('frame', frame)
62 |                 if key2 == ord('p'):  # resume
63 |                     break
64 |         cv2.imshow('frame', frame)
65 |         if key == 27:  # exit
66 |             break
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     import sys
71 |     from os import path
72 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
73 | 
74 |     from data import BaseTransform, VOC_CLASSES as labelmap
75 |     from ssd import build_ssd
76 | 
77 |     net = build_ssd('test', 300, 21)    # initialize SSD
78 |     net.load_state_dict(torch.load(args.weights))
79 |     transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0))
80 | 
81 |     fps = FPS().start()
82 |     cv2_demo(net.eval(), transform)
83 |     # stop the timer and display FPS information
84 |     fps.stop()
85 | 
86 |     print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
87 |     print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
88 | 
89 |     # cleanup
90 |     cv2.destroyAllWindows()
91 |     stream.stop()
92 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/doc/SSD.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/SSD.jpg


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/doc/detection_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_example.png


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/doc/detection_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_example2.png


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/doc/detection_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/detection_examples.png


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/doc/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter5/ssd-pytorch/doc/ssd.png


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/functions/detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ..box_utils import decode, nms
 4 | from data import voc as cfg
 5 | 
 6 | 
 7 | class Detect(Function):
 8 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
 9 |     apply non-maximum suppression to location predictions based on conf
10 |     scores and threshold to a top_k number of output predictions for both
11 |     confidence score and locations.
12 |     """
13 |     def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
14 |         self.num_classes = num_classes
15 |         self.background_label = bkg_label
16 |         self.top_k = top_k
17 |         # Parameters used in nms.
18 |         self.nms_thresh = nms_thresh
19 |         if nms_thresh <= 0:
20 |             raise ValueError('nms_threshold must be non negative.')
21 |         self.conf_thresh = conf_thresh
22 |         self.variance = cfg['variance']
23 | 
24 |     def forward(self, loc_data, conf_data, prior_data):
25 |         """
26 |         Args:
27 |             loc_data: (tensor) Loc preds from loc layers
28 |                 Shape: [batch,num_priors*4]
29 |             conf_data: (tensor) Shape: Conf preds from conf layers
30 |                 Shape: [batch*num_priors,num_classes]
31 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
32 |                 Shape: [1,num_priors,4]
33 |         """
34 |         num = loc_data.size(0)  # batch size
35 |         num_priors = prior_data.size(0)
36 |         output = torch.zeros(num, self.num_classes, self.top_k, 5)
37 |         conf_preds = conf_data.view(num, num_priors,
38 |                                     self.num_classes).transpose(2, 1)
39 | 
40 |         # Decode predictions into bboxes.
41 |         for i in range(num):
42 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance)
43 |             # For each class, perform nms
44 |             conf_scores = conf_preds[i].clone()
45 | 
46 |             for cl in range(1, self.num_classes):
47 |                 c_mask = conf_scores[cl].gt(self.conf_thresh)
48 |                 scores = conf_scores[cl][c_mask]
49 |                 if scores.dim() == 0:
50 |                     continue
51 |                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
52 |                 boxes = decoded_boxes[l_mask].view(-1, 4)
53 |                 # idx of highest scoring and non-overlapping boxes per class
54 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
55 |                 output[i, cl, :count] = \
56 |                     torch.cat((scores[ids[:count]].unsqueeze(1),
57 |                                boxes[ids[:count]]), 1)
58 |         flt = output.contiguous().view(num, -1, 5)
59 |         _, idx = flt[:, :, 0].sort(1, descending=True)
60 |         _, rank = idx.sort(1)
61 |         flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
62 |         return output
63 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from math import sqrt as sqrt
 3 | from itertools import product as product
 4 | import torch
 5 | 
 6 | 
 7 | class PriorBox(object):
 8 |     """Compute priorbox coordinates in center-offset form for each source
 9 |     feature map.
10 |     """
11 |     def __init__(self, cfg):
12 |         super(PriorBox, self).__init__()
13 |         self.image_size = cfg['min_dim']
14 |         # number of priors for feature map location (either 4 or 6)
15 |         self.num_priors = len(cfg['aspect_ratios'])
16 |         self.variance = cfg['variance'] or [0.1]
17 |         self.feature_maps = cfg['feature_maps']
18 |         self.min_sizes = cfg['min_sizes']
19 |         self.max_sizes = cfg['max_sizes']
20 |         self.steps = cfg['steps']
21 |         self.aspect_ratios = cfg['aspect_ratios']
22 |         self.clip = cfg['clip']
23 |         self.version = cfg['name']
24 |         for v in self.variance:
25 |             if v <= 0:
26 |                 raise ValueError('Variances must be greater than 0')
27 | 
28 |     # 生成所有的PriorBox，需要每一个特征图的信息
29 |     def forward(self):
30 |         mean = []
31 |         for k, f in enumerate(self.feature_maps):
32 |             for i, j in product(range(f), repeat=2):
33 |                 # f_k为每个特征图的尺寸
34 |                 f_k = self.image_size / self.steps[k]
35 |                 # 求取每个box的中心坐标
36 |                 cx = (j + 0.5) / f_k
37 |                 cy = (i + 0.5) / f_k
38 | 
39 |                 # 对应{S_k, S_k}大小的PriorBox
40 |                 s_k = self.min_sizes[k]/self.image_size
41 |                 mean += [cx, cy, s_k, s_k]
42 | 
43 |                 # 对应{√(S_k S_(k+1) ), √(S_k S_(k+1) )}大小的PriorBox
44 |                 s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
45 |                 mean += [cx, cy, s_k_prime, s_k_prime]
46 | 
47 |                 # 剩余的比例为2、1/2、3、1/3的PriorBox
48 |                 for ar in self.aspect_ratios[k]:
49 |                     mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
50 |                     mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
51 |         # back to torch land
52 |         output = torch.Tensor(mean).view(-1, 4)
53 |         if self.clip:
54 |             output.clamp_(max=1, min=0)
55 |         return output
56 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | 
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         #x /= norm
22 |         x = torch.div(x,norm)
23 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 |         return out
25 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import sys
 3 | import os
 4 | import argparse
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.backends.cudnn as cudnn
 8 | import torchvision.transforms as transforms
 9 | from torch.autograd import Variable
10 | from data import VOC_ROOT, VOC_CLASSES as labelmap
11 | from PIL import Image
12 | from data import VOCAnnotationTransform, VOCDetection, BaseTransform, VOC_CLASSES
13 | import torch.utils.data as data
14 | from ssd import build_ssd
15 | 
16 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection')
17 | parser.add_argument('--trained_model', default='weights/ssd_300_VOC0712.pth',
18 |                     type=str, help='Trained state_dict file path to open')
19 | parser.add_argument('--save_folder', default='eval/', type=str,
20 |                     help='Dir to save results')
21 | parser.add_argument('--visual_threshold', default=0.6, type=float,
22 |                     help='Final confidence threshold')
23 | parser.add_argument('--cuda', default=True, type=bool,
24 |                     help='Use cuda to train model')
25 | parser.add_argument('--voc_root', default=VOC_ROOT, help='Location of VOC root directory')
26 | parser.add_argument('-f', default=None, type=str, help="Dummy arg so we can load in Jupyter Notebooks")
27 | args = parser.parse_args()
28 | 
29 | if args.cuda and torch.cuda.is_available():
30 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
31 | else:
32 |     torch.set_default_tensor_type('torch.FloatTensor')
33 | 
34 | if not os.path.exists(args.save_folder):
35 |     os.mkdir(args.save_folder)
36 | 
37 | 
38 | def test_net(save_folder, net, cuda, testset, transform, thresh):
39 |     # dump predictions and assoc. ground truth to text file for now
40 |     filename = save_folder+'test1.txt'
41 |     num_images = len(testset)
42 |     for i in range(num_images):
43 |         print('Testing image {:d}/{:d}....'.format(i+1, num_images))
44 |         img = testset.pull_image(i)
45 |         img_id, annotation = testset.pull_anno(i)
46 |         x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)
47 |         x = Variable(x.unsqueeze(0))
48 | 
49 |         with open(filename, mode='a') as f:
50 |             f.write('\nGROUND TRUTH FOR: '+img_id+'\n')
51 |             for box in annotation:
52 |                 f.write('label: '+' || '.join(str(b) for b in box)+'\n')
53 |         if cuda:
54 |             x = x.cuda()
55 | 
56 |         y = net(x)      # forward pass
57 |         detections = y.data
58 |         # scale each detection back up to the image
59 |         scale = torch.Tensor([img.shape[1], img.shape[0],
60 |                              img.shape[1], img.shape[0]])
61 |         pred_num = 0
62 |         for i in range(detections.size(1)):
63 |             j = 0
64 |             while detections[0, i, j, 0] >= 0.6:
65 |                 if pred_num == 0:
66 |                     with open(filename, mode='a') as f:
67 |                         f.write('PREDICTIONS: '+'\n')
68 |                 score = detections[0, i, j, 0]
69 |                 label_name = labelmap[i-1]
70 |                 pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
71 |                 coords = (pt[0], pt[1], pt[2], pt[3])
72 |                 pred_num += 1
73 |                 with open(filename, mode='a') as f:
74 |                     f.write(str(pred_num)+' label: '+label_name+' score: ' +
75 |                             str(score) + ' '+' || '.join(str(c) for c in coords) + '\n')
76 |                 j += 1
77 | 
78 | 
79 | def test_voc():
80 |     # load net
81 |     num_classes = len(VOC_CLASSES) + 1 # +1 background
82 |     net = build_ssd('test', 300, num_classes) # initialize SSD
83 |     net.load_state_dict(torch.load(args.trained_model))
84 |     net.eval()
85 |     print('Finished loading model!')
86 |     # load data
87 |     testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform())
88 |     if args.cuda:
89 |         net = net.cuda()
90 |         cudnn.benchmark = True
91 |     # evaluation
92 |     test_net(args.save_folder, net, args.cuda, testset,
93 |              BaseTransform(net.size, (104, 117, 123)),
94 |              thresh=args.visual_threshold)
95 | 
96 | if __name__ == '__main__':
97 |     test_voc()
98 | 


--------------------------------------------------------------------------------
/chapter5/ssd-pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # 第六章 YOLO v2
 2 | 
 3 | ## 简介
 4 | 
 5 | 该代码主要参考了[longcw/yolo2-pytorch](https://github.com/longcw/yolo2-pytorch)的PyTorch复现工程，如在学习时遇到问题，可前往[longcw的问题区](https://github.com/longcw/yolo2-pytorch/issues)查看是否有解决方法。
 6 | 
 7 | ## 准备工作
 8 | 
 9 | ### 1 编译
10 | * 编译 reorg 模块，修改mask.sh中的arch，具体可参考第四章。
11 |     ```bash
12 |     cd yolo2-pytorch
13 |     ./make.sh
14 |     ```
15 | ### 2 数据集
16 | * 以VOC2012为例，将数据集建立软链接到data文件夹下：
17 |     
18 |     ```bash
19 |     cd yolo2-pytorch
20 |     mkdir data
21 |     cd data
22 |     ln -s "your VOCdevkit path" VOCdevkit2012
23 |     ```
24 | ### 3 预训练权重    
25 | * 下载预训练权重[darknet19](https://drive.google.com/file/d/0B4pXCfnYmG1WRG52enNpcV80aDg/view?usp=sharing)
26 | * 然后在`yolo2-pytorch/cfgs/exps/darknet19_exp1.py`中修改权重的路径。
27 | 
28 | ## 训练
29 | * 运行如下指令：
30 |     ```python
31 |     python train.py
32 |     ```
33 | 
34 | ## 前向计算
35 | 
36 | * 在`yolo2-pytorch/cfgs/config.py`中修改trained_model的路径。
37 |     ```bash
38 |     mkdir output
39 |     python test.py
40 |     ```
41 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/cfgs/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .config_voc import *  # noqa
 3 | from .exps.darknet19_exp1 import *  # noqa
 4 | 
 5 | 
 6 | def mkdir(path, max_depth=3):
 7 |     parent, child = os.path.split(path)
 8 |     if not os.path.exists(parent) and max_depth > 1:
 9 |         mkdir(parent, max_depth-1)
10 | 
11 |     if not os.path.exists(path):
12 |         os.mkdir(path)
13 | 
14 | 
15 | # input and output size
16 | ############################
17 | multi_scale_inp_size = [np.array([320, 320], dtype=np.int),
18 |                         np.array([352, 352], dtype=np.int),
19 |                         np.array([384, 384], dtype=np.int),
20 |                         np.array([416, 416], dtype=np.int),
21 |                         np.array([448, 448], dtype=np.int),
22 |                         np.array([480, 480], dtype=np.int),
23 |                         np.array([512, 512], dtype=np.int),
24 |                         np.array([544, 544], dtype=np.int),
25 |                         np.array([576, 576], dtype=np.int),
26 |                         # np.array([608, 608], dtype=np.int),
27 |                         ]   # w, h
28 | multi_scale_out_size = [multi_scale_inp_size[0] / 32,
29 |                         multi_scale_inp_size[1] / 32,
30 |                         multi_scale_inp_size[2] / 32,
31 |                         multi_scale_inp_size[3] / 32,
32 |                         multi_scale_inp_size[4] / 32,
33 |                         multi_scale_inp_size[5] / 32,
34 |                         multi_scale_inp_size[6] / 32,
35 |                         multi_scale_inp_size[7] / 32,
36 |                         multi_scale_inp_size[8] / 32,
37 |                         # multi_scale_inp_size[9] / 32,
38 |                         ]   # w, h
39 | inp_size = np.array([416, 416], dtype=np.int)   # w, h
40 | out_size = inp_size / 32
41 | 
42 | 
43 | # for display
44 | ############################
45 | def _to_color(indx, base):
46 |     """ return (b, r, g) tuple"""
47 |     base2 = base * base
48 |     b = 2 - indx / base2
49 |     r = 2 - (indx % base2) / base
50 |     g = 2 - (indx % base2) % base
51 |     return b * 127, r * 127, g * 127
52 | 
53 | 
54 | base = int(np.ceil(pow(num_classes, 1. / 3)))
55 | colors = [_to_color(x, base) for x in range(num_classes)]
56 | 
57 | 
58 | # detection config
59 | ############################
60 | thresh = 0.3
61 | 
62 | 
63 | # dir config
64 | ############################
65 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
66 | DATA_DIR = os.path.join(ROOT_DIR, 'data')
67 | MODEL_DIR = os.path.join(ROOT_DIR, 'models')
68 | TRAIN_DIR = os.path.join(MODEL_DIR, 'training')
69 | TEST_DIR = os.path.join(MODEL_DIR, 'testing')
70 | 
71 | trained_model = os.path.join(MODEL_DIR, h5_fname)
72 | pretrained_model = os.path.join(MODEL_DIR, pretrained_fname)
73 | train_output_dir = os.path.join(TRAIN_DIR, exp_name)
74 | test_output_dir = os.path.join(TEST_DIR, imdb_test, h5_fname)
75 | mkdir(train_output_dir, max_depth=3)
76 | mkdir(test_output_dir, max_depth=4)
77 | 
78 | rand_seed = 1024
79 | use_tensorboard = True
80 | 
81 | log_interval = 50
82 | disp_interval = 10
83 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/config_voc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | # trained model
 5 | h5_fname = 'yolo-voc.weights.h5'
 6 | 
 7 | # VOC
 8 | label_names = ('aeroplane', 'bicycle', 'bird', 'boat',
 9 |                'bottle', 'bus', 'car', 'cat', 'chair',
10 |                'cow', 'diningtable', 'dog', 'horse',
11 |                'motorbike', 'person', 'pottedplant',
12 |                'sheep', 'sofa', 'train', 'tvmonitor')
13 | num_classes = len(label_names)
14 | 
15 | anchors = np.asarray([(1.08, 1.19), (3.42, 4.41),
16 |                       (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)],
17 |                      dtype=np.float)
18 | num_anchors = len(anchors)
19 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/exps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/cfgs/exps/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/exps/darknet19_exp1.py:
--------------------------------------------------------------------------------
 1 | exp_name = 'darknet19_voc07trainval_exp3'
 2 | 
 3 | pretrained_fname = 'weights/darknet19.weights.npz'
 4 | 
 5 | start_step = 0
 6 | lr_decay_epochs = {60, 90}
 7 | lr_decay = 1./10
 8 | 
 9 | max_epoch = 160
10 | 
11 | weight_decay = 0.0005
12 | momentum = 0.9
13 | init_learning_rate = 1e-3
14 | 
15 | # for training yolo2
16 | object_scale = 5.
17 | noobject_scale = 1.
18 | class_scale = 1.
19 | coord_scale = 1.
20 | iou_thresh = 0.6
21 | 
22 | # dataset
23 | imdb_train = 'voc_2012_trainval'
24 | imdb_test = 'voc_2012_test'
25 | batch_size = 1
26 | train_batch_size = 16
27 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/cfgs/exps/darknet19_exp2.py:
--------------------------------------------------------------------------------
 1 | exp_name = 'darknet19_voc12trainval_exp1'
 2 | 
 3 | pretrained_fname = 'darknet19.weights.npz'
 4 | 
 5 | start_step = 0
 6 | lr_decay_epochs = {60, 90}
 7 | lr_decay = 1./10
 8 | 
 9 | max_epoch = 160
10 | 
11 | weight_decay = 0.0005
12 | momentum = 0.9
13 | init_learning_rate = 1e-3
14 | 
15 | # for training yolo2
16 | object_scale = 5.
17 | noobject_scale = 1.
18 | class_scale = 1.
19 | coord_scale = 1.
20 | iou_thresh = 0.6
21 | 
22 | # dataset
23 | imdb_train = 'voc_2012_trainval'
24 | imdb_test = 'voc_2012_test'
25 | batch_size = 1
26 | train_batch_size = 16
27 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/datasets/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | from torch.multiprocessing import Pool
 5 | 
 6 | from darknet import Darknet19
 7 | import utils.yolo as yolo_utils
 8 | import utils.network as net_utils
 9 | from utils.timer import Timer
10 | import cfgs.config as cfg
11 | 
12 | # This prevents deadlocks in the data loader, caused by
13 | # some incompatibility between pytorch and cv2 multiprocessing.
14 | # See https://github.com/pytorch/pytorch/issues/1355.
15 | cv2.setNumThreads(0)
16 | 
17 | 
18 | def preprocess(fname):
19 |     # return fname
20 |     image = cv2.imread(fname)
21 |     im_data = np.expand_dims(
22 |         yolo_utils.preprocess_test((image, None, cfg.multi_scale_inp_size), 0)[0], 0)
23 |     return image, im_data
24 | 
25 | 
26 | # hyper-parameters
27 | # npz_fname = 'models/yolo-voc.weights.npz'
28 | # h5_fname = 'models/yolo-voc.weights.h5'
29 | trained_model = cfg.trained_model
30 | # trained_model = os.path.join(
31 | #     cfg.train_output_dir, 'darknet19_voc07trainval_exp3_158.h5')
32 | thresh = 0.5
33 | im_path = 'demo'
34 | # ---
35 | 
36 | net = Darknet19()
37 | net_utils.load_net(trained_model, net)
38 | # net.load_from_npz(npz_fname)
39 | # net_utils.save_net(h5_fname, net)
40 | net.cuda()
41 | net.eval()
42 | print('load model succ...')
43 | 
44 | t_det = Timer()
45 | t_total = Timer()
46 | im_fnames = sorted((fname
47 |                     for fname in os.listdir(im_path)
48 |                     if os.path.splitext(fname)[-1] == '.jpg'))
49 | im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
50 | pool = Pool(processes=1)
51 | 
52 | for i, (image, im_data) in enumerate(pool.imap(
53 |         preprocess, im_fnames, chunksize=1)):
54 |     t_total.tic()
55 |     im_data = net_utils.np_to_variable(
56 |         im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
57 |     t_det.tic()
58 |     bbox_pred, iou_pred, prob_pred = net(im_data)
59 |     det_time = t_det.toc()
60 |     # to numpy
61 |     bbox_pred = bbox_pred.data.cpu().numpy()
62 |     iou_pred = iou_pred.data.cpu().numpy()
63 |     prob_pred = prob_pred.data.cpu().numpy()
64 | 
65 |     # print bbox_pred.shape, iou_pred.shape, prob_pred.shape
66 | 
67 |     bboxes, scores, cls_inds = yolo_utils.postprocess(
68 |         bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
69 | 
70 |     im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
71 | 
72 |     if im2show.shape[0] > 1100:
73 |         im2show = cv2.resize(im2show,
74 |                              (int(1000. *
75 |                                   float(im2show.shape[1]) / im2show.shape[0]),
76 |                               1000))
77 |     cv2.imshow('test', im2show)
78 | 
79 |     total_time = t_total.toc()
80 |     # wait_time = max(int(60 - total_time * 1000), 1)
81 |     cv2.waitKey(0)
82 | 
83 |     if i % 1 == 0:
84 |         format_str = 'frame: %d, ' \
85 |                      '(detection: %.1f Hz, %.1f ms) ' \
86 |                      '(total: %.1f Hz, %.1f ms)'
87 |         print((format_str % (
88 |             i,
89 |             1. / det_time, det_time * 1000,
90 |             1. / total_time, total_time * 1000)))
91 | 
92 |         t_total.clear()
93 |         t_det.clear()
94 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/2007_000039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/2007_000039.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/dog.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/eagle.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/giraffe.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/horses.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/2007_000039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/2007_000039.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/dog.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/eagle.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/giraffe.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/horses.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/person.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/ragged-edge-london-office-6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/ragged-edge-london-office-6.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/out/scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/out/scream.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/person.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/ragged-edge-london-office-6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/ragged-edge-london-office-6.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/demo/scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/demo/scream.jpg


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/reorg/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/reorg/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/_ext/reorg_layer/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._reorg_layer import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/reorg_cpu.c']
 7 | headers = ['src/reorg_cpu.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/reorg_cuda.c']
14 |     headers += ['src/reorg_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | # print(this_file)
20 | extra_objects = ['src/reorg_cuda_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.reorg_layer',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/reorg_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ._ext import reorg_layer
 4 | 
 5 | 
 6 | class ReorgFunction(Function):
 7 |     def __init__(self, stride=2):
 8 |         self.stride = stride
 9 | 
10 |     def forward(self, x):
11 |         stride = self.stride
12 | 
13 |         bsize, c, h, w = x.size()
14 |         out_w, out_h, out_c = int(w / stride), int(h / stride), c * (stride * stride)  # noqa
15 |         out = torch.FloatTensor(bsize, out_c, out_h, out_w)
16 | 
17 |         if x.is_cuda:
18 |             out = out.cuda()
19 |             reorg_layer.reorg_cuda(x, out_w, out_h, out_c, bsize,
20 |                                    stride, 0, out)
21 |         else:
22 |             reorg_layer.reorg_cpu(x, out_w, out_h, out_c, bsize,
23 |                                   stride, 0, out)
24 | 
25 |         return out
26 | 
27 |     def backward(self, grad_top):
28 |         stride = self.stride
29 |         bsize, c, h, w = grad_top.size()
30 | 
31 |         out_w, out_h, out_c = w * stride, h * stride, c / (stride * stride)
32 |         grad_bottom = torch.FloatTensor(bsize, int(out_c), out_h, out_w)
33 | 
34 |         # rev_stride = 1. / stride    # reverse
35 |         if grad_top.is_cuda:
36 |             grad_bottom = grad_bottom.cuda()
37 |             reorg_layer.reorg_cuda(grad_top, w, h, c, bsize,
38 |                                    stride, 1, grad_bottom)
39 |         else:
40 |             reorg_layer.reorg_cpu(grad_top, w, h, c, bsize,
41 |                                   stride, 1, grad_bottom)
42 | 
43 |         return grad_bottom
44 | 
45 | 
46 | class ReorgLayer(torch.nn.Module):
47 |     def __init__(self, stride):
48 |         super(ReorgLayer, self).__init__()
49 | 
50 |         self.stride = stride
51 | 
52 |     def forward(self, x):
53 |         x = ReorgFunction(self.stride)(x)
54 |         return x
55 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cpu.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | 
 3 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor)
 4 | {
 5 |     // Grab the tensor
 6 |     float * x = THFloatTensor_data(x_tensor);
 7 |     float * out = THFloatTensor_data(out_tensor);
 8 | 
 9 |     // https://github.com/pjreddie/darknet/blob/master/src/blas.c
10 |     int b,i,j,k;
11 |     int out_c = c/(stride*stride);
12 | 
13 |     for(b = 0; b < batch; ++b){
14 |         for(k = 0; k < c; ++k){
15 |             for(j = 0; j < h; ++j){
16 |                 for(i = 0; i < w; ++i){
17 |                     int in_index  = i + w*(j + h*(k + c*b));
18 |                     int c2 = k % out_c;
19 |                     int offset = k / out_c;
20 |                     int w2 = i*stride + offset % stride;
21 |                     int h2 = j*stride + offset / stride;
22 |                     int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
23 |                     if(forward) out[out_index] = x[in_index];
24 |                     else out[in_index] = x[out_index];
25 |                 }
26 |             }
27 |         }
28 |     }
29 | 
30 |     return 1;
31 | }


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cpu.h:
--------------------------------------------------------------------------------
1 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor);


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "reorg_cuda_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor)
 7 | {
 8 |     float * x = THCudaTensor_data(state, x_tensor);
 9 |     float * out = THCudaTensor_data(state, out_tensor);
10 | 
11 |     cudaStream_t stream = THCState_getCurrentStream(state);
12 |     reorg_ongpu(x, w, h, c, batch, stride, forward, out, stream);
13 | 
14 |     return 1;
15 | }


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda.h:
--------------------------------------------------------------------------------
1 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor);


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | #include <stdio.h>
 6 | #include <math.h>
 7 | #include <float.h>
 8 | #include "reorg_cuda_kernel.h"
 9 | 
10 | #define BLOCK 512
11 | 
12 | dim3 cuda_gridsize(int n)
13 | {
14 |     int k = (n-1) / BLOCK + 1;
15 |     int x = k;
16 |     int y = 1;
17 |     if(x > 65535){
18 |         x = ceil(sqrt(k));
19 |         y = (n-1)/(x*BLOCK) + 1;
20 |     }
21 |     dim3 d(x, y, 1);
22 |     //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
23 |     return d;
24 | }
25 | 
26 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
27 | {
28 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
29 |     if(i >= N) return;
30 |     int in_index = i;
31 |     int in_w = i%w;
32 |     i = i/w;
33 |     int in_h = i%h;
34 |     i = i/h;
35 |     int in_c = i%c;
36 |     i = i/c;
37 |     int b = i%batch;
38 | 
39 |     int out_c = c/(stride*stride);
40 | 
41 |     int c2 = in_c % out_c;
42 |     int offset = in_c / out_c;
43 |     int w2 = in_w*stride + offset % stride;
44 |     int h2 = in_h*stride + offset / stride;
45 |     //printf("%d\n", offset);
46 |     int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
47 | 
48 |    // printf("%d %d %d\n", w2, h2, c2);
49 |     //printf("%d %d\n", in_index, out_index);
50 |     //if(out_index >= N || out_index < 0) printf("bad bad bad \n");
51 | 
52 |     if(forward) out[out_index] = x[in_index];
53 |     else out[in_index] = x[out_index];
54 |     //if(forward) out[1] = x[1];
55 |     //else out[0] = x[0];
56 | }
57 | 
58 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream)
59 | {
60 |     int size = w*h*c*batch;
61 |     cudaError_t err;
62 | 
63 |     reorg_kernel<<<cuda_gridsize(size), BLOCK, 0, stream>>>(size, x, w, h, c, batch, stride, forward, out);
64 | 
65 |     err = cudaGetLastError();
66 |     if(cudaSuccess != err)
67 |     {
68 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
69 |         exit( -1 );
70 |     }
71 | }
72 | 
73 | 
74 | 
75 | #ifdef __cplusplus
76 | }
77 | #endif
78 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/reorg/src/reorg_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _REORG_CUDA_KERNEL
 2 | #define _REORG_CUDA_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream);
 9 | 
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/layers/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/roi_pooling.c']
 7 | headers = ['src/roi_pooling.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/roi_pooling_cuda.c']
14 |     headers += ['src/roi_pooling_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.roi_pooling',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ._ext import roi_pooling
 4 | 
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.output = None
12 |         self.argmax = None
13 |         self.rois = None
14 |         self.feature_size = None
15 | 
16 |     def forward(self, features, rois):
17 |         batch_size, num_channels, data_height, data_width = features.size()
18 |         num_rois = rois.size()[0]
19 |         output = torch.zeros(num_rois, num_channels,
20 |                              self.pooled_height, self.pooled_width)
21 |         argmax = torch.IntTensor(num_rois, num_channels,
22 |                                  self.pooled_height, self.pooled_width).zero_()
23 | 
24 |         if not features.is_cuda:
25 |             _features = features.permute(0, 2, 3, 1)
26 |             roi_pooling.roi_pooling_forward(self.pooled_height,
27 |                                             self.pooled_width,
28 |                                             self.spatial_scale,
29 |                                             _features,
30 |                                             rois,
31 |                                             output)
32 |             # output = output.cuda()
33 |         else:
34 |             output = output.cuda()
35 |             argmax = argmax.cuda()
36 |             roi_pooling.roi_pooling_forward_cuda(self.pooled_height,
37 |                                                  self.pooled_width,
38 |                                                  self.spatial_scale,
39 |                                                  features,
40 |                                                  rois,
41 |                                                  output,
42 |                                                  argmax)
43 |             self.output = output
44 |             self.argmax = argmax
45 |             self.rois = rois
46 |             self.feature_size = features.size()
47 | 
48 |         return output
49 | 
50 |     def backward(self, grad_output):
51 |         assert(self.feature_size is not None and grad_output.is_cuda)
52 | 
53 |         batch_size, num_channels, data_height, data_width = self.feature_size
54 | 
55 |         grad_input = torch.zeros(batch_size, num_channels,
56 |                                  data_height, data_width).cuda()
57 |         roi_pooling.roi_pooling_backward_cuda(self.pooled_height,
58 |                                               self.pooled_width,
59 |                                               self.spatial_scale,
60 |                                               grad_output,
61 |                                               self.rois,
62 |                                               grad_input,
63 |                                               self.argmax)
64 | 
65 |         # print grad_input
66 | 
67 |         return grad_input, None
68 | 
69 | 
70 | class RoIPool(torch.nn.Module):
71 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
72 |         super(RoIPool, self).__init__()
73 | 
74 |         self.pooled_width = int(pooled_width)
75 |         self.pooled_height = int(pooled_height)
76 |         self.spatial_scale = float(spatial_scale)
77 | 
78 |     def forward(self, features, rois):
79 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)  # noqa
80 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         batch_size, num_channels, data_height, data_width = features.size()
16 |         num_rois = rois.size()[0]
17 |         outputs = Variable(torch.zeros(num_rois, num_channels,
18 |                                        self.pooled_height,
19 |                                        self.pooled_width)).cuda()
20 | 
21 |         for roi_ind, roi in enumerate(rois):
22 |             batch_ind = int(roi[0].data[0])
23 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
24 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
25 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
26 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
27 |             bin_size_w = float(roi_width) / float(self.pooled_width)
28 |             bin_size_h = float(roi_height) / float(self.pooled_height)
29 | 
30 |             for ph in range(self.pooled_height):
31 |                 hstart = int(np.floor(ph * bin_size_h))
32 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
33 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
34 |                 hend = min(data_height, max(0, hend + roi_start_h))
35 |                 for pw in range(self.pooled_width):
36 |                     wstart = int(np.floor(pw * bin_size_w))
37 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
38 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
39 |                     wend = min(data_width, max(0, wend + roi_start_w))
40 | 
41 |                     is_empty = (hend <= hstart) or(wend <= wstart)
42 |                     if is_empty:
43 |                         outputs[roi_ind, :, ph, pw] = 0
44 |                     else:
45 |                         data = features[batch_ind]
46 |                         outputs[roi_ind, :, ph, pw] = torch.max(
47 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)  # noqa
48 | 
49 |         return outputs
50 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/src/cuda/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     int batch_size = THCudaTensor_size(state, features, 0);
27 |     if (batch_size != 1)
28 |     {
29 |         return 0;
30 |     }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     if (batch_size != 1)
70 |     {
71 |         return 0;
72 |     }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/layers/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd utils
 6 | python build.py build_ext --inplace
 7 | cd ../
 8 | 
 9 | cd layers/reorg/src
10 | echo "Compiling reorg layer kernels by nvcc..."
11 | nvcc -c -o reorg_cuda_kernel.cu.o reorg_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
12 | cd ../
13 | python build.py
14 | cd ../
15 | 
16 | cd roi_pooling/src/cuda
17 | echo "Compiling roi_pooling kernels by nvcc..."
18 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
19 | cd ../../
20 | python build.py
21 | cd ../
22 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython==0.27.3
2 | opencv-python==3.3.0.10
3 | h5py==2.7.1
4 | pycrayon==0.5


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/im_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def imcv2_recolor(im, a=.1):
 6 |     # t = [np.random.uniform()]
 7 |     # t += [np.random.uniform()]
 8 |     # t += [np.random.uniform()]
 9 |     # t = np.array(t) * 2. - 1.
10 |     t = np.random.uniform(-1, 1, 3)
11 | 
12 |     # random amplify each channel
13 |     im = im.astype(np.float)
14 |     im *= (1 + t * a)
15 |     mx = 255. * (1 + a)
16 |     up = np.random.uniform(-1, 1)
17 |     im = np.power(im / mx, 1. + up * .5)
18 |     # return np.array(im * 255., np.uint8)
19 |     return im
20 | 
21 | 
22 | def imcv2_affine_trans(im):
23 |     # Scale and translate
24 |     h, w, c = im.shape
25 |     scale = np.random.uniform() / 10. + 1.
26 |     max_offx = (scale - 1.) * w
27 |     max_offy = (scale - 1.) * h
28 |     offx = int(np.random.uniform() * max_offx)
29 |     offy = int(np.random.uniform() * max_offy)
30 | 
31 |     im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
32 |     im = im[offy: (offy + h), offx: (offx + w)]
33 |     flip = np.random.uniform() > 0.5
34 |     if flip:
35 |         im = cv2.flip(im, 1)
36 | 
37 |     return im, [scale, [offx, offy], flip]
38 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongdonghy/Detection-PyTorch-Notebook/b0c4745150cf019fef2fe661dfe16cc25dd81645/chapter6/yolov2-pytorch/utils/nms/__init__.py


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def py_cpu_nms(dets, thresh):
12 |     """Pure Python NMS baseline."""
13 |     x1 = dets[:, 0]
14 |     y1 = dets[:, 1]
15 |     x2 = dets[:, 2]
16 |     y2 = dets[:, 3]
17 |     scores = dets[:, 4]
18 | 
19 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 |     order = scores.argsort()[::-1]
21 | 
22 |     keep = []
23 |     while order.size > 0:
24 |         i = order[0]
25 |         keep.append(i)
26 |         xx1 = np.maximum(x1[i], x1[order[1:]])
27 |         yy1 = np.maximum(y1[i], y1[order[1:]])
28 |         xx2 = np.minimum(x2[i], x2[order[1:]])
29 |         yy2 = np.minimum(y2[i], y2[order[1:]])
30 | 
31 |         w = np.maximum(0.0, xx2 - xx1 + 1)
32 |         h = np.maximum(0.0, yy2 - yy1 + 1)
33 |         inter = w * h
34 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
35 | 
36 |         inds = np.where(ovr <= thresh)[0]
37 |         order = order[inds + 1]
38 | 
39 |     return keep
40 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .nms.cpu_nms import cpu_nms
 9 | from .nms.gpu_nms import gpu_nms
10 | 
11 | 
12 | # def nms(dets, thresh, force_cpu=False):
13 | #     """Dispatch to either CPU or GPU NMS implementations."""
14 | #
15 | #     if dets.shape[0] == 0:
16 | #         return []
17 | #     if cfg.USE_GPU_NMS and not force_cpu:
18 | #         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | #     else:
20 | #         return cpu_nms(dets, thresh)
21 | 
22 | 
23 | def nms(dets, thresh, force_cpu=False):
24 |     """Dispatch to either CPU or GPU NMS implementations."""
25 | 
26 |     if dets.shape[0] == 0:
27 |         return []
28 |     if force_cpu:
29 |         return cpu_nms(dets, thresh)
30 |     return gpu_nms(dets, thresh)
31 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox,
38 | #                   and uncompressed RLE to encoded RLE mask.
39 | #
40 | # Usage:
41 | #  Rs     = encode( masks )
42 | #  masks  = decode( Rs )
43 | #  R      = merge( Rs, intersect=false )
44 | #  o      = iou( dt, gt, iscrowd )
45 | #  a      = area( Rs )
46 | #  bbs    = toBbox( Rs )
47 | #  Rs     = frPyObjects( [pyObjects], h, w )
48 | #
49 | # In the API the following formats are used:
50 | #  Rs      - [dict] Run-length encoding of binary masks
51 | #  R       - dict Run-length encoding of binary mask
52 | #  masks   - [hxwxn] Binary mask(s)
53 | #            (must have type np.ndarray(dtype=uint8) in column-major order)
54 | #  iscrowd - [nx1] list of np.ndarray.
55 | #            1 indicates corresponding gt image has crowd region to ignore
56 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
57 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
58 | #  dt,gt   - May be either bounding boxes or encoded masks
59 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
60 | #
61 | # Finally, a note about the intersection over union (iou) computation.
62 | # The standard iou of a ground truth (gt) and detected (dt) object is
63 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
64 | # For "crowd" regions, we use a modified criteria. If a gt object is
65 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
66 | # Choosing gt' in the crowd gt that best matches the dt can be done using
67 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
68 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
69 | # For crowd gt regions we use this modified criteria above for the iou.
70 | #
71 | # To compile run "python setup.py build_ext --inplace"
72 | # Please do not contact us for help with compiling.
73 | #
74 | # Microsoft COCO Toolbox.      version 2.0
75 | # Data, paper, and tutorials available at:  http://mscoco.org/
76 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
77 | # Licensed under the Simplified BSD License [see coco/license.txt]
78 | 
79 | encode = _mask.encode
80 | decode = _mask.decode
81 | iou = _mask.iou
82 | merge = _mask.merge
83 | area = _mask.area
84 | toBbox = _mask.toBbox
85 | frPyObjects = _mask.frPyObjects
86 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/chapter6/yolov2-pytorch/utils/yolo.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | DTYPE = np.float
 6 | ctypedef np.float_t DTYPE_t
 7 | 
 8 | cdef extern from "math.h":
 9 |     double abs(double m)
10 |     double log(double x)
11 | 
12 | 
13 | def yolo_to_bbox(
14 |         np.ndarray[DTYPE_t, ndim=4] bbox_pred,
15 |         np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W):
16 |     return yolo_to_bbox_c(bbox_pred, anchors, H, W)
17 | 
18 | cdef yolo_to_bbox_c(
19 |         np.ndarray[DTYPE_t, ndim=4] bbox_pred,
20 |         np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W):
21 |     """
22 |     Parameters
23 |     ----------
24 |     bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th))
25 |     anchors: (num_anchors, 2) (pw, ph)
26 |     Returns
27 |     -------
28 |     bbox_out: (HxWxnum_anchors, 4) ndarray of bbox (x1, y1, x2, y2) rescaled to (0, 1)
29 |     """
30 |     cdef unsigned int bsize = bbox_pred.shape[0]
31 |     cdef unsigned int num_anchors = anchors.shape[0]
32 |     cdef np.ndarray[DTYPE_t, ndim=4] bbox_out = np.zeros((bsize, H*W, num_anchors, 4), dtype=DTYPE)
33 | 
34 |     cdef DTYPE_t cx, cy, bw, bh
35 |     cdef unsigned int row, col, a, ind
36 |     for b in range(bsize):
37 |         for row in range(H):
38 |             for col in range(W):
39 |                 ind = row * W + col
40 |                 for a in range(num_anchors):
41 |                     cx = (bbox_pred[b, ind, a, 0] + col) / W
42 |                     cy = (bbox_pred[b, ind, a, 1] + row) / H
43 |                     bw = bbox_pred[b, ind, a, 2] * anchors[a][0] / W * 0.5
44 |                     bh = bbox_pred[b, ind, a, 3] * anchors[a][1] / H * 0.5
45 | 
46 |                     bbox_out[b, ind, a, 0] = cx - bw
47 |                     bbox_out[b, ind, a, 1] = cy - bh
48 |                     bbox_out[b, ind, a, 2] = cx + bw
49 |                     bbox_out[b, ind, a, 3] = cy + bh
50 | 
51 |     return bbox_out


--------------------------------------------------------------------------------
/chapter7/mobilenet_v1.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | class MobileNet(nn.Module):
 4 |     def __init__(self):
 5 |         super(MobileNet, self).__init__()
 6 | 
 7 |         def conv_bn(dim_in, dim_out, stride):
 8 |             return nn.Sequential(
 9 |                 nn.Conv2d(dim_in, dim_out, 3, stride, 1, bias=False),
10 |                 nn.BatchNorm2d(dim_out),
11 |                 nn.ReLU(inplace=True)
12 |             )
13 | 
14 |         def conv_dw(dim_in, dim_out, stride):
15 |             return nn.Sequential(
16 |                 nn.Conv2d(dim_in, dim_in, 3, stride, 1, groups= dim_in, bias=False),
17 |                 nn.BatchNorm2d(dim_in),
18 |                 nn.ReLU(inplace=True),
19 |                 nn.Conv2d(dim_in, dim_out, 1, 1, 0, bias=False),
20 |                 nn.BatchNorm2d(dim_out),
21 |                 nn.ReLU(inplace=True),
22 |             )
23 |         self.model = nn.Sequential(
24 |             conv_bn(  3,  32, 2), 
25 |             conv_dw( 32,  64, 1),
26 |             conv_dw( 64, 128, 2),
27 |             conv_dw(128, 128, 1),
28 |             conv_dw(128, 256, 2),
29 |             conv_dw(256, 256, 1),
30 |             conv_dw(256, 512, 2),
31 |             conv_dw(512, 512, 1),
32 |             conv_dw(512, 512, 1),
33 |             conv_dw(512, 512, 1),
34 |             conv_dw(512, 512, 1),
35 |             conv_dw(512, 512, 1),
36 |             conv_dw(512, 1024, 2),
37 |             conv_dw(1024, 1024, 1),
38 |             nn.AvgPool2d(7),
39 |         )
40 |         self.fc = nn.Linear(1024, 1000)
41 | 
42 |     def forward(self, x):
43 |         x = self.model(x)
44 |         x = x.view(-1, 1024)
45 |         x = self.fc(x)
46 |         return x
47 | 
48 | 


--------------------------------------------------------------------------------
/chapter7/mobilenet_v2_block.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | class InvertedResidual(nn.Module):
 4 | 
 5 |     def __init__(self, inp, oup, stride, expand_ratio):
 6 |         super(InvertedResidual, self).__init__()
 7 |         self.stride = stride
 8 |         hidden_dim = round(inp * expand_ratio)
 9 |         self.conv = nn.Sequential(
10 |             nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
11 |             nn.BatchNorm2d(hidden_dim),
12 |             nn.ReLU6(inplace=True),
13 |             nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
14 |             nn.BatchNorm2d(hidden_dim),
15 |             nn.ReLU6(inplace=True),
16 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
17 |             nn.BatchNorm2d(oup),
18 |         )
19 | 
20 |     def forward(self, x):
21 |         return x + self.conv(x)
22 | 
23 | 


--------------------------------------------------------------------------------
/chapter7/shufflenet_v1.py:
--------------------------------------------------------------------------------
 1 | class ShuffleNet(nn.Module):
 2 | 
 3 |     def __init__(self, groups=3, in_channels=3, num_classes=1000):
 4 |         super(ShuffleNet, self).__init__()
 5 |         self.groups = groups
 6 |         self.stage_repeats = [3, 7, 3]
 7 |         self.in_channels =  in_channels
 8 |         self.num_classes = num_classes
 9 |         self.stage_out_channels = [-1, 24, 240, 480, 960]
10 |         
11 |         self.conv1 = conv3x3(self.in_channels,
12 |                              self.stage_out_channels[1], # stage 1
13 |                              stride=2)
14 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
15 |         self.stage2 = self._make_stage(2)
16 |         self.stage3 = self._make_stage(3)
17 |         self.stage4 = self._make_stage(4)
18 |         num_inputs = self.stage_out_channels[-1]
19 |         self.fc = nn.Linear(num_inputs, self.num_classes)
20 | 
21 |     def _make_stage(self, stage):
22 |         modules = OrderedDict()
23 |         stage_name = "ShuffleUnit_Stage{}".format(stage)
24 |         grouped_conv = stage > 2
25 |         first_module = ShuffleUnit(
26 |             self.stage_out_channels[stage-1],
27 |             self.stage_out_channels[stage],
28 |             groups=self.groups,
29 |             grouped_conv=grouped_conv,
30 |             combine='concat'
31 |             )
32 |         modules[stage_name+"_0"] = first_module
33 |         for i in range(self.stage_repeats[stage-2]):
34 |             name = stage_name + "_{}".format(i+1)
35 |             module = ShuffleUnit(
36 |                 self.stage_out_channels[stage],
37 |                 self.stage_out_channels[stage],
38 |                 groups=self.groups,
39 |                 grouped_conv=True,
40 |                 combine='add'
41 |                 )
42 |             modules[name] = module
43 |         return nn.Sequential(modules)
44 | 
45 |     def forward(self, x):
46 |         x = self.conv1(x)
47 |         x = self.maxpool(x)
48 |         x = self.stage2(x)
49 |         x = self.stage3(x)
50 |         x = self.stage4(x)
51 |         x = F.avg_pool2d(x, x.data.size()[-2:])
52 |         x = x.view(x.size(0), -1)
53 |         x = self.fc(x)
54 |         return F.log_softmax(x, dim=1)
55 | 
56 | 


--------------------------------------------------------------------------------
/chapter7/squeezenet_fire.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class Fire(nn.Module):
 5 | 
 6 |     def __init__(self, inplanes, squeeze_planes, expand_planes):
 7 |         super(Fire, self).__init__()
 8 |         self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
 9 |         self.bn1 = nn.BatchNorm2d(squeeze_planes)
10 |         self.relu1 = nn.ReLU(inplace=True)
11 |         self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
12 |         self.bn2 = nn.BatchNorm2d(expand_planes)
13 |         self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
14 |         self.bn3 = nn.BatchNorm2d(expand_planes)
15 |         self.relu2 = nn.ReLU(inplace=True)
16 |     
17 |     def forward(self, x):
18 |         x = self.conv1(x)
19 |         x = self.bn1(x)
20 |         x = self.relu1(x)
21 |         out1 = self.conv2(x)
22 |         out1 = self.bn2(out1)
23 |         out2 = self.conv3(x)
24 |         out2 = self.bn3(out2)
25 |         out = torch.cat([out1, out2], 1)
26 |         out = self.relu2(out)
27 |         return out
28 | 
29 | 


--------------------------------------------------------------------------------
/chapter8/nms.py:
--------------------------------------------------------------------------------
 1 | def nms(self, bboxes, scores, thresh=0.5):
 2 | 
 3 |     x1 = bboxes[:,0]
 4 |     y1 = bboxes[:,1]
 5 |     x2 = bboxes[:,2]
 6 |     y2 = bboxes[:,3]
 7 |     areas = (x2-x1+1)*(y2-y1+1) 
 8 |     _, order = scores.sort(0, descending=True)
 9 |     keep = []
10 | 
11 |     while order.numel() > 0:
12 |         if order.numel() == 1:
13 |             i = order.item()
14 |             keep.append(i)
15 |             break
16 |         else:
17 |             i = order[0].item()
18 |             keep.append(i)
19 |         xx1 = x1[order[1:]].clamp(min=x1[i])
20 |         yy1 = y1[order[1:]].clamp(min=y1[i])
21 |         xx2 = x2[order[1:]].clamp(max=x2[i])
22 |         yy2 = y2[order[1:]].clamp(max=y2[i])
23 |         inter = (xx2-xx1).clamp(min=0) * (yy2-yy1).clamp(min=0)
24 |         iou = inter / (areas[i]+areas[order[1:]]-inter)
25 |         idx = (iou <= threshold).nonzero().squeeze()
26 |         if idx.numel() == 0:
27 |             break
28 |         order = order[idx+1]
29 | 
30 |     return torch.LongTensor(keep)
31 | 
32 | 


--------------------------------------------------------------------------------
/chapter8/retinanet.py:
--------------------------------------------------------------------------------
 1 | class RetinaNet(nn.Module):
 2 | 
 3 |     num_anchors = 9
 4 | 
 5 |     def __init__(self, num_classes=20):
 6 |         super(RetinaNet, self).__init__()
 7 |         self.fpn = FPN50() 
 8 |         self.num_classes = num_classes 
 9 |         self.loc_head = self._make_head(self.num_anchors*4) 
10 |         self.cls_head = self._make_head(self.num_anchors*self.num_classes)
11 | 
12 |     def forward(self, x):
13 |         fms = self.fpn(x)
14 |         loc_preds = []
15 |         cls_preds = []
16 |         for fm in fms:
17 |             loc_pred = self.loc_head(fm)
18 |             cls_pred = self.cls_head(fm)
19 |             loc_pred=loc_pred.permute(0,2,3,1).contiguous().view(x.size(0),-1,4) 
20 |             cls_pred=cls_pred.permute(0,2,3,1).contiguous().view(x.size(0),-1,self.num_classes)
21 |             loc_preds.append(loc_pred)
22 |             cls_preds.append(cls_pred)
23 |         return torch.cat(loc_preds,1), torch.cat(cls_preds,1)
24 | 
25 |     def _make_head(self, out_planes):
26 |         layers = []
27 |         for _ in range(4):
28 |             layers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
29 |             layers.append(nn.ReLU(True))
30 |         layers.append(nn.Conv2d(256, out_planes, kernel_size=3, stride=1, padding=1))
31 |         return nn.Sequential(*layers)
32 | 
33 |     def freeze_bn(self):
34 |         for layer in self.modules():
35 |             if isinstance(layer, nn.BatchNorm2d):
36 |                 layer.eval()
37 | 
38 | 


--------------------------------------------------------------------------------