├── .gitignore
├── LICENSE
├── README.md
├── cycleGAN_dataset_loader
    ├── datasets_K_C.py
    ├── datasets_cityscapes_BDDday.py
    └── datasets_foggyC_C.py
├── data
    ├── .gitignore
    ├── demo
    │   ├── 000456.jpg
    │   ├── 000542.jpg
    │   ├── 001150.jpg
    │   ├── 001763.jpg
    │   └── 004545.jpg
    ├── imgs
    │   ├── gt.png
    │   └── pred.png
    └── scripts
    │   └── fetch_faster_rcnn_models.sh
├── experiments
    ├── cfgs
    │   ├── FPNres50.yml
    │   ├── mobile.yml
    │   ├── res101-lg.yml
    │   ├── res101.yml
    │   ├── res50.yml
    │   ├── vgg16.yml
    │   ├── vgg16_C2BDD.yml
    │   ├── vgg16_C2F.yml
    │   └── vgg16_K2C.yml
    ├── logs
    │   └── .gitignore
    └── scripts
    │   ├── convert_vgg16.sh
    │   ├── test_adapt_faster_rcnn.sh
    │   ├── test_adapt_faster_rcnn_stage1.sh
    │   ├── test_adapt_faster_rcnn_stage2.sh
    │   ├── test_faster_rcnn.sh
    │   ├── test_faster_rcnn_notime.sh
    │   ├── train_adapt_faster_rcnn_stage1.sh
    │   ├── train_adapt_faster_rcnn_stage2.sh
    │   ├── train_faster_rcnn.sh
    │   └── train_faster_rcnn_notime.sh
├── figure
    ├── adapt_results_c2bdd.png
    ├── adapt_results_c2f.png
    ├── adapt_results_k2c.png
    └── det_results.png
├── lib
    ├── datasets
    │   ├── KITTI.py
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── bdd100k.py
    │   ├── cityscapes.py
    │   ├── cityscapes_eval.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── layer_utils
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_top_layer.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── crop_and_resize
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── crop_and_resize.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── crop_and_resize.c
    │   │   │   ├── crop_and_resize.h
    │   │   │   ├── crop_and_resize_gpu.c
    │   │   │   ├── crop_and_resize_gpu.h
    │   │   │   └── cuda
    │   │   │       ├── crop_and_resize_kernel.cu
    │   │   │       ├── crop_and_resize_kernel.cu.o
    │   │   │       └── crop_and_resize_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── roi_pool.py
    │   │   ├── roi_pool_py.py
    │   │   └── src
    │   │   │   ├── cuda
    │   │   │       ├── roi_pooling_kernel.cu
    │   │   │       ├── roi_pooling_kernel.cu.o
    │   │   │       └── roi_pooling_kernel.h
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   └── roi_pooling_cuda.h
    │   └── snippets.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── nms_wrapper.py
    │   ├── test.py
    │   ├── test.py~
    │   ├── train_val.py
    │   └── train_val_adapt.py
    ├── nets
    │   ├── .py
    │   ├── __init__.py
    │   ├── discriminator_img.py
    │   ├── discriminator_inst.py
    │   ├── mobilenet_v1.py
    │   ├── network.py
    │   ├── resnet_v1.py
    │   └── vgg16.py
    ├── nms
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── nms
    │   │   │   └── __init__.py
    │   ├── build.py
    │   ├── pth_nms.py
    │   └── src
    │   │   ├── cuda
    │   │       ├── nms_kernel.cu
    │   │       ├── nms_kernel.cu.o
    │   │       └── nms_kernel.h
    │   │   ├── nms.c
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms_cuda.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── blob.py
    │   ├── timer.py
    │   └── visualization.py
├── requirements.txt
├── tools
    ├── _init_paths.py
    ├── convert_from_tensorflow.py
    ├── convert_from_tensorflow_mobile.py
    ├── convert_from_tensorflow_vgg.py
    ├── demo.ipynb
    ├── demo.py
    ├── demo_all_bboxes.py
    ├── reval.py
    ├── test_net.py
    ├── trainval_net.py
    └── trainval_net_adapt.py
└── trained_weights
    ├── .gitignore
    ├── netD_CsynthBDDday_score.json
    ├── netD_CsynthFoggyC_score.json
    └── netD_synthC_score.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.so
 2 | *.pyc
 3 | output
 4 | tensorboard
 5 | lib/build
 6 | lib/pycocotools
 7 | lib/pycocotools/_mask.c
 8 | lib/pycocotools/_mask.so
 9 | .idea
10 | results/Main/*
11 | 
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xinlei Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Progressive Domain Adaptation for Object Detection
  2 | Implementation of our paper **[Progressive Domain Adaptation for Object Detection](https://arxiv.org/pdf/1910.11319.pdf)**, based on [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/README.md) and [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN).
  3 | 
  4 | ## Paper
  5 | **[Progressive Domain Adaptation for Object Detection](https://arxiv.org/pdf/1910.11319.pdf)**
  6 | [Han-Kai Hsu](https://sites.google.com/site/kevinhkhsu/), [Chun-Han Yao](https://www.chhankyao.com/), [Yi-Hsuan Tsai](https://sites.google.com/site/yihsuantsai/home), [Wei-Chih Hung](https://hfslyc.github.io/), [Hung-Yu Tseng](https://sites.google.com/site/hytseng0509/), [Maneesh Singh](https://scholar.google.com/citations?user=hdQhiFgAAAAJ) and [Ming-Hsuan Yang](http://faculty.ucmerced.edu/mhyang/index.html)
  7 | IEEE Winter Conference on Applications of Computer Vision (WACV), 2020.
  8 | 
  9 | Please cite our paper if you find it useful for your research.
 10 | ```
 11 | @inproceedings{hsu2020progressivedet,
 12 |   author = {Han-Kai Hsu and Chun-Han Yao and Yi-Hsuan Tsai and Wei-Chih Hung and Hung-Yu Tseng and Maneesh Singh and Ming-Hsuan Yang},
 13 |   booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
 14 |   title = {Progressive Domain Adaptation for Object Detection},
 15 |   year = {2020}
 16 | }
 17 | ```
 18 | 
 19 | ## Dependencies
 20 | This code is tested with **Pytorch 0.4.1** and **CUDA 9.0**
 21 | ```
 22 | # Pytorch via pip: Download and install Pytorch 0.4.1 wheel for CUDA 9.0
 23 | #                  from https://download.pytorch.org/whl/cu90/torch_stable.html
 24 | # Pytorch via conda: 
 25 | conda install pytorch=0.4.1 cuda90 -c pytorch
 26 | # Other dependencies:
 27 | pip install -r requirements.txt
 28 | sh ./lib/make.sh
 29 | ```
 30 | 
 31 | ## Data Preparation
 32 | #### KITTI
 33 | - Download the data from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d).
 34 | - Extract the files under `data/KITTI/`
 35 | 
 36 | #### Cityscapes
 37 | - Download the data from [here](https://www.cityscapes-dataset.com/).
 38 | - Extract the files under `data/CityScapes/`
 39 | 
 40 | #### Foggy Cityscapes
 41 | - Follow the instructions [here](https://www.cityscapes-dataset.com/) to request for the dataset download.
 42 | - Locate the data under `data/CityScapes/leftImg8bit/` as `foggytrain` and `foggyval`.
 43 | 
 44 | #### BDD100k
 45 | - Download the data from [here](https://bdd-data.berkeley.edu/).
 46 | - Extract the files under `data/bdd100k/`
 47 | 
 48 | ## Generate synthetic data with CycleGAN
 49 | Generate the synthetic data with the [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN) implementation.
 50 | 
 51 | `git clone https://github.com/aitorzip/PyTorch-CycleGAN`
 52 | 
 53 | #### Dataset loader code
 54 | Import the dataset loader code in `./cycleGAN_dataset_loader/` to train/test the CycleGAN on corresponding image translation task.
 55 | 
 56 | #### Generate from pre-trained weight:
 57 | Follow the testing instructions on [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN) and download the weight below to generate synthetic images. (Remember to change to the corresponding output image size)
 58 | - [KITTI with Cityscapes style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/K_C_model.tar.gz) (KITTI->Cityscapes): size=(376,1244)
 59 | Locate the generated data under `data/KITTI/training/synthCity_image_2/` with same naming and folder structure as original KITTI data.
 60 | - [Cityscapes with FoggyCityscapes style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/foggyC_C_model.tar.gz) (Cityscapes->FoggyCityscapes): size=(1024,2048)
 61 | Locate the generated data under `data/CityScapes/leftImg8bit/synthFoggytrain` with same naming and folder structure as original Cityscapes data.
 62 | - [Cityscapes with BDD style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/cityscapes_BDDday_model.tar.gz) (Cityscpaes->BDD100k): size=(1024,1280)
 63 | Locate the generated data under `data/CityScapes/leftImg8bit/synthBDDdaytrain` and `data/CityScapes/leftImg8bit/synthBDDdayval` with same naming and folder structure as original Cityscapes data.
 64 | 
 65 | #### Train your own CycleGAN:
 66 | Please follow the training instructions on [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN).
 67 | 
 68 | ## Test the adaptation model
 69 | Download the following adapted weights to `./trained_weights/adapt_weight/`
 70 | - [KITTI->Cityscapes](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_K2C_stage2.pth)
 71 | - [Cityscapes->FoggyCityscapes](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_C2F_stage2.pth)
 72 | - [Cityscpaes->BDD100k](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_C2BDD_stage2.pth)
 73 | ```
 74 | ./experiments/scripts/test_adapt_faster_rcnn_stage1.sh [GPU_ID] [Adapt_mode] vgg16
 75 | # Specify the GPU_ID you want to use
 76 | # Adapt_mode selection:
 77 | #   'K2C': KITTI->Cityscapes
 78 | #   'C2F': Cityscapes->Foggy Cityscapes
 79 | #   'C2BDD': Cityscapes->BDD100k_day
 80 | # Example:
 81 | ./experiments/scripts/test_adapt_faster_rcnn_stage2.sh 0 K2C vgg16
 82 | ```
 83 | 
 84 | ## Train your own model
 85 | #### Stage one
 86 | ```
 87 | ./experiments/scripts/train_adapt_faster_rcnn_stage1.sh [GPU_ID] [Adapt_mode] vgg16
 88 | # Specify the GPU_ID you want to use
 89 | # Adapt_mode selection:
 90 | #   'K2C': KITTI->Cityscapes
 91 | #   'C2F': Cityscapes->Foggy Cityscapes
 92 | #   'C2BDD': Cityscapes->BDD100k_day
 93 | # Example:
 94 | ./experiments/scripts/train_adapt_faster_rcnn_stage1.sh 0 K2C vgg16
 95 | ```
 96 | Download the following pretrained detector weights to `./trained_weights/pretrained_detector/`
 97 | - [KITTI for K2C](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_KITTI_pretrained.pth)
 98 | - [Cityscapes for C2f](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_city_pretrained_8class.pth)
 99 | - [Cityscapes for C2BDD](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_city_pretrained_10class.pth)
100 | 
101 | #### Stage two
102 | ```
103 | ./experiments/scripts/train_adapt_faster_rcnn_stage2.sh 0 K2C vgg16
104 | ```
105 | Discriminator score files: 
106 | - netD_synthC_score.json
107 | - netD_CsynthFoggyC_score.json
108 | - netD_CsynthBDDday_score.json
109 | 
110 | Extract the pretrained [CycleGAN discriminator scores](http://vllab1.ucmerced.edu/~hhsu22/da_det/D_score.tar.gz) to `./trained_weights/` </br>
111 | or </br>
112 | Save a dictionary of CycleGAN discriminator scores with image name as key and score as value </br>
113 | Ex: {'jena_000074_000019_leftImg8bit.png': 0.64}
114 | 
115 | ## Detection results
116 | ![](figure/det_results.png)
117 | 
118 | ## Adaptation results
119 | ![](figure/adapt_results_k2c.png)
120 | ![](figure/adapt_results_c2f.png)
121 | ![](figure/adapt_results_c2bdd.png)
122 | 
123 | ## Acknowledgement
124 | Thanks to the awesome implementations from [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/README.md) and [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN).
125 | 


--------------------------------------------------------------------------------
/cycleGAN_dataset_loader/datasets_K_C.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import random
 3 | import os
 4 | 
 5 | from torch.utils.data import Dataset
 6 | from PIL import Image
 7 | import torchvision.transforms as transforms
 8 | 
 9 | class ImageDataset(Dataset):
10 |     def __init__(self, root, transforms_=None, unaligned=False, mode='train'):
11 |         self.transform = transforms.Compose(transforms_)
12 |         self.unaligned = unaligned
13 | 
14 |         #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*'))
15 |         #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*'))
16 |         kitti_mode = mode
17 |         kitti_root = './data/KITTI/training/image_2/'
18 |         
19 |         with open('./data/KITTI/training/ImageSets/trainval.txt', 'r') as f:
20 |             inds = f.readlines()
21 |         
22 |         self.files_A  = sorted([kitti_root + i.strip() + '.png' for i in inds])
23 |         self.files_B = sorted(glob.glob('./data/CityScapes/leftImg8bit/val/*/*.*'))
24 |         print(len(self.files_A), len(self.files_B)) 
25 |     def __getitem__(self, index):
26 |         item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)]))
27 | 
28 |         if self.unaligned:
29 |             item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)]))
30 |         else:
31 |             item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)]))
32 | 
33 |         return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]}
34 | 
35 |     def __len__(self):
36 |         return max(len(self.files_A), len(self.files_B))
37 | 


--------------------------------------------------------------------------------
/cycleGAN_dataset_loader/datasets_cityscapes_BDDday.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import random
 3 | import os
 4 | 
 5 | from torch.utils.data import Dataset
 6 | from PIL import Image
 7 | import torchvision.transforms as transforms
 8 | 
 9 | class ImageDataset(Dataset):
10 |     def __init__(self, root, transforms_=None, unaligned=False, mode='train'):
11 |         self.transform = transforms.Compose(transforms_)
12 |         self.unaligned = unaligned
13 | 
14 |         #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*'))
15 |         #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*'))
16 | 
17 |         BDD_root = './data/bdd100k/images/100k/'
18 |         #A: Cityscapes
19 |         #B: BDD_day
20 |         #with open('/home/hhsu22/bdd100k/labels/ImageSets/day%s.txt'%'train', 'r') as f:
21 |         #  inds = f.readlines()
22 |         with open('./data/bdd100k/labels/ImageSets/day%s.txt'%'val', 'r') as f:
23 |           inds = f.readlines()
24 |                 
25 |         self.files_A = sorted(glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%'train') + glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%'val'))
26 |         
27 |         self.files_B  = sorted([os.path.join(BDD_root, i.strip()) for i in inds])
28 | 
29 |         print(mode)
30 |         print(len(self.files_A), len(self.files_B))
31 | 
32 |     def __getitem__(self, index):
33 |         item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)]))
34 | 
35 |         if self.unaligned:
36 |             item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)]))
37 |         else:
38 |             item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)]))
39 | 
40 |         return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]}
41 | 
42 |     def __len__(self):
43 |         return max(len(self.files_A), len(self.files_B))
44 | 


--------------------------------------------------------------------------------
/cycleGAN_dataset_loader/datasets_foggyC_C.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import random
 3 | import os
 4 | 
 5 | from torch.utils.data import Dataset
 6 | from PIL import Image
 7 | import torchvision.transforms as transforms
 8 | 
 9 | class ImageDataset(Dataset):
10 |     def __init__(self, root, transforms_=None, unaligned=False, mode='train'):
11 |         self.transform = transforms.Compose(transforms_)
12 |         self.unaligned = unaligned
13 | 
14 |         #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*'))
15 |         #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*'))
16 | 
17 |         self.files_A = sorted(glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%mode))
18 |         self.files_B = sorted(glob.glob('./data/CityScapes/leftImg8bit/foggy%s/*/*.*'%mode))
19 |         print(mode)
20 |         print(len(self.files_B))
21 |     def __getitem__(self, index):
22 |         item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)]))
23 | 
24 |         if self.unaligned:
25 |             item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)]))
26 |         else:
27 |             item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)]))
28 | 
29 |         return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]}
30 | 
31 |     def __len__(self):
32 |         return max(len(self.files_A), len(self.files_B))
33 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
 1 | selective_search*
 2 | imagenet_*
 3 | fast_rcnn_*
 4 | VOCdevkit*
 5 | coco*
 6 | cache
 7 | KITTI*
 8 | CityScapes*
 9 | bdd100k*
10 | 


--------------------------------------------------------------------------------
/data/demo/000456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/000456.jpg


--------------------------------------------------------------------------------
/data/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/000542.jpg


--------------------------------------------------------------------------------
/data/demo/001150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/001150.jpg


--------------------------------------------------------------------------------
/data/demo/001763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/001763.jpg


--------------------------------------------------------------------------------
/data/demo/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/004545.jpg


--------------------------------------------------------------------------------
/data/imgs/gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/imgs/gt.png


--------------------------------------------------------------------------------
/data/imgs/pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/imgs/pred.png


--------------------------------------------------------------------------------
/data/scripts/fetch_faster_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | NET=res101
 7 | FILE=voc_0712_80k-110k.tgz
 8 | # replace it with gs11655.sp.cs.cmu.edu if ladoga.graphics.cs.cmu.edu does not work
 9 | URL=http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/$NET/$FILE
10 | CHECKSUM=cb32e9df553153d311cc5095b2f8c340
11 | 
12 | if [ -f $FILE ]; then
13 |   echo "File already exists. Checking md5..."
14 |   os=`uname -s`
15 |   if [ "$os" = "Linux" ]; then
16 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
17 |   elif [ "$os" = "Darwin" ]; then
18 |     checksum=`cat $FILE | md5`
19 |   fi
20 |   if [ "$checksum" = "$CHECKSUM" ]; then
21 |     echo "Checksum is correct. No need to download."
22 |     exit 0
23 |   else
24 |     echo "Checksum is incorrect. Need to download again."
25 |   fi
26 | fi
27 | 
28 | echo "Downloading Resnet 101 Faster R-CNN models Pret-trained on VOC 07+12 (340M)..."
29 | 
30 | wget $URL -O $FILE
31 | 
32 | echo "Unzipping..."
33 | 
34 | tar zxvf $FILE
35 | 
36 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
37 | 


--------------------------------------------------------------------------------
/experiments/cfgs/FPNres50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: FPNres50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: FPNres50_faster_rcnn_imnet_new
14 |   SNAPSHOT_ITERS: 1000
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_MODE: crop
18 | FPN: True
19 | 


--------------------------------------------------------------------------------
/experiments/cfgs/mobile.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: mobile
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: mobile_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101-lg.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101-lg
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 |   SCALES: [800]
15 |   MAX_SIZE: 1333
16 | TEST:
17 |   HAS_RPN: True
18 |   SCALES: [800]
19 |   MAX_SIZE: 1333
20 |   RPN_POST_NMS_TOP_N: 1000
21 | POOLING_MODE: crop
22 | ANCHOR_SCALES: [2,4,8,16,32]
23 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res50_faster_rcnn_img_synthC2C_from_K2synthC_unfix
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 | TEST:
13 |   HAS_RPN: True
14 | POOLING_MODE: crop
15 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16_C2BDD.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | ADAPT_MODE: C2BDD
 3 | TRAIN:
 4 |   HAS_RPN: True
 5 |   IMS_PER_BATCH: 1
 6 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 7 |   RPN_POSITIVE_OVERLAP: 0.7
 8 |   RPN_BATCHSIZE: 256
 9 |   PROPOSAL_METHOD: gt
10 |   BG_THRESH_LO: 0.0
11 |   DISPLAY: 20
12 |   BATCH_SIZE: 256
13 | TEST:
14 |   HAS_RPN: True
15 | POOLING_MODE: crop
16 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16_C2F.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | ADAPT_MODE: C2F
 3 | TRAIN:
 4 |   HAS_RPN: True
 5 |   IMS_PER_BATCH: 1
 6 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 7 |   RPN_POSITIVE_OVERLAP: 0.7
 8 |   RPN_BATCHSIZE: 256
 9 |   PROPOSAL_METHOD: gt
10 |   BG_THRESH_LO: 0.0
11 |   DISPLAY: 20
12 |   BATCH_SIZE: 256
13 | TEST:
14 |   HAS_RPN: True
15 | POOLING_MODE: crop
16 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16_K2C.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | ADAPT_MODE: K2C
 3 | TRAIN:
 4 |   HAS_RPN: True
 5 |   IMS_PER_BATCH: 1
 6 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 7 |   RPN_POSITIVE_OVERLAP: 0.7
 8 |   RPN_BATCHSIZE: 256
 9 |   PROPOSAL_METHOD: gt
10 |   BG_THRESH_LO: 0.0
11 |   DISPLAY: 20
12 |   BATCH_SIZE: 256
13 | TEST:
14 |   HAS_RPN: True
15 | POOLING_MODE: crop
16 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt.*
2 | 


--------------------------------------------------------------------------------
/experiments/scripts/convert_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=vgg16
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:2:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=70000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | set +x
46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS}
47 | set -x
48 | 
49 | if [ ! -f ${NET_FINAL}.index ]; then
50 |     if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
52 |             --snapshot ${NET_FINAL} \
53 |             --imdb ${TRAIN_IMDB} \
54 |             --iters ${ITERS} \
55 |             --cfg experiments/cfgs/${NET}.yml \
56 |             --tag ${EXTRA_ARGS_SLUG} \
57 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
58 |     else
59 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
60 |             --snapshot ${NET_FINAL} \
61 |             --imdb ${TRAIN_IMDB} \
62 |             --iters ${ITERS} \
63 |             --cfg experiments/cfgs/${NET}.yml \
64 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
65 |     fi
66 | fi
67 | 
68 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_adapt_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | ADAPT_MODE=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:4:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${ADAPT_MODE} in
18 |   K2C)
19 |     TRAIN_IMDB_S="KITTI_synthCity"
20 |     TRAIN_IMDB_T="cityscapes_train"
21 |     TEST_IMDB="cityscapes_val"
22 |     STEPSIZE="[50000]"
23 |     ANCHORS="[4,8,16,32]"
24 |     RATIOS="[0.5,1,2]"
25 |     NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_K2C_stage2.pth'
26 |     ;;
27 |   C2F)
28 |     TRAIN_IMDB_S="cityscapes_synthFoggytrain"
29 |     TRAIN_IMDB_T="cityscapes_foggytrain"
30 |     TEST_IMDB="cityscapes_foggyval"
31 |     STEPSIZE="[50000]"
32 |     ANCHORS="[4,8,16,32]"
33 |     RATIOS="[0.5,1,2]"
34 |     NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_C2F_stage2.pth'
35 |     ;;
36 |   C2BDD)
37 |     TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval"
38 |     TRAIN_IMDB_T="bdd100k_train"
39 |     TEST_IMDB="bdd100k_dayval"
40 |     STEPSIZE="[50000]"
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_C2BDD_stage2.pth'
44 |     ;;
45 |   *)
46 |     echo "No dataset given"
47 |     exit
48 |     ;;
49 | esac
50 | 
51 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
52 | exec &> >(tee -a "$LOG")
53 | echo Logging output to "$LOG"
54 | 
55 | set +x
56 | 
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
59 |     --imdb ${TEST_IMDB} \
60 |     --model ${NET_FINAL} \
61 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
62 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
63 |     --net ${NET} \
64 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
65 |           ${EXTRA_ARGS}
66 | else
67 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
68 |     --imdb ${TEST_IMDB} \
69 |     --model ${NET_FINAL} \
70 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
71 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
72 |     --net ${NET} \
73 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
74 |           ${EXTRA_ARGS}
75 | fi
76 | 
77 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_adapt_faster_rcnn_stage1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | ADAPT_MODE=$2
10 | NET=$3
11 | TEST_ITER=$4
12 | 
13 | array=( $@ )
14 | len=${#array[@]}
15 | EXTRA_ARGS=${array[@]:4:$len}
16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
17 | 
18 | case ${ADAPT_MODE} in
19 |   K2C)
20 |     TRAIN_IMDB_S="KITTI_train+KITTI_val"
21 |     TRAIN_IMDB_T="KITTI_synthCity"
22 |     TEST_IMDB="cityscapes_val"
23 |     STEPSIZE="[50000]"
24 |     ITERS=${TEST_ITER}
25 |     ANCHORS="[4,8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   C2F)
29 |     TRAIN_IMDB_S="cityscapes_train"
30 |     TRAIN_IMDB_T="cityscapes_synthFoggytrain"
31 |     TEST_IMDB="cityscapes_foggyval"
32 |     STEPSIZE="[50000]"
33 |     ITERS=${TEST_ITER}
34 |     ANCHORS="[4,8,16,32]"
35 |     RATIOS="[0.5,1,2]"
36 |     ;;
37 |   C2BDD)
38 |     TRAIN_IMDB_S="cityscapes_train+cityscapes_val"
39 |     TRAIN_IMDB_T="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval"
40 |     TEST_IMDB="bdd100k_dayval"
41 |     STEPSIZE="[50000]"
42 |     ITERS=${TEST_ITER}
43 |     ANCHORS="[4,8,16,32]"
44 |     RATIOS="[0.5,1,2]"
45 |     ;;
46 |   *)
47 |     echo "No dataset given"
48 |     exit
49 |     ;;
50 | esac
51 | 
52 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
53 | exec &> >(tee -a "$LOG")
54 | echo Logging output to "$LOG"
55 | 
56 | set +x
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | else
60 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_${ITERS}.pth
61 | fi
62 | 
63 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
65 |     --imdb ${TEST_IMDB} \
66 |     --model ${NET_FINAL} \
67 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
68 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
69 |     --net ${NET} \
70 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
71 |           ${EXTRA_ARGS}
72 | else
73 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
74 |     --imdb ${TEST_IMDB} \
75 |     --model ${NET_FINAL} \
76 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
77 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
78 |     --net ${NET} \
79 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
80 |           ${EXTRA_ARGS}
81 | fi
82 | 
83 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_adapt_faster_rcnn_stage2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | ADAPT_MODE=$2
10 | NET=$3
11 | TEST_ITER=$4
12 | 
13 | array=( $@ )
14 | len=${#array[@]}
15 | EXTRA_ARGS=${array[@]:4:$len}
16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
17 | 
18 | case ${ADAPT_MODE} in
19 |   K2C)
20 |     TRAIN_IMDB_S="KITTI_synthCity"
21 |     TRAIN_IMDB_T="cityscapes_train"
22 |     TEST_IMDB="cityscapes_val"
23 |     STEPSIZE="[50000]"
24 |     ITERS=${TEST_ITER}
25 |     ANCHORS="[4,8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   C2F)
29 |     TRAIN_IMDB_S="cityscapes_synthFoggytrain"
30 |     TRAIN_IMDB_T="cityscapes_foggytrain"
31 |     TEST_IMDB="cityscapes_foggyval"
32 |     STEPSIZE="[50000]"
33 |     ITERS=${TEST_ITER}
34 |     ANCHORS="[4,8,16,32]"
35 |     RATIOS="[0.5,1,2]"
36 |     ;;
37 |   C2BDD)
38 |     TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval"
39 |     TRAIN_IMDB_T="bdd100k_train"
40 |     TEST_IMDB="bdd100k_dayval"
41 |     STEPSIZE="[50000]"
42 |     ITERS=${TEST_ITER}
43 |     ANCHORS="[4,8,16,32]"
44 |     RATIOS="[0.5,1,2]"
45 |     ;;
46 |   *)
47 |     echo "No dataset given"
48 |     exit
49 |     ;;
50 | esac
51 | 
52 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
53 | exec &> >(tee -a "$LOG")
54 | echo Logging output to "$LOG"
55 | 
56 | set +x
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | else
60 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage2_iter_${ITERS}.pth
61 | fi
62 | 
63 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
65 |     --imdb ${TEST_IMDB} \
66 |     --model ${NET_FINAL} \
67 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
68 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
69 |     --net ${NET} \
70 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
71 |           ${EXTRA_ARGS}
72 | else
73 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
74 |     --imdb ${TEST_IMDB} \
75 |     --model ${NET_FINAL} \
76 |     --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
77 |     --tag ${EXTRA_ARGS_SLUG}_adapt \
78 |     --net ${NET} \
79 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
80 |           ${EXTRA_ARGS}
81 | fi
82 | 
83 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_faster_rcnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -x
  4 | set -e
  5 | 
  6 | export PYTHONUNBUFFERED="True"
  7 | 
  8 | GPU_ID=$1
  9 | DATASET=$2
 10 | NET=$3
 11 | 
 12 | array=( $@ )
 13 | len=${#array[@]}
 14 | EXTRA_ARGS=${array[@]:3:$len}
 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
 16 | 
 17 | case ${DATASET} in
 18 |   pascal_voc)
 19 |     TRAIN_IMDB="voc_2007_trainval"
 20 |     TEST_IMDB="voc_2007_test"
 21 |     ITERS=70000
 22 |     ANCHORS="[8,16,32]"
 23 |     RATIOS="[0.5,1,2]"
 24 |     ;;
 25 |   pascal_voc_0712)
 26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
 27 |     TEST_IMDB="voc_2007_test"
 28 |     ITERS=110000
 29 |     ANCHORS="[8,16,32]"
 30 |     RATIOS="[0.5,1,2]"
 31 |     ;;
 32 |   coco)
 33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
 34 |     TEST_IMDB="coco_2014_minival"
 35 |     ITERS=490000
 36 |     ANCHORS="[4,8,16,32]"
 37 |     RATIOS="[0.5,1,2]"
 38 |     ;;
 39 |   KITTI)
 40 |     TRAIN_IMDB="KITTI_train"
 41 |     TEST_IMDB="KITTI_val"
 42 |     STEPSIZE="[350000]"
 43 |     ITERS=490000
 44 |     ANCHORS="[4,8,16,32]"
 45 |     RATIOS="[0.5,1,2]"
 46 |     ;;
 47 |   cityscapes)
 48 |     TRAIN_IMDB="cityscapes_train+cityscapes_val"
 49 |     TEST_IMDB="bdd100k_dayval"
 50 |     STEPSIZE="[350000]"
 51 |     ITERS=110000
 52 |     ANCHORS="[4,8,16,32]"
 53 |     RATIOS="[0.5,1,2]"
 54 |     ;;
 55 |   foggyCity)
 56 |     TRAIN_IMDB="cityscapes_foggytrain"
 57 |     TEST_IMDB="cityscapes_foggyval"
 58 |     STEPSIZE="[350000]"
 59 |     ITERS=80000
 60 |     ANCHORS="[4,8,16,32]"
 61 |     RATIOS="[0.5,1,2]"
 62 |     ;;
 63 |   bdd100k)
 64 |     TRAIN_IMDB="bdd100k_nighttrain"
 65 |     TEST_IMDB="bdd100k_nightval"
 66 |     STEPSIZE="[350000]"
 67 |     ITERS=200000
 68 |     ANCHORS="[4,8,16,32]"
 69 |     RATIOS="[0.5,1,2]"
 70 |     ;;
 71 |   *)
 72 |     echo "No dataset given"
 73 |     exit
 74 |     ;;
 75 | esac
 76 | 
 77 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 78 | exec &> >(tee -a "$LOG")
 79 | echo Logging output to "$LOG"
 80 | 
 81 | set +x
 82 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
 83 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
 84 | else
 85 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
 86 | fi
 87 | set -x
 88 | 
 89 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
 90 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
 91 |     --imdb ${TEST_IMDB} \
 92 |     --model ${NET_FINAL} \
 93 |     --cfg experiments/cfgs/${NET}.yml \
 94 |     --tag ${EXTRA_ARGS_SLUG} \
 95 |     --net ${NET} \
 96 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
 97 |           ${EXTRA_ARGS}
 98 | else
 99 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
100 |     --imdb ${TEST_IMDB} \
101 |     --model ${NET_FINAL} \
102 |     --cfg experiments/cfgs/${NET}.yml \
103 |     --net ${NET} \
104 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
105 |           ${EXTRA_ARGS}
106 | fi
107 | 
108 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_faster_rcnn_notime.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=70000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
46 | exec &> >(tee -a "$LOG")
47 | echo Logging output to "$LOG"
48 | 
49 | set +x
50 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
52 | else
53 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
54 | fi
55 | set -x
56 | 
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
59 |     --imdb ${TEST_IMDB} \
60 |     --model ${NET_FINAL} \
61 |     --cfg experiments/cfgs/${NET}.yml \
62 |     --tag ${EXTRA_ARGS_SLUG} \
63 |     --net ${NET} \
64 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
65 |           ${EXTRA_ARGS}
66 | else
67 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
68 |     --imdb ${TEST_IMDB} \
69 |     --model ${NET_FINAL} \
70 |     --cfg experiments/cfgs/${NET}.yml \
71 |     --net ${NET} \
72 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |           ${EXTRA_ARGS}
74 | fi
75 | 
76 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_adapt_faster_rcnn_stage1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | ADAPT_MODE=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${ADAPT_MODE} in
18 |   K2C)
19 |     PRETRAINED_WEIGHT="${NET}_faster_rcnn_KITTI_pretrained.pth"
20 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1"
21 |     TRAIN_IMDB_S="KITTI_train+KITTI_val"
22 |     TRAIN_IMDB_T="KITTI_synthCity"
23 |     TEST_IMDB="cityscapes_val"
24 |     STEPSIZE="[50000]"
25 |     ITERS=70000
26 |     ANCHORS="[4,8,16,32]"
27 |     RATIOS="[0.5,1,2]"
28 |     ;;
29 |   C2F)
30 |     PRETRAINED_WEIGHT="${NET}_faster_rcnn_city_pretrained_8class.pth"
31 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1"
32 |     TRAIN_IMDB_S="cityscapes_train"
33 |     TRAIN_IMDB_T="cityscapes_synthFoggytrain"
34 |     TEST_IMDB="cityscapes_foggyval"
35 |     STEPSIZE="[50000]"
36 |     ITERS=70000
37 |     ANCHORS="[4,8,16,32]"
38 |     RATIOS="[0.5,1,2]"
39 |     ;;
40 |   C2BDD)
41 |     PRETRAINED_WEIGHT="${NET}_faster_rcnn_city_pretrained_10class.pth"
42 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1"
43 |     TRAIN_IMDB_S="cityscapes_train+cityscapes_val"
44 |     TRAIN_IMDB_T="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval"
45 |     TEST_IMDB="bdd100k_dayval"
46 |     STEPSIZE="[50000]"
47 |     ITERS=70000
48 |     ANCHORS="[4,8,16,32]"
49 |     RATIOS="[0.5,1,2]"
50 |     ;;
51 |   *)
52 |     echo "No dataset given"
53 |     exit
54 |     ;;
55 | esac
56 | 
57 | LOG="experiments/logs/${NET}_${TRAIN_IMDB_S}2${TRAIN_IMDB_T}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
58 | exec &> >(tee -a "$LOG")
59 | echo Logging output to "$LOG"
60 | 
61 | set +x
62 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
63 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
64 | else
65 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/adapt/${NET}_faster_rcnn_iter_${ITERS}.pth
66 | fi
67 | set -x
68 | 
69 | if [ ! -f ${NET_FINAL}.index ]; then
70 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
71 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \
72 |       --weight trained_weights/prerained_detector/${PRETRAINED_WEIGHT} \
73 |       --imdb ${TRAIN_IMDB_S} \
74 |       --imdbval ${TEST_IMDB} \
75 |       --imdb_T ${TRAIN_IMDB_T} \
76 |       --iters ${ITERS} \
77 |       --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
78 |       --tag ${EXTRA_ARGS_SLUG}_adapt \
79 |       --net ${NET} \
80 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
81 |       TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS}
82 |   else
83 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \
84 |       --weight trained_weights/pretrained_detector/${PRETRAINED_WEIGHT} \
85 |       --imdb ${TRAIN_IMDB_S} \
86 |       --imdbval ${TEST_IMDB} \
87 |       --imdb_T ${TRAIN_IMDB_T} \
88 |       --iters ${ITERS} \
89 |       --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
90 |       --tag ${EXTRA_ARGS_SLUG}_adapt \
91 |       --net ${NET} \
92 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
93 |       TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS}
94 |   fi
95 | fi
96 | 
97 | ./experiments/scripts/test_adapt_faster_rcnn_stage1.sh $@ ${ITERS}
98 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_adapt_faster_rcnn_stage2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e:w
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | ADAPT_MODE=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   K2C)
19 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2"
20 |     PREV_S="KITTI_train+KITTI_val"
21 |     TRAIN_IMDB_S="KITTI_synthCity"
22 |     TRAIN_IMDB_T="cityscapes_train"
23 |     TEST_IMDB="cityscapes_val"
24 |     STEPSIZE="[]"
25 |     ITERS=10000
26 |     ANCHORS="[4,8,16,32]"
27 |     RATIOS="[0.5,1,2]"
28 |     ;;
29 |   C2F)
30 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2"
31 |     PREV_S="cityscapes_train"
32 |     TRAIN_IMDB_S="cityscapes_synthFoggytrain"
33 |     TRAIN_IMDB_T="cityscapes_foggytrain"
34 |     TEST_IMDB="cityscapes_foggyval"
35 |     STEPSIZE="[]"
36 |     ITERS=60000
37 |     ANCHORS="[4,8,16,32]"
38 |     RATIOS="[0.5,1,2]"
39 |     ;;
40 |   C2BDD)
41 |     SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2"
42 |     PREV_S="cityscapes_train+cityscapes_val"
43 |     TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval"
44 |     TRAIN_IMDB_T="bdd100k_daytrain"
45 |     TEST_IMDB="bdd100k_dayval"
46 |     STEPSIZE="[10000]"
47 |     ITERS=30000
48 |     ANCHORS="[4,8,16,32]"
49 |     RATIOS="[0.5,1,2]"
50 |     ;;
51 |   *)
52 |     echo "No dataset given"
53 |     exit
54 |     ;;
55 | esac
56 | 
57 | LOG="experiments/logs/${NET}_${TRAIN_IMDB_S}2${TRAIN_IMDB_T}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
58 | exec &> >(tee -a "$LOG")
59 | echo Logging output to "$LOG"
60 | 
61 | set +x
62 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
63 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
64 | else
65 |   NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/adapt/${NET}_faster_rcnn_iter_${ITERS}.pth
66 | fi
67 | set -x
68 | 
69 | if [ ! -f ${NET_FINAL}.index ]; then
70 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
71 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \
72 |       --weight output/${NET}/${PREV_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_70000.pth \
73 |       --imdb ${TRAIN_IMDB_S} \
74 |       --imdbval ${TEST_IMDB} \
75 |       --imdb_T ${TRAIN_IMDB_T} \
76 |       --iters ${ITERS} \
77 |       --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
78 |       --tag ${EXTRA_ARGS_SLUG}_adapt \
79 |       --net ${NET} \
80 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ADAPT_MODE ${ADAPT_MODE} \
81 |       TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS}
82 |   else
83 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \
84 |       --weight output/${NET}/${PREV_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_70000.pth \
85 |       --imdb ${TRAIN_IMDB_S} \
86 |       --imdbval ${TEST_IMDB} \
87 |       --imdb_T ${TRAIN_IMDB_T} \
88 |       --iters ${ITERS} \
89 |       --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \
90 |       --tag ${EXTRA_ARGS_SLUG}_adapt \
91 |       --net ${NET} \
92 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ADAPT_MODE ${ADAPT_MODE} \
93 |       TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS}
94 |   fi
95 | fi
96 | 
97 | ./experiments/scripts/test_adapt_faster_rcnn_stage2.sh $@ ${ITERS}
98 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -x
  4 | set -e
  5 | 
  6 | export PYTHONUNBUFFERED="True"
  7 | 
  8 | GPU_ID=$1
  9 | DATASET=$2
 10 | NET=$3
 11 | 
 12 | array=( $@ )
 13 | len=${#array[@]}
 14 | EXTRA_ARGS=${array[@]:3:$len}
 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
 16 | 
 17 | case ${DATASET} in
 18 |   pascal_voc)
 19 |     TRAIN_IMDB="voc_2007_trainval"
 20 |     TEST_IMDB="voc_2007_test"
 21 |     STEPSIZE="[50000]"
 22 |     ITERS=70000
 23 |     ANCHORS="[8,16,32]"
 24 |     RATIOS="[0.5,1,2]"
 25 |     ;;
 26 |   pascal_voc_0712)
 27 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
 28 |     TEST_IMDB="voc_2007_test"
 29 |     STEPSIZE="[80000]"
 30 |     ITERS=110000
 31 |     ANCHORS="[8,16,32]"
 32 |     RATIOS="[0.5,1,2]"
 33 |     ;;
 34 |   coco)
 35 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
 36 |     TEST_IMDB="coco_2014_minival"
 37 |     STEPSIZE="[350000]"
 38 |     ITERS=490000
 39 |     ANCHORS="[4,8,16,32]"
 40 |     RATIOS="[0.5,1,2]"
 41 |     ;;
 42 |   KITTI)
 43 |     TRAIN_IMDB="KITTI_train"
 44 |     TEST_IMDB="KITTI_val"
 45 |     STEPSIZE="[50000]"
 46 |     ITERS=70000
 47 |     ANCHORS="[4,8,16,32]"
 48 |     RATIOS="[0.5,1,2]"
 49 |     ;;
 50 |   cityscapes)
 51 |     TRAIN_IMDB="cityscapes_train+cityscapes_val"
 52 |     TEST_IMDB="cityscapes_val"
 53 |     STEPSIZE="[80000]"
 54 |     ITERS=110000
 55 |     ANCHORS="[4,8,16,32]"
 56 |     RATIOS="[0.5,1,2]"
 57 |     ;;
 58 |   foggyCity)
 59 |     TRAIN_IMDB="cityscapes_foggytrain"
 60 |     TEST_IMDB="cityscapes_foggyval"
 61 |     STEPSIZE="[80000]"
 62 |     ITERS=110000
 63 |     ANCHORS="[4,8,16,32]"
 64 |     RATIOS="[0.5,1,2]"
 65 |     ;;
 66 |   bdd100k)
 67 |     TRAIN_IMDB='bdd100k_daytrain'
 68 |     TEST_IMDB='bdd100k_dayval'
 69 |     STEPSIZE="[80000]"
 70 |     ITERS=110000
 71 |     ANCHORS="[4,8,16,32]"
 72 |     RATIOS="[0.5,1,2]"
 73 |     ;;
 74 |   sim10k)
 75 |     TRAIN_IMDB='sim10k_trainval'
 76 |     TEST_IMDB='sim10k_trainval'
 77 |     STEPSIZE="[50000]"
 78 |     ITERS=70000
 79 |     ANCHORS="[4,8,16,32]"
 80 |     RATIOS="[0.5,1,2]"
 81 |     ;;
 82 |   *)
 83 |     echo "No dataset given"
 84 |     exit
 85 |     ;;
 86 | esac
 87 | 
 88 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 89 | exec &> >(tee -a "$LOG")
 90 | echo Logging output to "$LOG"
 91 | 
 92 | set +x
 93 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
 94 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
 95 | else
 96 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
 97 | fi
 98 | set -x
 99 | 
100 | if [ ! -f ${NET_FINAL}.index ]; then
101 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
102 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \
103 |       --weight data/imagenet_weights/${NET}.pth \
104 |       --imdb ${TRAIN_IMDB} \
105 |       --imdbval ${TEST_IMDB} \
106 |       --iters ${ITERS} \
107 |       --cfg experiments/cfgs/${NET}.yml \
108 |       --tag ${EXTRA_ARGS_SLUG} \
109 |       --net ${NET} \
110 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
111 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
112 |   else
113 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \
114 |       --weight data/imagenet_weights/${NET}.pth \
115 |       --imdb ${TRAIN_IMDB} \
116 |       --imdbval ${TEST_IMDB} \
117 |       --iters ${ITERS} \
118 |       --cfg experiments/cfgs/${NET}.yml \
119 |       --net ${NET} \
120 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
121 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
122 |   fi
123 | fi
124 | 
125 | ./experiments/scripts/test_faster_rcnn.sh $@
126 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_faster_rcnn_notime.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     STEPSIZE="[50000]"
22 |     ITERS=70000
23 |     ANCHORS="[8,16,32]"
24 |     RATIOS="[0.5,1,2]"
25 |     ;;
26 |   pascal_voc_0712)
27 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     STEPSIZE="[80000]"
30 |     ITERS=110000
31 |     ANCHORS="[8,16,32]"
32 |     RATIOS="[0.5,1,2]"
33 |     ;;
34 |   coco)
35 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
36 |     TEST_IMDB="coco_2014_minival"
37 |     STEPSIZE="[900000]"
38 |     ITERS=1190000
39 |     ANCHORS="[4,8,16,32]"
40 |     RATIOS="[0.5,1,2]"
41 |     ;;
42 |   *)
43 |     echo "No dataset given"
44 |     exit
45 |     ;;
46 | esac
47 | 
48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
49 | exec &> >(tee -a "$LOG")
50 | echo Logging output to "$LOG"
51 | 
52 | set +x
53 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
54 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
55 | else
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | fi
58 | set -x
59 | 
60 | if [ ! -f ${NET_FINAL}.index ]; then
61 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
62 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
63 |       --weight data/imagenet_weights/${NET}.pth \
64 |       --imdb ${TRAIN_IMDB} \
65 |       --imdbval ${TEST_IMDB} \
66 |       --iters ${ITERS} \
67 |       --cfg experiments/cfgs/${NET}.yml \
68 |       --tag ${EXTRA_ARGS_SLUG} \
69 |       --net ${NET} \
70 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
71 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
72 |   else
73 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
74 |       --weight data/imagenet_weights/${NET}.pth \
75 |       --imdb ${TRAIN_IMDB} \
76 |       --imdbval ${TEST_IMDB} \
77 |       --iters ${ITERS} \
78 |       --cfg experiments/cfgs/${NET}.yml \
79 |       --net ${NET} \
80 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
81 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
82 |   fi
83 | fi
84 | 
85 | ./experiments/scripts/test_faster_rcnn_notime.sh $@
86 | 


--------------------------------------------------------------------------------
/figure/adapt_results_c2bdd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_c2bdd.png


--------------------------------------------------------------------------------
/figure/adapt_results_c2f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_c2f.png


--------------------------------------------------------------------------------
/figure/adapt_results_k2c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_k2c.png


--------------------------------------------------------------------------------
/figure/det_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/det_results.png


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | import json
 15 | 
 16 | def parse_rec(filename):
 17 |   """ Parse cityscapes rec """
 18 |   objects = []
 19 |   with open(filename, 'r') as f:
 20 |       info = json.load(f)
 21 |   objs = info["objects"]
 22 |   num_objs = len(objs)
 23 | 
 24 |   for obj in objs:
 25 |     obj_struct = {}
 26 |     clsName = obj['label']
 27 |     #print clsName 
 28 |     maxW = float(info['imgWidth']) - 1.
 29 |     maxH = float(info['imgHeight']) - 1.
 30 |     x1 = maxW
 31 |     y1 = maxH
 32 |     x2 = 0.
 33 |     y2 = 0.
 34 |     for p in obj['polygon']: # (x, y)
 35 |       if p[0] < x1:
 36 |         x1 = max(0, p[0])
 37 |       if p[0] > x2:
 38 |         x2 = min(maxW, p[0])
 39 |       if p[1] < y1:
 40 |         y1 = max(0, p[1])
 41 |       if p[1] > y2:
 42 |         y2 = min(maxH, p[1])
 43 |     assert x1 >= 0 and x2 >=0 and y1 >= 0 and y2 >= 0
 44 |     assert x1 <= x2 and y1 <= y2
 45 |     obj_struct['name'] = clsName
 46 |     obj_struct['difficult'] = 0
 47 |     obj_struct['bbox'] = [x1, y1, x2, y2]
 48 |     objects.append(obj_struct)
 49 |   return objects
 50 | 
 51 | def voc_ap(rec, prec, use_07_metric=False):
 52 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 53 |   Compute VOC AP given precision and recall.
 54 |   If use_07_metric is true, uses the
 55 |   VOC 07 11 point method (default:False).
 56 |   """
 57 |   if use_07_metric:
 58 |     # 11 point metric
 59 |     ap = 0.
 60 |     for t in np.arange(0., 1.1, 0.1):
 61 |       if np.sum(rec >= t) == 0:
 62 |         p = 0
 63 |       else:
 64 |         p = np.max(prec[rec >= t])
 65 |       ap = ap + p / 11.
 66 |   else:
 67 |     # correct AP calculation
 68 |     # first append sentinel values at the end
 69 |     mrec = np.concatenate(([0.], rec, [1.]))
 70 |     mpre = np.concatenate(([0.], prec, [0.]))
 71 | 
 72 |     # compute the precision envelope
 73 |     for i in range(mpre.size - 1, 0, -1):
 74 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 75 | 
 76 |     # to calculate area under PR curve, look for points
 77 |     # where X axis (recall) changes value
 78 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 79 | 
 80 |     # and sum (\Delta recall) * prec
 81 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 82 |   return ap
 83 | 
 84 | 
 85 | def cityscapes_eval(detpath,
 86 |              annopath,
 87 |              imagesetfile,
 88 |              classname,
 89 |              cachedir,
 90 |              ovthresh=0.5,
 91 |              use_07_metric=False,
 92 |              use_diff=False):
 93 |   """rec, prec, ap = voc_eval(detpath,
 94 |                               annopath,
 95 |                               imagesetfile,
 96 |                               classname,
 97 |                               [ovthresh],
 98 |                               [use_07_metric])
 99 | 
100 |   Top level function that does the PASCAL VOC evaluation.
101 | 
102 |   detpath: Path to detections
103 |       detpath.format(classname) should produce the detection results file.
104 |   annopath: Path to annotations
105 |       annopath.format(imagename) should be the xml annotations file.
106 |   imagesetfile: Text file containing the list of images, one image per line.
107 |   classname: Category name (duh)
108 |   cachedir: Directory for caching the annotations
109 |   [ovthresh]: Overlap threshold (default = 0.5)
110 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
111 |       (default False)
112 |   """
113 |   # assumes detections are in detpath.format(classname)
114 |   # assumes annotations are in annopath.format(imagename)
115 |   # assumes imagesetfile is a text file with each line an image name
116 |   # cachedir caches the annotations in a pickle file
117 | 
118 |   # first load gt
119 |   if not os.path.isdir(cachedir):
120 |     os.mkdir(cachedir)
121 |   if 'foggy' in imagesetfile[0]:
122 |     cachefile = os.path.join(cachedir, '%s_annots.pkl' % 'cityscapes_foggy')
123 |   else:
124 |     cachefile = os.path.join(cachedir, '%s_annots.pkl' % 'cityscapes')
125 |   # read list of images
126 | 
127 |   imagenames = imagesetfile
128 | 
129 |   if not os.path.isfile(cachefile):
130 |     # load annotations
131 |     recs = {}
132 |     for i, imagename in enumerate(imagenames):
133 |       recs[imagename] = parse_rec(annopath.format(imagename[:imagename.find('_')], imagename[:imagename.find('leftImg8bit')]))
134 |       if i % 100 == 0:
135 |         print('Reading annotation for {:d}/{:d}'.format(
136 |           i + 1, len(imagenames)))
137 |     # save
138 |     print('Saving cached annotations to {:s}'.format(cachefile))
139 |     with open(cachefile, 'wb') as f:
140 |       pickle.dump(recs, f)
141 |   else:
142 |     # load
143 |     with open(cachefile, 'rb') as f:
144 |       try:
145 |         recs = pickle.load(f)
146 |       except:
147 |         recs = pickle.load(f, encoding='bytes')
148 | 
149 |   # extract gt objects for this class
150 |   class_recs = {}
151 |   npos = 0
152 |   for imagename in imagenames:
153 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
154 |     bbox = np.array([x['bbox'] for x in R])
155 |     if use_diff:
156 |       difficult = np.array([False for x in R]).astype(np.bool)
157 |     else:
158 |       difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
159 |     det = [False] * len(R)
160 |     npos = npos + sum(~difficult)
161 |     class_recs[imagename] = {'bbox': bbox,
162 |                              'difficult': difficult,
163 |                              'det': det}
164 | 
165 |   # read dets
166 |   detfile = detpath.format(classname)
167 | 
168 |   with open(detfile, 'r') as f:
169 |     lines = f.readlines()
170 | 
171 |   splitlines = [x.strip().split(' ') for x in lines]
172 |   image_ids = [x[0] for x in splitlines]
173 |   confidence = np.array([float(x[1]) for x in splitlines])
174 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
175 | 
176 |   nd = len(image_ids)
177 |   tp = np.zeros(nd)
178 |   fp = np.zeros(nd)
179 | 
180 |   if BB.shape[0] > 0:
181 |     # sort by confidence
182 |     sorted_ind = np.argsort(-confidence)
183 |     sorted_scores = np.sort(-confidence)
184 |     BB = BB[sorted_ind, :]
185 |     image_ids = [image_ids[x] for x in sorted_ind]
186 | 
187 |     # go down dets and mark TPs and FPs
188 |     for d in range(nd):
189 |       R = class_recs[image_ids[d]]
190 |       bb = BB[d, :].astype(float)
191 |       ovmax = -np.inf
192 |       BBGT = R['bbox'].astype(float)
193 | 
194 |       if BBGT.size > 0:
195 |         # compute overlaps
196 |         # intersection
197 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
198 |         iymin = np.maximum(BBGT[:, 1], bb[1])
199 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
200 |         iymax = np.minimum(BBGT[:, 3], bb[3])
201 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
202 |         ih = np.maximum(iymax - iymin + 1., 0.)
203 |         inters = iw * ih
204 | 
205 |         # union
206 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
207 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
208 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
209 | 
210 |         overlaps = inters / uni
211 |         ovmax = np.max(overlaps)
212 |         jmax = np.argmax(overlaps)
213 | 
214 |       if ovmax > ovthresh:
215 |         if not R['difficult'][jmax]:
216 |           if not R['det'][jmax]:
217 |             tp[d] = 1.
218 |             R['det'][jmax] = 1
219 |           else:
220 |             fp[d] = 1.
221 |       else:
222 |         fp[d] = 1.
223 | 
224 |   # compute precision recall
225 |   fp = np.cumsum(fp)
226 |   tp = np.cumsum(tp)
227 |   rec = tp / float(npos)
228 |   # avoid divide by zero in case the first detection matches a difficult
229 |   # ground truth
230 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
231 |   ap = voc_ap(rec, prec, use_07_metric)
232 |   print(classname, npos)
233 |   return rec, prec, ap
234 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.KITTI import KITTI
16 | from datasets.cityscapes import cityscapes
17 | from datasets.bdd100k import bdd100k
18 | 
19 | import numpy as np
20 | 
21 | # Set up voc_<year>_<split> 
22 | for year in ['2007', '2012']:
23 |   for split in ['train', 'val', 'trainval', 'test']:
24 |     name = 'voc_{}_{}'.format(year, split)
25 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 | 
27 | for year in ['2007', '2012']:
28 |   for split in ['train', 'val', 'trainval', 'test']:
29 |     name = 'voc_{}_{}_diff'.format(year, split)
30 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True))
31 | 
32 | # Set up KITTI
33 | for split in ['train', 'val', 'synthCity', 'trainval']:
34 |   name = 'KITTI_{}'.format(split)
35 |   __sets[name] = (lambda split=split, year=year: KITTI(split))
36 | 
37 | # Set up cityscapes
38 | for split in ['train', 'val', 'foggytrain', 'foggyval', 'synthFoggytrain', 'synthBDDdaytrain', 'synthBDDdayval']:
39 |   name = 'cityscapes_{}'.format(split)
40 |   __sets[name] = (lambda split=split, year=year: cityscapes(split))
41 | 
42 | # Set up bdd100k
43 | for split in ['train', 'val', 'daytrain', 'dayval', 'nighttrain', 'nightval', 'citydaytrain', 'citydayval', 'cleardaytrain', 'cleardayval', 'rainydaytrain', 'rainydayval']:
44 |   name = 'bdd100k_{}'.format(split)
45 |   __sets[name] = (lambda split=split, year=year: bdd100k(split))
46 | 
47 | def get_imdb(name):
48 |   """Get an imdb (image database) by name."""
49 |   if name not in __sets:
50 |     raise KeyError('Unknown dataset: {}'.format(name))
51 |   return __sets[name]()
52 | 
53 | def list_imdbs():
54 |   """List all registered imdbs."""
55 |   return list(__sets.keys())
56 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/layer_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from model.config import cfg
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | from utils.bbox import bbox_overlaps
 16 | from model.bbox_transform import bbox_transform
 17 | import torch
 18 | 
 19 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
 20 |   """Same as the anchor target layer in original Fast/er RCNN """
 21 |   npr.seed(cfg.RNG_SEED)
 22 |   A = num_anchors
 23 |   total_anchors = all_anchors.shape[0]
 24 |   K = total_anchors / num_anchors
 25 | 
 26 |   # allow boxes to sit over the edge by a small amount
 27 |   _allowed_border = 0
 28 | 
 29 |   # map of shape (..., H, W)
 30 |   height, width = rpn_cls_score.shape[1:3]
 31 | 
 32 |   # only keep anchors inside the image
 33 |   inds_inside = np.arange(total_anchors)
 34 |     
 35 |   # keep only inside anchors
 36 |   anchors = all_anchors[inds_inside, :]
 37 | 
 38 |   # label: 1 is positive, 0 is negative, -1 is dont care
 39 |   labels = np.empty((len(inds_inside),), dtype=np.float32)
 40 |   labels.fill(-1)
 41 | 
 42 |   # overlaps between the anchors and the gt boxes
 43 |   # overlaps (ex, gt)
 44 |   overlaps = bbox_overlaps(
 45 |     np.ascontiguousarray(anchors, dtype=np.float),
 46 |     np.ascontiguousarray(gt_boxes, dtype=np.float))
 47 |   argmax_overlaps = overlaps.argmax(axis=1)
 48 |   max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 49 |   gt_argmax_overlaps = overlaps.argmax(axis=0)
 50 |   gt_max_overlaps = overlaps[gt_argmax_overlaps,
 51 |                              np.arange(overlaps.shape[1])]
 52 |   gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 53 | 
 54 |   if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 55 |     # assign bg labels first so that positive labels can clobber them
 56 |     # first set the negatives
 57 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 58 | 
 59 |   # fg label: for each gt, anchor with highest overlap
 60 |   labels[gt_argmax_overlaps] = 1
 61 | 
 62 |   # fg label: above threshold IOU
 63 |   labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
 64 | 
 65 |   if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 66 |     # assign bg labels last so that negative labels can clobber positives
 67 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 68 | 
 69 |   # subsample positive labels if we have too many
 70 |   num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
 71 |   fg_inds = np.where(labels == 1)[0]
 72 |   if len(fg_inds) > num_fg:
 73 |     disable_inds = npr.choice(
 74 |       fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 75 |     labels[disable_inds] = -1
 76 | 
 77 |   # subsample negative labels if we have too many
 78 |   num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
 79 |   bg_inds = np.where(labels == 0)[0]
 80 |   if len(bg_inds) > num_bg:
 81 |     disable_inds = npr.choice(
 82 |       bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 83 |     labels[disable_inds] = -1
 84 | 
 85 |   bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
 86 |   bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
 87 | 
 88 |   bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
 89 |   # only the positive ones have regression targets
 90 |   bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
 91 | 
 92 |   bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
 93 |   if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
 94 |     # uniform weighting of examples (given non-uniform sampling)
 95 |     num_examples = np.sum(labels >= 0)
 96 |     positive_weights = np.ones((1, 4)) * 1.0 / num_examples
 97 |     negative_weights = np.ones((1, 4)) * 1.0 / num_examples
 98 |   else:
 99 |     assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
100 |             (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
101 |     positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
102 |                         np.sum(labels == 1))
103 |     negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
104 |                         np.sum(labels == 0))
105 |   bbox_outside_weights[labels == 1, :] = positive_weights
106 |   bbox_outside_weights[labels == 0, :] = negative_weights
107 | 
108 |   # map up to original set of anchors
109 |   labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
110 |   bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
111 |   bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
112 |   bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
113 | 
114 |   # labels
115 |   labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
116 |   labels = labels.reshape((1, 1, A * height, width))
117 |   rpn_labels = labels
118 | 
119 |   # bbox_targets
120 |   bbox_targets = bbox_targets \
121 |     .reshape((1, height, width, A * 4))
122 | 
123 |   rpn_bbox_targets = bbox_targets
124 |   # bbox_inside_weights
125 |   bbox_inside_weights = bbox_inside_weights \
126 |     .reshape((1, height, width, A * 4))
127 | 
128 |   rpn_bbox_inside_weights = bbox_inside_weights
129 | 
130 |   # bbox_outside_weights
131 |   bbox_outside_weights = bbox_outside_weights \
132 |     .reshape((1, height, width, A * 4))
133 | 
134 |   rpn_bbox_outside_weights = bbox_outside_weights
135 |   return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
136 | 
137 | 
138 | def _unmap(data, count, inds, fill=0):
139 |   """ Unmap a subset of item (data) back to the original set of items (of
140 |   size count) """
141 |   if len(data.shape) == 1:
142 |     ret = np.empty((count,), dtype=np.float32)
143 |     ret.fill(fill)
144 |     ret[inds] = data
145 |   else:
146 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
147 |     ret.fill(fill)
148 |     ret[inds, :] = data
149 |   return ret
150 | 
151 | 
152 | def _compute_targets(ex_rois, gt_rois):
153 |   """Compute bounding-box regression targets for an image."""
154 | 
155 |   assert ex_rois.shape[0] == gt_rois.shape[0]
156 |   assert ex_rois.shape[1] == 4
157 |   assert gt_rois.shape[1] == 5
158 | 
159 |   return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()
160 | 


--------------------------------------------------------------------------------
/lib/layer_utils/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 15 | #
 16 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 17 | #    >> anchors
 18 | #
 19 | #    anchors =
 20 | #
 21 | #       -83   -39   100    56
 22 | #      -175   -87   192   104
 23 | #      -359  -183   376   200
 24 | #       -55   -55    72    72
 25 | #      -119  -119   136   136
 26 | #      -247  -247   264   264
 27 | #       -35   -79    52    96
 28 | #       -79  -167    96   184
 29 | #      -167  -343   184   360
 30 | 
 31 | # array([[ -83.,  -39.,  100.,   56.],
 32 | #       [-175.,  -87.,  192.,  104.],
 33 | #       [-359., -183.,  376.,  200.],
 34 | #       [ -55.,  -55.,   72.,   72.],
 35 | #       [-119., -119.,  136.,  136.],
 36 | #       [-247., -247.,  264.,  264.],
 37 | #       [ -35.,  -79.,   52.,   96.],
 38 | #       [ -79., -167.,   96.,  184.],
 39 | #       [-167., -343.,  184.,  360.]])
 40 | 
 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 42 |                      scales=2 ** np.arange(3, 6)):
 43 |   """
 44 |   Generate anchor (reference) windows by enumerating aspect ratios X
 45 |   scales wrt a reference (0, 0, 15, 15) window.
 46 |   """
 47 | 
 48 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 49 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 50 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 51 |                        for i in range(ratio_anchors.shape[0])])
 52 |   return anchors
 53 | 
 54 | 
 55 | def _whctrs(anchor):
 56 |   """
 57 |   Return width, height, x center, and y center for an anchor (window).
 58 |   """
 59 | 
 60 |   w = anchor[2] - anchor[0] + 1
 61 |   h = anchor[3] - anchor[1] + 1
 62 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 63 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 64 |   return w, h, x_ctr, y_ctr
 65 | 
 66 | 
 67 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 68 |   """
 69 |   Given a vector of widths (ws) and heights (hs) around a center
 70 |   (x_ctr, y_ctr), output a set of anchors (windows).
 71 |   """
 72 | 
 73 |   ws = ws[:, np.newaxis]
 74 |   hs = hs[:, np.newaxis]
 75 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 76 |                        y_ctr - 0.5 * (hs - 1),
 77 |                        x_ctr + 0.5 * (ws - 1),
 78 |                        y_ctr + 0.5 * (hs - 1)))
 79 |   return anchors
 80 | 
 81 | 
 82 | def _ratio_enum(anchor, ratios):
 83 |   """
 84 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 85 |   """
 86 | 
 87 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 88 |   size = w * h
 89 |   size_ratios = size / ratios
 90 |   ws = np.round(np.sqrt(size_ratios))
 91 |   hs = np.round(ws * ratios)
 92 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 93 |   return anchors
 94 | 
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |   """
 98 |   Enumerate a set of anchors for each scale wrt an anchor.
 99 |   """
100 | 
101 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |   ws = w * scales
103 |   hs = h * scales
104 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |   return anchors
106 | 
107 | 
108 | if __name__ == '__main__':
109 |   import time
110 | 
111 |   t = time.time()
112 |   a = generate_anchors()
113 |   print(time.time() - t)
114 |   print(a)
115 |   from IPython import embed;
116 | 
117 |   embed()
118 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Ross Girshick and Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import numpy as np
 11 | from model.config import cfg
 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
 13 | from model.nms_wrapper import nms
 14 | 
 15 | import torch
 16 | from torch.autograd import Variable
 17 | 
 18 | 
 19 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
 20 |   """A simplified version compared to fast/er RCNN
 21 |      For details please see the technical report
 22 |   """
 23 |   if type(cfg_key) == bytes:
 24 |       cfg_key = cfg_key.decode('utf-8')
 25 |   pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 26 |   post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 27 |   nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
 28 | 
 29 |   # Get the scores and bounding boxes
 30 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
 31 |   rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
 32 |   scores = scores.contiguous().view(-1, 1)
 33 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
 34 |   proposals = clip_boxes(proposals, im_info[:2])
 35 | 
 36 |   # Pick the top region proposals
 37 |   scores, order = scores.view(-1).sort(descending=True)
 38 |   if pre_nms_topN > 0:
 39 |     order = order[:pre_nms_topN]
 40 |     scores = scores[:pre_nms_topN].view(-1, 1)
 41 |   proposals = proposals[order.data, :]
 42 | 
 43 |   # Non-maximal suppression
 44 |   keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) #error
 45 | 
 46 |   # Pick the top region proposals after NMS
 47 |   if post_nms_topN > 0:
 48 |     keep = keep[:post_nms_topN]
 49 |   proposals = proposals[keep, :]
 50 |   scores = scores[keep,]
 51 | 
 52 |   # Only support single image as input
 53 |   batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_())
 54 |   blob = torch.cat((batch_inds, proposals), 1)
 55 | 
 56 |   return blob, scores
 57 | 
 58 | def proposal_layer_fpn(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
 59 |   """A simplified version compared to fast/er RCNN
 60 |      For details please see the technical report
 61 |   """
 62 |   if type(cfg_key) == bytes:
 63 |       cfg_key = cfg_key.decode('utf-8')
 64 |   pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 65 |   post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 66 |   nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
 67 | 
 68 |   proposals_total = []
 69 |   scores_total = []
 70 |   for idx in range(len(rpn_cls_prob)):
 71 |     # Get the scores and bounding boxes
 72 |     scores = rpn_cls_prob[idx][:, :, :, num_anchors:]
 73 |     rpn_bbox_pred[idx] = rpn_bbox_pred[idx].view((-1, 4))
 74 |     scores = scores.contiguous().view(-1, 1)
 75 |     proposals = bbox_transform_inv(anchors[idx], rpn_bbox_pred[idx])
 76 |     proposals = clip_boxes(proposals, im_info[:2])
 77 |     
 78 |     # Pick the top region proposals
 79 |     scores, order = scores.view(-1).sort(descending=True)
 80 |     if pre_nms_topN > 0:
 81 |       order = order[:pre_nms_topN]
 82 |       scores = scores[:pre_nms_topN].view(-1, 1)
 83 |     proposals = proposals[order.data, :]
 84 | 
 85 |     proposals_total.append(proposals)
 86 |     scores_total.append(scores)
 87 | 
 88 |   proposals = torch.cat(proposals_total)
 89 |   scores = torch.cat(scores_total)
 90 | 
 91 |   # Non-maximal suppression
 92 |   keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)
 93 | 
 94 |   # Pick th top region proposals after NMS
 95 |   if post_nms_topN > 0:
 96 |     keep = keep[:post_nms_topN]
 97 |   proposals = proposals[keep, :]
 98 |   scores = scores[keep,]
 99 | 
100 |   # Only support single image as input
101 |   batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_())
102 |   blob = torch.cat((batch_inds, proposals), 1)
103 | 
104 |   return blob, scores


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from model.config import cfg
 14 | from model.bbox_transform import bbox_transform
 15 | from utils.bbox import bbox_overlaps
 16 | 
 17 | 
 18 | import torch
 19 | from torch.autograd import Variable
 20 | 
 21 | def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
 22 |   """
 23 |   Assign object detection proposals to ground-truth targets. Produces proposal
 24 |   classification labels and bounding-box regression targets.
 25 |   """
 26 | 
 27 |   # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 28 |   # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 29 |   all_rois = rpn_rois
 30 |   all_scores = rpn_scores
 31 | 
 32 |   # Include ground-truth boxes in the set of candidate rois
 33 |   if cfg.TRAIN.USE_GT:
 34 |     zeros = rpn_rois.data.new(gt_boxes.shape[0], 1)
 35 |     all_rois = torch.cat(
 36 |       (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1))
 37 |     , 0)
 38 |     # not sure if it a wise appending, but anyway i am not using it
 39 |     all_scores = torch.cat((all_scores, zeros), 0)
 40 | 
 41 |   num_images = 1
 42 |   rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 43 |   fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image))
 44 | 
 45 |   # Sample rois with classification labels and bounding box regression
 46 |   # targets
 47 |   labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
 48 |     all_rois, all_scores, gt_boxes, fg_rois_per_image,
 49 |     rois_per_image, _num_classes)
 50 | 
 51 |   rois = rois.view(-1, 5)
 52 |   roi_scores = roi_scores.view(-1)
 53 |   labels = labels.view(-1, 1)
 54 |   bbox_targets = bbox_targets.view(-1, _num_classes * 4)
 55 |   bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4)
 56 |   bbox_outside_weights = (bbox_inside_weights > 0).float()
 57 | 
 58 |   return rois, roi_scores, labels, Variable(bbox_targets), Variable(bbox_inside_weights), Variable(bbox_outside_weights)
 59 | 
 60 | 
 61 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
 62 |   """Bounding-box regression targets (bbox_target_data) are stored in a
 63 |   compact form N x (class, tx, ty, tw, th)
 64 | 
 65 |   This function expands those targets into the 4-of-4*K representation used
 66 |   by the network (i.e. only one class has non-zero targets).
 67 | 
 68 |   Returns:
 69 |       bbox_target (ndarray): N x 4K blob of regression targets
 70 |       bbox_inside_weights (ndarray): N x 4K blob of loss weights
 71 |   """
 72 |   # Inputs are tensor
 73 | 
 74 |   clss = bbox_target_data[:, 0]
 75 |   bbox_targets = clss.new(clss.numel(), 4 * num_classes).zero_()
 76 |   bbox_inside_weights = clss.new(bbox_targets.shape).zero_()
 77 |   inds = (clss > 0).nonzero().view(-1)
 78 |   if inds.numel() > 0:
 79 |     clss = clss[inds].contiguous().view(-1,1)
 80 |     dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
 81 |     dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long()
 82 |     bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:]
 83 |     bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds)
 84 | 
 85 |   return bbox_targets, bbox_inside_weights
 86 | 
 87 | 
 88 | def _compute_targets(ex_rois, gt_rois, labels):
 89 |   """Compute bounding-box regression targets for an image."""
 90 |   # Inputs are tensor
 91 | 
 92 |   assert ex_rois.shape[0] == gt_rois.shape[0]
 93 |   assert ex_rois.shape[1] == 4
 94 |   assert gt_rois.shape[1] == 4
 95 | 
 96 |   targets = bbox_transform(ex_rois, gt_rois)
 97 |   if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 98 |     # Optionally normalize targets by a precomputed mean and stdev
 99 |     targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
100 |                / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS))
101 |   return torch.cat(
102 |     [labels.unsqueeze(1), targets], 1)
103 | 
104 | 
105 | def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
106 |   """Generate a random sample of RoIs comprising foreground and background
107 |   examples.
108 |   """
109 |   npr.seed(cfg.RNG_SEED)
110 |   # overlaps: (rois x gt_boxes)
111 |   overlaps = bbox_overlaps(
112 |     all_rois[:, 1:5].data,
113 |     gt_boxes[:, :4].data)
114 |   max_overlaps, gt_assignment = overlaps.max(1)
115 |   labels = gt_boxes[gt_assignment, [4]]
116 | 
117 |   # Select foreground RoIs as those with >= FG_THRESH overlap
118 |   fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
119 |   # Guard against the case when an image has fewer than fg_rois_per_image
120 |   # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
121 |   bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)
122 | 
123 |   # Small modification to the original version where we ensure a fixed number of regions are sampled
124 |   if fg_inds.numel() > 0 and bg_inds.numel() > 0:
125 |     fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
126 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
127 |     bg_rois_per_image = rois_per_image - fg_rois_per_image
128 |     to_replace = bg_inds.numel() < bg_rois_per_image
129 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
130 |   elif fg_inds.numel() > 0:
131 |     to_replace = fg_inds.numel() < rois_per_image
132 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
133 |     fg_rois_per_image = rois_per_image
134 |   elif bg_inds.numel() > 0:
135 |     to_replace = bg_inds.numel() < rois_per_image
136 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
137 |     fg_rois_per_image = 0
138 |   else:
139 |     import pdb
140 |     pdb.set_trace()
141 | 
142 |   # The indices that we're selecting (both fg and bg)
143 |   keep_inds = torch.cat([fg_inds, bg_inds], 0)
144 |   # Select sampled values from various arrays:
145 |   labels = labels[keep_inds].contiguous()
146 |   # Clamp labels for the background RoIs to 0
147 |   labels[int(fg_rois_per_image):] = 0
148 |   rois = all_rois[keep_inds].contiguous()
149 |   roi_scores = all_scores[keep_inds].contiguous()
150 | 
151 |   bbox_target_data = _compute_targets(
152 |     rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)
153 |   
154 |   bbox_targets, bbox_inside_weights = \
155 |     _get_bbox_regression_labels(bbox_target_data, num_classes)
156 | 
157 |   return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
158 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_top_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | import numpy.random as npr
14 | 
15 | import torch
16 | 
17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
18 |   """A layer that just selects the top region proposals
19 |      without using non-maximal suppression,
20 |      For details please see the technical report
21 |   """
22 |   npr.seed(cfg.RNG_SEED)
23 |   rpn_top_n = cfg.TEST.RPN_TOP_N
24 | 
25 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
26 | 
27 |   rpn_bbox_pred = rpn_bbox_pred.view(-1, 4)
28 |   scores = scores.contiguous().view(-1, 1)
29 | 
30 |   length = scores.size(0)
31 |   if length < rpn_top_n:
32 |     # Random selection, maybe unnecessary and loses good proposals
33 |     # But such case rarely happens
34 |     top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().cuda()
35 |   else:
36 |     top_inds = scores.sort(0, descending=True)[1]
37 |     top_inds = top_inds[:rpn_top_n]
38 |     top_inds = top_inds.view(rpn_top_n)
39 | 
40 |   # Do the selection here
41 |   anchors = anchors[top_inds, :].contiguous()
42 |   rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous()
43 |   scores = scores[top_inds].contiguous()
44 | 
45 |   # Convert anchors into proposals via bbox transformations
46 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
47 | 
48 |   # Clip predicted boxes to image
49 |   proposals = clip_boxes(proposals, im_info[:2])
50 | 
51 |   # Output rois blob
52 |   # Our RPN implementation only supports a single input image, so all
53 |   # batch inds are 0
54 |   batch_inds = proposals.data.new(proposals.size(0), 1).zero_()
55 |   blob = torch.cat([batch_inds, proposals], 1)
56 |   return blob, scores
57 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/crop_and_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/crop_and_resize.c']
 7 | headers = ['src/crop_and_resize.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/crop_and_resize_gpu.c']
15 |     headers += ['src/crop_and_resize_gpu.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o']
18 |     with_cuda = True
19 | 
20 | extra_compile_args = ['-std=c99']
21 | 
22 | this_file = os.path.dirname(os.path.realpath(__file__))
23 | print(this_file)
24 | sources = [os.path.join(this_file, fname) for fname in sources]
25 | headers = [os.path.join(this_file, fname) for fname in headers]
26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
27 | 
28 | ffi = create_extension(
29 |     '_ext.crop_and_resize',
30 |     headers=headers,
31 |     sources=sources,
32 |     define_macros=defines,
33 |     relative_to=__file__,
34 |     with_cuda=with_cuda,
35 |     extra_objects=extra_objects,
36 |     extra_compile_args=extra_compile_args
37 | )
38 | 
39 | if __name__ == '__main__':
40 |     ffi.build()
41 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/crop_and_resize.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Function
 6 | 
 7 | from ._ext import crop_and_resize as _backend
 8 | 
 9 | 
10 | class CropAndResizeFunction(Function):
11 | 
12 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
13 |         self.crop_height = crop_height
14 |         self.crop_width = crop_width
15 |         self.extrapolation_value = extrapolation_value
16 | 
17 |     def forward(self, image, boxes, box_ind):
18 |         crops = torch.zeros_like(image)
19 | 
20 |         if image.is_cuda:
21 |             _backend.crop_and_resize_gpu_forward(
22 |                 image, boxes, box_ind,
23 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
24 |         else:
25 |             _backend.crop_and_resize_forward(
26 |                 image, boxes, box_ind,
27 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
28 | 
29 |         # save for backward
30 |         self.im_size = image.size()
31 |         self.save_for_backward(boxes, box_ind)
32 | 
33 |         return crops
34 | 
35 |     def backward(self, grad_outputs):
36 |         boxes, box_ind = self.saved_tensors
37 | 
38 |         grad_outputs = grad_outputs.contiguous()
39 |         grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size)
40 | 
41 |         if grad_outputs.is_cuda:
42 |             _backend.crop_and_resize_gpu_backward(
43 |                 grad_outputs, boxes, box_ind, grad_image
44 |             )
45 |         else:
46 |             _backend.crop_and_resize_backward(
47 |                 grad_outputs, boxes, box_ind, grad_image
48 |             )
49 | 
50 |         return grad_image, None, None
51 | 
52 | 
53 | class CropAndResize(nn.Module):
54 |     """
55 |     Crop and resize ported from tensorflow
56 |     See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize
57 |     """
58 | 
59 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
60 |         super(CropAndResize, self).__init__()
61 | 
62 |         self.crop_height = crop_height
63 |         self.crop_width = crop_width
64 |         self.extrapolation_value = extrapolation_value
65 | 
66 |     def forward(self, image, boxes, box_ind):
67 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind)
68 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize
 5 | 
 6 | 
 7 | class RoIAlign(nn.Module):
 8 | 
 9 |     def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True):
10 |         super(RoIAlign, self).__init__()
11 | 
12 |         self.crop_height = crop_height
13 |         self.crop_width = crop_width
14 |         self.extrapolation_value = extrapolation_value
15 |         self.transform_fpcoor = transform_fpcoor
16 | 
17 |     def forward(self, featuremap, boxes, box_ind):
18 |         """
19 |         RoIAlign based on crop_and_resize.
20 |         See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301
21 |         :param featuremap: NxCxHxW
22 |         :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
23 |         :param box_ind: M
24 |         :return: MxCxoHxoW
25 |         """
26 |         x1, y1, x2, y2 = torch.split(boxes, 1, dim=1)
27 |         image_height, image_width = featuremap.size()[2:4]
28 | 
29 |         if self.transform_fpcoor:
30 |             spacing_w = (x2 - x1) / float(self.crop_width)
31 |             spacing_h = (y2 - y1) / float(self.crop_height)
32 | 
33 |             nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
34 |             ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
35 |             nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
36 |             nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1)
37 | 
38 |             boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
39 |         else:
40 |             x1 = x1 / float(image_width - 1)
41 |             x2 = x2 / float(image_width - 1)
42 |             y1 = y1 / float(image_height - 1)
43 |             y2 = y2 / float(image_height - 1)
44 |             boxes = torch.cat((y1, x1, y2, x2), 1)
45 | 
46 |         boxes = boxes.detach().contiguous()
47 |         box_ind = box_ind.detach()
48 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind)
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_forward(
 2 |     THFloatTensor * image,
 3 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
 4 |     THIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THFloatTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_backward(
12 |     THFloatTensor * grads,
13 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
14 |     THIntTensor * box_index,    // range in [0, batch_size)
15 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize_gpu.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "cuda/crop_and_resize_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | 
 7 | void crop_and_resize_gpu_forward(
 8 |     THCudaTensor * image,
 9 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
10 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
11 |     const float extrapolation_value,
12 |     const int crop_height,
13 |     const int crop_width,
14 |     THCudaTensor * crops
15 | ) {
16 |     const int batch_size = THCudaTensor_size(state, image, 0);
17 |     const int depth = THCudaTensor_size(state, image, 1);
18 |     const int image_height = THCudaTensor_size(state, image, 2);
19 |     const int image_width = THCudaTensor_size(state, image, 3);
20 | 
21 |     const int num_boxes = THCudaTensor_size(state, boxes, 0);
22 | 
23 |     // init output space
24 |     THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width);
25 |     THCudaTensor_zero(state, crops);
26 | 
27 |     cudaStream_t stream = THCState_getCurrentStream(state);
28 |     CropAndResizeLaucher(
29 |         THCudaTensor_data(state, image),
30 |         THCudaTensor_data(state, boxes),
31 |         THCudaIntTensor_data(state, box_index),
32 |         num_boxes, batch_size, image_height, image_width,
33 |         crop_height, crop_width, depth, extrapolation_value,
34 |         THCudaTensor_data(state, crops),
35 |         stream
36 |     );
37 | }
38 | 
39 | 
40 | void crop_and_resize_gpu_backward(
41 |     THCudaTensor * grads,
42 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
43 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
44 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
45 | ) {
46 |     // shape
47 |     const int batch_size = THCudaTensor_size(state, grads_image, 0);
48 |     const int depth = THCudaTensor_size(state, grads_image, 1);
49 |     const int image_height = THCudaTensor_size(state, grads_image, 2);
50 |     const int image_width = THCudaTensor_size(state, grads_image, 3);
51 | 
52 |     const int num_boxes = THCudaTensor_size(state, grads, 0);
53 |     const int crop_height = THCudaTensor_size(state, grads, 2);
54 |     const int crop_width = THCudaTensor_size(state, grads, 3);
55 | 
56 |     // init output space
57 |     THCudaTensor_zero(state, grads_image);
58 | 
59 |     cudaStream_t stream = THCState_getCurrentStream(state);
60 |     CropAndResizeBackpropImageLaucher(
61 |         THCudaTensor_data(state, grads),
62 |         THCudaTensor_data(state, boxes),
63 |         THCudaIntTensor_data(state, box_index),
64 |         num_boxes, batch_size, image_height, image_width,
65 |         crop_height, crop_width, depth,
66 |         THCudaTensor_data(state, grads_image),
67 |         stream
68 |     );
69 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize_gpu.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_gpu_forward(
 2 |     THCudaTensor * image,
 3 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
 4 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THCudaTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_gpu_backward(
12 |     THCudaTensor * grads,
13 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
14 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
15 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CropAndResize_Kernel
 2 | #define _CropAndResize_Kernel
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void CropAndResizeLaucher(
 9 |     const float *image_ptr, const float *boxes_ptr,
10 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
11 |     int image_width, int crop_height, int crop_width, int depth,
12 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream);
13 | 
14 | void CropAndResizeBackpropImageLaucher(
15 |     const float *grads_ptr, const float *boxes_ptr,
16 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
17 |     int image_width, int crop_height, int crop_width, int depth,
18 |     float *grads_image_ptr, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/roi_pooling.c']
 7 | headers = ['src/roi_pooling.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/roi_pooling_cuda.c']
14 |     headers += ['src/roi_pooling_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.roi_pooling',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ._ext import roi_pooling
 4 | 
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.output = None
12 |         self.argmax = None
13 |         self.rois = None
14 |         self.feature_size = None
15 | 
16 |     def forward(self, features, rois):
17 |         batch_size, num_channels, data_height, data_width = features.size()
18 |         num_rois = rois.size()[0]
19 |         output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)
20 |         argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_()
21 | 
22 |         if not features.is_cuda:
23 |             _features = features.permute(0, 2, 3, 1)
24 |             roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale,
25 |                                             _features, rois, output)
26 |             # output = output.cuda()
27 |         else:
28 |             output = output.cuda()
29 |             argmax = argmax.cuda()
30 |             roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
31 |                                                  features, rois, output, argmax)
32 |             self.output = output
33 |             self.argmax = argmax
34 |             self.rois = rois
35 |             self.feature_size = features.size()
36 | 
37 |         return output
38 | 
39 |     def backward(self, grad_output):
40 |         assert(self.feature_size is not None and grad_output.is_cuda)
41 | 
42 |         batch_size, num_channels, data_height, data_width = self.feature_size
43 | 
44 |         grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
45 |         roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
46 |                                               grad_output, self.rois, grad_input, self.argmax)
47 | 
48 |         # print grad_input
49 | 
50 |         return grad_input, None
51 | 
52 | 
53 | class RoIPool(torch.nn.Module):
54 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
55 |         super(RoIPool, self).__init__()
56 | 
57 |         self.pooled_width = int(pooled_width)
58 |         self.pooled_height = int(pooled_height)
59 |         self.spatial_scale = float(spatial_scale)
60 | 
61 |     def forward(self, features, rois):
62 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
63 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         batch_size, num_channels, data_height, data_width = features.size()
16 |         num_rois = rois.size()[0]
17 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 | 
19 |         for roi_ind, roi in enumerate(rois):
20 |             batch_ind = int(roi[0].data[0])
21 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 |             bin_size_w = float(roi_width) / float(self.pooled_width)
26 |             bin_size_h = float(roi_height) / float(self.pooled_height)
27 | 
28 |             for ph in range(self.pooled_height):
29 |                 hstart = int(np.floor(ph * bin_size_h))
30 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
31 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
32 |                 hend = min(data_height, max(0, hend + roi_start_h))
33 |                 for pw in range(self.pooled_width):
34 |                     wstart = int(np.floor(pw * bin_size_w))
35 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
36 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
37 |                     wend = min(data_width, max(0, wend + roi_start_w))
38 | 
39 |                     is_empty = (hend <= hstart) or(wend <= wstart)
40 |                     if is_empty:
41 |                         outputs[roi_ind, :, ph, pw] = 0
42 |                     else:
43 |                         data = features[batch_ind]
44 |                         outputs[roi_ind, :, ph, pw] = torch.max(
45 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 | 
47 |         return outputs
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_pooling_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |        i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 | __global__ void ROIPoolForward(const int nthreads, const float* bottom_data,
 16 |     const float spatial_scale, const int height, const int width,
 17 |     const int channels, const int pooled_height, const int pooled_width,
 18 |     const float* bottom_rois, float* top_data, int* argmax_data)
 19 | {
 20 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
 21 |     {
 22 |         // (n, c, ph, pw) is an element in the pooled output
 23 |         int n = index;
 24 |         int pw = n % pooled_width;
 25 |         n /= pooled_width;
 26 |         int ph = n % pooled_height;
 27 |         n /= pooled_height;
 28 |         int c = n % channels;
 29 |         n /= channels;
 30 | 
 31 |         bottom_rois += n * 5;
 32 |         int roi_batch_ind = bottom_rois[0];
 33 |         int roi_start_w = round(bottom_rois[1] * spatial_scale);
 34 |         int roi_start_h = round(bottom_rois[2] * spatial_scale);
 35 |         int roi_end_w = round(bottom_rois[3] * spatial_scale);
 36 |         int roi_end_h = round(bottom_rois[4] * spatial_scale);
 37 | 
 38 |         // Force malformed ROIs to be 1x1
 39 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 40 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 41 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 42 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 43 | 
 44 |         int hstart = (int)(floor((float)(ph) * bin_size_h));
 45 |         int wstart = (int)(floor((float)(pw) * bin_size_w));
 46 |         int hend = (int)(ceil((float)(ph + 1) * bin_size_h));
 47 |         int wend = (int)(ceil((float)(pw + 1) * bin_size_w));
 48 | 
 49 |         // Add roi offsets and clip to input boundaries
 50 |         hstart = fminf(fmaxf(hstart + roi_start_h, 0), height);
 51 |         hend = fminf(fmaxf(hend + roi_start_h, 0), height);
 52 |         wstart = fminf(fmaxf(wstart + roi_start_w, 0), width);
 53 |         wend = fminf(fmaxf(wend + roi_start_w, 0), width);
 54 |         bool is_empty = (hend <= hstart) || (wend <= wstart);
 55 | 
 56 |         // Define an empty pooling region to be zero
 57 |         float maxval = is_empty ? 0 : -FLT_MAX;
 58 |         // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
 59 |         int maxidx = -1;
 60 |         bottom_data += roi_batch_ind * channels * height * width;
 61 |         for (int h = hstart; h < hend; ++h) {
 62 |             for (int w = wstart; w < wend; ++w) {
 63 |     //            int bottom_index = (h * width + w) * channels + c;
 64 |                 int bottom_index = (c * height + h) * width + w;
 65 |                 if (bottom_data[bottom_index] > maxval) {
 66 |                     maxval = bottom_data[bottom_index];
 67 |                     maxidx = bottom_index;
 68 |                 }
 69 |             }
 70 |         }
 71 |         top_data[index] = maxval;
 72 |         if (argmax_data != NULL)
 73 |             argmax_data[index] = maxidx;
 74 |     }
 75 | }
 76 | 
 77 | 
 78 | int ROIPoolForwardLaucher(
 79 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
 80 |     const int width, const int channels, const int pooled_height,
 81 |     const int pooled_width, const float* bottom_rois,
 82 |     float* top_data, int* argmax_data, cudaStream_t stream)
 83 | {
 84 |     const int kThreadsPerBlock = 1024;
 85 |     const int output_size = num_rois * pooled_height * pooled_width * channels;
 86 |     cudaError_t err;
 87 | 
 88 | 
 89 |     ROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 90 |       output_size, bottom_data, spatial_scale, height, width, channels, pooled_height,
 91 |       pooled_width, bottom_rois, top_data, argmax_data);
 92 | 
 93 |     err = cudaGetLastError();
 94 |     if(cudaSuccess != err)
 95 |     {
 96 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 97 |         exit( -1 );
 98 |     }
 99 | 
100 |     return 1;
101 | }
102 | 
103 | 
104 | __global__ void ROIPoolBackward(const int nthreads, const float* top_diff,
105 |     const int* argmax_data, const int num_rois, const float spatial_scale,
106 |     const int height, const int width, const int channels,
107 |     const int pooled_height, const int pooled_width, float* bottom_diff,
108 |     const float* bottom_rois) {
109 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
110 |     {
111 | 
112 |         // (n, c, ph, pw) is an element in the pooled output
113 |         int n = index;
114 |         int w = n % width;
115 |         n /= width;
116 |         int h = n % height;
117 |         n /= height;
118 |         int c = n % channels;
119 |         n /= channels;
120 | 
121 |         float gradient = 0;
122 |         // Accumulate gradient over all ROIs that pooled this element
123 |         for (int roi_n = 0; roi_n < num_rois; ++roi_n)
124 |         {
125 |             const float* offset_bottom_rois = bottom_rois + roi_n * 5;
126 |             int roi_batch_ind = offset_bottom_rois[0];
127 |             // Skip if ROI's batch index doesn't match n
128 |             if (n != roi_batch_ind) {
129 |                 continue;
130 |             }
131 | 
132 |             int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
133 |             int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
134 |             int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
135 |             int roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
136 | 
137 |             // Skip if ROI doesn't include (h, w)
138 |             const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
139 |                                h >= roi_start_h && h <= roi_end_h);
140 |             if (!in_roi) {
141 |                 continue;
142 |             }
143 | 
144 |             int offset = roi_n * pooled_height * pooled_width * channels;
145 |             const float* offset_top_diff = top_diff + offset;
146 |             const int* offset_argmax_data = argmax_data + offset;
147 | 
148 |             // Compute feasible set of pooled units that could have pooled
149 |             // this bottom unit
150 | 
151 |             // Force malformed ROIs to be 1x1
152 |             int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
153 |             int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
154 | 
155 |             float bin_size_h = (float)(roi_height) / (float)(pooled_height);
156 |             float bin_size_w = (float)(roi_width) / (float)(pooled_width);
157 | 
158 |             int phstart = floor((float)(h - roi_start_h) / bin_size_h);
159 |             int phend = ceil((float)(h - roi_start_h + 1) / bin_size_h);
160 |             int pwstart = floor((float)(w - roi_start_w) / bin_size_w);
161 |             int pwend = ceil((float)(w - roi_start_w + 1) / bin_size_w);
162 | 
163 |             phstart = fminf(fmaxf(phstart, 0), pooled_height);
164 |             phend = fminf(fmaxf(phend, 0), pooled_height);
165 |             pwstart = fminf(fmaxf(pwstart, 0), pooled_width);
166 |             pwend = fminf(fmaxf(pwend, 0), pooled_width);
167 | 
168 |             for (int ph = phstart; ph < phend; ++ph) {
169 |                 for (int pw = pwstart; pw < pwend; ++pw) {
170 |                     if (offset_argmax_data[(c * pooled_height + ph) * pooled_width + pw] == index)
171 |                     {
172 |                         gradient += offset_top_diff[(c * pooled_height + ph) * pooled_width + pw];
173 |                     }
174 |                 }
175 |             }
176 |         }
177 |         bottom_diff[index] = gradient;
178 |   }
179 | }
180 | 
181 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
182 |     const int height, const int width, const int channels, const int pooled_height,
183 |     const int pooled_width, const float* bottom_rois,
184 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream)
185 | {
186 |     const int kThreadsPerBlock = 1024;
187 |     const int output_size = batch_size * height * width * channels;
188 |     cudaError_t err;
189 | 
190 |     ROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
191 |       output_size, top_diff, argmax_data, num_rois, spatial_scale, height, width, channels, pooled_height,
192 |       pooled_width, bottom_diff, bottom_rois);
193 | 
194 |     err = cudaGetLastError();
195 |     if(cudaSuccess != err)
196 |     {
197 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
198 |         exit( -1 );
199 |     }
200 | 
201 |     return 1;
202 | }
203 | 
204 | 
205 | #ifdef __cplusplus
206 | }
207 | #endif
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     int batch_size = THCudaTensor_size(state, features, 0);
27 |     if (batch_size != 1)
28 |     {
29 |         return 0;
30 |     }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     if (batch_size != 1)
70 |     {
71 |         return 0;
72 |     }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/layer_utils/snippets.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from layer_utils.generate_anchors import generate_anchors
12 | 
13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
14 |   """ A wrapper function to generate anchors given different scales
15 |     Also return the number of anchors in variable 'length'
16 |   """
17 |   anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales))
18 |   A = anchors.shape[0]
19 |   shift_x = np.arange(0, width) * feat_stride
20 |   shift_y = np.arange(0, height) * feat_stride
21 |   shift_x, shift_y = np.meshgrid(shift_x, shift_y)
22 |   shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
23 |   K = shifts.shape[0]
24 |   # width changes faster, so here it is H, W, C
25 |   anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
26 |   anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
27 |   length = np.int32(anchors.shape[0])
28 | 
29 |   return anchors, length
30 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
 2 |            -gencode arch=compute_35,code=sm_35 \
 3 |            -gencode arch=compute_50,code=sm_50 \
 4 |            -gencode arch=compute_52,code=sm_52 \
 5 |            -gencode arch=compute_60,code=sm_60 \
 6 |            -gencode arch=compute_61,code=sm_61 \
 7 | 	   -gencode arch=compute_70,code=sm_70 "
 8 | 
 9 | # Build RoiPooling module
10 | cd layer_utils/roi_pooling/src/cuda
11 | echo "Compiling roi_pooling kernels by nvcc..."
12 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 
13 | cd ../../
14 | python build.py
15 | cd ../../
16 | 
17 | # Build RoIAlign
18 | cd layer_utils/roi_align/src/cuda
19 | echo 'Compiling crop_and_resize kernels by nvcc...'
20 | nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
21 | cd ../../
22 | python build.py
23 | cd ../../
24 | 
25 | # Build NMS
26 | cd nms/src/cuda
27 | echo "Compiling nms kernels by nvcc..."
28 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
29 | cd ../../
30 | python build.py
31 | cd ../
32 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config
2 | 


--------------------------------------------------------------------------------
/lib/model/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | import torch
13 | 
14 | def bbox_transform(ex_rois, gt_rois):
15 |   ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
16 |   ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
17 |   ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
18 |   ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
19 | 
20 |   gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
21 |   gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
22 |   gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
23 |   gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
24 | 
25 |   targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
26 |   targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
27 |   targets_dw = torch.log(gt_widths / ex_widths)
28 |   targets_dh = torch.log(gt_heights / ex_heights)
29 | 
30 |   targets = torch.stack(
31 |     (targets_dx, targets_dy, targets_dw, targets_dh), 1)
32 |   return targets
33 | 
34 | 
35 | def bbox_transform_inv(boxes, deltas):
36 |   # Input should be both tensor or both Variable and on the same device
37 |   if len(boxes) == 0:
38 |     return deltas.detach() * 0
39 | 
40 |   widths = boxes[:, 2] - boxes[:, 0] + 1.0
41 |   heights = boxes[:, 3] - boxes[:, 1] + 1.0
42 |   ctr_x = boxes[:, 0] + 0.5 * widths
43 |   ctr_y = boxes[:, 1] + 0.5 * heights
44 | 
45 |   dx = deltas[:, 0::4]
46 |   dy = deltas[:, 1::4]
47 |   dw = deltas[:, 2::4]
48 |   dh = deltas[:, 3::4]
49 |   
50 |   pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
51 |   pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
52 |   pred_w = torch.exp(dw) * widths.unsqueeze(1)
53 |   pred_h = torch.exp(dh) * heights.unsqueeze(1)
54 | 
55 |   pred_boxes = torch.cat(\
56 |     [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\
57 |                               pred_ctr_y - 0.5 * pred_h,\
58 |                               pred_ctr_x + 0.5 * pred_w,\
59 |                               pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1)
60 | 
61 |   return pred_boxes
62 | 
63 | 
64 | def clip_boxes(boxes, im_shape):
65 |   """
66 |   Clip boxes to image boundaries.
67 |   boxes must be tensor or Variable, im_shape can be anything but Variable
68 |   """
69 | 
70 |   if not hasattr(boxes, 'data'):
71 |     boxes_ = boxes.numpy()
72 | 
73 |   boxes = boxes.view(boxes.size(0), -1, 4)
74 |   boxes = torch.stack(\
75 |     [boxes[:,:,0].clamp(0, im_shape[1] - 1),
76 |      boxes[:,:,1].clamp(0, im_shape[0] - 1),
77 |      boxes[:,:,2].clamp(0, im_shape[1] - 1),
78 |      boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1)
79 | 
80 |   return boxes
81 | 


--------------------------------------------------------------------------------
/lib/model/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from nms.pth_nms import pth_nms
12 | 
13 | 
14 | def nms(dets, thresh):
15 |   """Dispatch to either CPU or GPU NMS implementations.
16 |   Accept dets as tensor"""
17 |   return pth_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/model/test.py~:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | try:
 13 |   import cPickle as pickle
 14 | except ImportError:
 15 |   import pickle
 16 | import os
 17 | import math
 18 | 
 19 | from utils.timer import Timer
 20 | from model.nms_wrapper import nms
 21 | from utils.blob import im_list_to_blob
 22 | 
 23 | from model.config import cfg, get_output_dir
 24 | from model.bbox_transform import clip_boxes, bbox_transform_inv
 25 | 
 26 | import torch
 27 | 
 28 | def _get_image_blob(im):
 29 |   """Converts an image into a network input.
 30 |   Arguments:
 31 |     im (ndarray): a color image in BGR order
 32 |   Returns:
 33 |     blob (ndarray): a data blob holding an image pyramid
 34 |     im_scale_factors (list): list of image scales (relative to im) used
 35 |       in the image pyramid
 36 |   """
 37 |   im_orig = im.astype(np.float32, copy=True)
 38 |   im_orig -= cfg.PIXEL_MEANS
 39 | 
 40 |   im_shape = im_orig.shape
 41 |   im_size_min = np.min(im_shape[0:2])
 42 |   im_size_max = np.max(im_shape[0:2])
 43 | 
 44 |   processed_ims = []
 45 |   im_scale_factors = []
 46 | 
 47 |   for target_size in cfg.TEST.SCALES:
 48 |     im_scale = float(target_size) / float(im_size_min)
 49 |     # Prevent the biggest axis from being more than MAX_SIZE
 50 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 51 |       im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 52 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 53 |             interpolation=cv2.INTER_LINEAR)
 54 |     im_scale_factors.append(im_scale)
 55 |     processed_ims.append(im)
 56 | 
 57 |   # Create a blob to hold the input images
 58 |   blob = im_list_to_blob(processed_ims)
 59 | 
 60 |   return blob, np.array(im_scale_factors)
 61 | 
 62 | def _get_blobs(im):
 63 |   """Convert an image and RoIs within that image into network inputs."""
 64 |   blobs = {}
 65 |   blobs['data'], im_scale_factors = _get_image_blob(im)
 66 | 
 67 |   return blobs, im_scale_factors
 68 | 
 69 | def _clip_boxes(boxes, im_shape):
 70 |   """Clip boxes to image boundaries."""
 71 |   # x1 >= 0
 72 |   boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
 73 |   # y1 >= 0
 74 |   boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
 75 |   # x2 < im_shape[1]
 76 |   boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
 77 |   # y2 < im_shape[0]
 78 |   boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
 79 |   return boxes
 80 | 
 81 | def _rescale_boxes(boxes, inds, scales):
 82 |   """Rescale boxes according to image rescaling."""
 83 |   for i in range(boxes.shape[0]):
 84 |     boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
 85 | 
 86 |   return boxes
 87 | 
 88 | def im_detect(net, im):
 89 |   blobs, im_scales = _get_blobs(im)
 90 |   assert len(im_scales) == 1, "Only single-image batch implemented"
 91 | 
 92 |   im_blob = blobs['data']
 93 |   blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
 94 | 
 95 |   _, scores, bbox_pred, rois, fc7, net_conv = net.test_image(blobs['data'], blobs['im_info'])
 96 |   
 97 |   boxes = rois[:, 1:5] / im_scales[0]
 98 |   scores = np.reshape(scores, [scores.shape[0], -1])
 99 |   bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
100 |   if cfg.TEST.BBOX_REG:
101 |     # Apply bounding-box regression deltas
102 |     box_deltas = bbox_pred
103 |     pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy()
104 |     pred_boxes = _clip_boxes(pred_boxes, im.shape)
105 |   else:
106 |     # Simply repeat the boxes, once for each class
107 |     pred_boxes = np.tile(boxes, (1, scores.shape[1]))
108 | 
109 |   return scores, pred_boxes#, fc7, net_conv
110 | 
111 | def apply_nms(all_boxes, thresh):
112 |   """Apply non-maximum suppression to all predicted boxes output by the
113 |   test_net method.
114 |   """
115 |   num_classes = len(all_boxes)
116 |   num_images = len(all_boxes[0])
117 |   nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
118 |   for cls_ind in range(num_classes):
119 |     for im_ind in range(num_images):
120 |       dets = all_boxes[cls_ind][im_ind]
121 |       if dets == []:
122 |         continue
123 | 
124 |       x1 = dets[:, 0]
125 |       y1 = dets[:, 1]
126 |       x2 = dets[:, 2]
127 |       y2 = dets[:, 3]
128 |       scores = dets[:, 4]
129 |       inds = np.where((x2 > x1) & (y2 > y1))[0]
130 |       dets = dets[inds,:]
131 |       if dets == []:
132 |         continue
133 | 
134 |       keep = nms(torch.from_numpy(dets), thresh).numpy()
135 |       if len(keep) == 0:
136 |         continue
137 |       nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
138 |   return nms_boxes
139 | def draw_car_bb(im, bboxes, scores=[], thr=0.3, type='det'):
140 |     bboxes = bboxes.astype(int)
141 |     imgcv = np.copy(im)
142 |     h, w, _ = imgcv.shape
143 |     color = (255,0,0)
144 |     if type == 'gt':
145 |       scores = np.ones(len(bboxes))
146 |       color = (0,0,255)
147 | 
148 |     for i, box in enumerate(bboxes):
149 |       if scores[i] < thr:
150 |           continue
151 | 
152 |       thick = int((h + w) / 1000) #original: int((h + w) / 300)
153 |       cv2.rectangle(imgcv,
154 |                     (box[0], box[1]), (box[2], box[3]),
155 |                     color, thick)
156 |       mess = '%s: %.3f' % ('Car', scores[i])
157 |       if type == 'gt':
158 |         mess = ''
159 |       cv2.putText(imgcv, mess, (box[0], box[1] - 12),
160 |                   0, 1e-3 * h / 2., color, 2)
161 | 
162 |     return imgcv
163 | 
164 | 
165 | def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.):
166 |   vis = True
167 | 
168 |   np.random.seed(cfg.RNG_SEED)
169 |   """Test a Fast R-CNN network on an image database."""
170 |   num_images = len(imdb.image_index)
171 |   # all detections are collected into:
172 |   #  all_boxes[cls][image] = N x 5 array of detections in
173 |   #  (x1, y1, x2, y2, score)
174 |   all_boxes = [[[] for _ in range(num_images)]
175 |          for _ in range(imdb.num_classes)]
176 | 
177 |   output_dir = get_output_dir(imdb, weights_filename)
178 | 
179 |   if vis and 'cityscapes' in imdb.name:
180 |     gt_roidb = [imdb._load_cityscapes_annotation(index)
181 |                   for index in imdb.image_index]
182 |   elif vis and 'KITTI' in imdb.name:
183 |     gt_roidb = [imdb._load_KITTI_annotation(index)
184 |               for index in imdb.image_index]
185 |   else:
186 |     gt_roidb = None
187 | 
188 |   # timers
189 |   _t = {'im_detect' : Timer(), 'misc' : Timer()}
190 | 
191 |   for i in range(num_images):
192 |     im = cv2.imread(imdb.image_path_at(i))
193 | 
194 |     _t['im_detect'].tic()
195 |     scores, boxes = im_detect(net, im)
196 |     _t['im_detect'].toc()
197 | 
198 |     _t['misc'].tic()
199 | 
200 |     # skip j = 0, because it's the background class
201 |     for j in range(1, imdb.num_classes):
202 |       inds = np.where(scores[:, j] > thresh)[0]
203 |       cls_scores = scores[inds, j]
204 |       cls_boxes = boxes[inds, j*4:(j+1)*4]
205 |       cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
206 |         .astype(np.float32, copy=False)
207 |       keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else []
208 |       cls_dets = cls_dets[keep, :]
209 |       all_boxes[j][i] = cls_dets
210 | 
211 |     # Limit to max_per_image detections *over all classes*
212 |     if max_per_image > 0:
213 |       image_scores = np.hstack([all_boxes[j][i][:, -1]
214 |                     for j in range(1, imdb.num_classes)])
215 |       if len(image_scores) > max_per_image:
216 |         image_thresh = np.sort(image_scores)[-max_per_image]
217 |         for j in range(1, imdb.num_classes):
218 |           keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
219 |           all_boxes[j][i] = all_boxes[j][i][keep, :]
220 |     _t['misc'].toc()
221 |     
222 |     print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
223 |         .format(i + 1, num_images, _t['im_detect'].average_time(),
224 |             _t['misc'].average_time()))
225 | 
226 |     if vis and gt_roidb:
227 |       #draw ground truth boxes
228 |       im2show = draw_car_bb(im, gt_roidb[i]['boxes'], type='gt')
229 | 
230 |       #draw detected boxes
231 |       im2show = draw_car_bb(im2show, np.squeeze(all_boxes[1][i][:, :-1]), np.squeeze(all_boxes[1][i][:,-1])) #draw class 1: car
232 |       cv2.imwrite('/home/disk1/DA/pytorch-faster-rcnn/vis/inDomain/'+imdb.image_index[i]+'.png', im2show)
233 |       #cv2.imshow('test', im2show)
234 |       #cv2.waitKey(0)
235 |       
236 | 
237 | 
238 |   det_file = os.path.join(output_dir, 'detections.pkl')
239 |   with open(det_file, 'wb') as f:
240 |     pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
241 | 
242 |   print('Evaluating detections')
243 |   imdb.evaluate_detections(all_boxes, output_dir)
244 | 
245 | 


--------------------------------------------------------------------------------
/lib/nets/.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | from model.config import cfg
15 | 
16 | class FCDiscriminator_img(nn.Module):
17 | 
18 | 	def __init__(self, num_classes, ndf = 64):
19 | 		super(FCDiscriminator_img, self).__init__()
20 | 
21 | 		# self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1)
22 | 		# self.conv2 = nn.Conv2d(ndf, ndf*2, kernel_size=4, stride=2, padding=1)
23 | 		# self.conv3 = nn.Conv2d(ndf*2, ndf*4, kernel_size=4, stride=2, padding=1)
24 | 		# self.conv4 = nn.Conv2d(ndf*4, ndf*8, kernel_size=4, stride=2, padding=1)
25 | 		# self.classifier = nn.Conv2d(ndf*8, 1, kernel_size=4, stride=2, padding=1)
26 | 
27 | 		self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=3, padding=1)
28 | 		self.conv2 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1)
29 | 		self.conv3 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1)
30 | 		# self.classifier = nn.Conv2d(ndf, 1, kernel_size=3, padding=1)
31 | 
32 | 		self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
33 | 		#self.up_sample = nn.Upsample(scale_factor=32, mode='bilinear')
34 | 		#self.sigmoid = nn.Sigmoid()
35 | 
36 | 
37 | 	def forward(self, x):
38 | 		x = self.conv1(x)
39 | 		x = self.leaky_relu(x)
40 | 		# x = self.conv2(x)
41 | 		# x = self.leaky_relu(x)
42 | 		# x = self.conv3(x)
43 | 		# x = self.leaky_relu(x)
44 | 		# x = self.conv4(x)
45 | 		# x = self.leaky_relu(x)
46 | 		x = self.classifier(x)
47 | 		#x = self.up_sample(x)
48 | 		#x = self.sigmoid(x) 
49 | 
50 | 		return x


--------------------------------------------------------------------------------
/lib/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nets/__init__.py


--------------------------------------------------------------------------------
/lib/nets/discriminator_img.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | from model.config import cfg
15 | 
16 | class FCDiscriminator_img(nn.Module):
17 | 
18 | 	def __init__(self, num_classes, ndf = 64):
19 | 		super(FCDiscriminator_img, self).__init__()
20 | 
21 | 		self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=3, padding=1)
22 | 		self.conv2 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1)
23 | 		self.conv3 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1)
24 | 		self.classifier = nn.Conv2d(ndf, 1, kernel_size=3, padding=1)
25 | 
26 | 		self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
27 | 
28 | 
29 | 	def forward(self, x):
30 | 		x = self.conv1(x)
31 | 		x = self.leaky_relu(x)
32 | 		x = self.conv2(x)
33 | 		x = self.leaky_relu(x)
34 | 		x = self.conv3(x)
35 | 		x = self.leaky_relu(x)
36 | 		x = self.classifier(x)
37 | 
38 | 		return x


--------------------------------------------------------------------------------
/lib/nets/discriminator_inst.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | class FCDiscriminator_inst(nn.Module):
14 | 
15 | 	def __init__(self, in_channel, ndf = 4096):
16 | 		super(FCDiscriminator_inst, self).__init__()
17 | 
18 | 		self.fc1 = nn.Linear(in_channel, ndf)
19 | 		self.fc2 = nn.Linear(ndf, ndf)
20 | 		self.fc3 = nn.Linear(ndf, ndf)
21 | 		self.classifier = nn.Linear(ndf, 1)
22 | 
23 | 		self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
24 | 		self.dropout = nn.Dropout()
25 | 
26 | 
27 | 	def forward(self, x):
28 | 		x = x.view(x.size()[0], -1)
29 | 		x = self.fc1(x)
30 | 		x = self.leaky_relu(x)
31 | 		# x = self.dropout(x)
32 | 		x = self.fc2(x)
33 | 		x = self.leaky_relu(x)
34 | 		# x = self.dropout(x)
35 | 		x = self.fc3(x)
36 | 		x = self.leaky_relu(x)
37 | 		# x = self.dropout(x)
38 | 		x = self.classifier(x)
39 |                 
40 | 		return x


--------------------------------------------------------------------------------
/lib/nets/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | from nets.network import Network
11 | from model.config import cfg
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | from torch.autograd import Variable
17 | import math
18 | import torchvision.models as models
19 | 
20 | class vgg16(Network):
21 |   def __init__(self):
22 |     Network.__init__(self)
23 |     self._feat_stride = [16, ]
24 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
25 |     self._net_conv_channels = 512
26 |     self._fc7_channels = 4096
27 | 
28 |   def _init_head_tail(self):
29 |     self.vgg = models.vgg16()
30 |     # Remove fc8
31 |     self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1])
32 | 
33 |     # Fix the layers before conv3:
34 |     #for layer in range(10):
35 |     #  for p in self.vgg.features[layer].parameters(): p.requires_grad = False
36 | 
37 |     # self.vgg.features._modules['28'] = nn.Conv2d(512, 1024, [3, 3], padding=1) #for feature_separate
38 | 
39 |     # not using the last maxpool layer
40 |     self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1])
41 | 
42 |     ##
43 |     # self.vgg2 = models.vgg16()
44 |     # self._layers['head_2'] = nn.Sequential(*list(self.vgg2.features._modules.values())[:-1])
45 | 
46 |   def _image_to_head(self):
47 |     net_conv = self._layers['head'](self._image)
48 |     self._act_summaries['conv'] = net_conv
49 |      
50 |     return net_conv
51 | 
52 |   # def _image_to_head_branch(self):
53 |   #   net_conv2 = self._layers['head_2'](self._image)
54 |   
55 |   #   return net_conv2
56 | 
57 |   def _head_to_tail(self, pool5):
58 |     pool5_flat = pool5.view(pool5.size(0), -1)
59 |     fc7 = self.vgg.classifier(pool5_flat)
60 | 
61 |     return fc7
62 | 
63 |   def load_pretrained_cnn(self, state_dict):
64 |     #load from previous network weight
65 |     netDict = self.state_dict()
66 |     stateDict = {k: v for k, v in state_dict.items() if k in netDict}
67 |     
68 |     #print('load pretrained:', stateDict.keys())
69 |     netDict.update(stateDict)
70 |     nn.Module.load_state_dict(self, netDict)
71 |     self.vgg.load_state_dict({k.replace('vgg.', ''):v for k,v in state_dict.items() if k.replace('vgg.', '') in self.vgg.state_dict()}) #loading pretrained vgg.pth
72 | 
73 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """The data layer used during training to train a Fast R-CNN network.
 9 | 
10 | RoIDataLayer implements a Caffe Python layer.
11 | """
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | from model.config import cfg
17 | from roi_data_layer.minibatch import get_minibatch
18 | import numpy as np
19 | import time
20 | 
21 | class RoIDataLayer(object):
22 |   """Fast R-CNN data layer used for training."""
23 | 
24 |   def __init__(self, roidb, num_classes, random=False):
25 |     """Set the roidb to be used by this layer during training."""
26 |     self._roidb = roidb
27 |     self._num_classes = num_classes
28 |     # Also set a random flag
29 |     self._random = random
30 |     self._shuffle_roidb_inds()
31 | 
32 |   def _shuffle_roidb_inds(self):
33 |     """Randomly permute the training roidb."""
34 |     # If the random flag is set, 
35 |     # then the database is shuffled according to system time
36 |     # Useful for the validation set
37 |     if self._random:
38 |       st0 = np.random.get_state()
39 |       millis = int(round(time.time() * 1000)) % 4294967295
40 |       #np.random.seed(millis)
41 |     
42 |     if cfg.TRAIN.ASPECT_GROUPING:
43 |       widths = np.array([r['width'] for r in self._roidb])
44 |       heights = np.array([r['height'] for r in self._roidb])
45 |       horz = (widths >= heights)
46 |       vert = np.logical_not(horz)
47 |       horz_inds = np.where(horz)[0]
48 |       vert_inds = np.where(vert)[0]
49 |       inds = np.hstack((
50 |           np.random.permutation(horz_inds),
51 |           np.random.permutation(vert_inds)))
52 |       inds = np.reshape(inds, (-1, 2))
53 |       row_perm = np.random.permutation(np.arange(inds.shape[0]))
54 |       inds = np.reshape(inds[row_perm, :], (-1,))
55 |       self._perm = inds
56 |     else:
57 |       self._perm = np.random.permutation(np.arange(len(self._roidb)))
58 |     ##no shuffle
59 |     self._perm = np.arange(len(self._roidb))
60 |     # Restore the random state
61 |     #if self._random:
62 |       #np.random.set_state(st0)
63 |       
64 |     self._cur = 0
65 | 
66 |   def _get_next_minibatch_inds(self):
67 |     """Return the roidb indices for the next minibatch."""
68 |     
69 |     if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
70 |       self._shuffle_roidb_inds()
71 | 
72 |     db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
73 |     self._cur += cfg.TRAIN.IMS_PER_BATCH
74 | 
75 |     return db_inds
76 | 
77 |   def _get_next_minibatch(self):
78 |     """Return the blobs to be used for the next minibatch.
79 | 
80 |     If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
81 |     separate process and made available through self._blob_queue.
82 |     """
83 |     db_inds = self._get_next_minibatch_inds()
84 |     minibatch_db = [self._roidb[i] for i in db_inds]
85 |     return get_minibatch(minibatch_db, self._num_classes)
86 |       
87 |   def forward(self):
88 |     """Get blobs and copy them into this layer's top blob vector."""
89 |     blobs = self._get_next_minibatch()
90 |     return blobs
91 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | import cv2
16 | from model.config import cfg
17 | from utils.blob import prep_im_for_blob, im_list_to_blob
18 | 
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 | 
23 |   # Sample random scales to use for each image in this batch
24 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
25 |                   size=num_images)
26 | 
27 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
28 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
29 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
30 | 
31 |   # Get the input image blob, formatted for caffe
32 |   im_blob, im_scales, im_path, orig_imshape = _get_image_blob(roidb, random_scale_inds)
33 | 
34 |   blobs = {'data': im_blob}
35 |   blobs['data_path'] = im_path
36 | 
37 |   assert len(im_scales) == 1, "Single batch only"
38 |   assert len(roidb) == 1, "Single batch only"
39 |   
40 |   # gt boxes: (x1, y1, x2, y2, cls)
41 |   if cfg.TRAIN.USE_ALL_GT:
42 |     # Include all ground truth boxes
43 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
44 |   else:
45 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
46 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
47 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
48 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
49 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
50 |   blobs['gt_boxes'] = gt_boxes
51 |   blobs['im_info'] = np.array(
52 |     [im_blob.shape[1], im_blob.shape[2], im_scales[0], orig_imshape[0], orig_imshape[1], orig_imshape[2]],
53 |     dtype=np.float32)
54 | 
55 |   return blobs
56 | 
57 | def _get_image_blob(roidb, scale_inds):
58 |   """Builds an input blob from the images in the roidb at the specified
59 |   scales.
60 |   """
61 |   num_images = len(roidb)
62 |   processed_ims = []
63 |   im_scales = []
64 |   im_path = []
65 |   for i in range(num_images):
66 |     im = cv2.imread(roidb[i]['image'])
67 |     orig_imshape = im.shape
68 |     im_path.append(roidb[i]['image'])
69 |     if roidb[i]['flipped']:
70 |       im = im[:, ::-1, :]
71 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
72 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
73 |                     cfg.TRAIN.MAX_SIZE)
74 |     im_scales.append(im_scale)
75 |     processed_ims.append(im)
76 | 
77 |   # Create a blob to hold the input images
78 |   blob = im_list_to_blob(processed_ims)
79 | 
80 |   return blob, im_scales, im_path, orig_imshape
81 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | from model.config import cfg
15 | import PIL
16 | from tqdm import tqdm
17 | 
18 | def prepare_roidb(imdb):
19 |   """Enrich the imdb's roidb by adding some derived quantities that
20 |   are useful for training. This function precomputes the maximum
21 |   overlap, taken over ground-truth boxes, between each ROI and
22 |   each ground-truth box. The class with maximum overlap is also
23 |   recorded.
24 |   """
25 |   roidb = imdb.roidb
26 |   if not (imdb.name.startswith('coco')):
27 |     if 'bdd' in imdb.name:
28 |       sizes = [(1280,720) for i in range(imdb.num_images)]
29 |     else:
30 |       sizes = [PIL.Image.open(imdb.image_path_at(i)).size
31 |                for i in range(imdb.num_images)]
32 | 
33 |   # for i in range(len(imdb.image_index)):
34 |   for i in tqdm(range(len(imdb.image_index))):
35 |     roidb[i]['image'] = imdb.image_path_at(i)
36 |     if not (imdb.name.startswith('coco')):
37 |       roidb[i]['width'] = sizes[i][0]
38 |       roidb[i]['height'] = sizes[i][1]
39 |     # need gt_overlaps as a dense array for argmax
40 |     gt_overlaps = roidb[i]['gt_overlaps'].toarray()
41 | 
42 |     # max overlap with gt over classes (columns)
43 |     max_overlaps = gt_overlaps.max(axis=1)
44 |     # gt class that had the max overlap
45 |     max_classes = gt_overlaps.argmax(axis=1)
46 |     roidb[i]['max_classes'] = max_classes
47 |     roidb[i]['max_overlaps'] = max_overlaps
48 |     # sanity checks
49 |     # max overlap of 0 => class should be zero (background)
50 |     zero_inds = np.where(max_overlaps == 0)[0]
51 |     assert all(max_classes[zero_inds] == 0)
52 |     # max overlap > 0 => class should not be zero (must be a fg class)
53 |     nonzero_inds = np.where(max_overlaps > 0)[0]
54 |     assert all(max_classes[nonzero_inds] != 0)
55 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.h
4 | *.hpp
5 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def bbox_overlaps(boxes, query_boxes):
 5 |     """
 6 |     Parameters
 7 |     ----------
 8 |     boxes: (N, 4) ndarray or tensor or variable
 9 |     query_boxes: (K, 4) ndarray or tensor or variable
10 |     Returns
11 |     -------
12 |     overlaps: (N, K) overlap between boxes and query_boxes
13 |     """
14 |     if isinstance(boxes, np.ndarray):
15 |         boxes = torch.from_numpy(boxes)
16 |         query_boxes = torch.from_numpy(query_boxes)
17 |         out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return
18 |     else:
19 |         out_fn = lambda x: x
20 | 
21 |     box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \
22 |             (boxes[:, 3] - boxes[:, 1] + 1)
23 |     query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \
24 |             (query_boxes[:, 3] - query_boxes[:, 1] + 1)
25 | 
26 |     iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0)
27 |     ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0)
28 |     ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih
29 |     overlaps = iw * ih / ua
30 |     return out_fn(overlaps)
31 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import cv2
15 | 
16 | 
17 | def im_list_to_blob(ims):
18 |   """Convert a list of images into a network input.
19 | 
20 |   Assumes images are already prepared (means subtracted, BGR order, ...).
21 |   """
22 |   max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 |   num_images = len(ims)
24 |   blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 |                   dtype=np.float32)
26 |   for i in range(num_images):
27 |     im = ims[i]
28 |     blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | 
30 |   return blob
31 | 
32 | 
33 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
34 |   """Mean subtract and scale an image for use in a blob."""
35 |   im = im.astype(np.float32, copy=False)
36 |   im -= pixel_means
37 | 
38 |   im_shape = im.shape
39 |   im_size_min = np.min(im_shape[0:2])
40 |   im_size_max = np.max(im_shape[0:2])
41 |   im_scale = float(target_size) / float(im_size_min)
42 |   # Prevent the biggest axis from being more than MAX_SIZE
43 |   if np.round(im_scale * im_size_max) > max_size:
44 |     im_scale = float(max_size) / float(im_size_max)
45 |   im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
46 |                  interpolation=cv2.INTER_LINEAR)
47 | 
48 |   # im = cv2.resize(im, (1200, 480), interpolation=cv2.INTER_LINEAR)
49 | 
50 |   return im, im_scale
51 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | import torch
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self._total_time = {}
15 |         self._calls = {}
16 |         self._start_time = {}
17 |         self._diff = {}
18 |         self._average_time = {}
19 | 
20 |     def tic(self, name='default'):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         torch.cuda.synchronize()
24 |         self._start_time[name] = time.time()
25 | 
26 |     def toc(self, name='default', average=True):
27 |         torch.cuda.synchronize()
28 |         self._diff[name] = time.time() - self._start_time[name]
29 |         self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name]
30 |         self._calls[name] = self._calls.get(name, 0 ) + 1
31 |         self._average_time[name] = self._total_time[name] / self._calls[name]
32 |         if average:
33 |             return self._average_time[name]
34 |         else:
35 |             return self._diff[name]
36 | 
37 |     def average_time(self, name='default'):
38 |         return self._average_time[name]
39 | 
40 |     def total_time(self, name='default'):
41 |         return self._total_time[name]
42 | 
43 | timer = Timer()
44 | 


--------------------------------------------------------------------------------
/lib/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from six.moves import range
12 | import PIL.Image as Image
13 | import PIL.ImageColor as ImageColor
14 | import PIL.ImageDraw as ImageDraw
15 | import PIL.ImageFont as ImageFont
16 | 
17 | STANDARD_COLORS = [
18 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
19 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
20 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
21 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
22 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
23 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
24 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
25 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
26 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
27 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
28 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
29 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
30 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
31 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
32 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
33 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
34 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
35 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
36 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
37 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
38 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
39 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
40 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
41 | ]
42 | 
43 | NUM_COLORS = len(STANDARD_COLORS)
44 | 
45 | try:
46 |   FONT = ImageFont.truetype('arial.ttf', 24)
47 | except IOError:
48 |   FONT = ImageFont.load_default()
49 | 
50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4):
51 |   draw = ImageDraw.Draw(image)
52 |   (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
53 |   draw.line([(left, top), (left, bottom), (right, bottom),
54 |              (right, top), (left, top)], width=thickness, fill=color)
55 |   text_bottom = bottom
56 |   # Reverse list and print from bottom to top.
57 |   text_width, text_height = font.getsize(display_str)
58 |   margin = np.ceil(0.05 * text_height)
59 |   draw.rectangle(
60 |       [(left, text_bottom - text_height - 2 * margin), (left + text_width,
61 |                                                         text_bottom)],
62 |       fill=color)
63 |   draw.text(
64 |       (left + margin, text_bottom - text_height - margin),
65 |       display_str,
66 |       fill='black',
67 |       font=font)
68 | 
69 |   return image
70 | 
71 | def draw_bounding_boxes(image, gt_boxes, im_info):
72 |   num_boxes = gt_boxes.shape[0]
73 |   gt_boxes_new = gt_boxes.copy()
74 |   gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2])
75 |   disp_image = Image.fromarray(np.uint8(image[0]))
76 | 
77 |   for i in range(num_boxes):
78 |     this_class = int(gt_boxes_new[i, 4])
79 |     disp_image = _draw_single_box(disp_image, 
80 |                                 gt_boxes_new[i, 0],
81 |                                 gt_boxes_new[i, 1],
82 |                                 gt_boxes_new[i, 2],
83 |                                 gt_boxes_new[i, 3],
84 |                                 'N%02d-C%02d' % (i, this_class),
85 |                                 FONT,
86 |                                 color=STANDARD_COLORS[this_class % NUM_COLORS])
87 | 
88 |   image[0, :] = np.array(disp_image)
89 |   return image


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torchvision==0.2.1
2 | easydict==1.6
3 | opencv-python==3.4.1.15
4 | scipy==1.1.0
5 | pillow==5.1.0
6 | tensorboardX
7 | pyyaml==3.12
8 | tqdm==4.28.1
9 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '..', 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/tools/convert_from_tensorflow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import pywrap_tensorflow
 3 | from collections import OrderedDict
 4 | import re
 5 | import torch
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model')
 9 | parser.add_argument('--tensorflow_model',
10 |                     help='the path of tensorflow_model',
11 |                     default=None, type=str)
12 | 
13 | args = parser.parse_args()
14 | 
15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model)
16 | var_to_shape_map = reader.get_variable_to_shape_map()
17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()}
18 | 
19 | del var_dict['Variable']
20 | 
21 | for k in list(var_dict.keys()):
22 |     if 'Momentum' in k:
23 |         del var_dict[k]
24 | 
25 | for k in list(var_dict.keys()):
26 |     if k.find('/') >= 0:
27 |         var_dict['resnet' + k[k.find('/'):]] = var_dict[k]
28 |         del var_dict[k]
29 | 
30 | dummy_replace = OrderedDict([
31 |                 ('moving_mean', 'running_mean'),\
32 |                 ('moving_variance', 'running_var'),\
33 |                 ('weights', 'weight'),\
34 |                 ('biases', 'bias'),\
35 |                 ('conv1/BatchNorm', 'bn1'),\
36 |                 ('conv2/BatchNorm', 'bn2'),\
37 |                 ('conv3/BatchNorm', 'bn3'),\
38 |                 ('bottleneck_v1/', ''),\
39 |                 ('block', 'layer'),\
40 |                 ('resnet/rpn_conv/3x3', 'rpn_net'),\
41 |                 ('resnet/rpn_cls_score', 'rpn_cls_score_net'),\
42 |                 ('resnet/cls_score', 'cls_score_net'),\
43 |                 ('resnet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\
44 |                 ('resnet/bbox_pred', 'bbox_pred_net'),\
45 |                 ('shortcut/weight', 'downsample.0.weight'),\
46 |                 ('shortcut/BatchNorm', 'downsample.1'),\
47 |                 ('gamma', 'weight'),\
48 |                 ('beta', 'bias'),\
49 |                 ('/', '.')])
50 | 
51 | for a, b in dummy_replace.items():
52 |     for k in list(var_dict.keys()):
53 |         if a in k:
54 |             var_dict[k.replace(a,b)] = var_dict[k]
55 |             del var_dict[k]
56 | 
57 | 
58 | for k in list(var_dict.keys()):
59 |     if 'unit_' in k:
60 |         m = re.search('unit_(\d+)', k)
61 |         var_dict[k.replace(m.group(0), str(int(m.group(1)) - 1))] = var_dict[k]
62 |         del var_dict[k]
63 | 
64 | for k in list(var_dict.keys()):
65 |     if var_dict[k].ndim == 4:
66 |         var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C')
67 |     if var_dict[k].ndim == 2:
68 |         var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C')
69 |     # assert x[k].shape == var_dict[k].shape, k
70 | 
71 | for k in list(var_dict.keys()):
72 |     var_dict[k] = torch.from_numpy(var_dict[k])
73 | 
74 | 
75 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth')
76 | 


--------------------------------------------------------------------------------
/tools/convert_from_tensorflow_mobile.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import pywrap_tensorflow
 3 | from collections import OrderedDict
 4 | import re
 5 | import torch
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model')
 9 | parser.add_argument('--tensorflow_model',
10 |                     help='the path of tensorflow_model',
11 |                     default=None, type=str)
12 | 
13 | args = parser.parse_args()
14 | 
15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model)
16 | var_to_shape_map = reader.get_variable_to_shape_map()
17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()}
18 | 
19 | del var_dict['Variable']
20 | 
21 | for k in list(var_dict.keys()):
22 |     if 'Momentum' in k:
23 |         del var_dict[k]
24 | 
25 | for k in list(var_dict.keys()):
26 |     if k.find('/') >= 0:
27 |         var_dict['mobilenet' + k[k.find('/'):]] = var_dict[k]
28 |         del var_dict[k]
29 | 
30 | dummy_replace = OrderedDict([
31 |                 ('moving_mean', 'running_mean'),\
32 |                 ('moving_variance', 'running_var'),\
33 |                 ('weights', 'weight'),\
34 |                 ('biases', 'bias'),\
35 |                 ('/BatchNorm', '.1'),\
36 |                 ('_pointwise/', '.pointwise.0.'),\
37 |                 ('_depthwise/depthwise_', '.depthwise.0.'),\
38 |                 ('_pointwise.1', '.pointwise.1'),\
39 |                 ('_depthwise.1', '.depthwise.1'),\
40 |                 ('Conv2d_0/', 'Conv2d_0.0.'),\
41 |                 ('mobilenet/rpn_conv/3x3', 'rpn_net'),\
42 |                 ('mobilenet/rpn_cls_score', 'rpn_cls_score_net'),\
43 |                 ('mobilenet/cls_score', 'cls_score_net'),\
44 |                 ('mobilenet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\
45 |                 ('mobilenet/bbox_pred', 'bbox_pred_net'),\
46 |                 ('gamma', 'weight'),\
47 |                 ('beta', 'bias'),\
48 |                 ('/', '.')])
49 | 
50 | for a, b in dummy_replace.items():
51 |     for k in list(var_dict.keys()):
52 |         if a in k:
53 |             var_dict[k.replace(a,b)] = var_dict[k]
54 |             del var_dict[k]
55 | 
56 | # print set(var_dict.keys()) - set(x.keys())
57 | # print set(x.keys()) - set(var_dict.keys())
58 | 
59 | for k in list(var_dict.keys()):
60 |     if var_dict[k].ndim == 4:
61 |         if 'depthwise' in k:
62 |             var_dict[k] = var_dict[k].transpose((2, 3, 0, 1)).copy(order='C')
63 |         else:
64 |             var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C')
65 |     if var_dict[k].ndim == 2:
66 |         var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C')
67 |     # assert x[k].shape == var_dict[k].shape, k
68 | 
69 | for k in list(var_dict.keys()):
70 |     var_dict[k] = torch.from_numpy(var_dict[k])
71 | 
72 | 
73 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth')
74 | 


--------------------------------------------------------------------------------
/tools/convert_from_tensorflow_vgg.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import pywrap_tensorflow
 3 | from collections import OrderedDict
 4 | import re
 5 | import torch
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model')
 9 | parser.add_argument('--tensorflow_model',
10 |                     help='the path of tensorflow_model',
11 |                     default=None, type=str)
12 | 
13 | args = parser.parse_args()
14 | 
15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model)
16 | var_to_shape_map = reader.get_variable_to_shape_map()
17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()}
18 | 
19 | del var_dict['Variable']
20 | 
21 | for k in list(var_dict.keys()):
22 |     if 'Momentum' in k:
23 |         del var_dict[k]
24 | 
25 | for k in list(var_dict.keys()):
26 |     if k.find('/') >= 0:
27 |         var_dict['vgg' + k[k.find('/'):]] = var_dict[k]
28 |         del var_dict[k]
29 | 
30 | dummy_replace = OrderedDict([
31 |                 ('weights', 'weight'),\
32 |                 ('biases', 'bias'),\
33 |                 ('vgg/rpn_conv/3x3', 'rpn_net'),\
34 |                 ('vgg/rpn_cls_score', 'rpn_cls_score_net'),\
35 |                 ('vgg/cls_score', 'cls_score_net'),\
36 |                 ('vgg/rpn_bbox_pred', 'rpn_bbox_pred_net'),\
37 |                 ('vgg/bbox_pred', 'bbox_pred_net'),\
38 |                 ('/', '.')])
39 | 
40 | for a, b in dummy_replace.items():
41 |     for k in list(var_dict.keys()):
42 |         if a in k:
43 |             var_dict[k.replace(a,b)] = var_dict[k]
44 |             del var_dict[k]
45 | 
46 | layer_map = OrderedDict([
47 |     ('conv1.conv1_1', 'features.0'),\
48 |     ('conv1.conv1_2', 'features.2'),\
49 |     ('conv2.conv2_1', 'features.5'),\
50 |     ('conv2.conv2_2', 'features.7'),\
51 |     ('conv3.conv3_1', 'features.10'),\
52 |     ('conv3.conv3_2', 'features.12'),\
53 |     ('conv3.conv3_3', 'features.14'),\
54 |     ('conv4.conv4_1', 'features.17'),\
55 |     ('conv4.conv4_2', 'features.19'),\
56 |     ('conv4.conv4_3', 'features.21'),\
57 |     ('conv5.conv5_1', 'features.24'),\
58 |     ('conv5.conv5_2', 'features.26'),\
59 |     ('conv5.conv5_3', 'features.28'),\
60 |     ('fc6', 'classifier.0'),\
61 |     ('fc7', 'classifier.3')])
62 | 
63 | for a, b in layer_map.items():
64 |     for k in list(var_dict.keys()):
65 |         if a in k:
66 |             var_dict[k.replace(a,b)] = var_dict[k]
67 |             del var_dict[k]
68 | 
69 | for k in list(var_dict.keys()):
70 |     if 'classifier.0' in k:
71 |         if var_dict[k].ndim == 2: # weight
72 |             var_dict[k] = var_dict[k].reshape(7,7,512,4096).transpose((3, 2, 0, 1)).reshape(4096, -1).copy(order='C')
73 |     else:
74 |         if var_dict[k].ndim == 4:
75 |             var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C')
76 |         if var_dict[k].ndim == 2:
77 |             var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C')
78 |     # assert x[k].shape == var_dict[k].shape, k
79 | 
80 | for k in list(var_dict.keys()):
81 |     var_dict[k] = torch.from_numpy(var_dict[k])
82 | 
83 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth')
84 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Tensorflow Faster R-CNN
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Xinlei Chen, based on code from Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | """
 10 | Demo script showing detections in sample images.
 11 | 
 12 | See README.md for installation instructions before running.
 13 | """
 14 | from __future__ import absolute_import
 15 | from __future__ import division
 16 | from __future__ import print_function
 17 | 
 18 | import _init_paths
 19 | from model.config import cfg
 20 | from model.test import im_detect
 21 | from model.nms_wrapper import nms
 22 | 
 23 | from utils.timer import Timer
 24 | import matplotlib.pyplot as plt
 25 | import numpy as np
 26 | import os, cv2
 27 | import argparse
 28 | 
 29 | from nets.vgg16 import vgg16
 30 | from nets.resnet_v1 import resnetv1
 31 | 
 32 | import torch
 33 | 
 34 | CLASSES = ('__background__',
 35 |            'aeroplane', 'bicycle', 'bird', 'boat',
 36 |            'bottle', 'bus', 'car', 'cat', 'chair',
 37 |            'cow', 'diningtable', 'dog', 'horse',
 38 |            'motorbike', 'person', 'pottedplant',
 39 |            'sheep', 'sofa', 'train', 'tvmonitor')
 40 | 
 41 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)}
 42 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
 43 | 
 44 | def vis_detections(im, class_name, dets, thresh=0.5):
 45 |     """Draw detected bounding boxes."""
 46 |     inds = np.where(dets[:, -1] >= thresh)[0]
 47 |     if len(inds) == 0:
 48 |         return
 49 | 
 50 |     im = im[:, :, (2, 1, 0)]
 51 |     fig, ax = plt.subplots(figsize=(12, 12))
 52 |     ax.imshow(im, aspect='equal')
 53 |     for i in inds:
 54 |         bbox = dets[i, :4]
 55 |         score = dets[i, -1]
 56 | 
 57 |         ax.add_patch(
 58 |             plt.Rectangle((bbox[0], bbox[1]),
 59 |                           bbox[2] - bbox[0],
 60 |                           bbox[3] - bbox[1], fill=False,
 61 |                           edgecolor='red', linewidth=3.5)
 62 |             )
 63 |         ax.text(bbox[0], bbox[1] - 2,
 64 |                 '{:s} {:.3f}'.format(class_name, score),
 65 |                 bbox=dict(facecolor='blue', alpha=0.5),
 66 |                 fontsize=14, color='white')
 67 | 
 68 |     ax.set_title(('{} detections with '
 69 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 70 |                                                   thresh),
 71 |                   fontsize=14)
 72 |     plt.axis('off')
 73 |     plt.tight_layout()
 74 |     plt.draw()
 75 | 
 76 | def demo(net, image_name):
 77 |     """Detect object classes in an image using pre-computed object proposals."""
 78 | 
 79 |     # Load the demo image
 80 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 81 |     im = cv2.imread(im_file)
 82 | 
 83 |     # Detect all object classes and regress object bounds
 84 |     timer = Timer()
 85 |     timer.tic()
 86 |     scores, boxes = im_detect(net, im)
 87 |     timer.toc()
 88 |     print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))
 89 | 
 90 |     # Visualize detections for each class
 91 |     CONF_THRESH = 0.8
 92 |     NMS_THRESH = 0.3
 93 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 94 |         cls_ind += 1 # because we skipped background
 95 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 96 |         cls_scores = scores[:, cls_ind]
 97 |         dets = np.hstack((cls_boxes,
 98 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 99 |         keep = nms(torch.from_numpy(dets), NMS_THRESH)
100 |         dets = dets[keep.numpy(), :]
101 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
102 | 
103 | def parse_args():
104 |     """Parse input arguments."""
105 |     parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo')
106 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
107 |                         choices=NETS.keys(), default='res101')
108 |     parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
109 |                         choices=DATASETS.keys(), default='pascal_voc_0712')
110 |     args = parser.parse_args()
111 | 
112 |     return args
113 | 
114 | if __name__ == '__main__':
115 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
116 |     args = parse_args()
117 | 
118 |     # model path
119 |     demonet = args.demo_net
120 |     dataset = args.dataset
121 |     saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default',
122 |                               NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000))
123 | 
124 | 
125 |     if not os.path.isfile(saved_model):
126 |         raise IOError(('{:s} not found.\nDid you download the proper networks from '
127 |                        'our server and place them properly?').format(saved_model))
128 | 
129 |     # load network
130 |     if demonet == 'vgg16':
131 |         net = vgg16()
132 |     elif demonet == 'res101':
133 |         net = resnetv1(num_layers=101)
134 |     else:
135 |         raise NotImplementedError
136 |     net.create_architecture(21,
137 |                           tag='default', anchor_scales=[8, 16, 32])
138 | 
139 |     net.load_state_dict(torch.load(saved_model))
140 | 
141 |     net.eval()
142 |     net.cuda()
143 | 
144 |     print('Loaded network {:s}'.format(saved_model))
145 | 
146 |     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
147 |                 '001763.jpg', '004545.jpg']
148 |     for im_name in im_names:
149 |         print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
150 |         print('Demo for data/demo/{}'.format(im_name))
151 |         demo(net, im_name)
152 | 
153 |     plt.show()
154 | 


--------------------------------------------------------------------------------
/tools/demo_all_bboxes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Tensorflow Faster R-CNN
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Xinlei Chen, based on code from Ross Girshick
  7 | # Edited by Matthew Seals
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import _init_paths
 20 | from model.config import cfg
 21 | from model.test import im_detect
 22 | from model.nms_wrapper import nms
 23 | 
 24 | from utils.timer import Timer
 25 | import matplotlib.pyplot as plt
 26 | import numpy as np
 27 | import os
 28 | import cv2
 29 | import argparse
 30 | from matplotlib import cm
 31 | 
 32 | from nets.vgg16 import vgg16
 33 | from nets.resnet_v1 import resnetv1
 34 | 
 35 | import torch
 36 | 
 37 | CLASSES = ('__background__',
 38 |            'aeroplane', 'bicycle', 'bird', 'boat',
 39 |            'bottle', 'bus', 'car', 'cat', 'chair',
 40 |            'cow', 'diningtable', 'dog', 'horse',
 41 |            'motorbike', 'person', 'pottedplant',
 42 |            'sheep', 'sofa', 'train', 'tvmonitor')
 43 | 
 44 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',), 'res101': ('res101_faster_rcnn_iter_%d.pth',)}
 45 | DATASETS = {'pascal_voc': ('voc_2007_trainval',), 'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
 46 | 
 47 | COLORS = [cm.tab10(i) for i in np.linspace(0., 1., 10)]
 48 | 
 49 | 
 50 | def demo(net, image_name):
 51 |     """Detect object classes in an image using pre-computed object proposals."""
 52 | 
 53 |     # Load the demo image
 54 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 55 |     im = cv2.imread(im_file)
 56 | 
 57 |     # Detect all object classes and regress object bounds
 58 |     timer = Timer()
 59 |     timer.tic()
 60 |     scores, boxes = im_detect(net, im)
 61 |     timer.toc()
 62 |     print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))
 63 | 
 64 |     # Visualize detections for each class
 65 |     thresh = 0.8  # CONF_THRESH
 66 |     NMS_THRESH = 0.3
 67 | 
 68 |     im = im[:, :, (2, 1, 0)]
 69 |     fig, ax = plt.subplots(figsize=(12, 12))
 70 |     ax.imshow(im, aspect='equal')
 71 |     cntr = -1
 72 | 
 73 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 74 |         cls_ind += 1  # because we skipped background
 75 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 76 |         cls_scores = scores[:, cls_ind]
 77 |         dets = np.hstack((cls_boxes,
 78 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 79 |         keep = nms(torch.from_numpy(dets), NMS_THRESH)
 80 |         dets = dets[keep.numpy(), :]
 81 |         inds = np.where(dets[:, -1] >= thresh)[0]
 82 |         if len(inds) == 0:
 83 |             continue
 84 |         else:
 85 |             cntr += 1
 86 | 
 87 |         for i in inds:
 88 |             bbox = dets[i, :4]
 89 |             score = dets[i, -1]
 90 | 
 91 |             ax.add_patch(
 92 |                 plt.Rectangle((bbox[0], bbox[1]),
 93 |                               bbox[2] - bbox[0],
 94 |                               bbox[3] - bbox[1], fill=False,
 95 |                               edgecolor=COLORS[cntr % len(COLORS)], linewidth=3.5)
 96 |             )
 97 |             ax.text(bbox[0], bbox[1] - 2,
 98 |                     '{:s} {:.3f}'.format(cls, score),
 99 |                     bbox=dict(facecolor='blue', alpha=0.5),
100 |                     fontsize=14, color='white')
101 | 
102 |         ax.set_title('All detections with threshold >= {:.1f}'.format(thresh), fontsize=14)
103 | 
104 |         plt.axis('off')
105 |         plt.tight_layout()
106 |     plt.savefig('demo_' + image_name)
107 |     print('Saved to `{}`'.format(os.path.join(os.getcwd(), 'demo_' + image_name)))
108 | 
109 | 
110 | def parse_args():
111 |     """Parse input arguments."""
112 |     parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo')
113 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
114 |                         choices=NETS.keys(), default='res101')
115 |     parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
116 |                         choices=DATASETS.keys(), default='pascal_voc_0712')
117 |     args = parser.parse_args()
118 | 
119 |     return args
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
124 |     args = parse_args()
125 | 
126 |     # model path
127 |     demonet = args.demo_net
128 |     dataset = args.dataset
129 |     saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default',
130 |                                NETS[demonet][0] % (70000 if dataset == 'pascal_voc' else 110000))
131 | 
132 |     if not os.path.isfile(saved_model):
133 |         raise IOError(('{:s} not found.\nDid you download the proper networks from '
134 |                        'our server and place them properly?').format(saved_model))
135 | 
136 |     # load network
137 |     if demonet == 'vgg16':
138 |         net = vgg16()
139 |     elif demonet == 'res101':
140 |         net = resnetv1(num_layers=101)
141 |     else:
142 |         raise NotImplementedError
143 |     net.create_architecture(21, tag='default', anchor_scales=[8, 16, 32])
144 | 
145 |     net.load_state_dict(torch.load(saved_model))
146 | 
147 |     net.eval()
148 |     net.cuda()
149 | 
150 |     print('Loaded network {:s}'.format(saved_model))
151 | 
152 |     im_names = [i for i in os.listdir('data/demo/')  # Pull in all jpgs
153 |                 if i.lower().endswith(".jpg")]
154 | 
155 |     for im_name in im_names:
156 |         print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
157 |         print('Demo for data/demo/{}'.format(im_name))
158 |         demo(net, im_name)
159 | 
160 |     plt.show()
161 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | # Reval = re-eval. Re-evaluate saved detections.
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import _init_paths
16 | from model.test import apply_nms
17 | from model.config import cfg
18 | from datasets.factory import get_imdb
19 | import pickle
20 | import os, sys, argparse
21 | import numpy as np
22 | 
23 | 
24 | def parse_args():
25 |   """
26 |   Parse input arguments
27 |   """
28 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
29 |   parser.add_argument('output_dir', nargs=1, help='results directory',
30 |                       type=str)
31 |   parser.add_argument('--imdb', dest='imdb_name',
32 |                       help='dataset to re-evaluate',
33 |                       default='voc_2007_test', type=str)
34 |   parser.add_argument('--matlab', dest='matlab_eval',
35 |                       help='use matlab for evaluation',
36 |                       action='store_true')
37 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
38 |                       action='store_true')
39 |   parser.add_argument('--nms', dest='apply_nms', help='apply nms',
40 |                       action='store_true')
41 | 
42 |   if len(sys.argv) == 1:
43 |     parser.print_help()
44 |     sys.exit(1)
45 | 
46 |   args = parser.parse_args()
47 |   return args
48 | 
49 | 
50 | def from_dets(imdb_name, output_dir, args):
51 |   imdb = get_imdb(imdb_name)
52 |   imdb.competition_mode(args.comp_mode)
53 |   imdb.config['matlab_eval'] = args.matlab_eval
54 |   with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
55 |     dets = pickle.load(f)
56 | 
57 |   if args.apply_nms:
58 |     print('Applying NMS to all detections')
59 |     nms_dets = apply_nms(dets, cfg.TEST.NMS)
60 |   else:
61 |     nms_dets = dets
62 | 
63 |   print('Evaluating detections')
64 |   imdb.evaluate_detections(nms_dets, output_dir)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |   args = parse_args()
69 | 
70 |   output_dir = os.path.abspath(args.output_dir[0])
71 |   imdb_name = args.imdb_name
72 |   from_dets(imdb_name, output_dir, args)
73 |   os.system("mv ./pr/pr.png ./pr/%s.png" % args.output_dir[0][args.output_dir[0].rfind('/')+1:][18:])
74 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.test import test_net
 12 | from model.config import cfg, cfg_from_file, cfg_from_list
 13 | from datasets.factory import get_imdb
 14 | import datasets.imdb
 15 | import argparse
 16 | import pprint
 17 | import time, os, sys
 18 | 
 19 | from nets.vgg16 import vgg16
 20 | from nets.resnet_v1 import resnetv1
 21 | from nets.mobilenet_v1 import mobilenetv1
 22 | 
 23 | import torch
 24 | 
 25 | def parse_args():
 26 |   """
 27 |   Parse input arguments
 28 |   """
 29 |   parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 30 |   parser.add_argument('--cfg', dest='cfg_file',
 31 |             help='optional config file', default=None, type=str)
 32 |   parser.add_argument('--model', dest='model',
 33 |             help='model to test',
 34 |             default=None, type=str)
 35 |   parser.add_argument('--imdb', dest='imdb_name',
 36 |             help='dataset to test',
 37 |             default='voc_2007_test', type=str)
 38 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
 39 |             action='store_true')
 40 |   parser.add_argument('--num_dets', dest='max_per_image',
 41 |             help='max number of detections per image',
 42 |             default=100, type=int)
 43 |   parser.add_argument('--tag', dest='tag',
 44 |                         help='tag of the model',
 45 |                         default='', type=str)
 46 |   parser.add_argument('--net', dest='net',
 47 |                       help='vgg16, res50, res101, res152, mobile',
 48 |                       default='res50', type=str)
 49 |   parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |   if len(sys.argv) == 1:
 54 |     parser.print_help()
 55 |     sys.exit(1)
 56 | 
 57 |   args = parser.parse_args()
 58 |   return args
 59 | 
 60 | if __name__ == '__main__':
 61 |   args = parse_args()
 62 | 
 63 |   print('Called with args:')
 64 |   print(args)
 65 | 
 66 |   if args.cfg_file is not None:
 67 |     cfg_from_file(args.cfg_file)
 68 |   if args.set_cfgs is not None:
 69 |     cfg_from_list(args.set_cfgs)
 70 | 
 71 |   print('Using config:')
 72 |   pprint.pprint(cfg)
 73 | 
 74 |   # if has model, get the name from it
 75 |   # if does not, then just use the initialization weights
 76 |   if args.model:
 77 |     filename = os.path.splitext(os.path.basename(args.model))[0]
 78 |   else:
 79 |     filename = os.path.splitext(os.path.basename(args.weight))[0]
 80 | 
 81 |   tag = args.tag
 82 |   tag = tag if tag else 'default'
 83 |   filename = tag + '/' + filename
 84 | 
 85 |   imdb = get_imdb(args.imdb_name)
 86 |   imdb.competition_mode(args.comp_mode)
 87 | 
 88 |   # load network
 89 |   if args.net == 'vgg16':
 90 |     net = vgg16()
 91 |   elif args.net == 'res50':
 92 |     net = resnetv1(num_layers=50)
 93 |   elif args.net == 'res101':
 94 |     net = resnetv1(num_layers=101)
 95 |   elif args.net == 'res152':
 96 |     net = resnetv1(num_layers=152)
 97 |   elif args.net == 'mobile':
 98 |     net = mobilenetv1()
 99 |   elif args.net == 'FPNres50':
100 |     net = resnetv1(num_layers=50)
101 |   else:
102 |     raise NotImplementedError
103 | 
104 |   # load model
105 |   net.create_architecture(imdb.num_classes, tag='default',
106 |                           anchor_scales=cfg.ANCHOR_SCALES,
107 |                           anchor_ratios=cfg.ANCHOR_RATIOS)
108 | 
109 |   net.eval()
110 |   net.cuda()
111 | 
112 |   if args.model:
113 |     print(('Loading model check point from {:s}').format(args.model))
114 |     net.load_state_dict(torch.load(args.model))
115 |     print('Loaded.')
116 |   else:
117 |     print(('Loading initial weights from {:s}').format(args.weight))
118 |     print('Loaded.')
119 | 
120 |   test_net(net, imdb, filename, max_per_image=args.max_per_image)
121 |   # os.system("mv ./pr/pr.png ./pr/%s.png" % args.model[args.model.rfind('/')+1:][18:-4])
122 | 


--------------------------------------------------------------------------------
/tools/trainval_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.train_val import get_training_roidb, train_net
 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
 13 | from datasets.factory import get_imdb
 14 | import datasets.imdb
 15 | import argparse
 16 | import pprint
 17 | import numpy as np
 18 | import sys
 19 | 
 20 | from nets.vgg16 import vgg16
 21 | from nets.resnet_v1 import resnetv1
 22 | from nets.mobilenet_v1 import mobilenetv1
 23 | 
 24 | def parse_args():
 25 |   """
 26 |   Parse input arguments
 27 |   """
 28 |   parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 29 |   parser.add_argument('--cfg', dest='cfg_file',
 30 |                       help='optional config file',
 31 |                       default=None, type=str)
 32 |   parser.add_argument('--weight', dest='weight',
 33 |                       help='initialize with pretrained model weights',
 34 |                       type=str)
 35 |   parser.add_argument('--imdb', dest='imdb_name',
 36 |                       help='dataset to train on',
 37 |                       default='voc_2007_trainval', type=str)
 38 |   parser.add_argument('--imdbval', dest='imdbval_name',
 39 |                       help='dataset to validate on',
 40 |                       default='voc_2007_test', type=str)
 41 |   parser.add_argument('--iters', dest='max_iters',
 42 |                       help='number of iterations to train',
 43 |                       default=70000, type=int)
 44 |   parser.add_argument('--tag', dest='tag',
 45 |                       help='tag of the model',
 46 |                       default=None, type=str)
 47 |   parser.add_argument('--net', dest='net',
 48 |                       help='vgg16, res50, res101, res152, mobile',
 49 |                       default='res50', type=str)
 50 |   parser.add_argument('--set', dest='set_cfgs',
 51 |                       help='set config keys', default=None,
 52 |                       nargs=argparse.REMAINDER)
 53 | 
 54 |   if len(sys.argv) == 1:
 55 |     parser.print_help()
 56 |     sys.exit(1)
 57 | 
 58 |   args = parser.parse_args()
 59 |   return args
 60 | 
 61 | 
 62 | def combined_roidb(imdb_names):
 63 |   """
 64 |   Combine multiple roidbs
 65 |   """
 66 | 
 67 |   def get_roidb(imdb_name):
 68 |     imdb = get_imdb(imdb_name)
 69 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
 70 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 71 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
 72 |     roidb = get_training_roidb(imdb)
 73 |     return roidb
 74 | 
 75 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 76 |   roidb = roidbs[0]
 77 |   if len(roidbs) > 1:
 78 |     for r in roidbs[1:]:
 79 |       roidb.extend(r)
 80 |     tmp = get_imdb(imdb_names.split('+')[1])
 81 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
 82 |   else:
 83 |     imdb = get_imdb(imdb_names)
 84 |   return imdb, roidb
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 |   args = parse_args()
 89 | 
 90 |   print('Called with args:')
 91 |   print(args)
 92 | 
 93 |   if args.cfg_file is not None:
 94 |     cfg_from_file(args.cfg_file)
 95 |   if args.set_cfgs is not None:
 96 |     cfg_from_list(args.set_cfgs)
 97 | 
 98 |   print('Using config:')
 99 |   pprint.pprint(cfg)
100 | 
101 |   np.random.seed(cfg.RNG_SEED)
102 | 
103 |   # train set
104 |   imdb, roidb = combined_roidb(args.imdb_name)
105 |   print('{:d} roidb entries'.format(len(roidb)))
106 | 
107 |   # output directory where the models are saved
108 |   output_dir = get_output_dir(imdb, args.tag)
109 |   print('Output will be saved to `{:s}`'.format(output_dir))
110 | 
111 |   # tensorboard directory where the summaries are saved during training
112 |   tb_dir = get_output_tb_dir(imdb, args.tag)
113 |   print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))
114 | 
115 |   # also add the validation set, but with no flipping images
116 |   orgflip = cfg.TRAIN.USE_FLIPPED
117 |   cfg.TRAIN.USE_FLIPPED = False
118 |   _, valroidb = combined_roidb(args.imdbval_name)
119 |   print('{:d} validation roidb entries'.format(len(valroidb)))
120 |   cfg.TRAIN.USE_FLIPPED = orgflip
121 | 
122 |   # load network
123 |   if args.net == 'vgg16':
124 |     net = vgg16()
125 |   elif args.net == 'res50':
126 |     net = resnetv1(num_layers=50)
127 |   elif args.net == 'res101':
128 |     net = resnetv1(num_layers=101)
129 |   elif args.net == 'res152':
130 |     net = resnetv1(num_layers=152)
131 |   elif args.net == 'mobile':
132 |     net = mobilenetv1()
133 |   else:
134 |     raise NotImplementedError
135 |     
136 |   train_net(net, imdb, roidb, valroidb, output_dir, tb_dir,
137 |             pretrained_model=args.weight,
138 |             max_iters=args.max_iters)
139 | 


--------------------------------------------------------------------------------
/tools/trainval_net_adapt.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.train_val_adapt import get_training_roidb, train_net
 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
 13 | from datasets.factory import get_imdb
 14 | import datasets.imdb
 15 | import argparse
 16 | import pprint
 17 | import numpy as np
 18 | import sys
 19 | 
 20 | from nets.vgg16 import vgg16
 21 | from nets.resnet_v1 import resnetv1
 22 | from nets.mobilenet_v1 import mobilenetv1
 23 | 
 24 | import random
 25 | #import os
 26 | #os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 27 | 
 28 | def parse_args():
 29 |   """
 30 |   Parse input arguments
 31 |   """
 32 |   parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 33 |   parser.add_argument('--cfg', dest='cfg_file',
 34 |                       help='optional config file',
 35 |                       default=None, type=str)
 36 |   parser.add_argument('--weight', dest='weight',
 37 |                       help='initialize with pretrained model weights',
 38 |                       type=str)
 39 |   parser.add_argument('--imdb', dest='imdb_name',
 40 |                       help='dataset to train on',
 41 |                       default='voc_2007_trainval', type=str)
 42 |   parser.add_argument('--imdb_T', dest='imdb_T_name',
 43 |                       help='dataset(target) to train on',
 44 |                       default='voc_2007_trainval', type=str)
 45 |   parser.add_argument('--imdbval', dest='imdbval_name',
 46 |                       help='dataset to validate on',
 47 |                       default='voc_2007_test', type=str)
 48 |   parser.add_argument('--iters', dest='max_iters',
 49 |                       help='number of iterations to train',
 50 |                       default=70000, type=int)
 51 |   parser.add_argument('--tag', dest='tag',
 52 |                       help='tag of the model',
 53 |                       default=None, type=str)
 54 |   parser.add_argument('--net', dest='net',
 55 |                       help='vgg16, res50, res101, res152, mobile',
 56 |                       default='res50', type=str)
 57 |   parser.add_argument('--set', dest='set_cfgs',
 58 |                       help='set config keys', default=None,
 59 |                       nargs=argparse.REMAINDER)
 60 | 
 61 |   if len(sys.argv) == 1:
 62 |     parser.print_help()
 63 |     sys.exit(1)
 64 | 
 65 |   args = parser.parse_args()
 66 |   return args
 67 | 
 68 | 
 69 | def combined_roidb(imdb_names):
 70 |   """
 71 |   Combine multiple roidbs
 72 |   """
 73 | 
 74 |   def get_roidb(imdb_name):
 75 |     imdb = get_imdb(imdb_name)
 76 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
 77 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 78 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
 79 |     roidb = get_training_roidb(imdb)
 80 |     return roidb
 81 | 
 82 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 83 |   roidb = roidbs[0]
 84 |   if len(roidbs) > 1:
 85 |     for r in roidbs[1:]:
 86 |       roidb.extend(r)
 87 |     tmp = get_imdb(imdb_names.split('+')[1])
 88 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
 89 |   else:
 90 |     imdb = get_imdb(imdb_names)
 91 |   return imdb, roidb
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 | 
 96 |   args = parse_args()
 97 | 
 98 |   print('Called with args:')
 99 |   print(args)
100 | 
101 |   if args.cfg_file is not None:
102 |     cfg_from_file(args.cfg_file)
103 |   if args.set_cfgs is not None:
104 |     cfg_from_list(args.set_cfgs)
105 |     
106 |   print('Using config:')
107 |   pprint.pprint(cfg)
108 | 
109 |   np.random.seed(cfg.RNG_SEED)
110 |   random.seed(cfg.RNG_SEED)
111 |   # train set
112 |   imdb, roidb = combined_roidb(args.imdb_name)
113 |   print('{:d} roidb entries'.format(len(roidb)))
114 |   imdb_T, roidb_T = combined_roidb(args.imdb_T_name)
115 |   print('{:d} roidbT entries'.format(len(roidb_T)))
116 | 
117 |   # output directory where the models are saved
118 |   output_dir = get_output_dir(imdb, args.tag)
119 |   print('Output will be saved to `{:s}`'.format(output_dir))
120 | 
121 |   # tensorboard directory where the summaries are saved during training
122 |   tb_dir = get_output_tb_dir(imdb, args.tag)
123 |   print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))
124 | 
125 |   # also add the validation set, but with no flipping images
126 |   orgflip = cfg.TRAIN.USE_FLIPPED
127 |   cfg.TRAIN.USE_FLIPPED = False
128 |   _, valroidb = combined_roidb(args.imdbval_name)
129 |   print('{:d} validation roidb entries'.format(len(valroidb)))
130 |   cfg.TRAIN.USE_FLIPPED = orgflip
131 | 
132 |   # load network
133 |   if args.net == 'vgg16':
134 |     net = vgg16()
135 |   elif args.net == 'res50':
136 |     net = resnetv1(num_layers=50)
137 |   elif args.net == 'res101':
138 |     net = resnetv1(num_layers=101)
139 |   elif args.net == 'res152':
140 |     net = resnetv1(num_layers=152)
141 |   elif args.net == 'mobile':
142 |     net = mobilenetv1()
143 |   else:
144 |     raise NotImplementedError
145 |     
146 |   train_net(net, imdb, roidb, imdb_T, roidb_T, valroidb, output_dir, tb_dir,
147 |             pretrained_model=args.weight,
148 |             max_iters=args.max_iters)
149 | 


--------------------------------------------------------------------------------
/trained_weights/.gitignore:
--------------------------------------------------------------------------------
1 | net_D*
2 | *.tar.gz
3 | *.pth
4 | 


--------------------------------------------------------------------------------