├── .gitignore ├── LICENSE ├── README.md ├── cycleGAN_dataset_loader ├── datasets_K_C.py ├── datasets_cityscapes_BDDday.py └── datasets_foggyC_C.py ├── data ├── .gitignore ├── demo │ ├── 000456.jpg │ ├── 000542.jpg │ ├── 001150.jpg │ ├── 001763.jpg │ └── 004545.jpg ├── imgs │ ├── gt.png │ └── pred.png └── scripts │ └── fetch_faster_rcnn_models.sh ├── experiments ├── cfgs │ ├── FPNres50.yml │ ├── mobile.yml │ ├── res101-lg.yml │ ├── res101.yml │ ├── res50.yml │ ├── vgg16.yml │ ├── vgg16_C2BDD.yml │ ├── vgg16_C2F.yml │ └── vgg16_K2C.yml ├── logs │ └── .gitignore └── scripts │ ├── convert_vgg16.sh │ ├── test_adapt_faster_rcnn.sh │ ├── test_adapt_faster_rcnn_stage1.sh │ ├── test_adapt_faster_rcnn_stage2.sh │ ├── test_faster_rcnn.sh │ ├── test_faster_rcnn_notime.sh │ ├── train_adapt_faster_rcnn_stage1.sh │ ├── train_adapt_faster_rcnn_stage2.sh │ ├── train_faster_rcnn.sh │ └── train_faster_rcnn_notime.sh ├── figure ├── adapt_results_c2bdd.png ├── adapt_results_c2f.png ├── adapt_results_k2c.png └── det_results.png ├── lib ├── datasets │ ├── KITTI.py │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── bdd100k.py │ ├── cityscapes.py │ ├── cityscapes_eval.py │ ├── ds_utils.py │ ├── factory.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── tools │ │ └── mcg_munge.py │ └── voc_eval.py ├── layer_utils │ ├── __init__.py │ ├── anchor_target_layer.py │ ├── generate_anchors.py │ ├── proposal_layer.py │ ├── proposal_target_layer.py │ ├── proposal_top_layer.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── crop_and_resize │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── crop_and_resize.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── crop_and_resize.c │ │ │ ├── crop_and_resize.h │ │ │ ├── crop_and_resize_gpu.c │ │ │ ├── crop_and_resize_gpu.h │ │ │ └── cuda │ │ │ ├── crop_and_resize_kernel.cu │ │ │ ├── crop_and_resize_kernel.cu.o │ │ │ └── crop_and_resize_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_pooling │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── roi_pool.py │ │ ├── roi_pool_py.py │ │ └── src │ │ │ ├── cuda │ │ │ ├── roi_pooling_kernel.cu │ │ │ ├── roi_pooling_kernel.cu.o │ │ │ └── roi_pooling_kernel.h │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ └── roi_pooling_cuda.h │ └── snippets.py ├── make.sh ├── model │ ├── __init__.py │ ├── bbox_transform.py │ ├── config.py │ ├── nms_wrapper.py │ ├── test.py │ ├── test.py~ │ ├── train_val.py │ └── train_val_adapt.py ├── nets │ ├── .py │ ├── __init__.py │ ├── discriminator_img.py │ ├── discriminator_inst.py │ ├── mobilenet_v1.py │ ├── network.py │ ├── resnet_v1.py │ └── vgg16.py ├── nms │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── nms │ │ │ └── __init__.py │ ├── build.py │ ├── pth_nms.py │ └── src │ │ ├── cuda │ │ ├── nms_kernel.cu │ │ ├── nms_kernel.cu.o │ │ └── nms_kernel.h │ │ ├── nms.c │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms_cuda.h ├── roi_data_layer │ ├── __init__.py │ ├── layer.py │ ├── minibatch.py │ └── roidb.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── bbox.py │ ├── blob.py │ ├── timer.py │ └── visualization.py ├── requirements.txt ├── tools ├── _init_paths.py ├── convert_from_tensorflow.py ├── convert_from_tensorflow_mobile.py ├── convert_from_tensorflow_vgg.py ├── demo.ipynb ├── demo.py ├── demo_all_bboxes.py ├── reval.py ├── test_net.py ├── trainval_net.py └── trainval_net_adapt.py └── trained_weights ├── .gitignore ├── netD_CsynthBDDday_score.json ├── netD_CsynthFoggyC_score.json └── netD_synthC_score.json /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.pyc 3 | output 4 | tensorboard 5 | lib/build 6 | lib/pycocotools 7 | lib/pycocotools/_mask.c 8 | lib/pycocotools/_mask.so 9 | .idea 10 | results/Main/* 11 | 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xinlei Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Progressive Domain Adaptation for Object Detection 2 | Implementation of our paper **[Progressive Domain Adaptation for Object Detection](https://arxiv.org/pdf/1910.11319.pdf)**, based on [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/README.md) and [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN). 3 | 4 | ## Paper 5 | **[Progressive Domain Adaptation for Object Detection](https://arxiv.org/pdf/1910.11319.pdf)** 6 | [Han-Kai Hsu](https://sites.google.com/site/kevinhkhsu/), [Chun-Han Yao](https://www.chhankyao.com/), [Yi-Hsuan Tsai](https://sites.google.com/site/yihsuantsai/home), [Wei-Chih Hung](https://hfslyc.github.io/), [Hung-Yu Tseng](https://sites.google.com/site/hytseng0509/), [Maneesh Singh](https://scholar.google.com/citations?user=hdQhiFgAAAAJ) and [Ming-Hsuan Yang](http://faculty.ucmerced.edu/mhyang/index.html) 7 | IEEE Winter Conference on Applications of Computer Vision (WACV), 2020. 8 | 9 | Please cite our paper if you find it useful for your research. 10 | ``` 11 | @inproceedings{hsu2020progressivedet, 12 | author = {Han-Kai Hsu and Chun-Han Yao and Yi-Hsuan Tsai and Wei-Chih Hung and Hung-Yu Tseng and Maneesh Singh and Ming-Hsuan Yang}, 13 | booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)}, 14 | title = {Progressive Domain Adaptation for Object Detection}, 15 | year = {2020} 16 | } 17 | ``` 18 | 19 | ## Dependencies 20 | This code is tested with **Pytorch 0.4.1** and **CUDA 9.0** 21 | ``` 22 | # Pytorch via pip: Download and install Pytorch 0.4.1 wheel for CUDA 9.0 23 | # from https://download.pytorch.org/whl/cu90/torch_stable.html 24 | # Pytorch via conda: 25 | conda install pytorch=0.4.1 cuda90 -c pytorch 26 | # Other dependencies: 27 | pip install -r requirements.txt 28 | sh ./lib/make.sh 29 | ``` 30 | 31 | ## Data Preparation 32 | #### KITTI 33 | - Download the data from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d). 34 | - Extract the files under `data/KITTI/` 35 | 36 | #### Cityscapes 37 | - Download the data from [here](https://www.cityscapes-dataset.com/). 38 | - Extract the files under `data/CityScapes/` 39 | 40 | #### Foggy Cityscapes 41 | - Follow the instructions [here](https://www.cityscapes-dataset.com/) to request for the dataset download. 42 | - Locate the data under `data/CityScapes/leftImg8bit/` as `foggytrain` and `foggyval`. 43 | 44 | #### BDD100k 45 | - Download the data from [here](https://bdd-data.berkeley.edu/). 46 | - Extract the files under `data/bdd100k/` 47 | 48 | ## Generate synthetic data with CycleGAN 49 | Generate the synthetic data with the [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN) implementation. 50 | 51 | `git clone https://github.com/aitorzip/PyTorch-CycleGAN` 52 | 53 | #### Dataset loader code 54 | Import the dataset loader code in `./cycleGAN_dataset_loader/` to train/test the CycleGAN on corresponding image translation task. 55 | 56 | #### Generate from pre-trained weight: 57 | Follow the testing instructions on [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN) and download the weight below to generate synthetic images. (Remember to change to the corresponding output image size) 58 | - [KITTI with Cityscapes style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/K_C_model.tar.gz) (KITTI->Cityscapes): size=(376,1244) 59 | Locate the generated data under `data/KITTI/training/synthCity_image_2/` with same naming and folder structure as original KITTI data. 60 | - [Cityscapes with FoggyCityscapes style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/foggyC_C_model.tar.gz) (Cityscapes->FoggyCityscapes): size=(1024,2048) 61 | Locate the generated data under `data/CityScapes/leftImg8bit/synthFoggytrain` with same naming and folder structure as original Cityscapes data. 62 | - [Cityscapes with BDD style](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_cycleGAN_weight/cityscapes_BDDday_model.tar.gz) (Cityscpaes->BDD100k): size=(1024,1280) 63 | Locate the generated data under `data/CityScapes/leftImg8bit/synthBDDdaytrain` and `data/CityScapes/leftImg8bit/synthBDDdayval` with same naming and folder structure as original Cityscapes data. 64 | 65 | #### Train your own CycleGAN: 66 | Please follow the training instructions on [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN). 67 | 68 | ## Test the adaptation model 69 | Download the following adapted weights to `./trained_weights/adapt_weight/` 70 | - [KITTI->Cityscapes](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_K2C_stage2.pth) 71 | - [Cityscapes->FoggyCityscapes](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_C2F_stage2.pth) 72 | - [Cityscpaes->BDD100k](http://vllab1.ucmerced.edu/~hhsu22/da_det/adapt_weight/vgg16_faster_rcnn_C2BDD_stage2.pth) 73 | ``` 74 | ./experiments/scripts/test_adapt_faster_rcnn_stage1.sh [GPU_ID] [Adapt_mode] vgg16 75 | # Specify the GPU_ID you want to use 76 | # Adapt_mode selection: 77 | # 'K2C': KITTI->Cityscapes 78 | # 'C2F': Cityscapes->Foggy Cityscapes 79 | # 'C2BDD': Cityscapes->BDD100k_day 80 | # Example: 81 | ./experiments/scripts/test_adapt_faster_rcnn_stage2.sh 0 K2C vgg16 82 | ``` 83 | 84 | ## Train your own model 85 | #### Stage one 86 | ``` 87 | ./experiments/scripts/train_adapt_faster_rcnn_stage1.sh [GPU_ID] [Adapt_mode] vgg16 88 | # Specify the GPU_ID you want to use 89 | # Adapt_mode selection: 90 | # 'K2C': KITTI->Cityscapes 91 | # 'C2F': Cityscapes->Foggy Cityscapes 92 | # 'C2BDD': Cityscapes->BDD100k_day 93 | # Example: 94 | ./experiments/scripts/train_adapt_faster_rcnn_stage1.sh 0 K2C vgg16 95 | ``` 96 | Download the following pretrained detector weights to `./trained_weights/pretrained_detector/` 97 | - [KITTI for K2C](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_KITTI_pretrained.pth) 98 | - [Cityscapes for C2f](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_city_pretrained_8class.pth) 99 | - [Cityscapes for C2BDD](http://vllab1.ucmerced.edu/~hhsu22/da_det/pretrained_detector/vgg16_faster_rcnn_city_pretrained_10class.pth) 100 | 101 | #### Stage two 102 | ``` 103 | ./experiments/scripts/train_adapt_faster_rcnn_stage2.sh 0 K2C vgg16 104 | ``` 105 | Discriminator score files: 106 | - netD_synthC_score.json 107 | - netD_CsynthFoggyC_score.json 108 | - netD_CsynthBDDday_score.json 109 | 110 | Extract the pretrained [CycleGAN discriminator scores](http://vllab1.ucmerced.edu/~hhsu22/da_det/D_score.tar.gz) to `./trained_weights/`
111 | or
112 | Save a dictionary of CycleGAN discriminator scores with image name as key and score as value
113 | Ex: {'jena_000074_000019_leftImg8bit.png': 0.64} 114 | 115 | ## Detection results 116 | ![](figure/det_results.png) 117 | 118 | ## Adaptation results 119 | ![](figure/adapt_results_k2c.png) 120 | ![](figure/adapt_results_c2f.png) 121 | ![](figure/adapt_results_c2bdd.png) 122 | 123 | ## Acknowledgement 124 | Thanks to the awesome implementations from [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/README.md) and [PyTorch-CycleGAN](https://github.com/aitorzip/PyTorch-CycleGAN). 125 | -------------------------------------------------------------------------------- /cycleGAN_dataset_loader/datasets_K_C.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | class ImageDataset(Dataset): 10 | def __init__(self, root, transforms_=None, unaligned=False, mode='train'): 11 | self.transform = transforms.Compose(transforms_) 12 | self.unaligned = unaligned 13 | 14 | #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*')) 15 | #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*')) 16 | kitti_mode = mode 17 | kitti_root = './data/KITTI/training/image_2/' 18 | 19 | with open('./data/KITTI/training/ImageSets/trainval.txt', 'r') as f: 20 | inds = f.readlines() 21 | 22 | self.files_A = sorted([kitti_root + i.strip() + '.png' for i in inds]) 23 | self.files_B = sorted(glob.glob('./data/CityScapes/leftImg8bit/val/*/*.*')) 24 | print(len(self.files_A), len(self.files_B)) 25 | def __getitem__(self, index): 26 | item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)])) 27 | 28 | if self.unaligned: 29 | item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])) 30 | else: 31 | item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)])) 32 | 33 | return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]} 34 | 35 | def __len__(self): 36 | return max(len(self.files_A), len(self.files_B)) 37 | -------------------------------------------------------------------------------- /cycleGAN_dataset_loader/datasets_cityscapes_BDDday.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | class ImageDataset(Dataset): 10 | def __init__(self, root, transforms_=None, unaligned=False, mode='train'): 11 | self.transform = transforms.Compose(transforms_) 12 | self.unaligned = unaligned 13 | 14 | #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*')) 15 | #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*')) 16 | 17 | BDD_root = './data/bdd100k/images/100k/' 18 | #A: Cityscapes 19 | #B: BDD_day 20 | #with open('/home/hhsu22/bdd100k/labels/ImageSets/day%s.txt'%'train', 'r') as f: 21 | # inds = f.readlines() 22 | with open('./data/bdd100k/labels/ImageSets/day%s.txt'%'val', 'r') as f: 23 | inds = f.readlines() 24 | 25 | self.files_A = sorted(glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%'train') + glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%'val')) 26 | 27 | self.files_B = sorted([os.path.join(BDD_root, i.strip()) for i in inds]) 28 | 29 | print(mode) 30 | print(len(self.files_A), len(self.files_B)) 31 | 32 | def __getitem__(self, index): 33 | item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)])) 34 | 35 | if self.unaligned: 36 | item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])) 37 | else: 38 | item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)])) 39 | 40 | return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]} 41 | 42 | def __len__(self): 43 | return max(len(self.files_A), len(self.files_B)) 44 | -------------------------------------------------------------------------------- /cycleGAN_dataset_loader/datasets_foggyC_C.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | class ImageDataset(Dataset): 10 | def __init__(self, root, transforms_=None, unaligned=False, mode='train'): 11 | self.transform = transforms.Compose(transforms_) 12 | self.unaligned = unaligned 13 | 14 | #self.files_A = sorted(glob.glob(os.path.join(root, '%s/A' % mode) + '/*.*')) 15 | #self.files_B = sorted(glob.glob(os.path.join(root, '%s/B' % mode) + '/*.*')) 16 | 17 | self.files_A = sorted(glob.glob('./data/CityScapes/leftImg8bit/%s/*/*.*'%mode)) 18 | self.files_B = sorted(glob.glob('./data/CityScapes/leftImg8bit/foggy%s/*/*.*'%mode)) 19 | print(mode) 20 | print(len(self.files_B)) 21 | def __getitem__(self, index): 22 | item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)])) 23 | 24 | if self.unaligned: 25 | item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])) 26 | else: 27 | item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)])) 28 | 29 | return {'A': item_A, 'B': item_B, 'A_path': self.files_A[index % len(self.files_A)], 'B_path': self.files_B[index % len(self.files_B)]} 30 | 31 | def __len__(self): 32 | return max(len(self.files_A), len(self.files_B)) 33 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | selective_search* 2 | imagenet_* 3 | fast_rcnn_* 4 | VOCdevkit* 5 | coco* 6 | cache 7 | KITTI* 8 | CityScapes* 9 | bdd100k* 10 | -------------------------------------------------------------------------------- /data/demo/000456.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/000456.jpg -------------------------------------------------------------------------------- /data/demo/000542.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/000542.jpg -------------------------------------------------------------------------------- /data/demo/001150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/001150.jpg -------------------------------------------------------------------------------- /data/demo/001763.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/001763.jpg -------------------------------------------------------------------------------- /data/demo/004545.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/demo/004545.jpg -------------------------------------------------------------------------------- /data/imgs/gt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/imgs/gt.png -------------------------------------------------------------------------------- /data/imgs/pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/data/imgs/pred.png -------------------------------------------------------------------------------- /data/scripts/fetch_faster_rcnn_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | NET=res101 7 | FILE=voc_0712_80k-110k.tgz 8 | # replace it with gs11655.sp.cs.cmu.edu if ladoga.graphics.cs.cmu.edu does not work 9 | URL=http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/$NET/$FILE 10 | CHECKSUM=cb32e9df553153d311cc5095b2f8c340 11 | 12 | if [ -f $FILE ]; then 13 | echo "File already exists. Checking md5..." 14 | os=`uname -s` 15 | if [ "$os" = "Linux" ]; then 16 | checksum=`md5sum $FILE | awk '{ print $1 }'` 17 | elif [ "$os" = "Darwin" ]; then 18 | checksum=`cat $FILE | md5` 19 | fi 20 | if [ "$checksum" = "$CHECKSUM" ]; then 21 | echo "Checksum is correct. No need to download." 22 | exit 0 23 | else 24 | echo "Checksum is incorrect. Need to download again." 25 | fi 26 | fi 27 | 28 | echo "Downloading Resnet 101 Faster R-CNN models Pret-trained on VOC 07+12 (340M)..." 29 | 30 | wget $URL -O $FILE 31 | 32 | echo "Unzipping..." 33 | 34 | tar zxvf $FILE 35 | 36 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 37 | -------------------------------------------------------------------------------- /experiments/cfgs/FPNres50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: FPNres50 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: FPNres50_faster_rcnn_imnet_new 14 | SNAPSHOT_ITERS: 1000 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: crop 18 | FPN: True 19 | -------------------------------------------------------------------------------- /experiments/cfgs/mobile.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: mobile 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: mobile_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res101-lg.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101-lg 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | SCALES: [800] 15 | MAX_SIZE: 1333 16 | TEST: 17 | HAS_RPN: True 18 | SCALES: [800] 19 | MAX_SIZE: 1333 20 | RPN_POST_NMS_TOP_N: 1000 21 | POOLING_MODE: crop 22 | ANCHOR_SCALES: [2,4,8,16,32] 23 | -------------------------------------------------------------------------------- /experiments/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res50_faster_rcnn_img_synthC2C_from_K2synthC_unfix 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | TEST: 13 | HAS_RPN: True 14 | POOLING_MODE: crop 15 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16_C2BDD.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | ADAPT_MODE: C2BDD 3 | TRAIN: 4 | HAS_RPN: True 5 | IMS_PER_BATCH: 1 6 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 7 | RPN_POSITIVE_OVERLAP: 0.7 8 | RPN_BATCHSIZE: 256 9 | PROPOSAL_METHOD: gt 10 | BG_THRESH_LO: 0.0 11 | DISPLAY: 20 12 | BATCH_SIZE: 256 13 | TEST: 14 | HAS_RPN: True 15 | POOLING_MODE: crop 16 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16_C2F.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | ADAPT_MODE: C2F 3 | TRAIN: 4 | HAS_RPN: True 5 | IMS_PER_BATCH: 1 6 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 7 | RPN_POSITIVE_OVERLAP: 0.7 8 | RPN_BATCHSIZE: 256 9 | PROPOSAL_METHOD: gt 10 | BG_THRESH_LO: 0.0 11 | DISPLAY: 20 12 | BATCH_SIZE: 256 13 | TEST: 14 | HAS_RPN: True 15 | POOLING_MODE: crop 16 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16_K2C.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | ADAPT_MODE: K2C 3 | TRAIN: 4 | HAS_RPN: True 5 | IMS_PER_BATCH: 1 6 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 7 | RPN_POSITIVE_OVERLAP: 0.7 8 | RPN_BATCHSIZE: 256 9 | PROPOSAL_METHOD: gt 10 | BG_THRESH_LO: 0.0 11 | DISPLAY: 20 12 | BATCH_SIZE: 256 13 | TEST: 14 | HAS_RPN: True 15 | POOLING_MODE: crop 16 | -------------------------------------------------------------------------------- /experiments/logs/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt.* 2 | -------------------------------------------------------------------------------- /experiments/scripts/convert_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=vgg16 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:2:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | set +x 46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS} 47 | set -x 48 | 49 | if [ ! -f ${NET_FINAL}.index ]; then 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 52 | --snapshot ${NET_FINAL} \ 53 | --imdb ${TRAIN_IMDB} \ 54 | --iters ${ITERS} \ 55 | --cfg experiments/cfgs/${NET}.yml \ 56 | --tag ${EXTRA_ARGS_SLUG} \ 57 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 58 | else 59 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 60 | --snapshot ${NET_FINAL} \ 61 | --imdb ${TRAIN_IMDB} \ 62 | --iters ${ITERS} \ 63 | --cfg experiments/cfgs/${NET}.yml \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 65 | fi 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /experiments/scripts/test_adapt_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | ADAPT_MODE=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:4:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${ADAPT_MODE} in 18 | K2C) 19 | TRAIN_IMDB_S="KITTI_synthCity" 20 | TRAIN_IMDB_T="cityscapes_train" 21 | TEST_IMDB="cityscapes_val" 22 | STEPSIZE="[50000]" 23 | ANCHORS="[4,8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_K2C_stage2.pth' 26 | ;; 27 | C2F) 28 | TRAIN_IMDB_S="cityscapes_synthFoggytrain" 29 | TRAIN_IMDB_T="cityscapes_foggytrain" 30 | TEST_IMDB="cityscapes_foggyval" 31 | STEPSIZE="[50000]" 32 | ANCHORS="[4,8,16,32]" 33 | RATIOS="[0.5,1,2]" 34 | NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_C2F_stage2.pth' 35 | ;; 36 | C2BDD) 37 | TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval" 38 | TRAIN_IMDB_T="bdd100k_train" 39 | TEST_IMDB="bdd100k_dayval" 40 | STEPSIZE="[50000]" 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | NET_FINAL='trained_weights/adapt_weight/vgg16_faster_rcnn_C2BDD_stage2.pth' 44 | ;; 45 | *) 46 | echo "No dataset given" 47 | exit 48 | ;; 49 | esac 50 | 51 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 52 | exec &> >(tee -a "$LOG") 53 | echo Logging output to "$LOG" 54 | 55 | set +x 56 | 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 59 | --imdb ${TEST_IMDB} \ 60 | --model ${NET_FINAL} \ 61 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 62 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 63 | --net ${NET} \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 65 | ${EXTRA_ARGS} 66 | else 67 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 68 | --imdb ${TEST_IMDB} \ 69 | --model ${NET_FINAL} \ 70 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 71 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 72 | --net ${NET} \ 73 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 74 | ${EXTRA_ARGS} 75 | fi 76 | 77 | -------------------------------------------------------------------------------- /experiments/scripts/test_adapt_faster_rcnn_stage1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | ADAPT_MODE=$2 10 | NET=$3 11 | TEST_ITER=$4 12 | 13 | array=( $@ ) 14 | len=${#array[@]} 15 | EXTRA_ARGS=${array[@]:4:$len} 16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 17 | 18 | case ${ADAPT_MODE} in 19 | K2C) 20 | TRAIN_IMDB_S="KITTI_train+KITTI_val" 21 | TRAIN_IMDB_T="KITTI_synthCity" 22 | TEST_IMDB="cityscapes_val" 23 | STEPSIZE="[50000]" 24 | ITERS=${TEST_ITER} 25 | ANCHORS="[4,8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | C2F) 29 | TRAIN_IMDB_S="cityscapes_train" 30 | TRAIN_IMDB_T="cityscapes_synthFoggytrain" 31 | TEST_IMDB="cityscapes_foggyval" 32 | STEPSIZE="[50000]" 33 | ITERS=${TEST_ITER} 34 | ANCHORS="[4,8,16,32]" 35 | RATIOS="[0.5,1,2]" 36 | ;; 37 | C2BDD) 38 | TRAIN_IMDB_S="cityscapes_train+cityscapes_val" 39 | TRAIN_IMDB_T="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval" 40 | TEST_IMDB="bdd100k_dayval" 41 | STEPSIZE="[50000]" 42 | ITERS=${TEST_ITER} 43 | ANCHORS="[4,8,16,32]" 44 | RATIOS="[0.5,1,2]" 45 | ;; 46 | *) 47 | echo "No dataset given" 48 | exit 49 | ;; 50 | esac 51 | 52 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 53 | exec &> >(tee -a "$LOG") 54 | echo Logging output to "$LOG" 55 | 56 | set +x 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | else 60 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_${ITERS}.pth 61 | fi 62 | 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 65 | --imdb ${TEST_IMDB} \ 66 | --model ${NET_FINAL} \ 67 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 68 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 69 | --net ${NET} \ 70 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 71 | ${EXTRA_ARGS} 72 | else 73 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 74 | --imdb ${TEST_IMDB} \ 75 | --model ${NET_FINAL} \ 76 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 77 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 78 | --net ${NET} \ 79 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 80 | ${EXTRA_ARGS} 81 | fi 82 | 83 | -------------------------------------------------------------------------------- /experiments/scripts/test_adapt_faster_rcnn_stage2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | ADAPT_MODE=$2 10 | NET=$3 11 | TEST_ITER=$4 12 | 13 | array=( $@ ) 14 | len=${#array[@]} 15 | EXTRA_ARGS=${array[@]:4:$len} 16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 17 | 18 | case ${ADAPT_MODE} in 19 | K2C) 20 | TRAIN_IMDB_S="KITTI_synthCity" 21 | TRAIN_IMDB_T="cityscapes_train" 22 | TEST_IMDB="cityscapes_val" 23 | STEPSIZE="[50000]" 24 | ITERS=${TEST_ITER} 25 | ANCHORS="[4,8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | C2F) 29 | TRAIN_IMDB_S="cityscapes_synthFoggytrain" 30 | TRAIN_IMDB_T="cityscapes_foggytrain" 31 | TEST_IMDB="cityscapes_foggyval" 32 | STEPSIZE="[50000]" 33 | ITERS=${TEST_ITER} 34 | ANCHORS="[4,8,16,32]" 35 | RATIOS="[0.5,1,2]" 36 | ;; 37 | C2BDD) 38 | TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval" 39 | TRAIN_IMDB_T="bdd100k_train" 40 | TEST_IMDB="bdd100k_dayval" 41 | STEPSIZE="[50000]" 42 | ITERS=${TEST_ITER} 43 | ANCHORS="[4,8,16,32]" 44 | RATIOS="[0.5,1,2]" 45 | ;; 46 | *) 47 | echo "No dataset given" 48 | exit 49 | ;; 50 | esac 51 | 52 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB_S}_adapt_${TEST_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 53 | exec &> >(tee -a "$LOG") 54 | echo Logging output to "$LOG" 55 | 56 | set +x 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | else 60 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage2_iter_${ITERS}.pth 61 | fi 62 | 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 65 | --imdb ${TEST_IMDB} \ 66 | --model ${NET_FINAL} \ 67 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 68 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 69 | --net ${NET} \ 70 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 71 | ${EXTRA_ARGS} 72 | else 73 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 74 | --imdb ${TEST_IMDB} \ 75 | --model ${NET_FINAL} \ 76 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 77 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 78 | --net ${NET} \ 79 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 80 | ${EXTRA_ARGS} 81 | fi 82 | 83 | -------------------------------------------------------------------------------- /experiments/scripts/test_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | KITTI) 40 | TRAIN_IMDB="KITTI_train" 41 | TEST_IMDB="KITTI_val" 42 | STEPSIZE="[350000]" 43 | ITERS=490000 44 | ANCHORS="[4,8,16,32]" 45 | RATIOS="[0.5,1,2]" 46 | ;; 47 | cityscapes) 48 | TRAIN_IMDB="cityscapes_train+cityscapes_val" 49 | TEST_IMDB="bdd100k_dayval" 50 | STEPSIZE="[350000]" 51 | ITERS=110000 52 | ANCHORS="[4,8,16,32]" 53 | RATIOS="[0.5,1,2]" 54 | ;; 55 | foggyCity) 56 | TRAIN_IMDB="cityscapes_foggytrain" 57 | TEST_IMDB="cityscapes_foggyval" 58 | STEPSIZE="[350000]" 59 | ITERS=80000 60 | ANCHORS="[4,8,16,32]" 61 | RATIOS="[0.5,1,2]" 62 | ;; 63 | bdd100k) 64 | TRAIN_IMDB="bdd100k_nighttrain" 65 | TEST_IMDB="bdd100k_nightval" 66 | STEPSIZE="[350000]" 67 | ITERS=200000 68 | ANCHORS="[4,8,16,32]" 69 | RATIOS="[0.5,1,2]" 70 | ;; 71 | *) 72 | echo "No dataset given" 73 | exit 74 | ;; 75 | esac 76 | 77 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 78 | exec &> >(tee -a "$LOG") 79 | echo Logging output to "$LOG" 80 | 81 | set +x 82 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 83 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 84 | else 85 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 86 | fi 87 | set -x 88 | 89 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 90 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 91 | --imdb ${TEST_IMDB} \ 92 | --model ${NET_FINAL} \ 93 | --cfg experiments/cfgs/${NET}.yml \ 94 | --tag ${EXTRA_ARGS_SLUG} \ 95 | --net ${NET} \ 96 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 97 | ${EXTRA_ARGS} 98 | else 99 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 100 | --imdb ${TEST_IMDB} \ 101 | --model ${NET_FINAL} \ 102 | --cfg experiments/cfgs/${NET}.yml \ 103 | --net ${NET} \ 104 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 105 | ${EXTRA_ARGS} 106 | fi 107 | 108 | -------------------------------------------------------------------------------- /experiments/scripts/test_faster_rcnn_notime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 46 | exec &> >(tee -a "$LOG") 47 | echo Logging output to "$LOG" 48 | 49 | set +x 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 52 | else 53 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 54 | fi 55 | set -x 56 | 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 59 | --imdb ${TEST_IMDB} \ 60 | --model ${NET_FINAL} \ 61 | --cfg experiments/cfgs/${NET}.yml \ 62 | --tag ${EXTRA_ARGS_SLUG} \ 63 | --net ${NET} \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 65 | ${EXTRA_ARGS} 66 | else 67 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 68 | --imdb ${TEST_IMDB} \ 69 | --model ${NET_FINAL} \ 70 | --cfg experiments/cfgs/${NET}.yml \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | ${EXTRA_ARGS} 74 | fi 75 | 76 | -------------------------------------------------------------------------------- /experiments/scripts/train_adapt_faster_rcnn_stage1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | ADAPT_MODE=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${ADAPT_MODE} in 18 | K2C) 19 | PRETRAINED_WEIGHT="${NET}_faster_rcnn_KITTI_pretrained.pth" 20 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1" 21 | TRAIN_IMDB_S="KITTI_train+KITTI_val" 22 | TRAIN_IMDB_T="KITTI_synthCity" 23 | TEST_IMDB="cityscapes_val" 24 | STEPSIZE="[50000]" 25 | ITERS=70000 26 | ANCHORS="[4,8,16,32]" 27 | RATIOS="[0.5,1,2]" 28 | ;; 29 | C2F) 30 | PRETRAINED_WEIGHT="${NET}_faster_rcnn_city_pretrained_8class.pth" 31 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1" 32 | TRAIN_IMDB_S="cityscapes_train" 33 | TRAIN_IMDB_T="cityscapes_synthFoggytrain" 34 | TEST_IMDB="cityscapes_foggyval" 35 | STEPSIZE="[50000]" 36 | ITERS=70000 37 | ANCHORS="[4,8,16,32]" 38 | RATIOS="[0.5,1,2]" 39 | ;; 40 | C2BDD) 41 | PRETRAINED_WEIGHT="${NET}_faster_rcnn_city_pretrained_10class.pth" 42 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage1" 43 | TRAIN_IMDB_S="cityscapes_train+cityscapes_val" 44 | TRAIN_IMDB_T="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval" 45 | TEST_IMDB="bdd100k_dayval" 46 | STEPSIZE="[50000]" 47 | ITERS=70000 48 | ANCHORS="[4,8,16,32]" 49 | RATIOS="[0.5,1,2]" 50 | ;; 51 | *) 52 | echo "No dataset given" 53 | exit 54 | ;; 55 | esac 56 | 57 | LOG="experiments/logs/${NET}_${TRAIN_IMDB_S}2${TRAIN_IMDB_T}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 58 | exec &> >(tee -a "$LOG") 59 | echo Logging output to "$LOG" 60 | 61 | set +x 62 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 63 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 64 | else 65 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/adapt/${NET}_faster_rcnn_iter_${ITERS}.pth 66 | fi 67 | set -x 68 | 69 | if [ ! -f ${NET_FINAL}.index ]; then 70 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 71 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \ 72 | --weight trained_weights/prerained_detector/${PRETRAINED_WEIGHT} \ 73 | --imdb ${TRAIN_IMDB_S} \ 74 | --imdbval ${TEST_IMDB} \ 75 | --imdb_T ${TRAIN_IMDB_T} \ 76 | --iters ${ITERS} \ 77 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 78 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 79 | --net ${NET} \ 80 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 81 | TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS} 82 | else 83 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \ 84 | --weight trained_weights/pretrained_detector/${PRETRAINED_WEIGHT} \ 85 | --imdb ${TRAIN_IMDB_S} \ 86 | --imdbval ${TEST_IMDB} \ 87 | --imdb_T ${TRAIN_IMDB_T} \ 88 | --iters ${ITERS} \ 89 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 90 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 91 | --net ${NET} \ 92 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 93 | TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS} 94 | fi 95 | fi 96 | 97 | ./experiments/scripts/test_adapt_faster_rcnn_stage1.sh $@ ${ITERS} 98 | -------------------------------------------------------------------------------- /experiments/scripts/train_adapt_faster_rcnn_stage2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e:w 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | ADAPT_MODE=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | K2C) 19 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2" 20 | PREV_S="KITTI_train+KITTI_val" 21 | TRAIN_IMDB_S="KITTI_synthCity" 22 | TRAIN_IMDB_T="cityscapes_train" 23 | TEST_IMDB="cityscapes_val" 24 | STEPSIZE="[]" 25 | ITERS=10000 26 | ANCHORS="[4,8,16,32]" 27 | RATIOS="[0.5,1,2]" 28 | ;; 29 | C2F) 30 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2" 31 | PREV_S="cityscapes_train" 32 | TRAIN_IMDB_S="cityscapes_synthFoggytrain" 33 | TRAIN_IMDB_T="cityscapes_foggytrain" 34 | TEST_IMDB="cityscapes_foggyval" 35 | STEPSIZE="[]" 36 | ITERS=60000 37 | ANCHORS="[4,8,16,32]" 38 | RATIOS="[0.5,1,2]" 39 | ;; 40 | C2BDD) 41 | SNAPSHOT_PREFIX="${NET}_faster_rcnn_${ADAPT_MODE}_stage2" 42 | PREV_S="cityscapes_train+cityscapes_val" 43 | TRAIN_IMDB_S="cityscapes_synthBDDdaytrain+cityscapes_synthBDDdayval" 44 | TRAIN_IMDB_T="bdd100k_daytrain" 45 | TEST_IMDB="bdd100k_dayval" 46 | STEPSIZE="[10000]" 47 | ITERS=30000 48 | ANCHORS="[4,8,16,32]" 49 | RATIOS="[0.5,1,2]" 50 | ;; 51 | *) 52 | echo "No dataset given" 53 | exit 54 | ;; 55 | esac 56 | 57 | LOG="experiments/logs/${NET}_${TRAIN_IMDB_S}2${TRAIN_IMDB_T}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 58 | exec &> >(tee -a "$LOG") 59 | echo Logging output to "$LOG" 60 | 61 | set +x 62 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 63 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 64 | else 65 | NET_FINAL=output/${NET}/${TRAIN_IMDB_S}/adapt/${NET}_faster_rcnn_iter_${ITERS}.pth 66 | fi 67 | set -x 68 | 69 | if [ ! -f ${NET_FINAL}.index ]; then 70 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 71 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \ 72 | --weight output/${NET}/${PREV_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_70000.pth \ 73 | --imdb ${TRAIN_IMDB_S} \ 74 | --imdbval ${TEST_IMDB} \ 75 | --imdb_T ${TRAIN_IMDB_T} \ 76 | --iters ${ITERS} \ 77 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 78 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 79 | --net ${NET} \ 80 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ADAPT_MODE ${ADAPT_MODE} \ 81 | TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS} 82 | else 83 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net_adapt.py \ 84 | --weight output/${NET}/${PREV_S}/_adapt/${NET}_faster_rcnn_${ADAPT_MODE}_stage1_iter_70000.pth \ 85 | --imdb ${TRAIN_IMDB_S} \ 86 | --imdbval ${TEST_IMDB} \ 87 | --imdb_T ${TRAIN_IMDB_T} \ 88 | --iters ${ITERS} \ 89 | --cfg experiments/cfgs/${NET}_${ADAPT_MODE}.yml \ 90 | --tag ${EXTRA_ARGS_SLUG}_adapt \ 91 | --net ${NET} \ 92 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ADAPT_MODE ${ADAPT_MODE} \ 93 | TRAIN.STEPSIZE ${STEPSIZE} TRAIN.SNAPSHOT_PREFIX ${SNAPSHOT_PREFIX} ${EXTRA_ARGS} 94 | fi 95 | fi 96 | 97 | ./experiments/scripts/test_adapt_faster_rcnn_stage2.sh $@ ${ITERS} 98 | -------------------------------------------------------------------------------- /experiments/scripts/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | STEPSIZE="[50000]" 22 | ITERS=70000 23 | ANCHORS="[8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | ;; 26 | pascal_voc_0712) 27 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | STEPSIZE="[80000]" 30 | ITERS=110000 31 | ANCHORS="[8,16,32]" 32 | RATIOS="[0.5,1,2]" 33 | ;; 34 | coco) 35 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 36 | TEST_IMDB="coco_2014_minival" 37 | STEPSIZE="[350000]" 38 | ITERS=490000 39 | ANCHORS="[4,8,16,32]" 40 | RATIOS="[0.5,1,2]" 41 | ;; 42 | KITTI) 43 | TRAIN_IMDB="KITTI_train" 44 | TEST_IMDB="KITTI_val" 45 | STEPSIZE="[50000]" 46 | ITERS=70000 47 | ANCHORS="[4,8,16,32]" 48 | RATIOS="[0.5,1,2]" 49 | ;; 50 | cityscapes) 51 | TRAIN_IMDB="cityscapes_train+cityscapes_val" 52 | TEST_IMDB="cityscapes_val" 53 | STEPSIZE="[80000]" 54 | ITERS=110000 55 | ANCHORS="[4,8,16,32]" 56 | RATIOS="[0.5,1,2]" 57 | ;; 58 | foggyCity) 59 | TRAIN_IMDB="cityscapes_foggytrain" 60 | TEST_IMDB="cityscapes_foggyval" 61 | STEPSIZE="[80000]" 62 | ITERS=110000 63 | ANCHORS="[4,8,16,32]" 64 | RATIOS="[0.5,1,2]" 65 | ;; 66 | bdd100k) 67 | TRAIN_IMDB='bdd100k_daytrain' 68 | TEST_IMDB='bdd100k_dayval' 69 | STEPSIZE="[80000]" 70 | ITERS=110000 71 | ANCHORS="[4,8,16,32]" 72 | RATIOS="[0.5,1,2]" 73 | ;; 74 | sim10k) 75 | TRAIN_IMDB='sim10k_trainval' 76 | TEST_IMDB='sim10k_trainval' 77 | STEPSIZE="[50000]" 78 | ITERS=70000 79 | ANCHORS="[4,8,16,32]" 80 | RATIOS="[0.5,1,2]" 81 | ;; 82 | *) 83 | echo "No dataset given" 84 | exit 85 | ;; 86 | esac 87 | 88 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 89 | exec &> >(tee -a "$LOG") 90 | echo Logging output to "$LOG" 91 | 92 | set +x 93 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 94 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 95 | else 96 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 97 | fi 98 | set -x 99 | 100 | if [ ! -f ${NET_FINAL}.index ]; then 101 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 102 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \ 103 | --weight data/imagenet_weights/${NET}.pth \ 104 | --imdb ${TRAIN_IMDB} \ 105 | --imdbval ${TEST_IMDB} \ 106 | --iters ${ITERS} \ 107 | --cfg experiments/cfgs/${NET}.yml \ 108 | --tag ${EXTRA_ARGS_SLUG} \ 109 | --net ${NET} \ 110 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 111 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 112 | else 113 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \ 114 | --weight data/imagenet_weights/${NET}.pth \ 115 | --imdb ${TRAIN_IMDB} \ 116 | --imdbval ${TEST_IMDB} \ 117 | --iters ${ITERS} \ 118 | --cfg experiments/cfgs/${NET}.yml \ 119 | --net ${NET} \ 120 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 121 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 122 | fi 123 | fi 124 | 125 | ./experiments/scripts/test_faster_rcnn.sh $@ 126 | -------------------------------------------------------------------------------- /experiments/scripts/train_faster_rcnn_notime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | STEPSIZE="[50000]" 22 | ITERS=70000 23 | ANCHORS="[8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | ;; 26 | pascal_voc_0712) 27 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | STEPSIZE="[80000]" 30 | ITERS=110000 31 | ANCHORS="[8,16,32]" 32 | RATIOS="[0.5,1,2]" 33 | ;; 34 | coco) 35 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 36 | TEST_IMDB="coco_2014_minival" 37 | STEPSIZE="[900000]" 38 | ITERS=1190000 39 | ANCHORS="[4,8,16,32]" 40 | RATIOS="[0.5,1,2]" 41 | ;; 42 | *) 43 | echo "No dataset given" 44 | exit 45 | ;; 46 | esac 47 | 48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 49 | exec &> >(tee -a "$LOG") 50 | echo Logging output to "$LOG" 51 | 52 | set +x 53 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 54 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 55 | else 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | fi 58 | set -x 59 | 60 | if [ ! -f ${NET_FINAL}.index ]; then 61 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 62 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 63 | --weight data/imagenet_weights/${NET}.pth \ 64 | --imdb ${TRAIN_IMDB} \ 65 | --imdbval ${TEST_IMDB} \ 66 | --iters ${ITERS} \ 67 | --cfg experiments/cfgs/${NET}.yml \ 68 | --tag ${EXTRA_ARGS_SLUG} \ 69 | --net ${NET} \ 70 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 71 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 72 | else 73 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 74 | --weight data/imagenet_weights/${NET}.pth \ 75 | --imdb ${TRAIN_IMDB} \ 76 | --imdbval ${TEST_IMDB} \ 77 | --iters ${ITERS} \ 78 | --cfg experiments/cfgs/${NET}.yml \ 79 | --net ${NET} \ 80 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 81 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 82 | fi 83 | fi 84 | 85 | ./experiments/scripts/test_faster_rcnn_notime.sh $@ 86 | -------------------------------------------------------------------------------- /figure/adapt_results_c2bdd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_c2bdd.png -------------------------------------------------------------------------------- /figure/adapt_results_c2f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_c2f.png -------------------------------------------------------------------------------- /figure/adapt_results_k2c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/adapt_results_k2c.png -------------------------------------------------------------------------------- /figure/det_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/figure/det_results.png -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import xml.etree.ElementTree as ET 11 | import os 12 | import pickle 13 | import numpy as np 14 | import json 15 | 16 | def parse_rec(filename): 17 | """ Parse cityscapes rec """ 18 | objects = [] 19 | with open(filename, 'r') as f: 20 | info = json.load(f) 21 | objs = info["objects"] 22 | num_objs = len(objs) 23 | 24 | for obj in objs: 25 | obj_struct = {} 26 | clsName = obj['label'] 27 | #print clsName 28 | maxW = float(info['imgWidth']) - 1. 29 | maxH = float(info['imgHeight']) - 1. 30 | x1 = maxW 31 | y1 = maxH 32 | x2 = 0. 33 | y2 = 0. 34 | for p in obj['polygon']: # (x, y) 35 | if p[0] < x1: 36 | x1 = max(0, p[0]) 37 | if p[0] > x2: 38 | x2 = min(maxW, p[0]) 39 | if p[1] < y1: 40 | y1 = max(0, p[1]) 41 | if p[1] > y2: 42 | y2 = min(maxH, p[1]) 43 | assert x1 >= 0 and x2 >=0 and y1 >= 0 and y2 >= 0 44 | assert x1 <= x2 and y1 <= y2 45 | obj_struct['name'] = clsName 46 | obj_struct['difficult'] = 0 47 | obj_struct['bbox'] = [x1, y1, x2, y2] 48 | objects.append(obj_struct) 49 | return objects 50 | 51 | def voc_ap(rec, prec, use_07_metric=False): 52 | """ ap = voc_ap(rec, prec, [use_07_metric]) 53 | Compute VOC AP given precision and recall. 54 | If use_07_metric is true, uses the 55 | VOC 07 11 point method (default:False). 56 | """ 57 | if use_07_metric: 58 | # 11 point metric 59 | ap = 0. 60 | for t in np.arange(0., 1.1, 0.1): 61 | if np.sum(rec >= t) == 0: 62 | p = 0 63 | else: 64 | p = np.max(prec[rec >= t]) 65 | ap = ap + p / 11. 66 | else: 67 | # correct AP calculation 68 | # first append sentinel values at the end 69 | mrec = np.concatenate(([0.], rec, [1.])) 70 | mpre = np.concatenate(([0.], prec, [0.])) 71 | 72 | # compute the precision envelope 73 | for i in range(mpre.size - 1, 0, -1): 74 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 75 | 76 | # to calculate area under PR curve, look for points 77 | # where X axis (recall) changes value 78 | i = np.where(mrec[1:] != mrec[:-1])[0] 79 | 80 | # and sum (\Delta recall) * prec 81 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 82 | return ap 83 | 84 | 85 | def cityscapes_eval(detpath, 86 | annopath, 87 | imagesetfile, 88 | classname, 89 | cachedir, 90 | ovthresh=0.5, 91 | use_07_metric=False, 92 | use_diff=False): 93 | """rec, prec, ap = voc_eval(detpath, 94 | annopath, 95 | imagesetfile, 96 | classname, 97 | [ovthresh], 98 | [use_07_metric]) 99 | 100 | Top level function that does the PASCAL VOC evaluation. 101 | 102 | detpath: Path to detections 103 | detpath.format(classname) should produce the detection results file. 104 | annopath: Path to annotations 105 | annopath.format(imagename) should be the xml annotations file. 106 | imagesetfile: Text file containing the list of images, one image per line. 107 | classname: Category name (duh) 108 | cachedir: Directory for caching the annotations 109 | [ovthresh]: Overlap threshold (default = 0.5) 110 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 111 | (default False) 112 | """ 113 | # assumes detections are in detpath.format(classname) 114 | # assumes annotations are in annopath.format(imagename) 115 | # assumes imagesetfile is a text file with each line an image name 116 | # cachedir caches the annotations in a pickle file 117 | 118 | # first load gt 119 | if not os.path.isdir(cachedir): 120 | os.mkdir(cachedir) 121 | if 'foggy' in imagesetfile[0]: 122 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % 'cityscapes_foggy') 123 | else: 124 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % 'cityscapes') 125 | # read list of images 126 | 127 | imagenames = imagesetfile 128 | 129 | if not os.path.isfile(cachefile): 130 | # load annotations 131 | recs = {} 132 | for i, imagename in enumerate(imagenames): 133 | recs[imagename] = parse_rec(annopath.format(imagename[:imagename.find('_')], imagename[:imagename.find('leftImg8bit')])) 134 | if i % 100 == 0: 135 | print('Reading annotation for {:d}/{:d}'.format( 136 | i + 1, len(imagenames))) 137 | # save 138 | print('Saving cached annotations to {:s}'.format(cachefile)) 139 | with open(cachefile, 'wb') as f: 140 | pickle.dump(recs, f) 141 | else: 142 | # load 143 | with open(cachefile, 'rb') as f: 144 | try: 145 | recs = pickle.load(f) 146 | except: 147 | recs = pickle.load(f, encoding='bytes') 148 | 149 | # extract gt objects for this class 150 | class_recs = {} 151 | npos = 0 152 | for imagename in imagenames: 153 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 154 | bbox = np.array([x['bbox'] for x in R]) 155 | if use_diff: 156 | difficult = np.array([False for x in R]).astype(np.bool) 157 | else: 158 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 159 | det = [False] * len(R) 160 | npos = npos + sum(~difficult) 161 | class_recs[imagename] = {'bbox': bbox, 162 | 'difficult': difficult, 163 | 'det': det} 164 | 165 | # read dets 166 | detfile = detpath.format(classname) 167 | 168 | with open(detfile, 'r') as f: 169 | lines = f.readlines() 170 | 171 | splitlines = [x.strip().split(' ') for x in lines] 172 | image_ids = [x[0] for x in splitlines] 173 | confidence = np.array([float(x[1]) for x in splitlines]) 174 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 175 | 176 | nd = len(image_ids) 177 | tp = np.zeros(nd) 178 | fp = np.zeros(nd) 179 | 180 | if BB.shape[0] > 0: 181 | # sort by confidence 182 | sorted_ind = np.argsort(-confidence) 183 | sorted_scores = np.sort(-confidence) 184 | BB = BB[sorted_ind, :] 185 | image_ids = [image_ids[x] for x in sorted_ind] 186 | 187 | # go down dets and mark TPs and FPs 188 | for d in range(nd): 189 | R = class_recs[image_ids[d]] 190 | bb = BB[d, :].astype(float) 191 | ovmax = -np.inf 192 | BBGT = R['bbox'].astype(float) 193 | 194 | if BBGT.size > 0: 195 | # compute overlaps 196 | # intersection 197 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 198 | iymin = np.maximum(BBGT[:, 1], bb[1]) 199 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 200 | iymax = np.minimum(BBGT[:, 3], bb[3]) 201 | iw = np.maximum(ixmax - ixmin + 1., 0.) 202 | ih = np.maximum(iymax - iymin + 1., 0.) 203 | inters = iw * ih 204 | 205 | # union 206 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 207 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 208 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 209 | 210 | overlaps = inters / uni 211 | ovmax = np.max(overlaps) 212 | jmax = np.argmax(overlaps) 213 | 214 | if ovmax > ovthresh: 215 | if not R['difficult'][jmax]: 216 | if not R['det'][jmax]: 217 | tp[d] = 1. 218 | R['det'][jmax] = 1 219 | else: 220 | fp[d] = 1. 221 | else: 222 | fp[d] = 1. 223 | 224 | # compute precision recall 225 | fp = np.cumsum(fp) 226 | tp = np.cumsum(tp) 227 | rec = tp / float(npos) 228 | # avoid divide by zero in case the first detection matches a difficult 229 | # ground truth 230 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 231 | ap = voc_ap(rec, prec, use_07_metric) 232 | print(classname, npos) 233 | return rec, prec, ap 234 | -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.KITTI import KITTI 16 | from datasets.cityscapes import cityscapes 17 | from datasets.bdd100k import bdd100k 18 | 19 | import numpy as np 20 | 21 | # Set up voc__ 22 | for year in ['2007', '2012']: 23 | for split in ['train', 'val', 'trainval', 'test']: 24 | name = 'voc_{}_{}'.format(year, split) 25 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 26 | 27 | for year in ['2007', '2012']: 28 | for split in ['train', 'val', 'trainval', 'test']: 29 | name = 'voc_{}_{}_diff'.format(year, split) 30 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True)) 31 | 32 | # Set up KITTI 33 | for split in ['train', 'val', 'synthCity', 'trainval']: 34 | name = 'KITTI_{}'.format(split) 35 | __sets[name] = (lambda split=split, year=year: KITTI(split)) 36 | 37 | # Set up cityscapes 38 | for split in ['train', 'val', 'foggytrain', 'foggyval', 'synthFoggytrain', 'synthBDDdaytrain', 'synthBDDdayval']: 39 | name = 'cityscapes_{}'.format(split) 40 | __sets[name] = (lambda split=split, year=year: cityscapes(split)) 41 | 42 | # Set up bdd100k 43 | for split in ['train', 'val', 'daytrain', 'dayval', 'nighttrain', 'nightval', 'citydaytrain', 'citydayval', 'cleardaytrain', 'cleardayval', 'rainydaytrain', 'rainydayval']: 44 | name = 'bdd100k_{}'.format(split) 45 | __sets[name] = (lambda split=split, year=year: bdd100k(split)) 46 | 47 | def get_imdb(name): 48 | """Get an imdb (image database) by name.""" 49 | if name not in __sets: 50 | raise KeyError('Unknown dataset: {}'.format(name)) 51 | return __sets[name]() 52 | 53 | def list_imdbs(): 54 | """List all registered imdbs.""" 55 | return list(__sets.keys()) 56 | -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 6 | so that it's consistent with those computed by Jan Hosang (see: 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 8 | computing/research/object-recognition-and-scene-understanding/how- 9 | good-are-detection-proposals-really/) 10 | 11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 13 | """ 14 | 15 | def munge(src_dir): 16 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 17 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 18 | 19 | files = os.listdir(src_dir) 20 | for fn in files: 21 | base, ext = os.path.splitext(fn) 22 | # first 14 chars / first 22 chars / all chars + .mat 23 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 24 | first = base[:14] 25 | second = base[:22] 26 | dst_dir = os.path.join('MCG', 'mat', first, second) 27 | if not os.path.exists(dst_dir): 28 | os.makedirs(dst_dir) 29 | src = os.path.join(src_dir, fn) 30 | dst = os.path.join(dst_dir, fn) 31 | print 'MV: {} -> {}'.format(src, dst) 32 | os.rename(src, dst) 33 | 34 | if __name__ == '__main__': 35 | # src_dir should look something like: 36 | # src_dir = 'MCG-COCO-val2014-boxes' 37 | src_dir = sys.argv[1] 38 | munge(src_dir) 39 | -------------------------------------------------------------------------------- /lib/layer_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/anchor_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from model.config import cfg 13 | import numpy as np 14 | import numpy.random as npr 15 | from utils.bbox import bbox_overlaps 16 | from model.bbox_transform import bbox_transform 17 | import torch 18 | 19 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): 20 | """Same as the anchor target layer in original Fast/er RCNN """ 21 | npr.seed(cfg.RNG_SEED) 22 | A = num_anchors 23 | total_anchors = all_anchors.shape[0] 24 | K = total_anchors / num_anchors 25 | 26 | # allow boxes to sit over the edge by a small amount 27 | _allowed_border = 0 28 | 29 | # map of shape (..., H, W) 30 | height, width = rpn_cls_score.shape[1:3] 31 | 32 | # only keep anchors inside the image 33 | inds_inside = np.arange(total_anchors) 34 | 35 | # keep only inside anchors 36 | anchors = all_anchors[inds_inside, :] 37 | 38 | # label: 1 is positive, 0 is negative, -1 is dont care 39 | labels = np.empty((len(inds_inside),), dtype=np.float32) 40 | labels.fill(-1) 41 | 42 | # overlaps between the anchors and the gt boxes 43 | # overlaps (ex, gt) 44 | overlaps = bbox_overlaps( 45 | np.ascontiguousarray(anchors, dtype=np.float), 46 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 47 | argmax_overlaps = overlaps.argmax(axis=1) 48 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 49 | gt_argmax_overlaps = overlaps.argmax(axis=0) 50 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 51 | np.arange(overlaps.shape[1])] 52 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 53 | 54 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 55 | # assign bg labels first so that positive labels can clobber them 56 | # first set the negatives 57 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 58 | 59 | # fg label: for each gt, anchor with highest overlap 60 | labels[gt_argmax_overlaps] = 1 61 | 62 | # fg label: above threshold IOU 63 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 64 | 65 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 66 | # assign bg labels last so that negative labels can clobber positives 67 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 68 | 69 | # subsample positive labels if we have too many 70 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 71 | fg_inds = np.where(labels == 1)[0] 72 | if len(fg_inds) > num_fg: 73 | disable_inds = npr.choice( 74 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 75 | labels[disable_inds] = -1 76 | 77 | # subsample negative labels if we have too many 78 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 79 | bg_inds = np.where(labels == 0)[0] 80 | if len(bg_inds) > num_bg: 81 | disable_inds = npr.choice( 82 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 83 | labels[disable_inds] = -1 84 | 85 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 86 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 87 | 88 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 89 | # only the positive ones have regression targets 90 | bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) 91 | 92 | bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 93 | if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: 94 | # uniform weighting of examples (given non-uniform sampling) 95 | num_examples = np.sum(labels >= 0) 96 | positive_weights = np.ones((1, 4)) * 1.0 / num_examples 97 | negative_weights = np.ones((1, 4)) * 1.0 / num_examples 98 | else: 99 | assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & 100 | (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) 101 | positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / 102 | np.sum(labels == 1)) 103 | negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / 104 | np.sum(labels == 0)) 105 | bbox_outside_weights[labels == 1, :] = positive_weights 106 | bbox_outside_weights[labels == 0, :] = negative_weights 107 | 108 | # map up to original set of anchors 109 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 110 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 111 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 112 | bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) 113 | 114 | # labels 115 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) 116 | labels = labels.reshape((1, 1, A * height, width)) 117 | rpn_labels = labels 118 | 119 | # bbox_targets 120 | bbox_targets = bbox_targets \ 121 | .reshape((1, height, width, A * 4)) 122 | 123 | rpn_bbox_targets = bbox_targets 124 | # bbox_inside_weights 125 | bbox_inside_weights = bbox_inside_weights \ 126 | .reshape((1, height, width, A * 4)) 127 | 128 | rpn_bbox_inside_weights = bbox_inside_weights 129 | 130 | # bbox_outside_weights 131 | bbox_outside_weights = bbox_outside_weights \ 132 | .reshape((1, height, width, A * 4)) 133 | 134 | rpn_bbox_outside_weights = bbox_outside_weights 135 | return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights 136 | 137 | 138 | def _unmap(data, count, inds, fill=0): 139 | """ Unmap a subset of item (data) back to the original set of items (of 140 | size count) """ 141 | if len(data.shape) == 1: 142 | ret = np.empty((count,), dtype=np.float32) 143 | ret.fill(fill) 144 | ret[inds] = data 145 | else: 146 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 147 | ret.fill(fill) 148 | ret[inds, :] = data 149 | return ret 150 | 151 | 152 | def _compute_targets(ex_rois, gt_rois): 153 | """Compute bounding-box regression targets for an image.""" 154 | 155 | assert ex_rois.shape[0] == gt_rois.shape[0] 156 | assert ex_rois.shape[1] == 4 157 | assert gt_rois.shape[1] == 5 158 | 159 | return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy() 160 | -------------------------------------------------------------------------------- /lib/layer_utils/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 15 | # 16 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 17 | # >> anchors 18 | # 19 | # anchors = 20 | # 21 | # -83 -39 100 56 22 | # -175 -87 192 104 23 | # -359 -183 376 200 24 | # -55 -55 72 72 25 | # -119 -119 136 136 26 | # -247 -247 264 264 27 | # -35 -79 52 96 28 | # -79 -167 96 184 29 | # -167 -343 184 360 30 | 31 | # array([[ -83., -39., 100., 56.], 32 | # [-175., -87., 192., 104.], 33 | # [-359., -183., 376., 200.], 34 | # [ -55., -55., 72., 72.], 35 | # [-119., -119., 136., 136.], 36 | # [-247., -247., 264., 264.], 37 | # [ -35., -79., 52., 96.], 38 | # [ -79., -167., 96., 184.], 39 | # [-167., -343., 184., 360.]]) 40 | 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 42 | scales=2 ** np.arange(3, 6)): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales wrt a reference (0, 0, 15, 15) window. 46 | """ 47 | 48 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 49 | ratio_anchors = _ratio_enum(base_anchor, ratios) 50 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 51 | for i in range(ratio_anchors.shape[0])]) 52 | return anchors 53 | 54 | 55 | def _whctrs(anchor): 56 | """ 57 | Return width, height, x center, and y center for an anchor (window). 58 | """ 59 | 60 | w = anchor[2] - anchor[0] + 1 61 | h = anchor[3] - anchor[1] + 1 62 | x_ctr = anchor[0] + 0.5 * (w - 1) 63 | y_ctr = anchor[1] + 0.5 * (h - 1) 64 | return w, h, x_ctr, y_ctr 65 | 66 | 67 | def _mkanchors(ws, hs, x_ctr, y_ctr): 68 | """ 69 | Given a vector of widths (ws) and heights (hs) around a center 70 | (x_ctr, y_ctr), output a set of anchors (windows). 71 | """ 72 | 73 | ws = ws[:, np.newaxis] 74 | hs = hs[:, np.newaxis] 75 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 76 | y_ctr - 0.5 * (hs - 1), 77 | x_ctr + 0.5 * (ws - 1), 78 | y_ctr + 0.5 * (hs - 1))) 79 | return anchors 80 | 81 | 82 | def _ratio_enum(anchor, ratios): 83 | """ 84 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 85 | """ 86 | 87 | w, h, x_ctr, y_ctr = _whctrs(anchor) 88 | size = w * h 89 | size_ratios = size / ratios 90 | ws = np.round(np.sqrt(size_ratios)) 91 | hs = np.round(ws * ratios) 92 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 93 | return anchors 94 | 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | 108 | if __name__ == '__main__': 109 | import time 110 | 111 | t = time.time() 112 | a = generate_anchors() 113 | print(time.time() - t) 114 | print(a) 115 | from IPython import embed; 116 | 117 | embed() 118 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick and Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | from model.nms_wrapper import nms 14 | 15 | import torch 16 | from torch.autograd import Variable 17 | 18 | 19 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): 20 | """A simplified version compared to fast/er RCNN 21 | For details please see the technical report 22 | """ 23 | if type(cfg_key) == bytes: 24 | cfg_key = cfg_key.decode('utf-8') 25 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 26 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 27 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 28 | 29 | # Get the scores and bounding boxes 30 | scores = rpn_cls_prob[:, :, :, num_anchors:] 31 | rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) 32 | scores = scores.contiguous().view(-1, 1) 33 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 34 | proposals = clip_boxes(proposals, im_info[:2]) 35 | 36 | # Pick the top region proposals 37 | scores, order = scores.view(-1).sort(descending=True) 38 | if pre_nms_topN > 0: 39 | order = order[:pre_nms_topN] 40 | scores = scores[:pre_nms_topN].view(-1, 1) 41 | proposals = proposals[order.data, :] 42 | 43 | # Non-maximal suppression 44 | keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) #error 45 | 46 | # Pick the top region proposals after NMS 47 | if post_nms_topN > 0: 48 | keep = keep[:post_nms_topN] 49 | proposals = proposals[keep, :] 50 | scores = scores[keep,] 51 | 52 | # Only support single image as input 53 | batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) 54 | blob = torch.cat((batch_inds, proposals), 1) 55 | 56 | return blob, scores 57 | 58 | def proposal_layer_fpn(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): 59 | """A simplified version compared to fast/er RCNN 60 | For details please see the technical report 61 | """ 62 | if type(cfg_key) == bytes: 63 | cfg_key = cfg_key.decode('utf-8') 64 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 65 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 66 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 67 | 68 | proposals_total = [] 69 | scores_total = [] 70 | for idx in range(len(rpn_cls_prob)): 71 | # Get the scores and bounding boxes 72 | scores = rpn_cls_prob[idx][:, :, :, num_anchors:] 73 | rpn_bbox_pred[idx] = rpn_bbox_pred[idx].view((-1, 4)) 74 | scores = scores.contiguous().view(-1, 1) 75 | proposals = bbox_transform_inv(anchors[idx], rpn_bbox_pred[idx]) 76 | proposals = clip_boxes(proposals, im_info[:2]) 77 | 78 | # Pick the top region proposals 79 | scores, order = scores.view(-1).sort(descending=True) 80 | if pre_nms_topN > 0: 81 | order = order[:pre_nms_topN] 82 | scores = scores[:pre_nms_topN].view(-1, 1) 83 | proposals = proposals[order.data, :] 84 | 85 | proposals_total.append(proposals) 86 | scores_total.append(scores) 87 | 88 | proposals = torch.cat(proposals_total) 89 | scores = torch.cat(scores_total) 90 | 91 | # Non-maximal suppression 92 | keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) 93 | 94 | # Pick th top region proposals after NMS 95 | if post_nms_topN > 0: 96 | keep = keep[:post_nms_topN] 97 | proposals = proposals[keep, :] 98 | scores = scores[keep,] 99 | 100 | # Only support single image as input 101 | batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) 102 | blob = torch.cat((batch_inds, proposals), 1) 103 | 104 | return blob, scores -------------------------------------------------------------------------------- /lib/layer_utils/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import numpy.random as npr 13 | from model.config import cfg 14 | from model.bbox_transform import bbox_transform 15 | from utils.bbox import bbox_overlaps 16 | 17 | 18 | import torch 19 | from torch.autograd import Variable 20 | 21 | def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes): 22 | """ 23 | Assign object detection proposals to ground-truth targets. Produces proposal 24 | classification labels and bounding-box regression targets. 25 | """ 26 | 27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 29 | all_rois = rpn_rois 30 | all_scores = rpn_scores 31 | 32 | # Include ground-truth boxes in the set of candidate rois 33 | if cfg.TRAIN.USE_GT: 34 | zeros = rpn_rois.data.new(gt_boxes.shape[0], 1) 35 | all_rois = torch.cat( 36 | (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1)) 37 | , 0) 38 | # not sure if it a wise appending, but anyway i am not using it 39 | all_scores = torch.cat((all_scores, zeros), 0) 40 | 41 | num_images = 1 42 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 43 | fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image)) 44 | 45 | # Sample rois with classification labels and bounding box regression 46 | # targets 47 | labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois( 48 | all_rois, all_scores, gt_boxes, fg_rois_per_image, 49 | rois_per_image, _num_classes) 50 | 51 | rois = rois.view(-1, 5) 52 | roi_scores = roi_scores.view(-1) 53 | labels = labels.view(-1, 1) 54 | bbox_targets = bbox_targets.view(-1, _num_classes * 4) 55 | bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4) 56 | bbox_outside_weights = (bbox_inside_weights > 0).float() 57 | 58 | return rois, roi_scores, labels, Variable(bbox_targets), Variable(bbox_inside_weights), Variable(bbox_outside_weights) 59 | 60 | 61 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 62 | """Bounding-box regression targets (bbox_target_data) are stored in a 63 | compact form N x (class, tx, ty, tw, th) 64 | 65 | This function expands those targets into the 4-of-4*K representation used 66 | by the network (i.e. only one class has non-zero targets). 67 | 68 | Returns: 69 | bbox_target (ndarray): N x 4K blob of regression targets 70 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 71 | """ 72 | # Inputs are tensor 73 | 74 | clss = bbox_target_data[:, 0] 75 | bbox_targets = clss.new(clss.numel(), 4 * num_classes).zero_() 76 | bbox_inside_weights = clss.new(bbox_targets.shape).zero_() 77 | inds = (clss > 0).nonzero().view(-1) 78 | if inds.numel() > 0: 79 | clss = clss[inds].contiguous().view(-1,1) 80 | dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) 81 | dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long() 82 | bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:] 83 | bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds) 84 | 85 | return bbox_targets, bbox_inside_weights 86 | 87 | 88 | def _compute_targets(ex_rois, gt_rois, labels): 89 | """Compute bounding-box regression targets for an image.""" 90 | # Inputs are tensor 91 | 92 | assert ex_rois.shape[0] == gt_rois.shape[0] 93 | assert ex_rois.shape[1] == 4 94 | assert gt_rois.shape[1] == 4 95 | 96 | targets = bbox_transform(ex_rois, gt_rois) 97 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 98 | # Optionally normalize targets by a precomputed mean and stdev 99 | targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 100 | / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 101 | return torch.cat( 102 | [labels.unsqueeze(1), targets], 1) 103 | 104 | 105 | def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): 106 | """Generate a random sample of RoIs comprising foreground and background 107 | examples. 108 | """ 109 | npr.seed(cfg.RNG_SEED) 110 | # overlaps: (rois x gt_boxes) 111 | overlaps = bbox_overlaps( 112 | all_rois[:, 1:5].data, 113 | gt_boxes[:, :4].data) 114 | max_overlaps, gt_assignment = overlaps.max(1) 115 | labels = gt_boxes[gt_assignment, [4]] 116 | 117 | # Select foreground RoIs as those with >= FG_THRESH overlap 118 | fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) 119 | # Guard against the case when an image has fewer than fg_rois_per_image 120 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 121 | bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) 122 | 123 | # Small modification to the original version where we ensure a fixed number of regions are sampled 124 | if fg_inds.numel() > 0 and bg_inds.numel() > 0: 125 | fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) 126 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] 127 | bg_rois_per_image = rois_per_image - fg_rois_per_image 128 | to_replace = bg_inds.numel() < bg_rois_per_image 129 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] 130 | elif fg_inds.numel() > 0: 131 | to_replace = fg_inds.numel() < rois_per_image 132 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] 133 | fg_rois_per_image = rois_per_image 134 | elif bg_inds.numel() > 0: 135 | to_replace = bg_inds.numel() < rois_per_image 136 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] 137 | fg_rois_per_image = 0 138 | else: 139 | import pdb 140 | pdb.set_trace() 141 | 142 | # The indices that we're selecting (both fg and bg) 143 | keep_inds = torch.cat([fg_inds, bg_inds], 0) 144 | # Select sampled values from various arrays: 145 | labels = labels[keep_inds].contiguous() 146 | # Clamp labels for the background RoIs to 0 147 | labels[int(fg_rois_per_image):] = 0 148 | rois = all_rois[keep_inds].contiguous() 149 | roi_scores = all_scores[keep_inds].contiguous() 150 | 151 | bbox_target_data = _compute_targets( 152 | rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) 153 | 154 | bbox_targets, bbox_inside_weights = \ 155 | _get_bbox_regression_labels(bbox_target_data, num_classes) 156 | 157 | return labels, rois, roi_scores, bbox_targets, bbox_inside_weights 158 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_top_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | import numpy.random as npr 14 | 15 | import torch 16 | 17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): 18 | """A layer that just selects the top region proposals 19 | without using non-maximal suppression, 20 | For details please see the technical report 21 | """ 22 | npr.seed(cfg.RNG_SEED) 23 | rpn_top_n = cfg.TEST.RPN_TOP_N 24 | 25 | scores = rpn_cls_prob[:, :, :, num_anchors:] 26 | 27 | rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) 28 | scores = scores.contiguous().view(-1, 1) 29 | 30 | length = scores.size(0) 31 | if length < rpn_top_n: 32 | # Random selection, maybe unnecessary and loses good proposals 33 | # But such case rarely happens 34 | top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().cuda() 35 | else: 36 | top_inds = scores.sort(0, descending=True)[1] 37 | top_inds = top_inds[:rpn_top_n] 38 | top_inds = top_inds.view(rpn_top_n) 39 | 40 | # Do the selection here 41 | anchors = anchors[top_inds, :].contiguous() 42 | rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() 43 | scores = scores[top_inds].contiguous() 44 | 45 | # Convert anchors into proposals via bbox transformations 46 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 47 | 48 | # Clip predicted boxes to image 49 | proposals = clip_boxes(proposals, im_info[:2]) 50 | 51 | # Output rois blob 52 | # Our RPN implementation only supports a single input image, so all 53 | # batch inds are 0 54 | batch_inds = proposals.data.new(proposals.size(0), 1).zero_() 55 | blob = torch.cat([batch_inds, proposals], 1) 56 | return blob, scores 57 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/crop_and_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/crop_and_resize.c'] 7 | headers = ['src/crop_and_resize.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/crop_and_resize_gpu.c'] 15 | headers += ['src/crop_and_resize_gpu.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o'] 18 | with_cuda = True 19 | 20 | extra_compile_args = ['-std=c99'] 21 | 22 | this_file = os.path.dirname(os.path.realpath(__file__)) 23 | print(this_file) 24 | sources = [os.path.join(this_file, fname) for fname in sources] 25 | headers = [os.path.join(this_file, fname) for fname in headers] 26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 27 | 28 | ffi = create_extension( 29 | '_ext.crop_and_resize', 30 | headers=headers, 31 | sources=sources, 32 | define_macros=defines, 33 | relative_to=__file__, 34 | with_cuda=with_cuda, 35 | extra_objects=extra_objects, 36 | extra_compile_args=extra_compile_args 37 | ) 38 | 39 | if __name__ == '__main__': 40 | ffi.build() 41 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/crop_and_resize.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Function 6 | 7 | from ._ext import crop_and_resize as _backend 8 | 9 | 10 | class CropAndResizeFunction(Function): 11 | 12 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 13 | self.crop_height = crop_height 14 | self.crop_width = crop_width 15 | self.extrapolation_value = extrapolation_value 16 | 17 | def forward(self, image, boxes, box_ind): 18 | crops = torch.zeros_like(image) 19 | 20 | if image.is_cuda: 21 | _backend.crop_and_resize_gpu_forward( 22 | image, boxes, box_ind, 23 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 24 | else: 25 | _backend.crop_and_resize_forward( 26 | image, boxes, box_ind, 27 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 28 | 29 | # save for backward 30 | self.im_size = image.size() 31 | self.save_for_backward(boxes, box_ind) 32 | 33 | return crops 34 | 35 | def backward(self, grad_outputs): 36 | boxes, box_ind = self.saved_tensors 37 | 38 | grad_outputs = grad_outputs.contiguous() 39 | grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size) 40 | 41 | if grad_outputs.is_cuda: 42 | _backend.crop_and_resize_gpu_backward( 43 | grad_outputs, boxes, box_ind, grad_image 44 | ) 45 | else: 46 | _backend.crop_and_resize_backward( 47 | grad_outputs, boxes, box_ind, grad_image 48 | ) 49 | 50 | return grad_image, None, None 51 | 52 | 53 | class CropAndResize(nn.Module): 54 | """ 55 | Crop and resize ported from tensorflow 56 | See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize 57 | """ 58 | 59 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 60 | super(CropAndResize, self).__init__() 61 | 62 | self.crop_height = crop_height 63 | self.crop_width = crop_width 64 | self.extrapolation_value = extrapolation_value 65 | 66 | def forward(self, image, boxes, box_ind): 67 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind) 68 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize 5 | 6 | 7 | class RoIAlign(nn.Module): 8 | 9 | def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True): 10 | super(RoIAlign, self).__init__() 11 | 12 | self.crop_height = crop_height 13 | self.crop_width = crop_width 14 | self.extrapolation_value = extrapolation_value 15 | self.transform_fpcoor = transform_fpcoor 16 | 17 | def forward(self, featuremap, boxes, box_ind): 18 | """ 19 | RoIAlign based on crop_and_resize. 20 | See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301 21 | :param featuremap: NxCxHxW 22 | :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization** 23 | :param box_ind: M 24 | :return: MxCxoHxoW 25 | """ 26 | x1, y1, x2, y2 = torch.split(boxes, 1, dim=1) 27 | image_height, image_width = featuremap.size()[2:4] 28 | 29 | if self.transform_fpcoor: 30 | spacing_w = (x2 - x1) / float(self.crop_width) 31 | spacing_h = (y2 - y1) / float(self.crop_height) 32 | 33 | nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1) 34 | ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1) 35 | nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1) 36 | nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1) 37 | 38 | boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1) 39 | else: 40 | x1 = x1 / float(image_width - 1) 41 | x2 = x2 / float(image_width - 1) 42 | y1 = y1 / float(image_height - 1) 43 | y2 = y2 / float(image_height - 1) 44 | boxes = torch.cat((y1, x1, y2, x2), 1) 45 | 46 | boxes = boxes.detach().contiguous() 47 | box_ind = box_ind.detach() 48 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind) 49 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_forward( 2 | THFloatTensor * image, 3 | THFloatTensor * boxes, // [y1, x1, y2, x2] 4 | THIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THFloatTensor * crops 9 | ); 10 | 11 | void crop_and_resize_backward( 12 | THFloatTensor * grads, 13 | THFloatTensor * boxes, // [y1, x1, y2, x2] 14 | THIntTensor * box_index, // range in [0, batch_size) 15 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize_gpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda/crop_and_resize_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | 7 | void crop_and_resize_gpu_forward( 8 | THCudaTensor * image, 9 | THCudaTensor * boxes, // [y1, x1, y2, x2] 10 | THCudaIntTensor * box_index, // range in [0, batch_size) 11 | const float extrapolation_value, 12 | const int crop_height, 13 | const int crop_width, 14 | THCudaTensor * crops 15 | ) { 16 | const int batch_size = THCudaTensor_size(state, image, 0); 17 | const int depth = THCudaTensor_size(state, image, 1); 18 | const int image_height = THCudaTensor_size(state, image, 2); 19 | const int image_width = THCudaTensor_size(state, image, 3); 20 | 21 | const int num_boxes = THCudaTensor_size(state, boxes, 0); 22 | 23 | // init output space 24 | THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width); 25 | THCudaTensor_zero(state, crops); 26 | 27 | cudaStream_t stream = THCState_getCurrentStream(state); 28 | CropAndResizeLaucher( 29 | THCudaTensor_data(state, image), 30 | THCudaTensor_data(state, boxes), 31 | THCudaIntTensor_data(state, box_index), 32 | num_boxes, batch_size, image_height, image_width, 33 | crop_height, crop_width, depth, extrapolation_value, 34 | THCudaTensor_data(state, crops), 35 | stream 36 | ); 37 | } 38 | 39 | 40 | void crop_and_resize_gpu_backward( 41 | THCudaTensor * grads, 42 | THCudaTensor * boxes, // [y1, x1, y2, x2] 43 | THCudaIntTensor * box_index, // range in [0, batch_size) 44 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 45 | ) { 46 | // shape 47 | const int batch_size = THCudaTensor_size(state, grads_image, 0); 48 | const int depth = THCudaTensor_size(state, grads_image, 1); 49 | const int image_height = THCudaTensor_size(state, grads_image, 2); 50 | const int image_width = THCudaTensor_size(state, grads_image, 3); 51 | 52 | const int num_boxes = THCudaTensor_size(state, grads, 0); 53 | const int crop_height = THCudaTensor_size(state, grads, 2); 54 | const int crop_width = THCudaTensor_size(state, grads, 3); 55 | 56 | // init output space 57 | THCudaTensor_zero(state, grads_image); 58 | 59 | cudaStream_t stream = THCState_getCurrentStream(state); 60 | CropAndResizeBackpropImageLaucher( 61 | THCudaTensor_data(state, grads), 62 | THCudaTensor_data(state, boxes), 63 | THCudaIntTensor_data(state, box_index), 64 | num_boxes, batch_size, image_height, image_width, 65 | crop_height, crop_width, depth, 66 | THCudaTensor_data(state, grads_image), 67 | stream 68 | ); 69 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize_gpu.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_gpu_forward( 2 | THCudaTensor * image, 3 | THCudaTensor * boxes, // [y1, x1, y2, x2] 4 | THCudaIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THCudaTensor * crops 9 | ); 10 | 11 | void crop_and_resize_gpu_backward( 12 | THCudaTensor * grads, 13 | THCudaTensor * boxes, // [y1, x1, y2, x2] 14 | THCudaIntTensor * box_index, // range in [0, batch_size) 15 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CropAndResize_Kernel 2 | #define _CropAndResize_Kernel 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void CropAndResizeLaucher( 9 | const float *image_ptr, const float *boxes_ptr, 10 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 11 | int image_width, int crop_height, int crop_width, int depth, 12 | float extrapolation_value, float *crops_ptr, cudaStream_t stream); 13 | 14 | void CropAndResizeBackpropImageLaucher( 15 | const float *grads_ptr, const float *boxes_ptr, 16 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 17 | int image_width, int crop_height, int crop_width, int depth, 18 | float *grads_image_ptr, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/roi_pooling.c'] 7 | headers = ['src/roi_pooling.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/roi_pooling_cuda.c'] 14 | headers += ['src/roi_pooling_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.roi_pooling', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import roi_pooling 4 | 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.output = None 12 | self.argmax = None 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | batch_size, num_channels, data_height, data_width = features.size() 18 | num_rois = rois.size()[0] 19 | output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width) 20 | argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_() 21 | 22 | if not features.is_cuda: 23 | _features = features.permute(0, 2, 3, 1) 24 | roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale, 25 | _features, rois, output) 26 | # output = output.cuda() 27 | else: 28 | output = output.cuda() 29 | argmax = argmax.cuda() 30 | roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 31 | features, rois, output, argmax) 32 | self.output = output 33 | self.argmax = argmax 34 | self.rois = rois 35 | self.feature_size = features.size() 36 | 37 | return output 38 | 39 | def backward(self, grad_output): 40 | assert(self.feature_size is not None and grad_output.is_cuda) 41 | 42 | batch_size, num_channels, data_height, data_width = self.feature_size 43 | 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda() 45 | roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 46 | grad_output, self.rois, grad_input, self.argmax) 47 | 48 | # print grad_input 49 | 50 | return grad_input, None 51 | 52 | 53 | class RoIPool(torch.nn.Module): 54 | def __init__(self, pooled_height, pooled_width, spatial_scale): 55 | super(RoIPool, self).__init__() 56 | 57 | self.pooled_width = int(pooled_width) 58 | self.pooled_height = int(pooled_height) 59 | self.spatial_scale = float(spatial_scale) 60 | 61 | def forward(self, features, rois): 62 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 63 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | class RoIPool(nn.Module): 8 | def __init__(self, pooled_height, pooled_width, spatial_scale): 9 | super(RoIPool, self).__init__() 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | batch_size, num_channels, data_height, data_width = features.size() 16 | num_rois = rois.size()[0] 17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() 18 | 19 | for roi_ind, roi in enumerate(rois): 20 | batch_ind = int(roi[0].data[0]) 21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 23 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 24 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 25 | bin_size_w = float(roi_width) / float(self.pooled_width) 26 | bin_size_h = float(roi_height) / float(self.pooled_height) 27 | 28 | for ph in range(self.pooled_height): 29 | hstart = int(np.floor(ph * bin_size_h)) 30 | hend = int(np.ceil((ph + 1) * bin_size_h)) 31 | hstart = min(data_height, max(0, hstart + roi_start_h)) 32 | hend = min(data_height, max(0, hend + roi_start_h)) 33 | for pw in range(self.pooled_width): 34 | wstart = int(np.floor(pw * bin_size_w)) 35 | wend = int(np.ceil((pw + 1) * bin_size_w)) 36 | wstart = min(data_width, max(0, wstart + roi_start_w)) 37 | wend = min(data_width, max(0, wend + roi_start_w)) 38 | 39 | is_empty = (hend <= hstart) or(wend <= wstart) 40 | if is_empty: 41 | outputs[roi_ind, :, ph, pw] = 0 42 | else: 43 | data = features[batch_ind] 44 | outputs[roi_ind, :, ph, pw] = torch.max( 45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) 46 | 47 | return outputs 48 | 49 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_pooling_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIPoolForward(const int nthreads, const float* bottom_data, 16 | const float spatial_scale, const int height, const int width, 17 | const int channels, const int pooled_height, const int pooled_width, 18 | const float* bottom_rois, float* top_data, int* argmax_data) 19 | { 20 | CUDA_1D_KERNEL_LOOP(index, nthreads) 21 | { 22 | // (n, c, ph, pw) is an element in the pooled output 23 | int n = index; 24 | int pw = n % pooled_width; 25 | n /= pooled_width; 26 | int ph = n % pooled_height; 27 | n /= pooled_height; 28 | int c = n % channels; 29 | n /= channels; 30 | 31 | bottom_rois += n * 5; 32 | int roi_batch_ind = bottom_rois[0]; 33 | int roi_start_w = round(bottom_rois[1] * spatial_scale); 34 | int roi_start_h = round(bottom_rois[2] * spatial_scale); 35 | int roi_end_w = round(bottom_rois[3] * spatial_scale); 36 | int roi_end_h = round(bottom_rois[4] * spatial_scale); 37 | 38 | // Force malformed ROIs to be 1x1 39 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 40 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 41 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 42 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 43 | 44 | int hstart = (int)(floor((float)(ph) * bin_size_h)); 45 | int wstart = (int)(floor((float)(pw) * bin_size_w)); 46 | int hend = (int)(ceil((float)(ph + 1) * bin_size_h)); 47 | int wend = (int)(ceil((float)(pw + 1) * bin_size_w)); 48 | 49 | // Add roi offsets and clip to input boundaries 50 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), height); 51 | hend = fminf(fmaxf(hend + roi_start_h, 0), height); 52 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), width); 53 | wend = fminf(fmaxf(wend + roi_start_w, 0), width); 54 | bool is_empty = (hend <= hstart) || (wend <= wstart); 55 | 56 | // Define an empty pooling region to be zero 57 | float maxval = is_empty ? 0 : -FLT_MAX; 58 | // If nothing is pooled, argmax = -1 causes nothing to be backprop'd 59 | int maxidx = -1; 60 | bottom_data += roi_batch_ind * channels * height * width; 61 | for (int h = hstart; h < hend; ++h) { 62 | for (int w = wstart; w < wend; ++w) { 63 | // int bottom_index = (h * width + w) * channels + c; 64 | int bottom_index = (c * height + h) * width + w; 65 | if (bottom_data[bottom_index] > maxval) { 66 | maxval = bottom_data[bottom_index]; 67 | maxidx = bottom_index; 68 | } 69 | } 70 | } 71 | top_data[index] = maxval; 72 | if (argmax_data != NULL) 73 | argmax_data[index] = maxidx; 74 | } 75 | } 76 | 77 | 78 | int ROIPoolForwardLaucher( 79 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 80 | const int width, const int channels, const int pooled_height, 81 | const int pooled_width, const float* bottom_rois, 82 | float* top_data, int* argmax_data, cudaStream_t stream) 83 | { 84 | const int kThreadsPerBlock = 1024; 85 | const int output_size = num_rois * pooled_height * pooled_width * channels; 86 | cudaError_t err; 87 | 88 | 89 | ROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 90 | output_size, bottom_data, spatial_scale, height, width, channels, pooled_height, 91 | pooled_width, bottom_rois, top_data, argmax_data); 92 | 93 | err = cudaGetLastError(); 94 | if(cudaSuccess != err) 95 | { 96 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 97 | exit( -1 ); 98 | } 99 | 100 | return 1; 101 | } 102 | 103 | 104 | __global__ void ROIPoolBackward(const int nthreads, const float* top_diff, 105 | const int* argmax_data, const int num_rois, const float spatial_scale, 106 | const int height, const int width, const int channels, 107 | const int pooled_height, const int pooled_width, float* bottom_diff, 108 | const float* bottom_rois) { 109 | CUDA_1D_KERNEL_LOOP(index, nthreads) 110 | { 111 | 112 | // (n, c, ph, pw) is an element in the pooled output 113 | int n = index; 114 | int w = n % width; 115 | n /= width; 116 | int h = n % height; 117 | n /= height; 118 | int c = n % channels; 119 | n /= channels; 120 | 121 | float gradient = 0; 122 | // Accumulate gradient over all ROIs that pooled this element 123 | for (int roi_n = 0; roi_n < num_rois; ++roi_n) 124 | { 125 | const float* offset_bottom_rois = bottom_rois + roi_n * 5; 126 | int roi_batch_ind = offset_bottom_rois[0]; 127 | // Skip if ROI's batch index doesn't match n 128 | if (n != roi_batch_ind) { 129 | continue; 130 | } 131 | 132 | int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); 133 | int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); 134 | int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); 135 | int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); 136 | 137 | // Skip if ROI doesn't include (h, w) 138 | const bool in_roi = (w >= roi_start_w && w <= roi_end_w && 139 | h >= roi_start_h && h <= roi_end_h); 140 | if (!in_roi) { 141 | continue; 142 | } 143 | 144 | int offset = roi_n * pooled_height * pooled_width * channels; 145 | const float* offset_top_diff = top_diff + offset; 146 | const int* offset_argmax_data = argmax_data + offset; 147 | 148 | // Compute feasible set of pooled units that could have pooled 149 | // this bottom unit 150 | 151 | // Force malformed ROIs to be 1x1 152 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 153 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 154 | 155 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 156 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 157 | 158 | int phstart = floor((float)(h - roi_start_h) / bin_size_h); 159 | int phend = ceil((float)(h - roi_start_h + 1) / bin_size_h); 160 | int pwstart = floor((float)(w - roi_start_w) / bin_size_w); 161 | int pwend = ceil((float)(w - roi_start_w + 1) / bin_size_w); 162 | 163 | phstart = fminf(fmaxf(phstart, 0), pooled_height); 164 | phend = fminf(fmaxf(phend, 0), pooled_height); 165 | pwstart = fminf(fmaxf(pwstart, 0), pooled_width); 166 | pwend = fminf(fmaxf(pwend, 0), pooled_width); 167 | 168 | for (int ph = phstart; ph < phend; ++ph) { 169 | for (int pw = pwstart; pw < pwend; ++pw) { 170 | if (offset_argmax_data[(c * pooled_height + ph) * pooled_width + pw] == index) 171 | { 172 | gradient += offset_top_diff[(c * pooled_height + ph) * pooled_width + pw]; 173 | } 174 | } 175 | } 176 | } 177 | bottom_diff[index] = gradient; 178 | } 179 | } 180 | 181 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 182 | const int height, const int width, const int channels, const int pooled_height, 183 | const int pooled_width, const float* bottom_rois, 184 | float* bottom_diff, const int* argmax_data, cudaStream_t stream) 185 | { 186 | const int kThreadsPerBlock = 1024; 187 | const int output_size = batch_size * height * width * channels; 188 | cudaError_t err; 189 | 190 | ROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 191 | output_size, top_diff, argmax_data, num_rois, spatial_scale, height, width, channels, pooled_height, 192 | pooled_width, bottom_diff, bottom_rois); 193 | 194 | err = cudaGetLastError(); 195 | if(cudaSuccess != err) 196 | { 197 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 198 | exit( -1 ); 199 | } 200 | 201 | return 1; 202 | } 203 | 204 | 205 | #ifdef __cplusplus 206 | } 207 | #endif 208 | 209 | 210 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | int batch_size = THCudaTensor_size(state, features, 0); 27 | if (batch_size != 1) 28 | { 29 | return 0; 30 | } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | if (batch_size != 1) 70 | { 71 | return 0; 72 | } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/layer_utils/snippets.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from layer_utils.generate_anchors import generate_anchors 12 | 13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)): 14 | """ A wrapper function to generate anchors given different scales 15 | Also return the number of anchors in variable 'length' 16 | """ 17 | anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) 18 | A = anchors.shape[0] 19 | shift_x = np.arange(0, width) * feat_stride 20 | shift_y = np.arange(0, height) * feat_stride 21 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 22 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 23 | K = shifts.shape[0] 24 | # width changes faster, so here it is H, W, C 25 | anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 26 | anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 27 | length = np.int32(anchors.shape[0]) 28 | 29 | return anchors, length 30 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 2 | -gencode arch=compute_35,code=sm_35 \ 3 | -gencode arch=compute_50,code=sm_50 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 " 8 | 9 | # Build RoiPooling module 10 | cd layer_utils/roi_pooling/src/cuda 11 | echo "Compiling roi_pooling kernels by nvcc..." 12 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 13 | cd ../../ 14 | python build.py 15 | cd ../../ 16 | 17 | # Build RoIAlign 18 | cd layer_utils/roi_align/src/cuda 19 | echo 'Compiling crop_and_resize kernels by nvcc...' 20 | nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 21 | cd ../../ 22 | python build.py 23 | cd ../../ 24 | 25 | # Build NMS 26 | cd nms/src/cuda 27 | echo "Compiling nms kernels by nvcc..." 28 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 29 | cd ../../ 30 | python build.py 31 | cd ../ 32 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- 1 | from . import config 2 | -------------------------------------------------------------------------------- /lib/model/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import torch 13 | 14 | def bbox_transform(ex_rois, gt_rois): 15 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 16 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 17 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 18 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 19 | 20 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 21 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 22 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 23 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 24 | 25 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 26 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 27 | targets_dw = torch.log(gt_widths / ex_widths) 28 | targets_dh = torch.log(gt_heights / ex_heights) 29 | 30 | targets = torch.stack( 31 | (targets_dx, targets_dy, targets_dw, targets_dh), 1) 32 | return targets 33 | 34 | 35 | def bbox_transform_inv(boxes, deltas): 36 | # Input should be both tensor or both Variable and on the same device 37 | if len(boxes) == 0: 38 | return deltas.detach() * 0 39 | 40 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 41 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 42 | ctr_x = boxes[:, 0] + 0.5 * widths 43 | ctr_y = boxes[:, 1] + 0.5 * heights 44 | 45 | dx = deltas[:, 0::4] 46 | dy = deltas[:, 1::4] 47 | dw = deltas[:, 2::4] 48 | dh = deltas[:, 3::4] 49 | 50 | pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) 51 | pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) 52 | pred_w = torch.exp(dw) * widths.unsqueeze(1) 53 | pred_h = torch.exp(dh) * heights.unsqueeze(1) 54 | 55 | pred_boxes = torch.cat(\ 56 | [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\ 57 | pred_ctr_y - 0.5 * pred_h,\ 58 | pred_ctr_x + 0.5 * pred_w,\ 59 | pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1) 60 | 61 | return pred_boxes 62 | 63 | 64 | def clip_boxes(boxes, im_shape): 65 | """ 66 | Clip boxes to image boundaries. 67 | boxes must be tensor or Variable, im_shape can be anything but Variable 68 | """ 69 | 70 | if not hasattr(boxes, 'data'): 71 | boxes_ = boxes.numpy() 72 | 73 | boxes = boxes.view(boxes.size(0), -1, 4) 74 | boxes = torch.stack(\ 75 | [boxes[:,:,0].clamp(0, im_shape[1] - 1), 76 | boxes[:,:,1].clamp(0, im_shape[0] - 1), 77 | boxes[:,:,2].clamp(0, im_shape[1] - 1), 78 | boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1) 79 | 80 | return boxes 81 | -------------------------------------------------------------------------------- /lib/model/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from nms.pth_nms import pth_nms 12 | 13 | 14 | def nms(dets, thresh): 15 | """Dispatch to either CPU or GPU NMS implementations. 16 | Accept dets as tensor""" 17 | return pth_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/model/test.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import cv2 11 | import numpy as np 12 | try: 13 | import cPickle as pickle 14 | except ImportError: 15 | import pickle 16 | import os 17 | import math 18 | 19 | from utils.timer import Timer 20 | from model.nms_wrapper import nms 21 | from utils.blob import im_list_to_blob 22 | 23 | from model.config import cfg, get_output_dir 24 | from model.bbox_transform import clip_boxes, bbox_transform_inv 25 | 26 | import torch 27 | 28 | def _get_image_blob(im): 29 | """Converts an image into a network input. 30 | Arguments: 31 | im (ndarray): a color image in BGR order 32 | Returns: 33 | blob (ndarray): a data blob holding an image pyramid 34 | im_scale_factors (list): list of image scales (relative to im) used 35 | in the image pyramid 36 | """ 37 | im_orig = im.astype(np.float32, copy=True) 38 | im_orig -= cfg.PIXEL_MEANS 39 | 40 | im_shape = im_orig.shape 41 | im_size_min = np.min(im_shape[0:2]) 42 | im_size_max = np.max(im_shape[0:2]) 43 | 44 | processed_ims = [] 45 | im_scale_factors = [] 46 | 47 | for target_size in cfg.TEST.SCALES: 48 | im_scale = float(target_size) / float(im_size_min) 49 | # Prevent the biggest axis from being more than MAX_SIZE 50 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 51 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 52 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 53 | interpolation=cv2.INTER_LINEAR) 54 | im_scale_factors.append(im_scale) 55 | processed_ims.append(im) 56 | 57 | # Create a blob to hold the input images 58 | blob = im_list_to_blob(processed_ims) 59 | 60 | return blob, np.array(im_scale_factors) 61 | 62 | def _get_blobs(im): 63 | """Convert an image and RoIs within that image into network inputs.""" 64 | blobs = {} 65 | blobs['data'], im_scale_factors = _get_image_blob(im) 66 | 67 | return blobs, im_scale_factors 68 | 69 | def _clip_boxes(boxes, im_shape): 70 | """Clip boxes to image boundaries.""" 71 | # x1 >= 0 72 | boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0) 73 | # y1 >= 0 74 | boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0) 75 | # x2 < im_shape[1] 76 | boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1) 77 | # y2 < im_shape[0] 78 | boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1) 79 | return boxes 80 | 81 | def _rescale_boxes(boxes, inds, scales): 82 | """Rescale boxes according to image rescaling.""" 83 | for i in range(boxes.shape[0]): 84 | boxes[i,:] = boxes[i,:] / scales[int(inds[i])] 85 | 86 | return boxes 87 | 88 | def im_detect(net, im): 89 | blobs, im_scales = _get_blobs(im) 90 | assert len(im_scales) == 1, "Only single-image batch implemented" 91 | 92 | im_blob = blobs['data'] 93 | blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) 94 | 95 | _, scores, bbox_pred, rois, fc7, net_conv = net.test_image(blobs['data'], blobs['im_info']) 96 | 97 | boxes = rois[:, 1:5] / im_scales[0] 98 | scores = np.reshape(scores, [scores.shape[0], -1]) 99 | bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) 100 | if cfg.TEST.BBOX_REG: 101 | # Apply bounding-box regression deltas 102 | box_deltas = bbox_pred 103 | pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() 104 | pred_boxes = _clip_boxes(pred_boxes, im.shape) 105 | else: 106 | # Simply repeat the boxes, once for each class 107 | pred_boxes = np.tile(boxes, (1, scores.shape[1])) 108 | 109 | return scores, pred_boxes#, fc7, net_conv 110 | 111 | def apply_nms(all_boxes, thresh): 112 | """Apply non-maximum suppression to all predicted boxes output by the 113 | test_net method. 114 | """ 115 | num_classes = len(all_boxes) 116 | num_images = len(all_boxes[0]) 117 | nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] 118 | for cls_ind in range(num_classes): 119 | for im_ind in range(num_images): 120 | dets = all_boxes[cls_ind][im_ind] 121 | if dets == []: 122 | continue 123 | 124 | x1 = dets[:, 0] 125 | y1 = dets[:, 1] 126 | x2 = dets[:, 2] 127 | y2 = dets[:, 3] 128 | scores = dets[:, 4] 129 | inds = np.where((x2 > x1) & (y2 > y1))[0] 130 | dets = dets[inds,:] 131 | if dets == []: 132 | continue 133 | 134 | keep = nms(torch.from_numpy(dets), thresh).numpy() 135 | if len(keep) == 0: 136 | continue 137 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 138 | return nms_boxes 139 | def draw_car_bb(im, bboxes, scores=[], thr=0.3, type='det'): 140 | bboxes = bboxes.astype(int) 141 | imgcv = np.copy(im) 142 | h, w, _ = imgcv.shape 143 | color = (255,0,0) 144 | if type == 'gt': 145 | scores = np.ones(len(bboxes)) 146 | color = (0,0,255) 147 | 148 | for i, box in enumerate(bboxes): 149 | if scores[i] < thr: 150 | continue 151 | 152 | thick = int((h + w) / 1000) #original: int((h + w) / 300) 153 | cv2.rectangle(imgcv, 154 | (box[0], box[1]), (box[2], box[3]), 155 | color, thick) 156 | mess = '%s: %.3f' % ('Car', scores[i]) 157 | if type == 'gt': 158 | mess = '' 159 | cv2.putText(imgcv, mess, (box[0], box[1] - 12), 160 | 0, 1e-3 * h / 2., color, 2) 161 | 162 | return imgcv 163 | 164 | 165 | def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.): 166 | vis = True 167 | 168 | np.random.seed(cfg.RNG_SEED) 169 | """Test a Fast R-CNN network on an image database.""" 170 | num_images = len(imdb.image_index) 171 | # all detections are collected into: 172 | # all_boxes[cls][image] = N x 5 array of detections in 173 | # (x1, y1, x2, y2, score) 174 | all_boxes = [[[] for _ in range(num_images)] 175 | for _ in range(imdb.num_classes)] 176 | 177 | output_dir = get_output_dir(imdb, weights_filename) 178 | 179 | if vis and 'cityscapes' in imdb.name: 180 | gt_roidb = [imdb._load_cityscapes_annotation(index) 181 | for index in imdb.image_index] 182 | elif vis and 'KITTI' in imdb.name: 183 | gt_roidb = [imdb._load_KITTI_annotation(index) 184 | for index in imdb.image_index] 185 | else: 186 | gt_roidb = None 187 | 188 | # timers 189 | _t = {'im_detect' : Timer(), 'misc' : Timer()} 190 | 191 | for i in range(num_images): 192 | im = cv2.imread(imdb.image_path_at(i)) 193 | 194 | _t['im_detect'].tic() 195 | scores, boxes = im_detect(net, im) 196 | _t['im_detect'].toc() 197 | 198 | _t['misc'].tic() 199 | 200 | # skip j = 0, because it's the background class 201 | for j in range(1, imdb.num_classes): 202 | inds = np.where(scores[:, j] > thresh)[0] 203 | cls_scores = scores[inds, j] 204 | cls_boxes = boxes[inds, j*4:(j+1)*4] 205 | cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ 206 | .astype(np.float32, copy=False) 207 | keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else [] 208 | cls_dets = cls_dets[keep, :] 209 | all_boxes[j][i] = cls_dets 210 | 211 | # Limit to max_per_image detections *over all classes* 212 | if max_per_image > 0: 213 | image_scores = np.hstack([all_boxes[j][i][:, -1] 214 | for j in range(1, imdb.num_classes)]) 215 | if len(image_scores) > max_per_image: 216 | image_thresh = np.sort(image_scores)[-max_per_image] 217 | for j in range(1, imdb.num_classes): 218 | keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] 219 | all_boxes[j][i] = all_boxes[j][i][keep, :] 220 | _t['misc'].toc() 221 | 222 | print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ 223 | .format(i + 1, num_images, _t['im_detect'].average_time(), 224 | _t['misc'].average_time())) 225 | 226 | if vis and gt_roidb: 227 | #draw ground truth boxes 228 | im2show = draw_car_bb(im, gt_roidb[i]['boxes'], type='gt') 229 | 230 | #draw detected boxes 231 | im2show = draw_car_bb(im2show, np.squeeze(all_boxes[1][i][:, :-1]), np.squeeze(all_boxes[1][i][:,-1])) #draw class 1: car 232 | cv2.imwrite('/home/disk1/DA/pytorch-faster-rcnn/vis/inDomain/'+imdb.image_index[i]+'.png', im2show) 233 | #cv2.imshow('test', im2show) 234 | #cv2.waitKey(0) 235 | 236 | 237 | 238 | det_file = os.path.join(output_dir, 'detections.pkl') 239 | with open(det_file, 'wb') as f: 240 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) 241 | 242 | print('Evaluating detections') 243 | imdb.evaluate_detections(all_boxes, output_dir) 244 | 245 | -------------------------------------------------------------------------------- /lib/nets/.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from model.config import cfg 15 | 16 | class FCDiscriminator_img(nn.Module): 17 | 18 | def __init__(self, num_classes, ndf = 64): 19 | super(FCDiscriminator_img, self).__init__() 20 | 21 | # self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1) 22 | # self.conv2 = nn.Conv2d(ndf, ndf*2, kernel_size=4, stride=2, padding=1) 23 | # self.conv3 = nn.Conv2d(ndf*2, ndf*4, kernel_size=4, stride=2, padding=1) 24 | # self.conv4 = nn.Conv2d(ndf*4, ndf*8, kernel_size=4, stride=2, padding=1) 25 | # self.classifier = nn.Conv2d(ndf*8, 1, kernel_size=4, stride=2, padding=1) 26 | 27 | self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=3, padding=1) 28 | self.conv2 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1) 29 | self.conv3 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1) 30 | # self.classifier = nn.Conv2d(ndf, 1, kernel_size=3, padding=1) 31 | 32 | self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) 33 | #self.up_sample = nn.Upsample(scale_factor=32, mode='bilinear') 34 | #self.sigmoid = nn.Sigmoid() 35 | 36 | 37 | def forward(self, x): 38 | x = self.conv1(x) 39 | x = self.leaky_relu(x) 40 | # x = self.conv2(x) 41 | # x = self.leaky_relu(x) 42 | # x = self.conv3(x) 43 | # x = self.leaky_relu(x) 44 | # x = self.conv4(x) 45 | # x = self.leaky_relu(x) 46 | x = self.classifier(x) 47 | #x = self.up_sample(x) 48 | #x = self.sigmoid(x) 49 | 50 | return x -------------------------------------------------------------------------------- /lib/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nets/__init__.py -------------------------------------------------------------------------------- /lib/nets/discriminator_img.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from model.config import cfg 15 | 16 | class FCDiscriminator_img(nn.Module): 17 | 18 | def __init__(self, num_classes, ndf = 64): 19 | super(FCDiscriminator_img, self).__init__() 20 | 21 | self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=3, padding=1) 22 | self.conv2 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1) 23 | self.conv3 = nn.Conv2d(ndf, ndf, kernel_size=3, padding=1) 24 | self.classifier = nn.Conv2d(ndf, 1, kernel_size=3, padding=1) 25 | 26 | self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) 27 | 28 | 29 | def forward(self, x): 30 | x = self.conv1(x) 31 | x = self.leaky_relu(x) 32 | x = self.conv2(x) 33 | x = self.leaky_relu(x) 34 | x = self.conv3(x) 35 | x = self.leaky_relu(x) 36 | x = self.classifier(x) 37 | 38 | return x -------------------------------------------------------------------------------- /lib/nets/discriminator_inst.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | class FCDiscriminator_inst(nn.Module): 14 | 15 | def __init__(self, in_channel, ndf = 4096): 16 | super(FCDiscriminator_inst, self).__init__() 17 | 18 | self.fc1 = nn.Linear(in_channel, ndf) 19 | self.fc2 = nn.Linear(ndf, ndf) 20 | self.fc3 = nn.Linear(ndf, ndf) 21 | self.classifier = nn.Linear(ndf, 1) 22 | 23 | self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) 24 | self.dropout = nn.Dropout() 25 | 26 | 27 | def forward(self, x): 28 | x = x.view(x.size()[0], -1) 29 | x = self.fc1(x) 30 | x = self.leaky_relu(x) 31 | # x = self.dropout(x) 32 | x = self.fc2(x) 33 | x = self.leaky_relu(x) 34 | # x = self.dropout(x) 35 | x = self.fc3(x) 36 | x = self.leaky_relu(x) 37 | # x = self.dropout(x) 38 | x = self.classifier(x) 39 | 40 | return x -------------------------------------------------------------------------------- /lib/nets/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from nets.network import Network 11 | from model.config import cfg 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | import math 18 | import torchvision.models as models 19 | 20 | class vgg16(Network): 21 | def __init__(self): 22 | Network.__init__(self) 23 | self._feat_stride = [16, ] 24 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 25 | self._net_conv_channels = 512 26 | self._fc7_channels = 4096 27 | 28 | def _init_head_tail(self): 29 | self.vgg = models.vgg16() 30 | # Remove fc8 31 | self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1]) 32 | 33 | # Fix the layers before conv3: 34 | #for layer in range(10): 35 | # for p in self.vgg.features[layer].parameters(): p.requires_grad = False 36 | 37 | # self.vgg.features._modules['28'] = nn.Conv2d(512, 1024, [3, 3], padding=1) #for feature_separate 38 | 39 | # not using the last maxpool layer 40 | self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1]) 41 | 42 | ## 43 | # self.vgg2 = models.vgg16() 44 | # self._layers['head_2'] = nn.Sequential(*list(self.vgg2.features._modules.values())[:-1]) 45 | 46 | def _image_to_head(self): 47 | net_conv = self._layers['head'](self._image) 48 | self._act_summaries['conv'] = net_conv 49 | 50 | return net_conv 51 | 52 | # def _image_to_head_branch(self): 53 | # net_conv2 = self._layers['head_2'](self._image) 54 | 55 | # return net_conv2 56 | 57 | def _head_to_tail(self, pool5): 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.vgg.classifier(pool5_flat) 60 | 61 | return fc7 62 | 63 | def load_pretrained_cnn(self, state_dict): 64 | #load from previous network weight 65 | netDict = self.state_dict() 66 | stateDict = {k: v for k, v in state_dict.items() if k in netDict} 67 | 68 | #print('load pretrained:', stateDict.keys()) 69 | netDict.update(stateDict) 70 | nn.Module.load_state_dict(self, netDict) 71 | self.vgg.load_state_dict({k.replace('vgg.', ''):v for k,v in state_dict.items() if k.replace('vgg.', '') in self.vgg.state_dict()}) #loading pretrained vgg.pth 72 | 73 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinhkhsu/DA_detection/6859cf3f195b3831c1899625122cc0487f60d05f/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from model.config import cfg 17 | from roi_data_layer.minibatch import get_minibatch 18 | import numpy as np 19 | import time 20 | 21 | class RoIDataLayer(object): 22 | """Fast R-CNN data layer used for training.""" 23 | 24 | def __init__(self, roidb, num_classes, random=False): 25 | """Set the roidb to be used by this layer during training.""" 26 | self._roidb = roidb 27 | self._num_classes = num_classes 28 | # Also set a random flag 29 | self._random = random 30 | self._shuffle_roidb_inds() 31 | 32 | def _shuffle_roidb_inds(self): 33 | """Randomly permute the training roidb.""" 34 | # If the random flag is set, 35 | # then the database is shuffled according to system time 36 | # Useful for the validation set 37 | if self._random: 38 | st0 = np.random.get_state() 39 | millis = int(round(time.time() * 1000)) % 4294967295 40 | #np.random.seed(millis) 41 | 42 | if cfg.TRAIN.ASPECT_GROUPING: 43 | widths = np.array([r['width'] for r in self._roidb]) 44 | heights = np.array([r['height'] for r in self._roidb]) 45 | horz = (widths >= heights) 46 | vert = np.logical_not(horz) 47 | horz_inds = np.where(horz)[0] 48 | vert_inds = np.where(vert)[0] 49 | inds = np.hstack(( 50 | np.random.permutation(horz_inds), 51 | np.random.permutation(vert_inds))) 52 | inds = np.reshape(inds, (-1, 2)) 53 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 54 | inds = np.reshape(inds[row_perm, :], (-1,)) 55 | self._perm = inds 56 | else: 57 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 58 | ##no shuffle 59 | self._perm = np.arange(len(self._roidb)) 60 | # Restore the random state 61 | #if self._random: 62 | #np.random.set_state(st0) 63 | 64 | self._cur = 0 65 | 66 | def _get_next_minibatch_inds(self): 67 | """Return the roidb indices for the next minibatch.""" 68 | 69 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 70 | self._shuffle_roidb_inds() 71 | 72 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 73 | self._cur += cfg.TRAIN.IMS_PER_BATCH 74 | 75 | return db_inds 76 | 77 | def _get_next_minibatch(self): 78 | """Return the blobs to be used for the next minibatch. 79 | 80 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 81 | separate process and made available through self._blob_queue. 82 | """ 83 | db_inds = self._get_next_minibatch_inds() 84 | minibatch_db = [self._roidb[i] for i in db_inds] 85 | return get_minibatch(minibatch_db, self._num_classes) 86 | 87 | def forward(self): 88 | """Get blobs and copy them into this layer's top blob vector.""" 89 | blobs = self._get_next_minibatch() 90 | return blobs 91 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | import cv2 16 | from model.config import cfg 17 | from utils.blob import prep_im_for_blob, im_list_to_blob 18 | 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | 23 | # Sample random scales to use for each image in this batch 24 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 25 | size=num_images) 26 | 27 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 28 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 29 | format(num_images, cfg.TRAIN.BATCH_SIZE) 30 | 31 | # Get the input image blob, formatted for caffe 32 | im_blob, im_scales, im_path, orig_imshape = _get_image_blob(roidb, random_scale_inds) 33 | 34 | blobs = {'data': im_blob} 35 | blobs['data_path'] = im_path 36 | 37 | assert len(im_scales) == 1, "Single batch only" 38 | assert len(roidb) == 1, "Single batch only" 39 | 40 | # gt boxes: (x1, y1, x2, y2, cls) 41 | if cfg.TRAIN.USE_ALL_GT: 42 | # Include all ground truth boxes 43 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 44 | else: 45 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 46 | gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 47 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 48 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 49 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 50 | blobs['gt_boxes'] = gt_boxes 51 | blobs['im_info'] = np.array( 52 | [im_blob.shape[1], im_blob.shape[2], im_scales[0], orig_imshape[0], orig_imshape[1], orig_imshape[2]], 53 | dtype=np.float32) 54 | 55 | return blobs 56 | 57 | def _get_image_blob(roidb, scale_inds): 58 | """Builds an input blob from the images in the roidb at the specified 59 | scales. 60 | """ 61 | num_images = len(roidb) 62 | processed_ims = [] 63 | im_scales = [] 64 | im_path = [] 65 | for i in range(num_images): 66 | im = cv2.imread(roidb[i]['image']) 67 | orig_imshape = im.shape 68 | im_path.append(roidb[i]['image']) 69 | if roidb[i]['flipped']: 70 | im = im[:, ::-1, :] 71 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 72 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 73 | cfg.TRAIN.MAX_SIZE) 74 | im_scales.append(im_scale) 75 | processed_ims.append(im) 76 | 77 | # Create a blob to hold the input images 78 | blob = im_list_to_blob(processed_ims) 79 | 80 | return blob, im_scales, im_path, orig_imshape 81 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | from model.config import cfg 15 | import PIL 16 | from tqdm import tqdm 17 | 18 | def prepare_roidb(imdb): 19 | """Enrich the imdb's roidb by adding some derived quantities that 20 | are useful for training. This function precomputes the maximum 21 | overlap, taken over ground-truth boxes, between each ROI and 22 | each ground-truth box. The class with maximum overlap is also 23 | recorded. 24 | """ 25 | roidb = imdb.roidb 26 | if not (imdb.name.startswith('coco')): 27 | if 'bdd' in imdb.name: 28 | sizes = [(1280,720) for i in range(imdb.num_images)] 29 | else: 30 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 31 | for i in range(imdb.num_images)] 32 | 33 | # for i in range(len(imdb.image_index)): 34 | for i in tqdm(range(len(imdb.image_index))): 35 | roidb[i]['image'] = imdb.image_path_at(i) 36 | if not (imdb.name.startswith('coco')): 37 | roidb[i]['width'] = sizes[i][0] 38 | roidb[i]['height'] = sizes[i][1] 39 | # need gt_overlaps as a dense array for argmax 40 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 41 | 42 | # max overlap with gt over classes (columns) 43 | max_overlaps = gt_overlaps.max(axis=1) 44 | # gt class that had the max overlap 45 | max_classes = gt_overlaps.argmax(axis=1) 46 | roidb[i]['max_classes'] = max_classes 47 | roidb[i]['max_overlaps'] = max_overlaps 48 | # sanity checks 49 | # max overlap of 0 => class should be zero (background) 50 | zero_inds = np.where(max_overlaps == 0)[0] 51 | assert all(max_classes[zero_inds] == 0) 52 | # max overlap > 0 => class should not be zero (must be a fg class) 53 | nonzero_inds = np.where(max_overlaps > 0)[0] 54 | assert all(max_classes[nonzero_inds] != 0) 55 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.h 4 | *.hpp 5 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/utils/bbox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def bbox_overlaps(boxes, query_boxes): 5 | """ 6 | Parameters 7 | ---------- 8 | boxes: (N, 4) ndarray or tensor or variable 9 | query_boxes: (K, 4) ndarray or tensor or variable 10 | Returns 11 | ------- 12 | overlaps: (N, K) overlap between boxes and query_boxes 13 | """ 14 | if isinstance(boxes, np.ndarray): 15 | boxes = torch.from_numpy(boxes) 16 | query_boxes = torch.from_numpy(query_boxes) 17 | out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return 18 | else: 19 | out_fn = lambda x: x 20 | 21 | box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \ 22 | (boxes[:, 3] - boxes[:, 1] + 1) 23 | query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \ 24 | (query_boxes[:, 3] - query_boxes[:, 1] + 1) 25 | 26 | iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0) 27 | ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0) 28 | ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih 29 | overlaps = iw * ih / ua 30 | return out_fn(overlaps) 31 | -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """Convert a list of images into a network input. 19 | 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in range(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | 30 | return blob 31 | 32 | 33 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 34 | """Mean subtract and scale an image for use in a blob.""" 35 | im = im.astype(np.float32, copy=False) 36 | im -= pixel_means 37 | 38 | im_shape = im.shape 39 | im_size_min = np.min(im_shape[0:2]) 40 | im_size_max = np.max(im_shape[0:2]) 41 | im_scale = float(target_size) / float(im_size_min) 42 | # Prevent the biggest axis from being more than MAX_SIZE 43 | if np.round(im_scale * im_size_max) > max_size: 44 | im_scale = float(max_size) / float(im_size_max) 45 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 46 | interpolation=cv2.INTER_LINEAR) 47 | 48 | # im = cv2.resize(im, (1200, 480), interpolation=cv2.INTER_LINEAR) 49 | 50 | return im, im_scale 51 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | import torch 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self._total_time = {} 15 | self._calls = {} 16 | self._start_time = {} 17 | self._diff = {} 18 | self._average_time = {} 19 | 20 | def tic(self, name='default'): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | torch.cuda.synchronize() 24 | self._start_time[name] = time.time() 25 | 26 | def toc(self, name='default', average=True): 27 | torch.cuda.synchronize() 28 | self._diff[name] = time.time() - self._start_time[name] 29 | self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name] 30 | self._calls[name] = self._calls.get(name, 0 ) + 1 31 | self._average_time[name] = self._total_time[name] / self._calls[name] 32 | if average: 33 | return self._average_time[name] 34 | else: 35 | return self._diff[name] 36 | 37 | def average_time(self, name='default'): 38 | return self._average_time[name] 39 | 40 | def total_time(self, name='default'): 41 | return self._total_time[name] 42 | 43 | timer = Timer() 44 | -------------------------------------------------------------------------------- /lib/utils/visualization.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from six.moves import range 12 | import PIL.Image as Image 13 | import PIL.ImageColor as ImageColor 14 | import PIL.ImageDraw as ImageDraw 15 | import PIL.ImageFont as ImageFont 16 | 17 | STANDARD_COLORS = [ 18 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 19 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 20 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 21 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 22 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 23 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 24 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 25 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 26 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 27 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 28 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 29 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 30 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 31 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 32 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 33 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 34 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 35 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 36 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 37 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 38 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 39 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 40 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 41 | ] 42 | 43 | NUM_COLORS = len(STANDARD_COLORS) 44 | 45 | try: 46 | FONT = ImageFont.truetype('arial.ttf', 24) 47 | except IOError: 48 | FONT = ImageFont.load_default() 49 | 50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4): 51 | draw = ImageDraw.Draw(image) 52 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 53 | draw.line([(left, top), (left, bottom), (right, bottom), 54 | (right, top), (left, top)], width=thickness, fill=color) 55 | text_bottom = bottom 56 | # Reverse list and print from bottom to top. 57 | text_width, text_height = font.getsize(display_str) 58 | margin = np.ceil(0.05 * text_height) 59 | draw.rectangle( 60 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 61 | text_bottom)], 62 | fill=color) 63 | draw.text( 64 | (left + margin, text_bottom - text_height - margin), 65 | display_str, 66 | fill='black', 67 | font=font) 68 | 69 | return image 70 | 71 | def draw_bounding_boxes(image, gt_boxes, im_info): 72 | num_boxes = gt_boxes.shape[0] 73 | gt_boxes_new = gt_boxes.copy() 74 | gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2]) 75 | disp_image = Image.fromarray(np.uint8(image[0])) 76 | 77 | for i in range(num_boxes): 78 | this_class = int(gt_boxes_new[i, 4]) 79 | disp_image = _draw_single_box(disp_image, 80 | gt_boxes_new[i, 0], 81 | gt_boxes_new[i, 1], 82 | gt_boxes_new[i, 2], 83 | gt_boxes_new[i, 3], 84 | 'N%02d-C%02d' % (i, this_class), 85 | FONT, 86 | color=STANDARD_COLORS[this_class % NUM_COLORS]) 87 | 88 | image[0, :] = np.array(disp_image) 89 | return image -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torchvision==0.2.1 2 | easydict==1.6 3 | opencv-python==3.4.1.15 4 | scipy==1.1.0 5 | pillow==5.1.0 6 | tensorboardX 7 | pyyaml==3.12 8 | tqdm==4.28.1 9 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '..', 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /tools/convert_from_tensorflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import pywrap_tensorflow 3 | from collections import OrderedDict 4 | import re 5 | import torch 6 | 7 | import argparse 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model') 9 | parser.add_argument('--tensorflow_model', 10 | help='the path of tensorflow_model', 11 | default=None, type=str) 12 | 13 | args = parser.parse_args() 14 | 15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model) 16 | var_to_shape_map = reader.get_variable_to_shape_map() 17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()} 18 | 19 | del var_dict['Variable'] 20 | 21 | for k in list(var_dict.keys()): 22 | if 'Momentum' in k: 23 | del var_dict[k] 24 | 25 | for k in list(var_dict.keys()): 26 | if k.find('/') >= 0: 27 | var_dict['resnet' + k[k.find('/'):]] = var_dict[k] 28 | del var_dict[k] 29 | 30 | dummy_replace = OrderedDict([ 31 | ('moving_mean', 'running_mean'),\ 32 | ('moving_variance', 'running_var'),\ 33 | ('weights', 'weight'),\ 34 | ('biases', 'bias'),\ 35 | ('conv1/BatchNorm', 'bn1'),\ 36 | ('conv2/BatchNorm', 'bn2'),\ 37 | ('conv3/BatchNorm', 'bn3'),\ 38 | ('bottleneck_v1/', ''),\ 39 | ('block', 'layer'),\ 40 | ('resnet/rpn_conv/3x3', 'rpn_net'),\ 41 | ('resnet/rpn_cls_score', 'rpn_cls_score_net'),\ 42 | ('resnet/cls_score', 'cls_score_net'),\ 43 | ('resnet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\ 44 | ('resnet/bbox_pred', 'bbox_pred_net'),\ 45 | ('shortcut/weight', 'downsample.0.weight'),\ 46 | ('shortcut/BatchNorm', 'downsample.1'),\ 47 | ('gamma', 'weight'),\ 48 | ('beta', 'bias'),\ 49 | ('/', '.')]) 50 | 51 | for a, b in dummy_replace.items(): 52 | for k in list(var_dict.keys()): 53 | if a in k: 54 | var_dict[k.replace(a,b)] = var_dict[k] 55 | del var_dict[k] 56 | 57 | 58 | for k in list(var_dict.keys()): 59 | if 'unit_' in k: 60 | m = re.search('unit_(\d+)', k) 61 | var_dict[k.replace(m.group(0), str(int(m.group(1)) - 1))] = var_dict[k] 62 | del var_dict[k] 63 | 64 | for k in list(var_dict.keys()): 65 | if var_dict[k].ndim == 4: 66 | var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C') 67 | if var_dict[k].ndim == 2: 68 | var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C') 69 | # assert x[k].shape == var_dict[k].shape, k 70 | 71 | for k in list(var_dict.keys()): 72 | var_dict[k] = torch.from_numpy(var_dict[k]) 73 | 74 | 75 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth') 76 | -------------------------------------------------------------------------------- /tools/convert_from_tensorflow_mobile.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import pywrap_tensorflow 3 | from collections import OrderedDict 4 | import re 5 | import torch 6 | 7 | import argparse 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model') 9 | parser.add_argument('--tensorflow_model', 10 | help='the path of tensorflow_model', 11 | default=None, type=str) 12 | 13 | args = parser.parse_args() 14 | 15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model) 16 | var_to_shape_map = reader.get_variable_to_shape_map() 17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()} 18 | 19 | del var_dict['Variable'] 20 | 21 | for k in list(var_dict.keys()): 22 | if 'Momentum' in k: 23 | del var_dict[k] 24 | 25 | for k in list(var_dict.keys()): 26 | if k.find('/') >= 0: 27 | var_dict['mobilenet' + k[k.find('/'):]] = var_dict[k] 28 | del var_dict[k] 29 | 30 | dummy_replace = OrderedDict([ 31 | ('moving_mean', 'running_mean'),\ 32 | ('moving_variance', 'running_var'),\ 33 | ('weights', 'weight'),\ 34 | ('biases', 'bias'),\ 35 | ('/BatchNorm', '.1'),\ 36 | ('_pointwise/', '.pointwise.0.'),\ 37 | ('_depthwise/depthwise_', '.depthwise.0.'),\ 38 | ('_pointwise.1', '.pointwise.1'),\ 39 | ('_depthwise.1', '.depthwise.1'),\ 40 | ('Conv2d_0/', 'Conv2d_0.0.'),\ 41 | ('mobilenet/rpn_conv/3x3', 'rpn_net'),\ 42 | ('mobilenet/rpn_cls_score', 'rpn_cls_score_net'),\ 43 | ('mobilenet/cls_score', 'cls_score_net'),\ 44 | ('mobilenet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\ 45 | ('mobilenet/bbox_pred', 'bbox_pred_net'),\ 46 | ('gamma', 'weight'),\ 47 | ('beta', 'bias'),\ 48 | ('/', '.')]) 49 | 50 | for a, b in dummy_replace.items(): 51 | for k in list(var_dict.keys()): 52 | if a in k: 53 | var_dict[k.replace(a,b)] = var_dict[k] 54 | del var_dict[k] 55 | 56 | # print set(var_dict.keys()) - set(x.keys()) 57 | # print set(x.keys()) - set(var_dict.keys()) 58 | 59 | for k in list(var_dict.keys()): 60 | if var_dict[k].ndim == 4: 61 | if 'depthwise' in k: 62 | var_dict[k] = var_dict[k].transpose((2, 3, 0, 1)).copy(order='C') 63 | else: 64 | var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C') 65 | if var_dict[k].ndim == 2: 66 | var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C') 67 | # assert x[k].shape == var_dict[k].shape, k 68 | 69 | for k in list(var_dict.keys()): 70 | var_dict[k] = torch.from_numpy(var_dict[k]) 71 | 72 | 73 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth') 74 | -------------------------------------------------------------------------------- /tools/convert_from_tensorflow_vgg.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import pywrap_tensorflow 3 | from collections import OrderedDict 4 | import re 5 | import torch 6 | 7 | import argparse 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model') 9 | parser.add_argument('--tensorflow_model', 10 | help='the path of tensorflow_model', 11 | default=None, type=str) 12 | 13 | args = parser.parse_args() 14 | 15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model) 16 | var_to_shape_map = reader.get_variable_to_shape_map() 17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()} 18 | 19 | del var_dict['Variable'] 20 | 21 | for k in list(var_dict.keys()): 22 | if 'Momentum' in k: 23 | del var_dict[k] 24 | 25 | for k in list(var_dict.keys()): 26 | if k.find('/') >= 0: 27 | var_dict['vgg' + k[k.find('/'):]] = var_dict[k] 28 | del var_dict[k] 29 | 30 | dummy_replace = OrderedDict([ 31 | ('weights', 'weight'),\ 32 | ('biases', 'bias'),\ 33 | ('vgg/rpn_conv/3x3', 'rpn_net'),\ 34 | ('vgg/rpn_cls_score', 'rpn_cls_score_net'),\ 35 | ('vgg/cls_score', 'cls_score_net'),\ 36 | ('vgg/rpn_bbox_pred', 'rpn_bbox_pred_net'),\ 37 | ('vgg/bbox_pred', 'bbox_pred_net'),\ 38 | ('/', '.')]) 39 | 40 | for a, b in dummy_replace.items(): 41 | for k in list(var_dict.keys()): 42 | if a in k: 43 | var_dict[k.replace(a,b)] = var_dict[k] 44 | del var_dict[k] 45 | 46 | layer_map = OrderedDict([ 47 | ('conv1.conv1_1', 'features.0'),\ 48 | ('conv1.conv1_2', 'features.2'),\ 49 | ('conv2.conv2_1', 'features.5'),\ 50 | ('conv2.conv2_2', 'features.7'),\ 51 | ('conv3.conv3_1', 'features.10'),\ 52 | ('conv3.conv3_2', 'features.12'),\ 53 | ('conv3.conv3_3', 'features.14'),\ 54 | ('conv4.conv4_1', 'features.17'),\ 55 | ('conv4.conv4_2', 'features.19'),\ 56 | ('conv4.conv4_3', 'features.21'),\ 57 | ('conv5.conv5_1', 'features.24'),\ 58 | ('conv5.conv5_2', 'features.26'),\ 59 | ('conv5.conv5_3', 'features.28'),\ 60 | ('fc6', 'classifier.0'),\ 61 | ('fc7', 'classifier.3')]) 62 | 63 | for a, b in layer_map.items(): 64 | for k in list(var_dict.keys()): 65 | if a in k: 66 | var_dict[k.replace(a,b)] = var_dict[k] 67 | del var_dict[k] 68 | 69 | for k in list(var_dict.keys()): 70 | if 'classifier.0' in k: 71 | if var_dict[k].ndim == 2: # weight 72 | var_dict[k] = var_dict[k].reshape(7,7,512,4096).transpose((3, 2, 0, 1)).reshape(4096, -1).copy(order='C') 73 | else: 74 | if var_dict[k].ndim == 4: 75 | var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C') 76 | if var_dict[k].ndim == 2: 77 | var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C') 78 | # assert x[k].shape == var_dict[k].shape, k 79 | 80 | for k in list(var_dict.keys()): 81 | var_dict[k] = torch.from_numpy(var_dict[k]) 82 | 83 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth') 84 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Tensorflow Faster R-CNN 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Xinlei Chen, based on code from Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | """ 10 | Demo script showing detections in sample images. 11 | 12 | See README.md for installation instructions before running. 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | import _init_paths 19 | from model.config import cfg 20 | from model.test import im_detect 21 | from model.nms_wrapper import nms 22 | 23 | from utils.timer import Timer 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | import os, cv2 27 | import argparse 28 | 29 | from nets.vgg16 import vgg16 30 | from nets.resnet_v1 import resnetv1 31 | 32 | import torch 33 | 34 | CLASSES = ('__background__', 35 | 'aeroplane', 'bicycle', 'bird', 'boat', 36 | 'bottle', 'bus', 'car', 'cat', 'chair', 37 | 'cow', 'diningtable', 'dog', 'horse', 38 | 'motorbike', 'person', 'pottedplant', 39 | 'sheep', 'sofa', 'train', 'tvmonitor') 40 | 41 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)} 42 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)} 43 | 44 | def vis_detections(im, class_name, dets, thresh=0.5): 45 | """Draw detected bounding boxes.""" 46 | inds = np.where(dets[:, -1] >= thresh)[0] 47 | if len(inds) == 0: 48 | return 49 | 50 | im = im[:, :, (2, 1, 0)] 51 | fig, ax = plt.subplots(figsize=(12, 12)) 52 | ax.imshow(im, aspect='equal') 53 | for i in inds: 54 | bbox = dets[i, :4] 55 | score = dets[i, -1] 56 | 57 | ax.add_patch( 58 | plt.Rectangle((bbox[0], bbox[1]), 59 | bbox[2] - bbox[0], 60 | bbox[3] - bbox[1], fill=False, 61 | edgecolor='red', linewidth=3.5) 62 | ) 63 | ax.text(bbox[0], bbox[1] - 2, 64 | '{:s} {:.3f}'.format(class_name, score), 65 | bbox=dict(facecolor='blue', alpha=0.5), 66 | fontsize=14, color='white') 67 | 68 | ax.set_title(('{} detections with ' 69 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 70 | thresh), 71 | fontsize=14) 72 | plt.axis('off') 73 | plt.tight_layout() 74 | plt.draw() 75 | 76 | def demo(net, image_name): 77 | """Detect object classes in an image using pre-computed object proposals.""" 78 | 79 | # Load the demo image 80 | im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 81 | im = cv2.imread(im_file) 82 | 83 | # Detect all object classes and regress object bounds 84 | timer = Timer() 85 | timer.tic() 86 | scores, boxes = im_detect(net, im) 87 | timer.toc() 88 | print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0])) 89 | 90 | # Visualize detections for each class 91 | CONF_THRESH = 0.8 92 | NMS_THRESH = 0.3 93 | for cls_ind, cls in enumerate(CLASSES[1:]): 94 | cls_ind += 1 # because we skipped background 95 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 96 | cls_scores = scores[:, cls_ind] 97 | dets = np.hstack((cls_boxes, 98 | cls_scores[:, np.newaxis])).astype(np.float32) 99 | keep = nms(torch.from_numpy(dets), NMS_THRESH) 100 | dets = dets[keep.numpy(), :] 101 | vis_detections(im, cls, dets, thresh=CONF_THRESH) 102 | 103 | def parse_args(): 104 | """Parse input arguments.""" 105 | parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo') 106 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]', 107 | choices=NETS.keys(), default='res101') 108 | parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]', 109 | choices=DATASETS.keys(), default='pascal_voc_0712') 110 | args = parser.parse_args() 111 | 112 | return args 113 | 114 | if __name__ == '__main__': 115 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 116 | args = parse_args() 117 | 118 | # model path 119 | demonet = args.demo_net 120 | dataset = args.dataset 121 | saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default', 122 | NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000)) 123 | 124 | 125 | if not os.path.isfile(saved_model): 126 | raise IOError(('{:s} not found.\nDid you download the proper networks from ' 127 | 'our server and place them properly?').format(saved_model)) 128 | 129 | # load network 130 | if demonet == 'vgg16': 131 | net = vgg16() 132 | elif demonet == 'res101': 133 | net = resnetv1(num_layers=101) 134 | else: 135 | raise NotImplementedError 136 | net.create_architecture(21, 137 | tag='default', anchor_scales=[8, 16, 32]) 138 | 139 | net.load_state_dict(torch.load(saved_model)) 140 | 141 | net.eval() 142 | net.cuda() 143 | 144 | print('Loaded network {:s}'.format(saved_model)) 145 | 146 | im_names = ['000456.jpg', '000542.jpg', '001150.jpg', 147 | '001763.jpg', '004545.jpg'] 148 | for im_name in im_names: 149 | print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 150 | print('Demo for data/demo/{}'.format(im_name)) 151 | demo(net, im_name) 152 | 153 | plt.show() 154 | -------------------------------------------------------------------------------- /tools/demo_all_bboxes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Tensorflow Faster R-CNN 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Xinlei Chen, based on code from Ross Girshick 7 | # Edited by Matthew Seals 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Demo script showing detections in sample images. 12 | 13 | See README.md for installation instructions before running. 14 | """ 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import _init_paths 20 | from model.config import cfg 21 | from model.test import im_detect 22 | from model.nms_wrapper import nms 23 | 24 | from utils.timer import Timer 25 | import matplotlib.pyplot as plt 26 | import numpy as np 27 | import os 28 | import cv2 29 | import argparse 30 | from matplotlib import cm 31 | 32 | from nets.vgg16 import vgg16 33 | from nets.resnet_v1 import resnetv1 34 | 35 | import torch 36 | 37 | CLASSES = ('__background__', 38 | 'aeroplane', 'bicycle', 'bird', 'boat', 39 | 'bottle', 'bus', 'car', 'cat', 'chair', 40 | 'cow', 'diningtable', 'dog', 'horse', 41 | 'motorbike', 'person', 'pottedplant', 42 | 'sheep', 'sofa', 'train', 'tvmonitor') 43 | 44 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',), 'res101': ('res101_faster_rcnn_iter_%d.pth',)} 45 | DATASETS = {'pascal_voc': ('voc_2007_trainval',), 'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)} 46 | 47 | COLORS = [cm.tab10(i) for i in np.linspace(0., 1., 10)] 48 | 49 | 50 | def demo(net, image_name): 51 | """Detect object classes in an image using pre-computed object proposals.""" 52 | 53 | # Load the demo image 54 | im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 55 | im = cv2.imread(im_file) 56 | 57 | # Detect all object classes and regress object bounds 58 | timer = Timer() 59 | timer.tic() 60 | scores, boxes = im_detect(net, im) 61 | timer.toc() 62 | print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0])) 63 | 64 | # Visualize detections for each class 65 | thresh = 0.8 # CONF_THRESH 66 | NMS_THRESH = 0.3 67 | 68 | im = im[:, :, (2, 1, 0)] 69 | fig, ax = plt.subplots(figsize=(12, 12)) 70 | ax.imshow(im, aspect='equal') 71 | cntr = -1 72 | 73 | for cls_ind, cls in enumerate(CLASSES[1:]): 74 | cls_ind += 1 # because we skipped background 75 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 76 | cls_scores = scores[:, cls_ind] 77 | dets = np.hstack((cls_boxes, 78 | cls_scores[:, np.newaxis])).astype(np.float32) 79 | keep = nms(torch.from_numpy(dets), NMS_THRESH) 80 | dets = dets[keep.numpy(), :] 81 | inds = np.where(dets[:, -1] >= thresh)[0] 82 | if len(inds) == 0: 83 | continue 84 | else: 85 | cntr += 1 86 | 87 | for i in inds: 88 | bbox = dets[i, :4] 89 | score = dets[i, -1] 90 | 91 | ax.add_patch( 92 | plt.Rectangle((bbox[0], bbox[1]), 93 | bbox[2] - bbox[0], 94 | bbox[3] - bbox[1], fill=False, 95 | edgecolor=COLORS[cntr % len(COLORS)], linewidth=3.5) 96 | ) 97 | ax.text(bbox[0], bbox[1] - 2, 98 | '{:s} {:.3f}'.format(cls, score), 99 | bbox=dict(facecolor='blue', alpha=0.5), 100 | fontsize=14, color='white') 101 | 102 | ax.set_title('All detections with threshold >= {:.1f}'.format(thresh), fontsize=14) 103 | 104 | plt.axis('off') 105 | plt.tight_layout() 106 | plt.savefig('demo_' + image_name) 107 | print('Saved to `{}`'.format(os.path.join(os.getcwd(), 'demo_' + image_name))) 108 | 109 | 110 | def parse_args(): 111 | """Parse input arguments.""" 112 | parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo') 113 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]', 114 | choices=NETS.keys(), default='res101') 115 | parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]', 116 | choices=DATASETS.keys(), default='pascal_voc_0712') 117 | args = parser.parse_args() 118 | 119 | return args 120 | 121 | 122 | if __name__ == '__main__': 123 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 124 | args = parse_args() 125 | 126 | # model path 127 | demonet = args.demo_net 128 | dataset = args.dataset 129 | saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default', 130 | NETS[demonet][0] % (70000 if dataset == 'pascal_voc' else 110000)) 131 | 132 | if not os.path.isfile(saved_model): 133 | raise IOError(('{:s} not found.\nDid you download the proper networks from ' 134 | 'our server and place them properly?').format(saved_model)) 135 | 136 | # load network 137 | if demonet == 'vgg16': 138 | net = vgg16() 139 | elif demonet == 'res101': 140 | net = resnetv1(num_layers=101) 141 | else: 142 | raise NotImplementedError 143 | net.create_architecture(21, tag='default', anchor_scales=[8, 16, 32]) 144 | 145 | net.load_state_dict(torch.load(saved_model)) 146 | 147 | net.eval() 148 | net.cuda() 149 | 150 | print('Loaded network {:s}'.format(saved_model)) 151 | 152 | im_names = [i for i in os.listdir('data/demo/') # Pull in all jpgs 153 | if i.lower().endswith(".jpg")] 154 | 155 | for im_name in im_names: 156 | print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 157 | print('Demo for data/demo/{}'.format(im_name)) 158 | demo(net, im_name) 159 | 160 | plt.show() 161 | -------------------------------------------------------------------------------- /tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | # Reval = re-eval. Re-evaluate saved detections. 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import _init_paths 16 | from model.test import apply_nms 17 | from model.config import cfg 18 | from datasets.factory import get_imdb 19 | import pickle 20 | import os, sys, argparse 21 | import numpy as np 22 | 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Re-evaluate results') 29 | parser.add_argument('output_dir', nargs=1, help='results directory', 30 | type=str) 31 | parser.add_argument('--imdb', dest='imdb_name', 32 | help='dataset to re-evaluate', 33 | default='voc_2007_test', type=str) 34 | parser.add_argument('--matlab', dest='matlab_eval', 35 | help='use matlab for evaluation', 36 | action='store_true') 37 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 38 | action='store_true') 39 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 40 | action='store_true') 41 | 42 | if len(sys.argv) == 1: 43 | parser.print_help() 44 | sys.exit(1) 45 | 46 | args = parser.parse_args() 47 | return args 48 | 49 | 50 | def from_dets(imdb_name, output_dir, args): 51 | imdb = get_imdb(imdb_name) 52 | imdb.competition_mode(args.comp_mode) 53 | imdb.config['matlab_eval'] = args.matlab_eval 54 | with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: 55 | dets = pickle.load(f) 56 | 57 | if args.apply_nms: 58 | print('Applying NMS to all detections') 59 | nms_dets = apply_nms(dets, cfg.TEST.NMS) 60 | else: 61 | nms_dets = dets 62 | 63 | print('Evaluating detections') 64 | imdb.evaluate_detections(nms_dets, output_dir) 65 | 66 | 67 | if __name__ == '__main__': 68 | args = parse_args() 69 | 70 | output_dir = os.path.abspath(args.output_dir[0]) 71 | imdb_name = args.imdb_name 72 | from_dets(imdb_name, output_dir, args) 73 | os.system("mv ./pr/pr.png ./pr/%s.png" % args.output_dir[0][args.output_dir[0].rfind('/')+1:][18:]) 74 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.test import test_net 12 | from model.config import cfg, cfg_from_file, cfg_from_list 13 | from datasets.factory import get_imdb 14 | import datasets.imdb 15 | import argparse 16 | import pprint 17 | import time, os, sys 18 | 19 | from nets.vgg16 import vgg16 20 | from nets.resnet_v1 import resnetv1 21 | from nets.mobilenet_v1 import mobilenetv1 22 | 23 | import torch 24 | 25 | def parse_args(): 26 | """ 27 | Parse input arguments 28 | """ 29 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 30 | parser.add_argument('--cfg', dest='cfg_file', 31 | help='optional config file', default=None, type=str) 32 | parser.add_argument('--model', dest='model', 33 | help='model to test', 34 | default=None, type=str) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to test', 37 | default='voc_2007_test', type=str) 38 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 39 | action='store_true') 40 | parser.add_argument('--num_dets', dest='max_per_image', 41 | help='max number of detections per image', 42 | default=100, type=int) 43 | parser.add_argument('--tag', dest='tag', 44 | help='tag of the model', 45 | default='', type=str) 46 | parser.add_argument('--net', dest='net', 47 | help='vgg16, res50, res101, res152, mobile', 48 | default='res50', type=str) 49 | parser.add_argument('--set', dest='set_cfgs', 50 | help='set config keys', default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | if len(sys.argv) == 1: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | args = parser.parse_args() 58 | return args 59 | 60 | if __name__ == '__main__': 61 | args = parse_args() 62 | 63 | print('Called with args:') 64 | print(args) 65 | 66 | if args.cfg_file is not None: 67 | cfg_from_file(args.cfg_file) 68 | if args.set_cfgs is not None: 69 | cfg_from_list(args.set_cfgs) 70 | 71 | print('Using config:') 72 | pprint.pprint(cfg) 73 | 74 | # if has model, get the name from it 75 | # if does not, then just use the initialization weights 76 | if args.model: 77 | filename = os.path.splitext(os.path.basename(args.model))[0] 78 | else: 79 | filename = os.path.splitext(os.path.basename(args.weight))[0] 80 | 81 | tag = args.tag 82 | tag = tag if tag else 'default' 83 | filename = tag + '/' + filename 84 | 85 | imdb = get_imdb(args.imdb_name) 86 | imdb.competition_mode(args.comp_mode) 87 | 88 | # load network 89 | if args.net == 'vgg16': 90 | net = vgg16() 91 | elif args.net == 'res50': 92 | net = resnetv1(num_layers=50) 93 | elif args.net == 'res101': 94 | net = resnetv1(num_layers=101) 95 | elif args.net == 'res152': 96 | net = resnetv1(num_layers=152) 97 | elif args.net == 'mobile': 98 | net = mobilenetv1() 99 | elif args.net == 'FPNres50': 100 | net = resnetv1(num_layers=50) 101 | else: 102 | raise NotImplementedError 103 | 104 | # load model 105 | net.create_architecture(imdb.num_classes, tag='default', 106 | anchor_scales=cfg.ANCHOR_SCALES, 107 | anchor_ratios=cfg.ANCHOR_RATIOS) 108 | 109 | net.eval() 110 | net.cuda() 111 | 112 | if args.model: 113 | print(('Loading model check point from {:s}').format(args.model)) 114 | net.load_state_dict(torch.load(args.model)) 115 | print('Loaded.') 116 | else: 117 | print(('Loading initial weights from {:s}').format(args.weight)) 118 | print('Loaded.') 119 | 120 | test_net(net, imdb, filename, max_per_image=args.max_per_image) 121 | # os.system("mv ./pr/pr.png ./pr/%s.png" % args.model[args.model.rfind('/')+1:][18:-4]) 122 | -------------------------------------------------------------------------------- /tools/trainval_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.train_val import get_training_roidb, train_net 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir 13 | from datasets.factory import get_imdb 14 | import datasets.imdb 15 | import argparse 16 | import pprint 17 | import numpy as np 18 | import sys 19 | 20 | from nets.vgg16 import vgg16 21 | from nets.resnet_v1 import resnetv1 22 | from nets.mobilenet_v1 import mobilenetv1 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 29 | parser.add_argument('--cfg', dest='cfg_file', 30 | help='optional config file', 31 | default=None, type=str) 32 | parser.add_argument('--weight', dest='weight', 33 | help='initialize with pretrained model weights', 34 | type=str) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to train on', 37 | default='voc_2007_trainval', type=str) 38 | parser.add_argument('--imdbval', dest='imdbval_name', 39 | help='dataset to validate on', 40 | default='voc_2007_test', type=str) 41 | parser.add_argument('--iters', dest='max_iters', 42 | help='number of iterations to train', 43 | default=70000, type=int) 44 | parser.add_argument('--tag', dest='tag', 45 | help='tag of the model', 46 | default=None, type=str) 47 | parser.add_argument('--net', dest='net', 48 | help='vgg16, res50, res101, res152, mobile', 49 | default='res50', type=str) 50 | parser.add_argument('--set', dest='set_cfgs', 51 | help='set config keys', default=None, 52 | nargs=argparse.REMAINDER) 53 | 54 | if len(sys.argv) == 1: 55 | parser.print_help() 56 | sys.exit(1) 57 | 58 | args = parser.parse_args() 59 | return args 60 | 61 | 62 | def combined_roidb(imdb_names): 63 | """ 64 | Combine multiple roidbs 65 | """ 66 | 67 | def get_roidb(imdb_name): 68 | imdb = get_imdb(imdb_name) 69 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 70 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 71 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 72 | roidb = get_training_roidb(imdb) 73 | return roidb 74 | 75 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 76 | roidb = roidbs[0] 77 | if len(roidbs) > 1: 78 | for r in roidbs[1:]: 79 | roidb.extend(r) 80 | tmp = get_imdb(imdb_names.split('+')[1]) 81 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 82 | else: 83 | imdb = get_imdb(imdb_names) 84 | return imdb, roidb 85 | 86 | 87 | if __name__ == '__main__': 88 | args = parse_args() 89 | 90 | print('Called with args:') 91 | print(args) 92 | 93 | if args.cfg_file is not None: 94 | cfg_from_file(args.cfg_file) 95 | if args.set_cfgs is not None: 96 | cfg_from_list(args.set_cfgs) 97 | 98 | print('Using config:') 99 | pprint.pprint(cfg) 100 | 101 | np.random.seed(cfg.RNG_SEED) 102 | 103 | # train set 104 | imdb, roidb = combined_roidb(args.imdb_name) 105 | print('{:d} roidb entries'.format(len(roidb))) 106 | 107 | # output directory where the models are saved 108 | output_dir = get_output_dir(imdb, args.tag) 109 | print('Output will be saved to `{:s}`'.format(output_dir)) 110 | 111 | # tensorboard directory where the summaries are saved during training 112 | tb_dir = get_output_tb_dir(imdb, args.tag) 113 | print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir)) 114 | 115 | # also add the validation set, but with no flipping images 116 | orgflip = cfg.TRAIN.USE_FLIPPED 117 | cfg.TRAIN.USE_FLIPPED = False 118 | _, valroidb = combined_roidb(args.imdbval_name) 119 | print('{:d} validation roidb entries'.format(len(valroidb))) 120 | cfg.TRAIN.USE_FLIPPED = orgflip 121 | 122 | # load network 123 | if args.net == 'vgg16': 124 | net = vgg16() 125 | elif args.net == 'res50': 126 | net = resnetv1(num_layers=50) 127 | elif args.net == 'res101': 128 | net = resnetv1(num_layers=101) 129 | elif args.net == 'res152': 130 | net = resnetv1(num_layers=152) 131 | elif args.net == 'mobile': 132 | net = mobilenetv1() 133 | else: 134 | raise NotImplementedError 135 | 136 | train_net(net, imdb, roidb, valroidb, output_dir, tb_dir, 137 | pretrained_model=args.weight, 138 | max_iters=args.max_iters) 139 | -------------------------------------------------------------------------------- /tools/trainval_net_adapt.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.train_val_adapt import get_training_roidb, train_net 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir 13 | from datasets.factory import get_imdb 14 | import datasets.imdb 15 | import argparse 16 | import pprint 17 | import numpy as np 18 | import sys 19 | 20 | from nets.vgg16 import vgg16 21 | from nets.resnet_v1 import resnetv1 22 | from nets.mobilenet_v1 import mobilenetv1 23 | 24 | import random 25 | #import os 26 | #os.environ['CUDA_LAUNCH_BLOCKING'] = '1' 27 | 28 | def parse_args(): 29 | """ 30 | Parse input arguments 31 | """ 32 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 33 | parser.add_argument('--cfg', dest='cfg_file', 34 | help='optional config file', 35 | default=None, type=str) 36 | parser.add_argument('--weight', dest='weight', 37 | help='initialize with pretrained model weights', 38 | type=str) 39 | parser.add_argument('--imdb', dest='imdb_name', 40 | help='dataset to train on', 41 | default='voc_2007_trainval', type=str) 42 | parser.add_argument('--imdb_T', dest='imdb_T_name', 43 | help='dataset(target) to train on', 44 | default='voc_2007_trainval', type=str) 45 | parser.add_argument('--imdbval', dest='imdbval_name', 46 | help='dataset to validate on', 47 | default='voc_2007_test', type=str) 48 | parser.add_argument('--iters', dest='max_iters', 49 | help='number of iterations to train', 50 | default=70000, type=int) 51 | parser.add_argument('--tag', dest='tag', 52 | help='tag of the model', 53 | default=None, type=str) 54 | parser.add_argument('--net', dest='net', 55 | help='vgg16, res50, res101, res152, mobile', 56 | default='res50', type=str) 57 | parser.add_argument('--set', dest='set_cfgs', 58 | help='set config keys', default=None, 59 | nargs=argparse.REMAINDER) 60 | 61 | if len(sys.argv) == 1: 62 | parser.print_help() 63 | sys.exit(1) 64 | 65 | args = parser.parse_args() 66 | return args 67 | 68 | 69 | def combined_roidb(imdb_names): 70 | """ 71 | Combine multiple roidbs 72 | """ 73 | 74 | def get_roidb(imdb_name): 75 | imdb = get_imdb(imdb_name) 76 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 77 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 78 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 79 | roidb = get_training_roidb(imdb) 80 | return roidb 81 | 82 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 83 | roidb = roidbs[0] 84 | if len(roidbs) > 1: 85 | for r in roidbs[1:]: 86 | roidb.extend(r) 87 | tmp = get_imdb(imdb_names.split('+')[1]) 88 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 89 | else: 90 | imdb = get_imdb(imdb_names) 91 | return imdb, roidb 92 | 93 | 94 | if __name__ == '__main__': 95 | 96 | args = parse_args() 97 | 98 | print('Called with args:') 99 | print(args) 100 | 101 | if args.cfg_file is not None: 102 | cfg_from_file(args.cfg_file) 103 | if args.set_cfgs is not None: 104 | cfg_from_list(args.set_cfgs) 105 | 106 | print('Using config:') 107 | pprint.pprint(cfg) 108 | 109 | np.random.seed(cfg.RNG_SEED) 110 | random.seed(cfg.RNG_SEED) 111 | # train set 112 | imdb, roidb = combined_roidb(args.imdb_name) 113 | print('{:d} roidb entries'.format(len(roidb))) 114 | imdb_T, roidb_T = combined_roidb(args.imdb_T_name) 115 | print('{:d} roidbT entries'.format(len(roidb_T))) 116 | 117 | # output directory where the models are saved 118 | output_dir = get_output_dir(imdb, args.tag) 119 | print('Output will be saved to `{:s}`'.format(output_dir)) 120 | 121 | # tensorboard directory where the summaries are saved during training 122 | tb_dir = get_output_tb_dir(imdb, args.tag) 123 | print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir)) 124 | 125 | # also add the validation set, but with no flipping images 126 | orgflip = cfg.TRAIN.USE_FLIPPED 127 | cfg.TRAIN.USE_FLIPPED = False 128 | _, valroidb = combined_roidb(args.imdbval_name) 129 | print('{:d} validation roidb entries'.format(len(valroidb))) 130 | cfg.TRAIN.USE_FLIPPED = orgflip 131 | 132 | # load network 133 | if args.net == 'vgg16': 134 | net = vgg16() 135 | elif args.net == 'res50': 136 | net = resnetv1(num_layers=50) 137 | elif args.net == 'res101': 138 | net = resnetv1(num_layers=101) 139 | elif args.net == 'res152': 140 | net = resnetv1(num_layers=152) 141 | elif args.net == 'mobile': 142 | net = mobilenetv1() 143 | else: 144 | raise NotImplementedError 145 | 146 | train_net(net, imdb, roidb, imdb_T, roidb_T, valroidb, output_dir, tb_dir, 147 | pretrained_model=args.weight, 148 | max_iters=args.max_iters) 149 | -------------------------------------------------------------------------------- /trained_weights/.gitignore: -------------------------------------------------------------------------------- 1 | net_D* 2 | *.tar.gz 3 | *.pth 4 | --------------------------------------------------------------------------------