├── LICENSE ├── README.md ├── __init__.py ├── arguments.py ├── augment_data.py ├── coco_eval.py ├── data ├── __init__.py ├── coco.py ├── coco_labels.txt ├── config.py ├── data_augment.py ├── example.jpg ├── scripts │ ├── COCO2014.sh │ ├── VOC2007.sh │ └── VOC2012.sh ├── voc0712.py └── voc_eval.py ├── data_process.ipynb ├── data_reader.py ├── data_reader_pedestrian.py ├── dataset.py ├── dataset ├── caltech_pedestrian.py ├── inria_person.py ├── mall.py └── upen_person.py ├── demo ├── __init__.py ├── demo.ipynb ├── demo.py └── live.py ├── doc ├── RFB.png ├── SSD.jpg ├── detection_example.png ├── detection_example2.png ├── detection_examples.png ├── rfb.png └── ssd.png ├── eval.py ├── focal_loss.py ├── layers ├── __init__.py ├── functions │ ├── __init__.py │ ├── detection.py │ └── prior_box.py └── modules │ ├── __init__.py │ ├── l2norm.py │ ├── multibox_loss.py │ └── refine_multibox_loss.py ├── loss_loader.py ├── main.py ├── make.sh ├── model_loader.py ├── models ├── FRFBSSD_vgg.py ├── FSSD_mobile.py ├── FSSD_vgg.py ├── RFB_Net_E_vgg.py ├── RFB_Net_mobile.py ├── RFB_Net_vgg.py ├── RefineSSD_vgg.py ├── SSD_vgg.py ├── __init__.py ├── base_models.py ├── densenet.py ├── mobilenet.py ├── resnet.py └── vgg.py ├── multi_thread_score_pedestrian_detection.py ├── object_detector.py ├── pretrainedmodels ├── __init__.py ├── datasets │ ├── __init__.py │ ├── utils.py │ └── voc.py ├── models │ ├── __init__.py │ ├── bninception.py │ ├── cafferesnet.py │ ├── dpn.py │ ├── fbresnet.py │ ├── fbresnet │ │ ├── resnet152_dump.lua │ │ └── resnet152_load.py │ ├── inceptionresnetv2.py │ ├── inceptionv4.py │ ├── nasnet.py │ ├── nasnet_mobile.py │ ├── pnasnet.py │ ├── polynet.py │ ├── resnext.py │ ├── resnext_features │ │ ├── __init__.py │ │ ├── resnext101_32x4d_features.py │ │ └── resnext101_64x4d_features.py │ ├── senet.py │ ├── torchvision_models.py │ ├── utils.py │ ├── vggm.py │ ├── wideresnet.py │ └── xception.py ├── utils.py └── version.py ├── refinedet_train_test.py ├── score_pedestrian_detection.py ├── statics.py ├── train.py ├── train_test.py ├── train_test_fssd_mobile_pre.py ├── transforms.py ├── utils ├── __init__.py ├── box_utils.py ├── build.py ├── json_utils.py ├── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── pascal_utils.py ├── pycocotools │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── timer.py ├── utils.py └── visualization │ └── pascal_detection_visualize.py └── wider_face_pedestrian_to_pascal.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Max deGroot, Ellis Brown 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RetinaNet applied to widerface pedestrian detection challenege in ECCV 2018, in PyTorch 2 | 3 | This code placed 21 position among 168 teams. 4 | 5 | Improvement ideas (todos): 6 |
    7 |
  1. Run for more epoch
  2. 8 |
  3. User More data augmentation methods
  4. 9 |
  5. optimize hyper-parameter like learning rate, learning rate decay
  6. 10 |
  7. Use ADAM, SGD with momentum
  8. 11 |
  9. use soft nms
  10. 12 |
  11. use multi scale testing.
  12. 13 |
  13. Optimize FPN feature extractor for small pedestrian object.
  14. 14 |
  15. Use GAN to generate training data for in context of road and pedestrian
  16. 15 |
16 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/__init__.py -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | parser = argparse.ArgumentParser(description='PyTorch student network training') 3 | 4 | parser.add_argument('--lr',default=0.001, 5 | type=float, 6 | help='learning rate') 7 | parser.add_argument('--resume', 8 | action='store_true', 9 | help='resume from checkpoint') 10 | parser.add_argument('--optimizer', 11 | type=str, 12 | help='optimizer type', 13 | default='adam') 14 | parser.add_argument('--criterion', 15 | type=str, 16 | help='criterion', 17 | default='MSE') 18 | parser.add_argument('--root', 19 | default='../data/', 20 | type=str, 21 | help='data root path') 22 | parser.add_argument('--datalist', 23 | default='../data/datalist/', 24 | type=str, 25 | help='datalist path') 26 | parser.add_argument('--batch_size', 27 | type=int, 28 | help='mini-batch size', 29 | default=150) 30 | parser.add_argument('--name', 31 | default='VGG19_BN', 32 | type=str, 33 | help='session name') 34 | parser.add_argument('--log_dir_path', 35 | default='./student_net_learning/logs', 36 | type=str, 37 | help='log directory path') 38 | parser.add_argument('--epochs', 39 | default=200, 40 | type=int, 41 | help='number of epochs') 42 | parser.add_argument('--cuda', 43 | type=int, 44 | default=1, 45 | help='use CUDA') 46 | parser.add_argument('--model_name', 47 | type=str, 48 | help='model name', 49 | default='ResNet50') 50 | parser.add_argument('--down_epoch', 51 | type=int, 52 | help='epoch number for lr * 1e-1', 53 | default=30) -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES 2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES 3 | from .coco import COCODetection 4 | from .data_augment import * 5 | from .config import * 6 | -------------------------------------------------------------------------------- /data/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 1,1,person 2 | 2,2,bicycle 3 | 3,3,car 4 | 4,4,motorcycle 5 | 5,5,airplane 6 | 6,6,bus 7 | 7,7,train 8 | 8,8,truck 9 | 9,9,boat 10 | 10,10,traffic light 11 | 11,11,fire hydrant 12 | 13,12,stop sign 13 | 14,13,parking meter 14 | 15,14,bench 15 | 16,15,bird 16 | 17,16,cat 17 | 18,17,dog 18 | 19,18,horse 19 | 20,19,sheep 20 | 21,20,cow 21 | 22,21,elephant 22 | 23,22,bear 23 | 24,23,zebra 24 | 25,24,giraffe 25 | 27,25,backpack 26 | 28,26,umbrella 27 | 31,27,handbag 28 | 32,28,tie 29 | 33,29,suitcase 30 | 34,30,frisbee 31 | 35,31,skis 32 | 36,32,snowboard 33 | 37,33,sports ball 34 | 38,34,kite 35 | 39,35,baseball bat 36 | 40,36,baseball glove 37 | 41,37,skateboard 38 | 42,38,surfboard 39 | 43,39,tennis racket 40 | 44,40,bottle 41 | 46,41,wine glass 42 | 47,42,cup 43 | 48,43,fork 44 | 49,44,knife 45 | 50,45,spoon 46 | 51,46,bowl 47 | 52,47,banana 48 | 53,48,apple 49 | 54,49,sandwich 50 | 55,50,orange 51 | 56,51,broccoli 52 | 57,52,carrot 53 | 58,53,hot dog 54 | 59,54,pizza 55 | 60,55,donut 56 | 61,56,cake 57 | 62,57,chair 58 | 63,58,couch 59 | 64,59,potted plant 60 | 65,60,bed 61 | 67,61,dining table 62 | 70,62,toilet 63 | 72,63,tv 64 | 73,64,laptop 65 | 74,65,mouse 66 | 75,66,remote 67 | 76,67,keyboard 68 | 77,68,cell phone 69 | 78,69,microwave 70 | 79,70,oven 71 | 80,71,toaster 72 | 81,72,sink 73 | 82,73,refrigerator 74 | 84,74,book 75 | 85,75,clock 76 | 86,76,vase 77 | 87,77,scissors 78 | 88,78,teddy bear 79 | 89,79,hair drier 80 | 90,80,toothbrush 81 | -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | # gets home dir cross platform 4 | import cv2 5 | cv2.setNumThreads(0) # pytorch issue 1355: possible deadlock in dataloader 6 | # note: if you used our download scripts, this should be right 7 | VOCroot = '/media/milton/ssd1/dataset/pascal/VOCdevkit/' # path to VOCdevkit root dir 8 | COCOroot = '/home/user/Database/MSCOCO2017' 9 | 10 | # RFB CONFIGS 11 | VOC_300 = { 12 | 'feature_maps': [38, 19, 10, 5, 3, 1], 13 | 14 | 'min_dim': 300, 15 | 16 | 'steps': [8, 16, 32, 64, 100, 300], 17 | 18 | 'min_sizes': [30, 60, 111, 162, 213, 264], 19 | 20 | 'max_sizes': [60, 111, 162, 213, 264, 315], 21 | 22 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], 23 | 24 | 'variance': [0.1, 0.2], 25 | 26 | 'clip': True, 27 | } 28 | 29 | VOC_512 = { 30 | 'feature_maps': [64, 32, 16, 8, 4, 2, 1], 31 | 32 | 'min_dim': 512, 33 | 34 | 'steps': [8, 16, 32, 64, 128, 256, 512], 35 | 36 | 'min_sizes': [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8], 37 | 38 | 'max_sizes': [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6], 39 | 40 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], 41 | 42 | 'variance': [0.1, 0.2], 43 | 44 | 'clip': True, 45 | } 46 | 47 | COCO_300 = { 48 | 'feature_maps': [38, 19, 10, 5, 3, 1], 49 | 50 | 'min_dim': 300, 51 | 52 | 'steps': [8, 16, 32, 64, 100, 300], 53 | 54 | 'min_sizes': [21, 45, 99, 153, 207, 261], 55 | 56 | 'max_sizes': [45, 99, 153, 207, 261, 315], 57 | 58 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], 59 | 60 | 'variance': [0.1, 0.2], 61 | 62 | 'clip': True, 63 | } 64 | 65 | COCO_512 = { 66 | 'feature_maps': [64, 32, 16, 8, 4, 2, 1], 67 | 68 | 'min_dim': 512, 69 | 70 | 'steps': [8, 16, 32, 64, 128, 256, 512], 71 | 72 | 'min_sizes': [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8], 73 | 74 | 'max_sizes': [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72], 75 | 76 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], 77 | 78 | 'variance': [0.1, 0.2], 79 | 80 | 'clip': True, 81 | } 82 | 83 | COCO_mobile_300 = { 84 | 'feature_maps': [19, 10, 5, 3, 2, 1], 85 | 86 | 'min_dim': 300, 87 | 88 | 'steps': [16, 32, 64, 100, 150, 300], 89 | 90 | 'min_sizes': [45, 90, 135, 180, 225, 270], 91 | 92 | 'max_sizes': [90, 135, 180, 225, 270, 315], 93 | 94 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], 95 | 96 | 'variance': [0.1, 0.2], 97 | 98 | 'clip': True, 99 | } 100 | 101 | VOC_320 = { 102 | 'feature_maps': [40, 20, 10, 5], 103 | 104 | 'min_dim': 320, 105 | 106 | 'steps': [8, 16, 32, 64], 107 | 108 | 'min_sizes': [32, 64, 128, 256], 109 | 110 | 'max_sizes': [], 111 | 112 | 'aspect_ratios': [[2], [2], [2], [2]], 113 | 114 | 'variance': [0.1, 0.2], 115 | 116 | 'clip': True, 117 | } 118 | -------------------------------------------------------------------------------- /data/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/data/example.jpg -------------------------------------------------------------------------------- /data/scripts/COCO2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start=`date +%s` 4 | 5 | # handle optional download dir 6 | if [ -z "$1" ] 7 | then 8 | # navigate to ~/data 9 | echo "navigating to ~/data/ ..." 10 | mkdir -p ~/data 11 | cd ~/data/ 12 | mkdir -p ./coco 13 | cd ./coco 14 | mkdir -p ./images 15 | mkdir -p ./annotations 16 | else 17 | # check if specified dir is valid 18 | if [ ! -d $1 ]; then 19 | echo $1 " is not a valid directory" 20 | exit 0 21 | fi 22 | echo "navigating to " $1 " ..." 23 | cd $1 24 | fi 25 | 26 | if [ ! -d images ] 27 | then 28 | mkdir -p ./images 29 | fi 30 | 31 | # Download the image data. 32 | cd ./images 33 | echo "Downloading MSCOCO train images ..." 34 | curl -LO http://images.cocodataset.org/zips/train2014.zip 35 | echo "Downloading MSCOCO val images ..." 36 | curl -LO http://images.cocodataset.org/zips/val2014.zip 37 | 38 | cd ../ 39 | if [ ! -d annotations] 40 | then 41 | mkdir -p ./annotations 42 | fi 43 | 44 | # Download the annotation data. 45 | cd ./annotations 46 | echo "Downloading MSCOCO train/val annotations ..." 47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip 48 | echo "Finished downloading. Now extracting ..." 49 | 50 | # Unzip data 51 | echo "Extracting train images ..." 52 | unzip ../images/train2014.zip -d ../images 53 | echo "Extracting val images ..." 54 | unzip ../images/val2014.zip -d ../images 55 | echo "Extracting annotations ..." 56 | unzip ./annotations_trainval2014.zip 57 | 58 | echo "Removing zip files ..." 59 | rm ../images/train2014.zip 60 | rm ../images/val2014.zip 61 | rm ./annotations_trainval2014.zip 62 | 63 | echo "Creating trainval35k dataset..." 64 | 65 | # Download annotations json 66 | echo "Downloading trainval35k annotations from S3" 67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip 68 | 69 | # combine train and val 70 | echo "Combining train and val images" 71 | mkdir ../images/trainval35k 72 | cd ../images/train2014 73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp 74 | cd ../val2014 75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + 76 | 77 | 78 | end=`date +%s` 79 | runtime=$((end-start)) 80 | 81 | echo "Completed in " $runtime " seconds" 82 | -------------------------------------------------------------------------------- /data/scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /data/scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /data/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import pickle 8 | import xml.etree.ElementTree as ET 9 | 10 | import numpy as np 11 | import os 12 | 13 | 14 | def parse_rec(filename): 15 | """ Parse a PASCAL VOC xml file """ 16 | tree = ET.parse(filename) 17 | objects = [] 18 | for obj in tree.findall('object'): 19 | obj_struct = {} 20 | obj_struct['name'] = obj.find('name').text 21 | obj_struct['pose'] = obj.find('pose').text 22 | obj_struct['truncated'] = int(obj.find('truncated').text) 23 | obj_struct['difficult'] = int(obj.find('difficult').text) 24 | bbox = obj.find('bndbox') 25 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 26 | int(bbox.find('ymin').text), 27 | int(bbox.find('xmax').text), 28 | int(bbox.find('ymax').text)] 29 | objects.append(obj_struct) 30 | 31 | return objects 32 | 33 | 34 | def voc_ap(rec, prec, use_07_metric=False): 35 | """ ap = voc_ap(rec, prec, [use_07_metric]) 36 | Compute VOC AP given precision and recall. 37 | If use_07_metric is true, uses the 38 | VOC 07 11 point method (default:False). 39 | """ 40 | if use_07_metric: 41 | # 11 point metric 42 | ap = 0. 43 | for t in np.arange(0., 1.1, 0.1): 44 | if np.sum(rec >= t) == 0: 45 | p = 0 46 | else: 47 | p = np.max(prec[rec >= t]) 48 | ap = ap + p / 11. 49 | else: 50 | # correct AP calculation 51 | # first append sentinel values at the end 52 | mrec = np.concatenate(([0.], rec, [1.])) 53 | mpre = np.concatenate(([0.], prec, [0.])) 54 | 55 | # compute the precision envelope 56 | for i in range(mpre.size - 1, 0, -1): 57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 58 | 59 | # to calculate area under PR curve, look for points 60 | # where X axis (recall) changes value 61 | i = np.where(mrec[1:] != mrec[:-1])[0] 62 | 63 | # and sum (\Delta recall) * prec 64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 65 | return ap 66 | 67 | 68 | def voc_eval(detpath, 69 | annopath, 70 | imagesetfile, 71 | classname, 72 | cachedir, 73 | ovthresh=0.5, 74 | use_07_metric=False): 75 | """rec, prec, ap = voc_eval(detpath, 76 | annopath, 77 | imagesetfile, 78 | classname, 79 | [ovthresh], 80 | [use_07_metric]) 81 | 82 | Top level function that does the PASCAL VOC evaluation. 83 | 84 | detpath: Path to detections 85 | detpath.format(classname) should produce the detection results file. 86 | annopath: Path to annotations 87 | annopath.format(imagename) should be the xml annotations file. 88 | imagesetfile: Text file containing the list of images, one image per line. 89 | classname: Category name (duh) 90 | cachedir: Directory for caching the annotations 91 | [ovthresh]: Overlap threshold (default = 0.5) 92 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 93 | (default False) 94 | """ 95 | # assumes detections are in detpath.format(classname) 96 | # assumes annotations are in annopath.format(imagename) 97 | # assumes imagesetfile is a text file with each line an image name 98 | # cachedir caches the annotations in a pickle file 99 | 100 | # first load gt 101 | if not os.path.isdir(cachedir): 102 | os.mkdir(cachedir) 103 | cachefile = os.path.join(cachedir, 'annots.pkl') 104 | # read list of images 105 | with open(imagesetfile, 'r') as f: 106 | lines = f.readlines() 107 | imagenames = [x.strip() for x in lines] 108 | 109 | if not os.path.isfile(cachefile): 110 | # load annots 111 | recs = {} 112 | for i, imagename in enumerate(imagenames): 113 | recs[imagename] = parse_rec(annopath.format(imagename)) 114 | if i % 100 == 0: 115 | print('Reading annotation for {:d}/{:d}'.format( 116 | i + 1, len(imagenames))) 117 | # save 118 | print('Saving cached annotations to {:s}'.format(cachefile)) 119 | with open(cachefile, 'wb') as f: 120 | pickle.dump(recs, f) 121 | else: 122 | # load 123 | with open(cachefile, 'rb') as f: 124 | recs = pickle.load(f) 125 | 126 | # extract gt objects for this class 127 | class_recs = {} 128 | npos = 0 129 | for imagename in imagenames: 130 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 131 | bbox = np.array([x['bbox'] for x in R]) 132 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 133 | det = [False] * len(R) 134 | npos = npos + sum(~difficult) 135 | class_recs[imagename] = {'bbox': bbox, 136 | 'difficult': difficult, 137 | 'det': det} 138 | 139 | # read dets 140 | detfile = detpath.format(classname) 141 | with open(detfile, 'r') as f: 142 | lines = f.readlines() 143 | 144 | splitlines = [x.strip().split(' ') for x in lines] 145 | image_ids = [x[0] for x in splitlines] 146 | confidence = np.array([float(x[1]) for x in splitlines]) 147 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 148 | 149 | # sort by confidence 150 | sorted_ind = np.argsort(-confidence) 151 | sorted_scores = np.sort(-confidence) 152 | BB = BB[sorted_ind, :] 153 | image_ids = [image_ids[x] for x in sorted_ind] 154 | 155 | # go down dets and mark TPs and FPs 156 | nd = len(image_ids) 157 | tp = np.zeros(nd) 158 | fp = np.zeros(nd) 159 | for d in range(nd): 160 | R = class_recs[image_ids[d]] 161 | bb = BB[d, :].astype(float) 162 | ovmax = -np.inf 163 | BBGT = R['bbox'].astype(float) 164 | 165 | if BBGT.size > 0: 166 | # compute overlaps 167 | # intersection 168 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 169 | iymin = np.maximum(BBGT[:, 1], bb[1]) 170 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 171 | iymax = np.minimum(BBGT[:, 3], bb[3]) 172 | iw = np.maximum(ixmax - ixmin + 1., 0.) 173 | ih = np.maximum(iymax - iymin + 1., 0.) 174 | inters = iw * ih 175 | 176 | # union 177 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 178 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 179 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 180 | 181 | overlaps = inters / uni 182 | ovmax = np.max(overlaps) 183 | jmax = np.argmax(overlaps) 184 | 185 | if ovmax > ovthresh: 186 | if not R['difficult'][jmax]: 187 | if not R['det'][jmax]: 188 | tp[d] = 1. 189 | R['det'][jmax] = 1 190 | else: 191 | fp[d] = 1. 192 | else: 193 | fp[d] = 1. 194 | 195 | # compute precision recall 196 | fp = np.cumsum(fp) 197 | tp = np.cumsum(tp) 198 | rec = tp / float(npos) 199 | # avoid divide by zero in case the first detection matches a difficult 200 | # ground truth 201 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 202 | ap = voc_ap(rec, prec, use_07_metric) 203 | 204 | return rec, prec, ap 205 | -------------------------------------------------------------------------------- /data_reader.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.dataset import Dataset 2 | from torchvision import transforms 3 | from PIL import Image 4 | from torchvision.transforms import * 5 | 6 | data_set_name="ISIC 2018" 7 | 8 | from layers.functions import Detect 9 | 10 | 11 | def str2bool(v): 12 | return v.lower() in ("yes", "true", "t", "1") 13 | 14 | """ 15 | Train Val Test 16 | Images 11500 5000 3500 17 | Labels 46513 19696 18 | 19 | todo ignore parts set zero 20 | """ 21 | 22 | data_set_name="Wider Face Pedestrian dataset." 23 | 24 | def read_train_gt(): 25 | annotations=[] 26 | with open(train_bbx_gt_file,'r') as train_bbx_file: 27 | content=train_bbx_file.readlines(); 28 | for line in content: 29 | line_list=line.split(" ") 30 | file_name=line_list[0] 31 | row=[] 32 | for idx in range(1,len(line_list)-1,5): 33 | class_num=line_list[idx] 34 | left=line_list[idx+1] 35 | top=line_list[idx+2] 36 | w=line_list[idx+3] 37 | h=line_list[idx+4].strip() 38 | obj=[class_num, left, top, w, h] 39 | if len(obj)>0: 40 | row+=obj 41 | if len(row)>0: 42 | annotations.append([file_name,row[:]]) 43 | return annotations 44 | 45 | 46 | def read_val_gt(): 47 | annotations = [] 48 | with open(val_bbx_gt_file, 'r') as train_bbx_file: 49 | content = train_bbx_file.readlines(); 50 | for line in content: 51 | line_list = line.split(" ") 52 | file_name = line_list[0] 53 | row = [] 54 | for idx in range(1, len(line_list) - 1, 5): 55 | class_num = line_list[idx] 56 | left = line_list[idx + 1] 57 | top = line_list[idx + 2] 58 | w = line_list[idx + 3] 59 | h = line_list[idx + 4].strip() 60 | obj = [class_num, left, top, w, h] 61 | if len(obj) > 0: 62 | row += obj 63 | if len(row) > 0: 64 | annotations.append([file_name, row[:]]) 65 | return annotations 66 | 67 | 68 | # annotations=read_train_gt() 69 | # print(len(annotations)) 70 | # 71 | # count=0 72 | # for anno in annotations: 73 | # count+=len(annotations[anno]) 74 | # print(count) 75 | # # annos= read_train_gt() 76 | # # for anno in annos: 77 | # # print(annos[anno]) 78 | 79 | def test_read_data(): 80 | train_gt=read_train_gt() 81 | for row in train_gt: 82 | print(row) 83 | 84 | 85 | def get_validation_data(): 86 | return 87 | 88 | class DatasetReader(Dataset): 89 | """ 90 | """ 91 | def __init__(self, data,mode='train',): 92 | print("{} count:{}".format(mode,len(data))) 93 | self.mode=mode 94 | self.data=np.asarray(data) 95 | self.transform_train_image=transforms.Compose([ 96 | RandomCrop([224,224]), 97 | RandomHorizontalFlip(p=.2), 98 | # ColorJitter(.6), 99 | # RandomVerticalFlip(p=.2), 100 | # RandomGrayscale(p=.2), 101 | # transforms.RandomRotation(10), 102 | # transforms.RandomAffine(10), 103 | # ColorJitter(.6), 104 | transforms.ToTensor(), 105 | # transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 106 | ]); 107 | 108 | self.transform_test_image = transforms.Compose([ 109 | transforms.Resize([224, 224]), 110 | transforms.ToTensor()]); 111 | 112 | 113 | def __getitem__(self, index): 114 | img_path=self.data[index,0] 115 | label=int(self.data[index,1]) 116 | 117 | if not os.path.exists(img_path): 118 | print("{} image not found".format(img_path)) 119 | exit(0); 120 | img = Image.open(img_path) 121 | if self.mode=="train": 122 | data = self.transform_train_image(img) 123 | return data, label 124 | 125 | elif self.mode=="valid": 126 | data = self.transform_test_image(img) 127 | return data, label 128 | 129 | def __len__(self): 130 | return len(self.data) 131 | from statics import * 132 | from data import * 133 | def get_data_loader(args): 134 | return get_voc_reader(args) 135 | 136 | def get_voc_reader(args): 137 | img_dim=args.size 138 | rgb_means = (104, 117, 123) 139 | rgb_std = (1, 1, 1) 140 | p = (0.6, 0.2)[args.version == 'RFB_mobile'] 141 | train_sets = [('2007', 'trainval'), ('2012', 'trainval')] 142 | cfg = (VOC_300, VOC_512)[args.size == '512'] 143 | 144 | testset = VOCDetection( 145 | VOCroot, [('2007', 'test')], None, AnnotationTransform()) 146 | 147 | train_dataset = VOCDetection(VOCroot, train_sets, preproc( 148 | img_dim, rgb_means, rgb_std, p), AnnotationTransform()) 149 | 150 | trainloader = torch.utils.data.DataLoader(train_dataset, args.batch_size, 151 | shuffle=True, num_workers=args.num_workers, 152 | collate_fn=detection_collate) 153 | num_classes=len(args.classes.split(",")) 154 | detector = Detect(num_classes, 0, cfg) 155 | 156 | return (trainloader, (testset,detector)) 157 | 158 | def test(): 159 | trainloader, valloader = get_data_loader(100) 160 | for idx, (inputs, targets) in enumerate(valloader): 161 | print(inputs.shape) 162 | 163 | """ 164 | all the ignore parts of image will be zero. 165 | """ 166 | from utils.file_utils import * 167 | 168 | def get_ignore_parts_for_train(): 169 | annotations=[] 170 | for line in read_text_file(train_bbx_ignore_file): 171 | line_list = line.split(" ") 172 | # print(len(line_list)) 173 | file_name = line_list[0] 174 | for idx in range(1, len(line_list) - 1, 4): 175 | left = line_list[idx + 1] 176 | top = line_list[idx + 2] 177 | w = line_list[idx + 3] 178 | h = line_list[idx + 4] 179 | annotations[file_name].append([ left, top, w, h]) 180 | return annotations 181 | 182 | 183 | 184 | if __name__ == '__main__': 185 | read_train_gt() 186 | 187 | -------------------------------------------------------------------------------- /data_reader_pedestrian.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | def get_test_loader_for_upload(batch_size): 4 | test_files=glob.glob("/media/milton/ssd1/research/competitions/data_wider_pedestrian/test_new/test_new/**.jpg") 5 | return test_files -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | '''Custom dataset for loading imgs and descriptors 2 | ''' 3 | import os.path 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | import torch.utils.data as data 9 | from PIL import Image 10 | 11 | def default_loader(path): 12 | with open(path, 'rb') as f: 13 | with Image.open(f) as img: 14 | return img.convert('RGB') 15 | 16 | def np_loader(path): 17 | return np.load(path) 18 | 19 | 20 | 21 | def build_dataset_lists(list_path,split): 22 | im_list = os.path.join(list_path, 'im_'+split+'.txt') 23 | at_list = os.path.join(list_path, 'at_'+split+'.npy') 24 | print(os.path.abspath(im_list)) 25 | images = pd.read_csv(im_list, header=None, names=['impath']) 26 | targets = np.load(at_list) 27 | return images.impath.values,targets 28 | 29 | class ImageListDataset(data.Dataset): 30 | """ 31 | Builds a dataset based on a list of images. 32 | root -- path to images 33 | list_path -- path to image lists 34 | split -- train|val| - name of the dataset part (default train) 35 | transform -- transform for images 36 | """ 37 | def __init__(self, root, list_path, split = 'train', 38 | transform=None, loader=default_loader): 39 | 40 | images, targets = build_dataset_lists(list_path,split) 41 | self.root = root 42 | self.images = root + images 43 | self.targets = targets 44 | self.transform = transform 45 | self.loader = loader 46 | 47 | def __getitem__(self, index): 48 | """ 49 | Args: 50 | index (int): Index 51 | Returns: 52 | tuple: (image, target) 53 | """ 54 | path = self.images[index] 55 | target = self.targets[index] 56 | img = self.loader(path) 57 | if self.transform is not None: 58 | img = self.transform(img) 59 | img = img.type(torch.FloatTensor) 60 | return img, target 61 | 62 | def __len__(self): 63 | return len(self.images) 64 | -------------------------------------------------------------------------------- /dataset/caltech_pedestrian.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | from PIL import Image 4 | from utils.pascal_utils import write_pascal_annotation_aug 5 | from utils.json_utils import read_json_file 6 | 7 | data_dir='/media/milton/ssd1/dataset/pedestrian/caltech_pedestrian/caltech-pedestrian-dataset-converter/data' 8 | images_dir=os.path.join(data_dir,'images') 9 | json_file=os.path.join(data_dir,'annotations.json') 10 | 11 | data=read_json_file(json_file) 12 | for set_key in data.keys(): 13 | set_data=data[set_key] 14 | for v_key in set_data.keys(): 15 | frames=set_data[v_key]['frames'] 16 | for frame_key in frames.keys(): 17 | for frame_anno in frames[frame_key]: 18 | 19 | filename="{}_{}_{}.png".format(set_key.lower(),v_key,frame_key) 20 | file_path=os.path.join(images_dir, filename) 21 | # if not os.path.exists(file_path): 22 | # print("{} not found".format(file_path)) 23 | try: 24 | img=Image.open(file_path) 25 | except Exception as e: 26 | continue 27 | pass 28 | 29 | 30 | -------------------------------------------------------------------------------- /dataset/inria_person.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from utils.pascal_utils import write_pascal_annotation_aug 5 | from utils.file_utils import read_text_file 6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Train/annotations' 7 | test_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Test/annotations' 8 | 9 | 10 | def inria_person_to_pascal(train_anno_dir): 11 | anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt')) 12 | for anno_file in anno_files: 13 | filename = '' 14 | obj_list = [] 15 | 16 | for line in read_text_file(anno_file): 17 | # xml_file=os.path.join(annodir, xml_file_name) 18 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name)) 19 | # write_pascal_annotation(image_path,obj_list,xml_file) 20 | 21 | if 'Image filename' in line: 22 | filename = line.split(':')[1].strip()[1:-1] 23 | if 'Bounding box for object' in line: 24 | bounds = line.split(':')[1].split('-') 25 | xmin, ymin = bounds[0].strip()[1:-1].split(',') 26 | xmax, ymax = bounds[1].strip()[1:-1].split(',') 27 | xmin = int(xmin.strip()) 28 | ymin = int(ymin.strip()) 29 | xmax = int(xmax.strip()) 30 | ymax = int(ymax.strip()) 31 | obj_list.append([xmin, ymin, xmax, ymax, 1]) 32 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/INRIAPerson', filename) 33 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train', 34 | os.path.basename(image_path).split('.')[0] + ".xml") 35 | write_pascal_annotation_aug(image_path, obj_list, xml_file) 36 | 37 | 38 | inria_person_to_pascal(train_anno_dir) 39 | inria_person_to_pascal(test_anno_dir) 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /dataset/mall.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import scipy.io as sio 4 | 5 | from utils.pascal_utils import write_pascal_annotation_aug 6 | from utils.file_utils import read_text_file 7 | data_dir='/media/milton/ssd1/dataset/pedestrian/mall/mall_dataset' 8 | gt_file=os.path.join(data_dir,'mall_gt.mat') 9 | 10 | def inria_person_to_pascal(gt_file): 11 | 12 | gt=sio.loadmat(gt_file) 13 | frames=gt['frame'] 14 | anno_files = glob.glob(os.path.join(gt_file, '**.txt')) 15 | for anno_file in anno_files: 16 | filename = '' 17 | obj_list = [] 18 | 19 | for line in read_text_file(anno_file): 20 | # xml_file=os.path.join(annodir, xml_file_name) 21 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name)) 22 | # write_pascal_annotation(image_path,obj_list,xml_file) 23 | 24 | if 'Image filename' in line: 25 | filename = line.split(':')[1].strip()[1:-1] 26 | if 'Bounding box for object' in line: 27 | bounds = line.split(':')[1].split('-') 28 | xmin, ymin = bounds[0].strip()[1:-1].split(',') 29 | xmax, ymax = bounds[1].strip()[1:-1].split(',') 30 | xmin = int(xmin.strip()) 31 | ymin = int(ymin.strip()) 32 | xmax = int(xmax.strip()) 33 | ymax = int(ymax.strip()) 34 | obj_list.append([xmin, ymin, xmax, ymax, 1]) 35 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename) 36 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train', 37 | os.path.basename(image_path).split('.')[0] + ".xml") 38 | write_pascal_annotation_aug(image_path, obj_list, xml_file) 39 | 40 | 41 | inria_person_to_pascal(gt_file) 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /dataset/upen_person.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from utils.pascal_utils import write_pascal_annotation_aug 5 | from utils.file_utils import read_text_file 6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/upenn/PennFudanPed/Annotation' 7 | 8 | def inria_person_to_pascal(train_anno_dir): 9 | anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt')) 10 | for anno_file in anno_files: 11 | filename = '' 12 | obj_list = [] 13 | 14 | for line in read_text_file(anno_file): 15 | # xml_file=os.path.join(annodir, xml_file_name) 16 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name)) 17 | # write_pascal_annotation(image_path,obj_list,xml_file) 18 | 19 | if 'Image filename' in line: 20 | filename = line.split(':')[1].strip()[1:-1] 21 | if 'Bounding box for object' in line: 22 | bounds = line.split(':')[1].split('-') 23 | xmin, ymin = bounds[0].strip()[1:-1].split(',') 24 | xmax, ymax = bounds[1].strip()[1:-1].split(',') 25 | xmin = int(xmin.strip()) 26 | ymin = int(ymin.strip()) 27 | xmax = int(xmax.strip()) 28 | ymax = int(ymax.strip()) 29 | obj_list.append([xmin, ymin, xmax, ymax, 1]) 30 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename) 31 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train', 32 | os.path.basename(image_path).split('.')[0] + ".xml") 33 | write_pascal_annotation_aug(image_path, obj_list, xml_file) 34 | 35 | 36 | inria_person_to_pascal(train_anno_dir) 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/demo/__init__.py -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | from utils.file_utils import read_text_file 2 | import os 3 | import cv2 4 | 5 | val_dir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/val' 6 | for line in read_text_file('scores.txt'): 7 | line_arr=line.split(' ') 8 | image_name=line_arr[0] 9 | image_path=os.path.join(val_dir,image_name) 10 | save_path=os.path.join('out',image_name) 11 | if os.path.exists(save_path): 12 | image_path=save_path 13 | print(image_path) 14 | img_face_detect = cv2.imread(image_path) 15 | print(line_arr) 16 | x1, y1, w, h = line_arr[2:] 17 | x1=float(x1) 18 | y1=float(y1) 19 | w=float(w) 20 | h=float(h.strip()) 21 | x2=int(x1)+int(w) 22 | y2=int(y1)+int(h) 23 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1) 24 | print(save_path) 25 | print(img_face_detect.shape) 26 | cv2.imwrite(save_path, img_face_detect) 27 | -------------------------------------------------------------------------------- /doc/RFB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/RFB.png -------------------------------------------------------------------------------- /doc/SSD.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/SSD.jpg -------------------------------------------------------------------------------- /doc/detection_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example.png -------------------------------------------------------------------------------- /doc/detection_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example2.png -------------------------------------------------------------------------------- /doc/detection_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_examples.png -------------------------------------------------------------------------------- /doc/rfb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/rfb.png -------------------------------------------------------------------------------- /doc/ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/ssd.png -------------------------------------------------------------------------------- /focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | 7 | def one_hot(index, classes): 8 | size = index.size() + (classes,) 9 | view = index.size() + (1,) 10 | 11 | mask = torch.Tensor(*size).fill_(0) 12 | index = index.view(*view) 13 | ones = 1. 14 | 15 | if isinstance(index, Variable): 16 | ones = Variable(torch.Tensor(index.size()).fill_(1)) 17 | mask = Variable(mask) 18 | 19 | return mask.scatter_(1, index, ones) 20 | 21 | 22 | class FocalLoss(nn.Module): 23 | 24 | def __init__(self, gamma=0, eps=1e-7): 25 | super(FocalLoss, self).__init__() 26 | self.gamma = gamma 27 | self.eps = eps 28 | 29 | def forward(self, input, target): 30 | input=input.cpu() 31 | target=target.cpu() 32 | y = one_hot(target, input.size(-1)) 33 | logit = F.softmax(input, dim=-1) 34 | logit = logit.clamp(self.eps, 1. - self.eps) 35 | 36 | loss = -1 * y * torch.log(logit) # cross entropy 37 | loss = loss * (1 - logit) ** self.gamma # focal loss 38 | 39 | return loss.sum() -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /layers/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from utils.box_utils import decode, center_size 5 | 6 | 7 | class Detect(Function): 8 | """At test time, Detect is the final layer of SSD. Decode location preds, 9 | apply non-maximum suppression to location predictions based on conf 10 | scores and threshold to a top_k number of output predictions for both 11 | confidence score and locations. 12 | """ 13 | 14 | def __init__(self, num_classes, bkg_label, cfg, object_score=0): 15 | self.num_classes = num_classes 16 | self.background_label = bkg_label 17 | self.object_score = object_score 18 | # self.thresh = thresh 19 | 20 | # Parameters used in nms. 21 | self.variance = cfg['variance'] 22 | 23 | def forward(self, predictions, prior, arm_data=None): 24 | """ 25 | Args: 26 | loc_data: (tensor) Loc preds from loc layers 27 | Shape: [batch,num_priors*4] 28 | conf_data: (tensor) Shape: Conf preds from conf layers 29 | Shape: [batch*num_priors,num_classes] 30 | prior_data: (tensor) Prior boxes and variances from priorbox layers 31 | Shape: [1,num_priors,4] 32 | """ 33 | 34 | loc, conf = predictions 35 | loc_data = loc.data 36 | conf_data = conf.data 37 | prior_data = prior.data 38 | num = loc_data.size(0) # batch size 39 | if arm_data: 40 | arm_loc, arm_conf = arm_data 41 | arm_loc_data = arm_loc.data 42 | arm_conf_data = arm_conf.data 43 | arm_object_conf = arm_conf_data[:, 1:] 44 | no_object_index = arm_object_conf <= self.object_score 45 | conf_data[no_object_index.expand_as(conf_data)] = 0 46 | 47 | self.num_priors = prior_data.size(0) 48 | self.boxes = torch.zeros(num, self.num_priors, 4) 49 | self.scores = torch.zeros(num, self.num_priors, self.num_classes) 50 | 51 | if num == 1: 52 | # size batch x num_classes x num_priors 53 | conf_preds = conf_data.unsqueeze(0) 54 | 55 | else: 56 | conf_preds = conf_data.view(num, self.num_priors, 57 | self.num_classes) 58 | self.boxes.expand(num, self.num_priors, 4) 59 | self.scores.expand(num, self.num_priors, self.num_classes) 60 | # Decode predictions into bboxes. 61 | for i in range(num): 62 | if arm_data: 63 | default = decode(arm_loc_data[i], prior_data, self.variance) 64 | default = center_size(default) 65 | else: 66 | default = prior_data 67 | decoded_boxes = decode(loc_data[i], default, self.variance) 68 | # For each class, perform nms 69 | conf_scores = conf_preds[i].clone() 70 | ''' 71 | c_mask = conf_scores.gt(self.thresh) 72 | decoded_boxes = decoded_boxes[c_mask] 73 | conf_scores = conf_scores[c_mask] 74 | ''' 75 | 76 | self.boxes[i] = decoded_boxes 77 | self.scores[i] = conf_scores 78 | 79 | return self.boxes, self.scores 80 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from itertools import product as product 2 | from math import sqrt as sqrt 3 | 4 | import torch 5 | 6 | if torch.cuda.is_available(): 7 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 8 | 9 | 10 | class PriorBox(object): 11 | """Compute priorbox coordinates in center-offset form for each source 12 | feature map. 13 | Note: 14 | This 'layer' has changed between versions of the original SSD 15 | paper, so we include both versions, but note v2 is the most tested and most 16 | recent version of the paper. 17 | 18 | """ 19 | 20 | def __init__(self, cfg): 21 | super(PriorBox, self).__init__() 22 | self.image_size = cfg['min_dim'] 23 | # number of priors for feature map location (either 4 or 6) 24 | self.num_priors = len(cfg['aspect_ratios']) 25 | self.variance = cfg['variance'] or [0.1] 26 | self.feature_maps = cfg['feature_maps'] 27 | self.min_sizes = cfg['min_sizes'] 28 | self.max_sizes = cfg['max_sizes'] 29 | self.steps = cfg['steps'] 30 | self.aspect_ratios = cfg['aspect_ratios'] 31 | self.clip = cfg['clip'] 32 | for v in self.variance: 33 | if v <= 0: 34 | raise ValueError('Variances must be greater than 0') 35 | 36 | def forward(self): 37 | mean = [] 38 | for k, f in enumerate(self.feature_maps): 39 | for i, j in product(range(f), repeat=2): 40 | f_k = self.image_size / self.steps[k] 41 | cx = (j + 0.5) / f_k 42 | cy = (i + 0.5) / f_k 43 | 44 | s_k = self.min_sizes[k] / self.image_size 45 | mean += [cx, cy, s_k, s_k] 46 | 47 | # aspect_ratio: 1 48 | # rel size: sqrt(s_k * s_(k+1)) 49 | if self.max_sizes: 50 | s_k_prime = sqrt(s_k * (self.max_sizes[k] / self.image_size)) 51 | mean += [cx, cy, s_k_prime, s_k_prime] 52 | 53 | # rest of aspect ratios 54 | for ar in self.aspect_ratios[k]: 55 | mean += [cx, cy, s_k * sqrt(ar), s_k / sqrt(ar)] 56 | mean += [cx, cy, s_k / sqrt(ar), s_k * sqrt(ar)] 57 | 58 | # back to torch land 59 | output = torch.Tensor(mean).view(-1, 4) 60 | if self.clip: 61 | output.clamp_(max=1, min=0) 62 | return output 63 | -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | from .refine_multibox_loss import RefineMultiBoxLoss 3 | from .l2norm import L2Norm 4 | 5 | __all__ = ['MultiBoxLoss','L2Norm'] 6 | -------------------------------------------------------------------------------- /layers/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | class L2Norm(nn.Module): 8 | def __init__(self,n_channels, scale): 9 | super(L2Norm,self).__init__() 10 | self.n_channels = n_channels 11 | self.gamma = scale or None 12 | self.eps = 1e-10 13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | init.constant(self.weight,self.gamma) 18 | 19 | def forward(self, x): 20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 21 | x /= norm 22 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 23 | return out 24 | -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from utils.box_utils import match, log_sum_exp 6 | GPU = False 7 | if torch.cuda.is_available(): 8 | GPU = True 9 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 10 | 11 | 12 | class MultiBoxLoss(nn.Module): 13 | """SSD Weighted Loss Function 14 | Compute Targets: 15 | 1) Produce Confidence Target Indices by matching ground truth boxes 16 | with (default) 'priorboxes' that have jaccard index > threshold parameter 17 | (default threshold: 0.5). 18 | 2) Produce localization target by 'encoding' variance into offsets of ground 19 | truth boxes and their matched 'priorboxes'. 20 | 3) Hard negative mining to filter the excessive number of negative examples 21 | that comes with using a large number of default bounding boxes. 22 | (default negative:positive ratio 3:1) 23 | Objective Loss: 24 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 25 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 26 | weighted by α which is set to 1 by cross val. 27 | Args: 28 | c: class confidences, 29 | l: predicted boxes, 30 | g: ground truth boxes 31 | N: number of matched default boxes 32 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 33 | """ 34 | 35 | 36 | def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target): 37 | super(MultiBoxLoss, self).__init__() 38 | self.num_classes = num_classes 39 | self.threshold = overlap_thresh 40 | self.background_label = bkg_label 41 | self.encode_target = encode_target 42 | self.use_prior_for_matching = prior_for_matching 43 | self.do_neg_mining = neg_mining 44 | self.negpos_ratio = neg_pos 45 | self.neg_overlap = neg_overlap 46 | self.variance = [0.1,0.2] 47 | 48 | def forward(self, predictions, priors, targets): 49 | """Multibox Loss 50 | Args: 51 | predictions (tuple): A tuple containing loc preds, conf preds, 52 | and prior boxes from SSD net. 53 | conf shape: torch.size(batch_size,num_priors,num_classes) 54 | loc shape: torch.size(batch_size,num_priors,4) 55 | priors shape: torch.size(num_priors,4) 56 | 57 | ground_truth (tensor): Ground truth boxes and labels for a batch, 58 | shape: [batch_size,num_objs,5] (last idx is the label). 59 | """ 60 | 61 | loc_data, conf_data = predictions 62 | priors = priors 63 | num = loc_data.size(0) 64 | num_priors = (priors.size(0)) 65 | num_classes = self.num_classes 66 | 67 | # match priors (default boxes) and ground truth boxes 68 | loc_t = torch.Tensor(num, num_priors, 4) 69 | conf_t = torch.LongTensor(num, num_priors) 70 | for idx in range(num): 71 | truths = targets[idx][:,:-1].data 72 | labels = targets[idx][:,-1].data 73 | defaults = priors.data 74 | match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) 75 | if GPU: 76 | loc_t = loc_t.cuda() 77 | conf_t = conf_t.cuda() 78 | # wrap targets 79 | loc_t = Variable(loc_t, requires_grad=False) 80 | conf_t = Variable(conf_t,requires_grad=False) 81 | 82 | pos = conf_t > 0 83 | 84 | # Localization Loss (Smooth L1) 85 | # Shape: [batch,num_priors,4] 86 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 87 | loc_p = loc_data[pos_idx].view(-1,4) 88 | loc_t = loc_t[pos_idx].view(-1,4) 89 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) 90 | 91 | # Compute max conf across batch for hard negative mining 92 | batch_conf = conf_data.view(-1,self.num_classes) 93 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) 94 | 95 | # Hard Negative Mining 96 | loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now 97 | loss_c = loss_c.view(num, -1) 98 | _,loss_idx = loss_c.sort(1, descending=True) 99 | _,idx_rank = loss_idx.sort(1) 100 | num_pos = pos.long().sum(1,keepdim=True) 101 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 102 | neg = idx_rank < num_neg.expand_as(idx_rank) 103 | 104 | # Confidence Loss Including Positive and Negative Examples 105 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 106 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 107 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 108 | targets_weighted = conf_t[(pos+neg).gt(0)] 109 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) 110 | 111 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 112 | 113 | N = num_pos.data.float().sum() 114 | loss_l/=N 115 | loss_c/=N 116 | return loss_l,loss_c 117 | -------------------------------------------------------------------------------- /layers/modules/refine_multibox_loss.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from utils.box_utils import match,refine_match, log_sum_exp,decode 7 | GPU = False 8 | if torch.cuda.is_available(): 9 | GPU = True 10 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 11 | 12 | 13 | class RefineMultiBoxLoss(nn.Module): 14 | """SSD Weighted Loss Function 15 | Compute Targets: 16 | 1) Produce Confidence Target Indices by matching ground truth boxes 17 | with (default) 'priorboxes' that have jaccard index > threshold parameter 18 | (default threshold: 0.5). 19 | 2) Produce localization target by 'encoding' variance into offsets of ground 20 | truth boxes and their matched 'priorboxes'. 21 | 3) Hard negative mining to filter the excessive number of negative examples 22 | that comes with using a large number of default bounding boxes. 23 | (default negative:positive ratio 3:1) 24 | Objective Loss: 25 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 26 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 27 | weighted by α which is set to 1 by cross val. 28 | Args: 29 | c: class confidences, 30 | l: predicted boxes, 31 | g: ground truth boxes 32 | N: number of matched default boxes 33 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 34 | """ 35 | 36 | 37 | def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target,object_score = 0): 38 | super(RefineMultiBoxLoss, self).__init__() 39 | self.num_classes = num_classes 40 | self.threshold = overlap_thresh 41 | self.background_label = bkg_label 42 | self.encode_target = encode_target 43 | self.use_prior_for_matching = prior_for_matching 44 | self.do_neg_mining = neg_mining 45 | self.negpos_ratio = neg_pos 46 | self.neg_overlap = neg_overlap 47 | self.object_score = object_score 48 | self.variance = [0.1,0.2] 49 | 50 | def forward(self, odm_data,priors, targets,arm_data = None,filter_object = False): 51 | """Multibox Loss 52 | Args: 53 | predictions (tuple): A tuple containing loc preds, conf preds, 54 | and prior boxes from SSD net. 55 | conf shape: torch.size(batch_size,num_priors,num_classes) 56 | loc shape: torch.size(batch_size,num_priors,4) 57 | priors shape: torch.size(num_priors,4) 58 | 59 | ground_truth (tensor): Ground truth boxes and labels for a batch, 60 | shape: [batch_size,num_objs,5] (last idx is the label). 61 | arm_data (tuple): arm branch containg arm_loc and arm_conf 62 | filter_object: whether filter out the prediction according to the arm conf score 63 | """ 64 | 65 | loc_data,conf_data = odm_data 66 | if arm_data: 67 | arm_loc,arm_conf = arm_data 68 | priors = priors.data 69 | num = loc_data.size(0) 70 | num_priors = (priors.size(0)) 71 | 72 | # match priors (default boxes) and ground truth boxes 73 | loc_t = torch.Tensor(num, num_priors, 4) 74 | conf_t = torch.LongTensor(num, num_priors) 75 | for idx in range(num): 76 | truths = targets[idx][:,:-1].data 77 | labels = targets[idx][:,-1].data 78 | #for object detection 79 | if self.num_classes == 2: 80 | labels = labels > 0 81 | if arm_data: 82 | refine_match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx,arm_loc[idx].data) 83 | else: 84 | match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx) 85 | if GPU: 86 | loc_t = loc_t.cuda() 87 | conf_t = conf_t.cuda() 88 | # wrap targets 89 | loc_t = Variable(loc_t, requires_grad=False) 90 | conf_t = Variable(conf_t,requires_grad=False) 91 | if arm_data and filter_object: 92 | arm_conf_data = arm_conf.data[:,:,1] 93 | pos = conf_t > 0 94 | object_score_index = arm_conf_data <= self.object_score 95 | pos[object_score_index] = 0 96 | 97 | else: 98 | pos = conf_t > 0 99 | 100 | # Localization Loss (Smooth L1) 101 | # Shape: [batch,num_priors,4] 102 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 103 | loc_p = loc_data[pos_idx].view(-1,4) 104 | loc_t = loc_t[pos_idx].view(-1,4) 105 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) 106 | 107 | # Compute max conf across batch for hard negative mining 108 | batch_conf = conf_data.view(-1,self.num_classes) 109 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) 110 | 111 | # Hard Negative Mining 112 | loss_c[pos] = 0 # filter out pos boxes for now 113 | loss_c = loss_c.view(num, -1) 114 | _,loss_idx = loss_c.sort(1, descending=True) 115 | _,idx_rank = loss_idx.sort(1) 116 | num_pos = pos.long().sum(1,keepdim=True) 117 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 118 | neg = idx_rank < num_neg.expand_as(idx_rank) 119 | 120 | # Confidence Loss Including Positive and Negative Examples 121 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 122 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 123 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 124 | targets_weighted = conf_t[(pos+neg).gt(0)] 125 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) 126 | 127 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 128 | N = num_pos.data.sum() 129 | loss_l/=N 130 | loss_c/=N 131 | return loss_l,loss_c 132 | -------------------------------------------------------------------------------- /loss_loader.py: -------------------------------------------------------------------------------- 1 | from focal_loss import FocalLoss 2 | from torch import nn 3 | gamma = 2 4 | 5 | def get_focal_loss(classifier): 6 | print("==> Using Focal Loss.....") 7 | classifier.writer.add_text('Info', "Using Focal Loss ") 8 | return FocalLoss(gamma) 9 | 10 | def get_cross_entropy(classifier): 11 | print("==> Using CrossEntropy.....") 12 | classifier.writer.add_text('Info', "Using Cross Entropy Loss ") 13 | return nn.CrossEntropyLoss() 14 | 15 | def get_vat_cross_entropy(classifier): 16 | print("==> Using Adversarial Training Cross Entropy.....") 17 | pass -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | gpu=0 3 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 4 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 5 | os.environ['CUDA_LAUNCH_BLOCKING'] = str(gpu) 6 | from object_detector import Detector 7 | from torch import optim 8 | from augment_data import augment_images 9 | from model_loader import * 10 | from loss_loader import * 11 | from data_reader import * 12 | 13 | import argparse 14 | import pickle 15 | import time 16 | 17 | import numpy as np 18 | import os 19 | import torch 20 | import torch.backends.cudnn as cudnn 21 | import torch.nn.init as init 22 | import torch.optim as optim 23 | import torch.utils.data as data 24 | from torch.autograd import Variable 25 | 26 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \ 27 | COCODetection, VOCDetection, detection_collate, BaseTransform, preproc 28 | from layers.functions import Detect, PriorBox 29 | from layers.modules import MultiBoxLoss 30 | from utils.nms_wrapper import nms 31 | from utils.timer import Timer 32 | 33 | 34 | def str2bool(v): 35 | return v.lower() in ("yes", "true", "t", "1") 36 | 37 | classes=VOC_CLASSES 38 | classes_delimited=','.join(classes) 39 | num_classes=len(classes) 40 | 41 | parser = argparse.ArgumentParser( 42 | description='Receptive Field Block Net Training') 43 | 44 | parser.add_argument('-gpu', default=gpu, 45 | type=int, help='gpu index for training.') 46 | parser.add_argument('-v', '--version', default='RFB_vgg', 47 | help='RFB_vgg ,RFB_E_vgg RFB_mobile SSD_vgg version.') 48 | parser.add_argument('-s', '--size', default='300',type=int, 49 | help='300 or 512 input size.') 50 | parser.add_argument('-d', '--dataset', default='VOC', 51 | help='VOC or COCO dataset') 52 | 53 | parser.add_argument('-classes', default=classes_delimited,type=str, 54 | help='class names delimited by ,') 55 | parser.add_argument('-num_classes', default=num_classes, type=int, 56 | help='total classes') 57 | 58 | parser.add_argument( 59 | '--basenet', default='weights/vgg16_reducedfc.pth', help='pretrained base model') 60 | parser.add_argument('--jaccard_threshold', default=0.5, 61 | type=float, help='Min Jaccard index for matching') 62 | parser.add_argument('-b', '--batch_size', default=8, 63 | type=int, help='Batch size for training') 64 | parser.add_argument('--num_workers', default=4, 65 | type=int, help='Number of workers used in dataloading') 66 | parser.add_argument('--cuda', default=True, 67 | type=bool, help='Use cuda to train model') 68 | parser.add_argument('--ngpu', default=2, type=int, help='gpus') 69 | parser.add_argument('--lr', '--learning-rate', 70 | default=4e-3, type=float, help='initial learning rate') 71 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum') 72 | 73 | parser.add_argument('--resume_net', default=False, help='resume net for retraining') 74 | parser.add_argument('--resume_epoch', default=0, 75 | type=int, help='resume iter for retraining') 76 | parser.add_argument('-epochs', '--epochs', default=300, 77 | type=int, help='max epoch for retraining') 78 | parser.add_argument('--weight_decay', default=5e-4, 79 | type=float, help='Weight decay for SGD') 80 | parser.add_argument('-we', '--warm_epoch', default=1, 81 | type=int, help='max epoch for retraining') 82 | parser.add_argument('--gamma', default=0.1, 83 | type=float, help='Gamma update for SGD') 84 | 85 | parser.add_argument('--freeze_layers', default=0.80, 86 | type=float, help='PErcentage of weight to be freezed.') 87 | 88 | parser.add_argument('--log_iters', default=True, 89 | type=bool, help='Print the loss at each iteration') 90 | parser.add_argument('--save_folder', default='weights/', 91 | help='Location to save checkpoint models') 92 | parser.add_argument('--date', default='1213') 93 | parser.add_argument('--save_frequency', default=10) 94 | parser.add_argument('--retest', default=False, type=bool, 95 | help='test cache results') 96 | parser.add_argument('--test_frequency', default=10) 97 | parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') 98 | parser.add_argument('--send_images_to_visdom', type=str2bool, default=False, 99 | help='Sample a random image from each 10th batch, send it to visdom after augmentations step') 100 | args = parser.parse_args() 101 | 102 | """ 103 | sudo nvidia-smi -pl 180 104 | sudo nvidia-smi --gpu-reset -i 0 105 | use command line to run the training. 106 | 107 | todo download more images using image_utils and isic-arhive. Also, use more online resources for data. 108 | 109 | """ 110 | 111 | from layers.modules.multibox_loss import MultiBoxLoss 112 | 113 | from statics import * 114 | def get_loss_function(classifier): 115 | return MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) 116 | 117 | def get_model(args): 118 | return get_ssd_model(args) 119 | 120 | def get_optimizer(model_trainer): 121 | epsilon=1e-8 122 | momentum = 0.9 123 | weight_decay=5e-4 124 | # model_trainer.writer.add_scalar("leanring rate", learning_rate) 125 | # model_trainer.writer.add_scalar("epsilon", epsilon) 126 | # optimizer=optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()), 127 | # lr=0.001,momentum=momentum,weight_decay=weight_decay) 128 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, model_trainer.model.parameters()),lr=0.01) 129 | # optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()), lr=0.001, momentum=0.9, 130 | # weight_decay=weight_decay) 131 | return optimizer 132 | 133 | def get_prior(): 134 | cfg = (VOC_300, VOC_512)[args.size == '512'] 135 | priorbox = PriorBox(cfg) 136 | priors = Variable(priorbox.forward(), volatile=True) 137 | return priors 138 | 139 | class ModelDetails(object): 140 | def __init__(self,args): 141 | self.args=args 142 | self.priors=get_prior() 143 | self.model,self.model_name_str = get_model(args) 144 | self.logs_dir = "logs/{}/{}".format(args.gpu,self.model_name_str) 145 | self.augment_images = augment_images 146 | self.dataset_loader=get_data_loader(args) 147 | self.get_loss_function = get_loss_function 148 | self.get_optimizer = get_optimizer 149 | self.dataset=data_set_name 150 | self.class_names=VOC_CLASSES 151 | 152 | 153 | def start_training(args): 154 | model_details=ModelDetails(args) 155 | detector=Detector(model_details) 156 | detector.load_data() 157 | detector.load_model() 158 | for epoch in range(detector.start_epoch, detector.start_epoch + args.epochs): 159 | try: 160 | detector.train(epoch) 161 | detector.test(epoch) 162 | except KeyboardInterrupt: 163 | detector.test(epoch) 164 | break; 165 | detector.load_data() 166 | 167 | start_training(args) -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd ./utils/ 3 | 4 | CUDA_PATH=/usr/local/cuda/ 5 | 6 | python build.py build_ext --inplace 7 | 8 | cd .. 9 | -------------------------------------------------------------------------------- /model_loader.py: -------------------------------------------------------------------------------- 1 | from statics import voc 2 | import argparse 3 | import pickle 4 | import time 5 | 6 | import numpy as np 7 | import os 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import torch.nn.init as init 11 | import torch.optim as optim 12 | import torch.utils.data as data 13 | from torch.autograd import Variable 14 | 15 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \ 16 | COCODetection, VOCDetection, detection_collate, BaseTransform, preproc 17 | from layers.functions import Detect, PriorBox 18 | from layers.modules import MultiBoxLoss 19 | from utils.nms_wrapper import nms 20 | from utils.timer import Timer 21 | 22 | def get_ssd_model(args): 23 | save_folder = os.path.join(args.save_folder, args.version + '_' + str(args.size), args.date) 24 | if not os.path.exists(save_folder): 25 | os.makedirs(save_folder) 26 | test_save_dir = os.path.join(save_folder, 'ss_predict') 27 | if not os.path.exists(test_save_dir): 28 | os.makedirs(test_save_dir) 29 | gpu=args.gpu 30 | img_dim = args.size 31 | num_classes=args.num_classes 32 | print("==>Loading SSD model...") 33 | if args.version == 'RFB_vgg': 34 | from models.RFB_Net_vgg import build_net 35 | elif args.version == 'RFB_E_vgg': 36 | from models.RFB_Net_E_vgg import build_net 37 | elif args.version == 'RFB_mobile': 38 | from models.RFB_Net_mobile import build_net 39 | 40 | cfg = COCO_mobile_300 41 | elif args.version == 'SSD_vgg': 42 | from models.SSD_vgg import build_net 43 | elif args.version == 'FSSD_vgg': 44 | from models.FSSD_vgg import build_net 45 | elif args.version == 'FRFBSSD_vgg': 46 | from models.FRFBSSD_vgg import build_net 47 | else: 48 | print('Unkown version!') 49 | net = build_net(int(img_dim), num_classes) 50 | # model(model.cuda(), (3, height, width)) 51 | if not args.resume_net: 52 | base_weights = torch.load(args.basenet) 53 | print('Loading base network...') 54 | net.base.load_state_dict(base_weights) 55 | 56 | def xavier(param): 57 | init.xavier_uniform(param) 58 | 59 | def weights_init(m): 60 | for key in m.state_dict(): 61 | if key.split('.')[-1] == 'weight': 62 | if 'conv' in key: 63 | init.kaiming_normal(m.state_dict()[key], mode='fan_out') 64 | if 'bn' in key: 65 | m.state_dict()[key][...] = 1 66 | elif key.split('.')[-1] == 'bias': 67 | m.state_dict()[key][...] = 0 68 | 69 | print('Initializing weights...') 70 | # initialize newly added layers' weights with kaiming_normal method 71 | net.extras.apply(weights_init) 72 | net.loc.apply(weights_init) 73 | net.conf.apply(weights_init) 74 | if args.version == 'FSSD_vgg' or args.version == 'FRFBSSD_vgg': 75 | net.ft_module.apply(weights_init) 76 | net.pyramid_ext.apply(weights_init) 77 | if 'RFB' in args.version: 78 | net.Norm.apply(weights_init) 79 | if args.version == 'RFB_E_vgg': 80 | net.reduce.apply(weights_init) 81 | net.up_reduce.apply(weights_init) 82 | 83 | else: 84 | # load resume network 85 | resume_net_path = os.path.join(save_folder, args.version + '_' + args.dataset + '_epoches_' + \ 86 | str(args.resume_epoch) + '.pth') 87 | print('Loading resume network', resume_net_path) 88 | state_dict = torch.load(resume_net_path) 89 | # create new OrderedDict that does not contain `module.` 90 | from collections import OrderedDict 91 | 92 | new_state_dict = OrderedDict() 93 | for k, v in state_dict.items(): 94 | head = k[:7] 95 | if head == 'module.': 96 | name = k[7:] # remove `module.` 97 | else: 98 | name = k 99 | new_state_dict[name] = v 100 | net.load_state_dict(new_state_dict) 101 | return net,"ssd_{}_adam".format(gpu) 102 | -------------------------------------------------------------------------------- /models/FSSD_mobile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import os 4 | import torch 5 | import torch.nn as nn 6 | 7 | sys.path.append('./') 8 | from .mobilenet import mobilenet_1 9 | 10 | 11 | class BasicConv(nn.Module): 12 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, 13 | bn=False, bias=True, up_size=0): 14 | super(BasicConv, self).__init__() 15 | self.out_channels = out_planes 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, 17 | dilation=dilation, groups=groups, bias=bias) 18 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None 19 | self.relu = nn.ReLU(inplace=True) if relu else None 20 | self.up_size = up_size 21 | self.up_sample = nn.Upsample(size=(up_size, up_size), mode='bilinear') if up_size != 0 else None 22 | 23 | def forward(self, x): 24 | x = self.conv(x) 25 | if self.bn is not None: 26 | x = self.bn(x) 27 | if self.relu is not None: 28 | x = self.relu(x) 29 | if self.up_size > 0: 30 | x = self.up_sample(x) 31 | return x 32 | 33 | 34 | class FSSD(nn.Module): 35 | """Single Shot Multibox Architecture 36 | The network is composed of a base VGG network followed by the 37 | added multibox conv layers. Each multibox layer branches into 38 | 1) conv2d for class conf scores 39 | 2) conv2d for localization predictions 40 | 3) associated priorbox layer to produce default bounding 41 | boxes specific to the layer's feature map size. 42 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 43 | 44 | Args: 45 | phase: (string) Can be "test" or "train" 46 | base: VGG16 layers for input, size of either 300 or 500 47 | extras: extra layers that feed to multibox loc and conf layers 48 | head: "multibox head" consists of loc and conf conv layers 49 | """ 50 | 51 | def __init__(self, size, head, ft_module, pyramid_ext, num_classes): 52 | super(FSSD, self).__init__() 53 | self.num_classes = num_classes 54 | # TODO: implement __call__ in PriorBox 55 | self.size = size 56 | 57 | # SSD network 58 | self.base = mobilenet_1() 59 | # Layer learns to scale the l2 normalized features from conv4_3 60 | self.ft_module = nn.ModuleList(ft_module) 61 | self.pyramid_ext = nn.ModuleList(pyramid_ext) 62 | 63 | self.loc = nn.ModuleList(head[0]) 64 | self.conf = nn.ModuleList(head[1]) 65 | self.fea_bn = nn.BatchNorm2d(256 * len(self.ft_module), affine=True) 66 | 67 | self.softmax = nn.Softmax() 68 | 69 | def forward(self, x, test=False): 70 | """Applies network layers and ops on input image(s) x. 71 | 72 | Args: 73 | x: input image or batch of images. Shape: [batch,3*batch,300,300]. 74 | 75 | Return: 76 | Depending on phase: 77 | test: 78 | Variable(tensor) of output class label predictions, 79 | confidence score, and corresponding location predictions for 80 | each object detected. Shape: [batch,topk,7] 81 | 82 | train: 83 | list of concat outputs from: 84 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 85 | 2: localization layers, Shape: [batch,num_priors*4] 86 | 3: priorbox layers, Shape: [2,num_priors*4] 87 | """ 88 | source_features = list() 89 | transformed_features = list() 90 | loc = list() 91 | conf = list() 92 | 93 | base_out = self.base(x) 94 | source_features.append(base_out[0]) # mobilenet 4_1 95 | source_features.append(base_out[1]) # mobilent_5_5 96 | source_features.append(base_out[2]) # mobilenet 6_1 97 | 98 | assert len(self.ft_module) == len(source_features) 99 | for k, v in enumerate(self.ft_module): 100 | transformed_features.append(v(source_features[k])) 101 | concat_fea = torch.cat(transformed_features, 1) 102 | x = self.fea_bn(concat_fea) 103 | fea_bn = x 104 | pyramid_fea = list() 105 | for k, v in enumerate(self.pyramid_ext): 106 | x = v(x) 107 | pyramid_fea.append(x) 108 | # apply multibox head to source layers 109 | for (x, l, c) in zip(pyramid_fea, self.loc, self.conf): 110 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 111 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 112 | 113 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 114 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 115 | if test: 116 | output = ( 117 | loc.view(loc.size(0), -1, 4), # loc preds 118 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 119 | ) 120 | features = () 121 | else: 122 | output = ( 123 | loc.view(loc.size(0), -1, 4), 124 | conf.view(conf.size(0), -1, self.num_classes), 125 | ) 126 | features = ( 127 | fea_bn 128 | ) 129 | return output 130 | 131 | def load_weights(self, base_file): 132 | other, ext = os.path.splitext(base_file) 133 | if ext == '.pkl' or '.pth': 134 | print('Loading weights into state dict...') 135 | state_dict = torch.load(base_file, map_location=lambda storage, loc: storage) 136 | from collections import OrderedDict 137 | new_state_dict = OrderedDict() 138 | for k, v in state_dict.items(): 139 | head = k[:7] 140 | if head == 'module.': 141 | name = k[7:] # remove `module.` 142 | else: 143 | name = k 144 | new_state_dict[name] = v 145 | self.base.load_state_dict(new_state_dict) 146 | print('Finished!') 147 | 148 | else: 149 | print('Sorry only .pth and .pkl files supported.') 150 | 151 | 152 | def feature_transform_module(scale_factor): 153 | layers = [] 154 | # conv4_1 155 | layers += [BasicConv(int(256 * scale_factor), 256, kernel_size=1, padding=0)] 156 | # conv5_5 157 | layers += [BasicConv(int(512 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)] 158 | # conv6_mpo1 159 | layers += [BasicConv(int(1024 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)] 160 | return layers 161 | 162 | 163 | def pyramid_feature_extractor(): 164 | ''' 165 | layers = [BasicConv(256*3,512,kernel_size=3,stride=1,padding=1),BasicConv(512,512,kernel_size=3,stride=2,padding=1), \ 166 | BasicConv(512,256,kernel_size=3,stride=2,padding=1),BasicConv(256,256,kernel_size=3,stride=2,padding=1), \ 167 | BasicConv(256,256,kernel_size=3,stride=1,padding=0),BasicConv(256,256,kernel_size=3,stride=1,padding=0)] 168 | ''' 169 | from .mobilenet import DepthWiseBlock 170 | layers = [DepthWiseBlock(256 * 3, 512, stride=1), DepthWiseBlock(512, 512, stride=2), 171 | DepthWiseBlock(512, 256, stride=2), DepthWiseBlock(256, 256, stride=2), \ 172 | DepthWiseBlock(256, 128, stride=1, padding=0), DepthWiseBlock(128, 128, stride=1, padding=0)] 173 | 174 | return layers 175 | 176 | 177 | def multibox(fea_channels, cfg, num_classes): 178 | loc_layers = [] 179 | conf_layers = [] 180 | assert len(fea_channels) == len(cfg) 181 | for i, fea_channel in enumerate(fea_channels): 182 | loc_layers += [nn.Conv2d(fea_channel, cfg[i] * 4, kernel_size=3, padding=1)] 183 | conf_layers += [nn.Conv2d(fea_channel, cfg[i] * num_classes, kernel_size=3, padding=1)] 184 | return (loc_layers, conf_layers) 185 | 186 | 187 | mbox = { 188 | '300': [6, 6, 6, 6, 4, 4], # number of boxes per feature map location 189 | } 190 | fea_channels = [512, 512, 256, 256, 128, 128] 191 | 192 | 193 | def build_net(size=300, num_classes=21): 194 | if size != 300 and size != 512: 195 | print("Error: Sorry only SSD300 and SSD512 is supported currently!") 196 | return 197 | 198 | return FSSD(size, multibox(fea_channels, mbox[str(size)], num_classes), feature_transform_module(1), 199 | pyramid_feature_extractor(), \ 200 | num_classes=num_classes) 201 | 202 | 203 | net = build_net() 204 | print(net) 205 | -------------------------------------------------------------------------------- /models/SSD_vgg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from layers import * 7 | from .base_models import vgg, vgg_base 8 | 9 | 10 | class SSD(nn.Module): 11 | """Single Shot Multibox Architecture 12 | The network is composed of a base VGG network followed by the 13 | added multibox conv layers. Each multibox layer branches into 14 | 1) conv2d for class conf scores 15 | 2) conv2d for localization predictions 16 | 3) associated priorbox layer to produce default bounding 17 | boxes specific to the layer's feature map size. 18 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 19 | 20 | Args: 21 | phase: (string) Can be "test" or "train" 22 | base: VGG16 layers for input, size of either 300 or 500 23 | extras: extra layers that feed to multibox loc and conf layers 24 | head: "multibox head" consists of loc and conf conv layers 25 | """ 26 | 27 | def __init__(self, base, extras, head, num_classes,size): 28 | super(SSD, self).__init__() 29 | self.num_classes = num_classes 30 | # TODO: implement __call__ in PriorBox 31 | self.size = size 32 | 33 | # SSD network 34 | self.base = nn.ModuleList(base) 35 | # Layer learns to scale the l2 normalized features from conv4_3 36 | self.extras = nn.ModuleList(extras) 37 | self.L2Norm = L2Norm(512, 20) 38 | 39 | self.loc = nn.ModuleList(head[0]) 40 | self.conf = nn.ModuleList(head[1]) 41 | 42 | self.softmax = nn.Softmax() 43 | 44 | def forward(self, x, test=False): 45 | """Applies network layers and ops on input image(s) x. 46 | 47 | Args: 48 | x: input image or batch of images. Shape: [batch,3*batch,300,300]. 49 | 50 | Return: 51 | Depending on phase: 52 | test: 53 | Variable(tensor) of output class label predictions, 54 | confidence score, and corresponding location predictions for 55 | each object detected. Shape: [batch,topk,7] 56 | 57 | train: 58 | list of concat outputs from: 59 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 60 | 2: localization layers, Shape: [batch,num_priors*4] 61 | 3: priorbox layers, Shape: [2,num_priors*4] 62 | """ 63 | sources = list() 64 | loc = list() 65 | conf = list() 66 | 67 | # apply vgg up to conv4_3 relu 68 | for k in range(23): 69 | x = self.base[k](x) 70 | 71 | s = self.L2Norm(x) 72 | sources.append(s) 73 | 74 | # apply vgg up to fc7 75 | for k in range(23, len(self.base)): 76 | x = self.base[k](x) 77 | sources.append(x) 78 | 79 | # apply extra layers and cache source layer outputs 80 | for k, v in enumerate(self.extras): 81 | x = F.relu(v(x), inplace=True) 82 | if k % 2 == 1: 83 | sources.append(x) 84 | 85 | # apply multibox head to source layers 86 | for (x, l, c) in zip(sources, self.loc, self.conf): 87 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 88 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 89 | 90 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 91 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 92 | if test: 93 | output = ( 94 | loc.view(loc.size(0), -1, 4), # loc preds 95 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 96 | ) 97 | else: 98 | output = ( 99 | loc.view(loc.size(0), -1, 4), 100 | conf.view(conf.size(0), -1, self.num_classes), 101 | ) 102 | return output 103 | 104 | def load_weights(self, base_file): 105 | other, ext = os.path.splitext(base_file) 106 | if ext == '.pkl' or '.pth': 107 | print('Loading weights into state dict...') 108 | self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage)) 109 | print('Finished!') 110 | else: 111 | print('Sorry only .pth and .pkl files supported.') 112 | 113 | 114 | def add_extras(cfg, i, batch_norm=False, size=300): 115 | # Extra layers added to VGG for feature scaling 116 | layers = [] 117 | in_channels = i 118 | flag = False 119 | for k, v in enumerate(cfg): 120 | if in_channels != 'S': 121 | if v == 'S': 122 | layers += [nn.Conv2d(in_channels, cfg[k + 1], 123 | kernel_size=(1, 3)[flag], stride=2, padding=1)] 124 | else: 125 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] 126 | flag = not flag 127 | in_channels = v 128 | if size == 512: 129 | layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1)) 130 | layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1)) 131 | return layers 132 | 133 | 134 | def multibox(vgg, extra_layers, cfg, num_classes): 135 | loc_layers = [] 136 | conf_layers = [] 137 | vgg_source = [24, -2] 138 | for k, v in enumerate(vgg_source): 139 | loc_layers += [nn.Conv2d(vgg[v].out_channels, 140 | cfg[k] * 4, kernel_size=3, padding=1)] 141 | conf_layers += [nn.Conv2d(vgg[v].out_channels, 142 | cfg[k] * num_classes, kernel_size=3, padding=1)] 143 | for k, v in enumerate(extra_layers[1::2], 2): 144 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k] 145 | * 4, kernel_size=3, padding=1)] 146 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k] 147 | * num_classes, kernel_size=3, padding=1)] 148 | return vgg, extra_layers, (loc_layers, conf_layers) 149 | 150 | 151 | extras = { 152 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 153 | '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256], 154 | } 155 | mbox = { 156 | '300': [6, 6, 6, 6, 4, 4], # number of boxes per feature map location 157 | '512': [6, 6, 6, 6, 6, 4, 4], 158 | } 159 | 160 | 161 | def build_net(size=300, num_classes=21): 162 | if size != 300 and size != 512: 163 | print("Error: Sorry only SSD300 and SSD512 is supported currently!") 164 | return 165 | 166 | return SSD(*multibox(vgg(vgg_base[str(size)], 3), 167 | add_extras(extras[str(size)], 1024, size=size), 168 | mbox[str(size)], num_classes), num_classes=num_classes,size=size) 169 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/models/__init__.py -------------------------------------------------------------------------------- /models/base_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def vgg(cfg, i, batch_norm=False): 6 | layers = [] 7 | in_channels = i 8 | for v in cfg: 9 | if v == 'M': 10 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 11 | elif v == 'C': 12 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 13 | else: 14 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 15 | if batch_norm: 16 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 17 | else: 18 | layers += [conv2d, nn.ReLU(inplace=True)] 19 | in_channels = v 20 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 21 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) 22 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 23 | layers += [pool5, conv6, 24 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] 25 | return layers 26 | 27 | 28 | vgg_base = { 29 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 30 | 512, 512, 512], 31 | '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 32 | 512, 512, 512], 33 | } 34 | 35 | 36 | class BasicConv(nn.Module): 37 | 38 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, 39 | bn=True, bias=False): 40 | super(BasicConv, self).__init__() 41 | self.out_channels = out_planes 42 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, 43 | dilation=dilation, groups=groups, bias=bias) 44 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None 45 | self.relu = nn.ReLU(inplace=True) if relu else None 46 | 47 | def forward(self, x): 48 | x = self.conv(x) 49 | if self.bn is not None: 50 | x = self.bn(x) 51 | if self.relu is not None: 52 | x = self.relu(x) 53 | return x 54 | 55 | 56 | class BasicRFB_a(nn.Module): 57 | 58 | def __init__(self, in_planes, out_planes, stride=1, scale=0.1): 59 | super(BasicRFB_a, self).__init__() 60 | self.scale = scale 61 | self.out_channels = out_planes 62 | inter_planes = in_planes // 4 63 | 64 | self.branch0 = nn.Sequential( 65 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 66 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1, relu=False) 67 | ) 68 | self.branch1 = nn.Sequential( 69 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 70 | BasicConv(inter_planes, inter_planes, kernel_size=(3, 1), stride=1, padding=(1, 0)), 71 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False) 72 | ) 73 | self.branch2 = nn.Sequential( 74 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 75 | BasicConv(inter_planes, inter_planes, kernel_size=(1, 3), stride=stride, padding=(0, 1)), 76 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False) 77 | ) 78 | ''' 79 | self.branch3 = nn.Sequential( 80 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 81 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1), 82 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False) 83 | ) 84 | ''' 85 | self.branch3 = nn.Sequential( 86 | BasicConv(in_planes, inter_planes // 2, kernel_size=1, stride=1), 87 | BasicConv(inter_planes // 2, (inter_planes // 4) * 3, kernel_size=(1, 3), stride=1, padding=(0, 1)), 88 | BasicConv((inter_planes // 4) * 3, inter_planes, kernel_size=(3, 1), stride=stride, padding=(1, 0)), 89 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False) 90 | ) 91 | 92 | self.ConvLinear = BasicConv(4 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 93 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 94 | self.relu = nn.ReLU(inplace=False) 95 | 96 | def forward(self, x): 97 | x0 = self.branch0(x) 98 | x1 = self.branch1(x) 99 | x2 = self.branch2(x) 100 | x3 = self.branch3(x) 101 | 102 | out = torch.cat((x0, x1, x2, x3), 1) 103 | out = self.ConvLinear(out) 104 | short = self.shortcut(x) 105 | out = out * self.scale + short 106 | out = self.relu(out) 107 | 108 | return out 109 | -------------------------------------------------------------------------------- /models/densenet.py: -------------------------------------------------------------------------------- 1 | '''DenseNet in PyTorch.''' 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from torch.autograd import Variable 9 | 10 | 11 | class Bottleneck(nn.Module): 12 | def __init__(self, in_planes, growth_rate): 13 | super(Bottleneck, self).__init__() 14 | self.bn1 = nn.BatchNorm2d(in_planes) 15 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(4*growth_rate) 17 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 18 | 19 | def forward(self, x): 20 | out = self.conv1(F.relu(self.bn1(x))) 21 | out = self.conv2(F.relu(self.bn2(out))) 22 | out = torch.cat([out,x], 1) 23 | return out 24 | 25 | 26 | class Transition(nn.Module): 27 | def __init__(self, in_planes, out_planes): 28 | super(Transition, self).__init__() 29 | self.bn = nn.BatchNorm2d(in_planes) 30 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) 31 | 32 | def forward(self, x): 33 | out = self.conv(F.relu(self.bn(x))) 34 | out = F.avg_pool2d(out, 2) 35 | return out 36 | 37 | 38 | class DenseNet(nn.Module): 39 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=512): 40 | super(DenseNet, self).__init__() 41 | self.growth_rate = growth_rate 42 | 43 | num_planes = 2*growth_rate 44 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=2, padding=1, bias=False) 45 | 46 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) 47 | num_planes += nblocks[0]*growth_rate 48 | out_planes = int(math.floor(num_planes*reduction)) 49 | self.trans1 = Transition(num_planes, out_planes) 50 | num_planes = out_planes 51 | 52 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) 53 | num_planes += nblocks[1]*growth_rate 54 | out_planes = int(math.floor(num_planes*reduction)) 55 | self.trans2 = Transition(num_planes, out_planes) 56 | num_planes = out_planes 57 | 58 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) 59 | num_planes += nblocks[2]*growth_rate 60 | out_planes = int(math.floor(num_planes*reduction)) 61 | self.trans3 = Transition(num_planes, out_planes) 62 | num_planes = out_planes 63 | 64 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) 65 | num_planes += nblocks[3]*growth_rate 66 | 67 | self.bn = nn.BatchNorm2d(num_planes) 68 | self.linear = nn.Linear(num_planes, num_classes) 69 | self.fc_bn = nn.BatchNorm1d(512) 70 | 71 | def _make_dense_layers(self, block, in_planes, nblock): 72 | layers = [] 73 | for i in range(nblock): 74 | layers.append(block(in_planes, self.growth_rate)) 75 | in_planes += self.growth_rate 76 | return nn.Sequential(*layers) 77 | 78 | def forward(self, x): 79 | out = self.conv1(x) 80 | out = self.trans1(self.dense1(out)) 81 | out = self.trans2(self.dense2(out)) 82 | out = self.trans3(self.dense3(out)) 83 | out = self.dense4(out) 84 | out = F.avg_pool2d(F.relu(self.bn(out)), 7) 85 | out = out.view(out.size(0), -1) 86 | out = self.linear(out) 87 | out = self.fc_bn(out) 88 | return out 89 | 90 | def DenseNet121(): 91 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) 92 | 93 | def DenseNet169(): 94 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 95 | 96 | def DenseNet201(): 97 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 98 | 99 | def DenseNet161(): 100 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 101 | 102 | def test(): 103 | net = DenseNet121() 104 | x = torch.randn(2,3,112,112) 105 | y = net(Variable(x)) 106 | print(y.size()) 107 | 108 | #test() 109 | -------------------------------------------------------------------------------- /models/mobilenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | """ 4 | Creates a MobileNet Model as defined in: 5 | Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017). 6 | MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. 7 | (c) Yang Lu 8 | """ 9 | import math 10 | import torch.nn as nn 11 | 12 | __all__ = ['DepthWiseBlock', 'mobilenet', 'mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05', 13 | 'mobilenet_025'] 14 | 15 | 16 | class DepthWiseBlock(nn.Module): 17 | def __init__(self, inplanes, planes, stride=1, padding=1): 18 | super(DepthWiseBlock, self).__init__() 19 | inplanes, planes = int(inplanes), int(planes) 20 | self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=padding, stride=stride, groups=inplanes, 21 | bias=False) 22 | self.bn_dw = nn.BatchNorm2d(inplanes) 23 | self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False) 24 | self.bn_sep = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | 27 | def forward(self, x): 28 | out = self.conv_dw(x) 29 | out = self.bn_dw(out) 30 | out = self.relu(out) 31 | 32 | out = self.conv_sep(out) 33 | out = self.bn_sep(out) 34 | out = self.relu(out) 35 | 36 | return out 37 | 38 | 39 | class MobileNet(nn.Module): 40 | def __init__(self, widen_factor=1.0, num_classes=1000): 41 | """ Constructor 42 | Args: 43 | widen_factor: config of widen_factor 44 | num_classes: number of classes 45 | """ 46 | super(MobileNet, self).__init__() 47 | 48 | block = DepthWiseBlock 49 | 50 | self.conv1 = nn.Conv2d(3, int(32 * widen_factor), kernel_size=3, stride=2, padding=1, bias=False) 51 | self.bn1 = nn.BatchNorm2d(int(32 * widen_factor)) 52 | self.relu = nn.ReLU(inplace=True) 53 | 54 | self.dw2_1 = block(32 * widen_factor, 64 * widen_factor) 55 | self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2) 56 | 57 | self.dw3_1 = block(128 * widen_factor, 128 * widen_factor) 58 | self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2) 59 | 60 | self.dw4_1 = block(256 * widen_factor, 256 * widen_factor) 61 | self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2) 62 | 63 | self.dw5_1 = block(512 * widen_factor, 512 * widen_factor) 64 | self.dw5_2 = block(512 * widen_factor, 512 * widen_factor) 65 | self.dw5_3 = block(512 * widen_factor, 512 * widen_factor) 66 | self.dw5_4 = block(512 * widen_factor, 512 * widen_factor) 67 | self.dw5_5 = block(512 * widen_factor, 512 * widen_factor) 68 | self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2) 69 | 70 | self.dw6 = block(1024 * widen_factor, 1024 * widen_factor) 71 | 72 | self.avgpool = nn.AdaptiveAvgPool2d(1) 73 | self.fc = nn.Linear(int(1024 * widen_factor), num_classes) 74 | 75 | for m in self.modules(): 76 | if isinstance(m, nn.Conv2d): 77 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 78 | m.weight.data.normal_(0, math.sqrt(2. / n)) 79 | elif isinstance(m, nn.BatchNorm2d): 80 | m.weight.data.fill_(1) 81 | m.bias.data.zero_() 82 | 83 | def forward(self, x): 84 | x = self.conv1(x) 85 | x = self.bn1(x) 86 | x = self.relu(x) 87 | 88 | x = self.dw2_1(x) 89 | x = self.dw2_2(x) 90 | x = self.dw3_1(x) 91 | x = self.dw3_2(x) 92 | x0 = self.dw4_1(x) 93 | x = self.dw4_2(x0) 94 | x = self.dw5_1(x) 95 | x = self.dw5_2(x) 96 | x = self.dw5_3(x) 97 | x = self.dw5_4(x) 98 | x1 = self.dw5_5(x) 99 | x = self.dw5_6(x1) 100 | x2 = self.dw6(x) 101 | return x0, x1, x2 102 | 103 | 104 | def mobilenet(widen_factor=1.0, num_classes=1000): 105 | """ 106 | Construct MobileNet. 107 | """ 108 | model = MobileNet(widen_factor=widen_factor, num_classes=num_classes) 109 | return model 110 | 111 | 112 | def mobilenet_2(): 113 | """ 114 | Construct MobileNet. 115 | """ 116 | model = MobileNet(widen_factor=2.0, num_classes=1000) 117 | return model 118 | 119 | 120 | def mobilenet_1(): 121 | """ 122 | Construct MobileNet. 123 | """ 124 | model = MobileNet(widen_factor=1.0, num_classes=1000) 125 | return model 126 | 127 | 128 | def mobilenet_075(): 129 | """ 130 | Construct MobileNet. 131 | """ 132 | model = MobileNet(widen_factor=0.75, num_classes=1000) 133 | return model 134 | 135 | 136 | def mobilenet_05(): 137 | """ 138 | Construct MobileNet. 139 | """ 140 | model = MobileNet(widen_factor=0.5, num_classes=1000) 141 | return model 142 | 143 | 144 | def mobilenet_025(): 145 | """ 146 | Construct MobileNet. 147 | """ 148 | model = MobileNet(widen_factor=0.25, num_classes=1000) 149 | return model 150 | 151 | 152 | if __name__ == '__main__': 153 | mobilenet = mobilenet_1() 154 | print(mobilenet) 155 | print(mobilenet.state_dict().keys()) 156 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | Reference: 3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 4 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from torch.autograd import Variable 11 | 12 | 13 | class BasicBlock(nn.Module): 14 | expansion = 1 15 | 16 | def __init__(self, in_planes, planes, stride=1): 17 | super(BasicBlock, self).__init__() 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | 23 | self.shortcut = nn.Sequential() 24 | if stride != 1 or in_planes != self.expansion*planes: 25 | self.shortcut = nn.Sequential( 26 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion*planes) 28 | ) 29 | 30 | def forward(self, x): 31 | out = F.relu(self.bn1(self.conv1(x))) 32 | out = self.bn2(self.conv2(out)) 33 | out += self.shortcut(x) 34 | out = F.relu(out) 35 | return out 36 | 37 | 38 | class Bottleneck(nn.Module): 39 | expansion = 4 40 | 41 | def __init__(self, in_planes, planes, stride=1): 42 | super(Bottleneck, self).__init__() 43 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 44 | self.bn1 = nn.BatchNorm2d(planes) 45 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 46 | self.bn2 = nn.BatchNorm2d(planes) 47 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 48 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 49 | 50 | self.shortcut = nn.Sequential() 51 | if stride != 1 or in_planes != self.expansion*planes: 52 | self.shortcut = nn.Sequential( 53 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 54 | nn.BatchNorm2d(self.expansion*planes) 55 | ) 56 | 57 | def forward(self, x): 58 | out = F.relu(self.bn1(self.conv1(x))) 59 | out = F.relu(self.bn2(self.conv2(out))) 60 | out = self.bn3(self.conv3(out)) 61 | out += self.shortcut(x) 62 | out = F.relu(out) 63 | return out 64 | 65 | 66 | class ResNet(nn.Module): 67 | def __init__(self, block, num_blocks, num_classes=512): 68 | super(ResNet, self).__init__() 69 | self.in_planes = 64 70 | 71 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False) 72 | self.bn1 = nn.BatchNorm2d(64) 73 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 74 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 75 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 76 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 77 | self.linear = nn.Linear(512*block.expansion, num_classes) 78 | self.fc_bn = nn.BatchNorm1d(512) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x): 89 | out = F.relu(self.bn1(self.conv1(x))) 90 | out = self.layer1(out) 91 | out = self.layer2(out) 92 | out = self.layer3(out) 93 | out = self.layer4(out) 94 | out = F.avg_pool2d(out, 7) 95 | out = out.view(out.size(0), -1) 96 | out = self.linear(out) 97 | out = self.fc_bn(out) 98 | return out 99 | 100 | 101 | def ResNet18(): 102 | return ResNet(BasicBlock, [2,2,2,2]) 103 | 104 | def ResNet34(): 105 | return ResNet(BasicBlock, [3,4,6,3]) 106 | 107 | def ResNet50(): 108 | return ResNet(Bottleneck, [3,4,6,3]) 109 | 110 | def ResNet101(): 111 | return ResNet(Bottleneck, [3,4,23,3]) 112 | 113 | def ResNet152(): 114 | return ResNet(Bottleneck, [3,8,36,3]) 115 | 116 | 117 | def test(): 118 | net = ResNet34() 119 | y = net(Variable(torch.randn(32,3,112,112))) 120 | print(y.size()) 121 | 122 | #test() 123 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | 7 | cfg = { 8 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 9 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 10 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 11 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 12 | } 13 | 14 | 15 | class VGG(nn.Module): 16 | def __init__(self, vgg_name): 17 | super(VGG, self).__init__() 18 | self.features = self._make_layers(cfg[vgg_name]) 19 | self.classifier = nn.Linear(512, 512) 20 | self.fc_bn = nn.BatchNorm1d(512) 21 | 22 | def forward(self, x): 23 | out = self.features(x) 24 | out = out.view(out.size(0), -1) 25 | out = self.classifier(out) 26 | out = self.fc_bn(out) 27 | return out 28 | 29 | def _make_layers(self, cfg): 30 | layers = [] 31 | in_channels = 3 32 | for x in cfg: 33 | if x == 'M': 34 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 35 | else: 36 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 37 | nn.BatchNorm2d(x), 38 | nn.ReLU(inplace=True)] 39 | in_channels = x 40 | layers += [nn.AvgPool2d(kernel_size=3, stride=3)] 41 | return nn.Sequential(*layers) 42 | 43 | def test(): 44 | net = VGG('VGG11') 45 | x = torch.randn(2,3,112,112) 46 | print(net(Variable(x)).size()) 47 | 48 | #test() 49 | -------------------------------------------------------------------------------- /multi_thread_score_pedestrian_detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import argparse 4 | import os.path as osp 5 | from utils.utils import progress_bar 6 | import time 7 | 8 | def check_size(submission_file): 9 | max_size = 60*1024*1024 10 | if osp.getsize(submission_file) > max_size: 11 | raise IOError #File size exceeds the specified maximum size, which is 60M for the server. 12 | 13 | def judge_overlap(pbox,ignore_box): 14 | overlap=[] 15 | delete=[] 16 | for p in pbox: 17 | pl=min(p[0],p[2]) 18 | pr=max(p[0],p[2]) 19 | pb=min(p[1],p[3]) 20 | pt=max(p[1],p[3]) 21 | s_p=(pr-pl)*(pt-pb) 22 | s_lap=-0.01 23 | for c in ignore_box: 24 | cl=min(c[0],c[2]) 25 | cr=max(c[0],c[2]) 26 | cb=min(c[1],c[3]) 27 | ct=max(c[1],c[3]) 28 | if not (crpr or ctpt): 29 | s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb)) 30 | if s_lap>0: 31 | overlap.append([p,s_lap/s_p]) 32 | for o in overlap: 33 | if o[1]>0.5: 34 | delete.append(o[0]) 35 | remain_id = [p for p in pbox if p not in delete] 36 | return remain_id 37 | 38 | def parse_ignore_file(ignore_file): 39 | with open(ignore_file,'r') as f: 40 | lines = f.readlines() 41 | ig = [x.strip().split() for x in lines] 42 | ignore = {} 43 | for item in ig: 44 | key = item[0] 45 | ignore_num = (len(item)-1)/4 46 | bbox = [] 47 | for i in range(int(ignore_num)): 48 | b = [] 49 | b.append(int(item[1+4*i])) 50 | b.append(int(item[2+4*i])) 51 | b.append(int(item[1+4*i])+int(item[3+4*i])) 52 | b.append(int(item[2+4*i])+int(item[4+4*i])) 53 | bbox.append(b) 54 | ignore[key] = bbox 55 | return ignore 56 | 57 | def parse_submission(submission_file,ignore_file): 58 | ignore_zone = parse_ignore_file(ignore_file) 59 | ignore_keys = ignore_zone.keys() 60 | with open(submission_file, 'r') as f: 61 | lines = f.readlines() 62 | splitlines = [x.strip().split() for x in lines] 63 | image_ids = [x[0] for x in splitlines] 64 | confidence = np.array([float(x[1]) for x in splitlines]) 65 | BB = [] 66 | for x in splitlines: 67 | bb = [] 68 | bb.append(float(x[2])) 69 | bb.append(float(x[3])) 70 | bb.append(float(x[2])+float(x[4])) 71 | bb.append(float(x[3])+float(x[5])) 72 | BB.append(bb) 73 | 74 | sub_key = [] 75 | for x in image_ids: 76 | if x not in sub_key: 77 | sub_key.append(x) 78 | final_confidence = [] 79 | final_ids = [] 80 | final_BB = [] 81 | 82 | for key in sub_key: 83 | find = [i for i,v in enumerate(image_ids) if v == key] 84 | BB_sub = [BB[i] for i in find] 85 | confid_sub = [confidence[i] for i in find] 86 | if key in ignore_keys: 87 | ignore_bbox = ignore_zone[key] 88 | bbox_remain = judge_overlap(BB_sub,ignore_bbox) 89 | find_remain = [] 90 | for i,v in enumerate(BB_sub): 91 | if v in bbox_remain: 92 | find_remain.append(i) 93 | confid_remain = [confid_sub[i] for i in find_remain] 94 | BB_sub = bbox_remain 95 | confid_sub = confid_remain 96 | ids_sub = [key]*len(BB_sub) 97 | final_ids.extend(ids_sub) 98 | final_confidence.extend(confid_sub) 99 | final_BB.extend(BB_sub) 100 | 101 | final_BB = np.array(final_BB) 102 | final_confidence = np.array(final_confidence) 103 | sorted_ind = np.argsort(-final_confidence) 104 | final_BB = final_BB[sorted_ind, :] 105 | final_ids = [final_ids[x] for x in sorted_ind] 106 | return final_ids, final_BB 107 | 108 | def parse_gt_annotation(gt_file,ignore_file): 109 | ignore_zone = parse_ignore_file(ignore_file) 110 | ignore_keys = ignore_zone.keys() 111 | with open(gt_file, 'r') as f: 112 | lines = f.readlines() 113 | info = [x.strip().split() for x in lines] 114 | gt = {} 115 | for item in info: 116 | bbox = [] 117 | bbox_num = (len(item)-1)/5 118 | for i in range(int(bbox_num)): 119 | b = [] 120 | b.append(int(item[2+5*i])) 121 | b.append(int(item[3+5*i])) 122 | b.append(int(item[2+5*i])+int(item[4+5*i])) 123 | b.append(int(item[3+5*i])+int(item[5+5*i])) 124 | bbox.append(b) 125 | if item[0] in ignore_keys: 126 | ignore_bbox = ignore_zone[item[0]] 127 | bbox_remain = judge_overlap(bbox,ignore_bbox) 128 | else: 129 | bbox_remain = bbox 130 | gt[item[0]] = np.array(bbox_remain) 131 | return gt 132 | 133 | def compute_ap(rec, prec): 134 | mrec = np.concatenate(([0.], rec, [1.])) 135 | mpre = np.concatenate(([0.], prec, [0.])) 136 | for i in range(mpre.size - 1, 0, -1): 137 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 138 | i = np.where(mrec[1:] != mrec[:-1])[0] 139 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 140 | return ap 141 | 142 | 143 | def pedestrian_eval(aap,input, gt_file, ignore_file, ovthresh): 144 | gt = parse_gt_annotation(gt_file,ignore_file) 145 | image_ids, BB = parse_submission(input,ignore_file) 146 | npos = 0 147 | recs = {} 148 | for key in gt.keys(): 149 | det = [False]*len(gt[key]) 150 | recs[key] = {'bbox': gt[key], 'det': det} 151 | npos += len(gt[key]) 152 | nd = len(image_ids) 153 | tp = np.zeros(nd) 154 | fp = np.zeros(nd) 155 | for d in range(nd): 156 | if image_ids[d] not in recs.keys(): 157 | raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d])) 158 | for d in range(nd): 159 | R = recs[image_ids[d]] 160 | bb = BB[d, :].astype(float) 161 | ovmax = -np.inf 162 | BBGT = R['bbox'].astype(float) 163 | if BBGT.size > 0: 164 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 165 | iymin = np.maximum(BBGT[:, 1], bb[1]) 166 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 167 | iymax = np.minimum(BBGT[:, 3], bb[3]) 168 | iw = np.maximum(ixmax - ixmin + 1., 0.) 169 | ih = np.maximum(iymax - iymin + 1., 0.) 170 | inters = iw * ih 171 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 172 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 173 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 174 | overlaps = inters / uni 175 | ovmax = np.max(overlaps) 176 | jmax = np.argmax(overlaps) 177 | 178 | if ovmax > ovthresh: 179 | if not R['det'][jmax]: 180 | tp[d] = 1. 181 | R['det'][jmax] = 1 182 | else: 183 | fp[d] = 1. 184 | else: 185 | fp[d] = 1. 186 | fp = np.cumsum(fp) 187 | tp = np.cumsum(tp) 188 | rec = tp / float(npos) 189 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 190 | ap = compute_ap(rec, prec) 191 | aap.append(ap) 192 | return ap 193 | 194 | import threading 195 | def wider_ped_eval(input, gt,ignore_file): 196 | aap = [] 197 | threads=[] 198 | for ove in np.arange(0.5, 1.0, 0.05): 199 | # pedestrian_eval(aap, input, gt,ignore_file, ovthresh=ove) 200 | t=threading.Thread(target=pedestrian_eval, args=(aap,input,gt,ignore_file),kwargs={'ovthresh':ove}) 201 | threads.append(t) 202 | t.start() 203 | time.sleep(5) 204 | 205 | print("Total threads:{}".format(len(threads))) 206 | for index,t in enumerate(threads): 207 | progress_bar(index, len(threads)," executing.") 208 | t.join() 209 | mAP = np.average(aap) 210 | return mAP 211 | 212 | 213 | def eval(): 214 | input_dir = './' 215 | output_dir = './' 216 | ref_dir = osp.join(input_dir, 'ref') 217 | submit_dir = osp.join(input_dir, 'res') 218 | submit_file = 'submit_files/scores_validation.txt' 219 | gt_file = osp.join(ref_dir, 'val_annotations.txt') 220 | ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt') 221 | check_size(submit_file) 222 | mAP = wider_ped_eval(submit_file, gt_file, ignore_file) 223 | out = {'Average AP': mAP} 224 | strings = ['{}: {}\n'.format(k, v) for k, v in out.items()] 225 | open(os.path.join(output_dir, 'scores_out.txt'), 'w').writelines(strings) 226 | return mAP 227 | 228 | 229 | if __name__ == '__main__': 230 | # parser = argparse.ArgumentParser() 231 | # parser.add_argument("input", type=str, default='./') 232 | # parser.add_argument("output", type=str, default='./') 233 | # args = parser.parse_args() 234 | eval() -------------------------------------------------------------------------------- /pretrainedmodels/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | 3 | from . import models 4 | from . import datasets 5 | from .models.utils import pretrained_settings 6 | from .models.utils import model_names 7 | 8 | # to support pretrainedmodels.__dict__['nasnetalarge'] 9 | # but depreciated 10 | from .models.fbresnet import fbresnet152 11 | from .models.cafferesnet import cafferesnet101 12 | from .models.bninception import bninception 13 | from .models.resnext import resnext101_32x4d 14 | from .models.resnext import resnext101_64x4d 15 | from .models.inceptionv4 import inceptionv4 16 | from .models.inceptionresnetv2 import inceptionresnetv2 17 | from .models.nasnet import nasnetalarge 18 | from .models.nasnet_mobile import nasnetamobile 19 | from .models.torchvision_models import alexnet 20 | from .models.torchvision_models import densenet121 21 | from .models.torchvision_models import densenet169 22 | from .models.torchvision_models import densenet201 23 | from .models.torchvision_models import densenet161 24 | from .models.torchvision_models import resnet18 25 | from .models.torchvision_models import resnet34 26 | from .models.torchvision_models import resnet50 27 | from .models.torchvision_models import resnet101 28 | from .models.torchvision_models import resnet152 29 | from .models.torchvision_models import inceptionv3 30 | from .models.torchvision_models import squeezenet1_0 31 | from .models.torchvision_models import squeezenet1_1 32 | from .models.torchvision_models import vgg11 33 | from .models.torchvision_models import vgg11_bn 34 | from .models.torchvision_models import vgg13 35 | from .models.torchvision_models import vgg13_bn 36 | from .models.torchvision_models import vgg16 37 | from .models.torchvision_models import vgg16_bn 38 | from .models.torchvision_models import vgg19_bn 39 | from .models.torchvision_models import vgg19 40 | from .models.dpn import dpn68 41 | from .models.dpn import dpn68b 42 | from .models.dpn import dpn92 43 | from .models.dpn import dpn98 44 | from .models.dpn import dpn131 45 | from .models.dpn import dpn107 46 | from .models.xception import xception 47 | from .models.senet import senet154 48 | from .models.senet import se_resnet50 49 | from .models.senet import se_resnet101 50 | from .models.senet import se_resnet152 51 | from .models.senet import se_resnext50_32x4d 52 | from .models.senet import se_resnext101_32x4d 53 | from .models.pnasnet import pnasnet5large 54 | from .models.polynet import polynet 55 | -------------------------------------------------------------------------------- /pretrainedmodels/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc import Voc2007Classification -------------------------------------------------------------------------------- /pretrainedmodels/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | from urllib.request import urlretrieve 3 | 4 | import torch 5 | from PIL import Image 6 | from tqdm import tqdm 7 | 8 | def load_imagenet_classes(path_synsets='data/imagenet_synsets.txt', 9 | path_classes='data/imagenet_classes.txt'): 10 | with open(path_synsets, 'r') as f: 11 | synsets = f.readlines() 12 | 13 | synsets = [x.strip() for x in synsets] 14 | splits = [line.split(' ') for line in synsets] 15 | key_to_classname = {spl[0]:' '.join(spl[1:]) for spl in splits} 16 | 17 | with open(path_classes, 'r') as f: 18 | class_id_to_key = f.readlines() 19 | 20 | class_id_to_key = [x.strip() for x in class_id_to_key] 21 | 22 | cid_to_cname = [] 23 | for i in range(len(class_id_to_key)): 24 | key = class_id_to_key[i] 25 | cname = key_to_classname[key] 26 | cid_to_cname.append(cname) 27 | 28 | return cid_to_cname 29 | 30 | 31 | class Warp(object): 32 | def __init__(self, size, interpolation=Image.BILINEAR): 33 | self.size = int(size) 34 | self.interpolation = interpolation 35 | 36 | def __call__(self, img): 37 | return img.resize((self.size, self.size), self.interpolation) 38 | 39 | def __str__(self): 40 | return self.__class__.__name__ + ' (size={size}, interpolation={interpolation})'.format(size=self.size, 41 | interpolation=self.interpolation) 42 | 43 | 44 | def download_url(url, destination=None, progress_bar=True): 45 | """Download a URL to a local file. 46 | 47 | Parameters 48 | ---------- 49 | url : str 50 | The URL to download. 51 | destination : str, None 52 | The destination of the file. If None is given the file is saved to a temporary directory. 53 | progress_bar : bool 54 | Whether to show a command-line progress bar while downloading. 55 | 56 | Returns 57 | ------- 58 | filename : str 59 | The location of the downloaded file. 60 | 61 | Notes 62 | ----- 63 | Progress bar use/example adapted from tqdm documentation: https://github.com/tqdm/tqdm 64 | """ 65 | 66 | def my_hook(t): 67 | last_b = [0] 68 | 69 | def inner(b=1, bsize=1, tsize=None): 70 | if tsize is not None: 71 | t.total = tsize 72 | if b > 0: 73 | t.update((b - last_b[0]) * bsize) 74 | last_b[0] = b 75 | 76 | return inner 77 | 78 | if progress_bar: 79 | with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 80 | filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t)) 81 | else: 82 | filename, _ = urlretrieve(url, filename=destination) 83 | 84 | 85 | class AveragePrecisionMeter(object): 86 | """ 87 | The APMeter measures the average precision per class. 88 | The APMeter is designed to operate on `NxK` Tensors `output` and 89 | `target`, and optionally a `Nx1` Tensor weight where (1) the `output` 90 | contains model output scores for `N` examples and `K` classes that ought to 91 | be higher when the model is more convinced that the example should be 92 | positively labeled, and smaller when the model believes the example should 93 | be negatively labeled (for instance, the output of a sigmoid function); (2) 94 | the `target` contains only values 0 (for negative examples) and 1 95 | (for positive examples); and (3) the `weight` ( > 0) represents weight for 96 | each sample. 97 | """ 98 | 99 | def __init__(self, difficult_examples=False): 100 | super(AveragePrecisionMeter, self).__init__() 101 | self.reset() 102 | self.difficult_examples = difficult_examples 103 | 104 | def reset(self): 105 | """Resets the meter with empty member variables""" 106 | self.scores = torch.FloatTensor(torch.FloatStorage()) 107 | self.targets = torch.LongTensor(torch.LongStorage()) 108 | 109 | def add(self, output, target): 110 | """ 111 | Args: 112 | output (Tensor): NxK tensor that for each of the N examples 113 | indicates the probability of the example belonging to each of 114 | the K classes, according to the model. The probabilities should 115 | sum to one over all classes 116 | target (Tensor): binary NxK tensort that encodes which of the K 117 | classes are associated with the N-th input 118 | (eg: a row [0, 1, 0, 1] indicates that the example is 119 | associated with classes 2 and 4) 120 | weight (optional, Tensor): Nx1 tensor representing the weight for 121 | each example (each weight > 0) 122 | """ 123 | if not torch.is_tensor(output): 124 | output = torch.from_numpy(output) 125 | if not torch.is_tensor(target): 126 | target = torch.from_numpy(target) 127 | 128 | if output.dim() == 1: 129 | output = output.view(-1, 1) 130 | else: 131 | assert output.dim() == 2, \ 132 | 'wrong output size (should be 1D or 2D with one column \ 133 | per class)' 134 | if target.dim() == 1: 135 | target = target.view(-1, 1) 136 | else: 137 | assert target.dim() == 2, \ 138 | 'wrong target size (should be 1D or 2D with one column \ 139 | per class)' 140 | if self.scores.numel() > 0: 141 | assert target.size(1) == self.targets.size(1), \ 142 | 'dimensions for output should match previously added examples.' 143 | 144 | # make sure storage is of sufficient size 145 | if self.scores.storage().size() < self.scores.numel() + output.numel(): 146 | new_size = math.ceil(self.scores.storage().size() * 1.5) 147 | self.scores.storage().resize_(int(new_size + output.numel())) 148 | self.targets.storage().resize_(int(new_size + output.numel())) 149 | 150 | # store scores and targets 151 | offset = self.scores.size(0) if self.scores.dim() > 0 else 0 152 | self.scores.resize_(offset + output.size(0), output.size(1)) 153 | self.targets.resize_(offset + target.size(0), target.size(1)) 154 | self.scores.narrow(0, offset, output.size(0)).copy_(output) 155 | self.targets.narrow(0, offset, target.size(0)).copy_(target) 156 | 157 | def value(self): 158 | """Returns the model's average precision for each class 159 | Return: 160 | ap (FloatTensor): 1xK tensor, with avg precision for each class k 161 | """ 162 | 163 | if self.scores.numel() == 0: 164 | return 0 165 | ap = torch.zeros(self.scores.size(1)) 166 | rg = torch.arange(1, self.scores.size(0)).float() 167 | 168 | # compute average precision for each class 169 | for k in range(self.scores.size(1)): 170 | # sort scores 171 | scores = self.scores[:, k] 172 | targets = self.targets[:, k] 173 | 174 | # compute average precision 175 | ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples) 176 | return ap 177 | 178 | @staticmethod 179 | def average_precision(output, target, difficult_examples=True): 180 | 181 | # sort examples 182 | sorted, indices = torch.sort(output, dim=0, descending=True) 183 | 184 | # Computes prec@i 185 | pos_count = 0. 186 | total_count = 0. 187 | precision_at_i = 0. 188 | for i in indices: 189 | label = target[i] 190 | if difficult_examples and label == 0: 191 | continue 192 | if label == 1: 193 | pos_count += 1 194 | total_count += 1 195 | if label == 1: 196 | precision_at_i += pos_count / total_count 197 | precision_at_i /= pos_count 198 | return precision_at_i -------------------------------------------------------------------------------- /pretrainedmodels/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/pretrainedmodels/models/__init__.py -------------------------------------------------------------------------------- /pretrainedmodels/models/cafferesnet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.utils.model_zoo as model_zoo 6 | 7 | pretrained_settings = { 8 | 'cafferesnet101': { 9 | 'imagenet': { 10 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/cafferesnet101-9d633cc0.pth', 11 | 'input_space': 'BGR', 12 | 'input_size': [3, 224, 224], 13 | 'input_range': [0, 255], 14 | 'mean': [102.9801, 115.9465, 122.7717], 15 | 'std': [1, 1, 1], 16 | 'num_classes': 1000 17 | } 18 | } 19 | } 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | "3x3 convolution with padding" 24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 25 | padding=1, bias=False) 26 | 27 | 28 | class BasicBlock(nn.Module): 29 | expansion = 1 30 | 31 | def __init__(self, inplanes, planes, stride=1, downsample=None): 32 | super(BasicBlock, self).__init__() 33 | self.conv1 = conv3x3(inplanes, planes, stride) 34 | self.bn1 = nn.BatchNorm2d(planes) 35 | self.relu = nn.ReLU(inplace=True) 36 | self.conv2 = conv3x3(planes, planes) 37 | self.bn2 = nn.BatchNorm2d(planes) 38 | self.downsample = downsample 39 | self.stride = stride 40 | 41 | def forward(self, x): 42 | residual = x 43 | 44 | out = self.conv1(x) 45 | out = self.bn1(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | 54 | out += residual 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class Bottleneck(nn.Module): 61 | expansion = 4 62 | 63 | def __init__(self, inplanes, planes, stride=1, downsample=None): 64 | super(Bottleneck, self).__init__() 65 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change 66 | self.bn1 = nn.BatchNorm2d(planes) 67 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change 68 | padding=1, bias=False) 69 | self.bn2 = nn.BatchNorm2d(planes) 70 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 71 | self.bn3 = nn.BatchNorm2d(planes * 4) 72 | self.relu = nn.ReLU(inplace=True) 73 | self.downsample = downsample 74 | self.stride = stride 75 | 76 | def forward(self, x): 77 | residual = x 78 | 79 | out = self.conv1(x) 80 | out = self.bn1(out) 81 | out = self.relu(out) 82 | 83 | out = self.conv2(out) 84 | out = self.bn2(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv3(out) 88 | out = self.bn3(out) 89 | 90 | if self.downsample is not None: 91 | residual = self.downsample(x) 92 | 93 | out += residual 94 | out = self.relu(out) 95 | 96 | return out 97 | 98 | 99 | class ResNet(nn.Module): 100 | 101 | def __init__(self, block, layers, num_classes=1000): 102 | self.inplanes = 64 103 | super(ResNet, self).__init__() 104 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 105 | bias=False) 106 | self.bn1 = nn.BatchNorm2d(64) 107 | self.relu = nn.ReLU(inplace=True) 108 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change 109 | self.layer1 = self._make_layer(block, 64, layers[0]) 110 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 111 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 112 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 113 | # it is slightly better whereas slower to set stride = 1 114 | # self.layer4 = self._make_layer(block, 512, layers[3], stride=1) 115 | self.avgpool = nn.AvgPool2d(7) 116 | self.last_linear = nn.Linear(512 * block.expansion, num_classes) 117 | 118 | for m in self.modules(): 119 | if isinstance(m, nn.Conv2d): 120 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 121 | m.weight.data.normal_(0, math.sqrt(2. / n)) 122 | elif isinstance(m, nn.BatchNorm2d): 123 | m.weight.data.fill_(1) 124 | m.bias.data.zero_() 125 | 126 | def _make_layer(self, block, planes, blocks, stride=1): 127 | downsample = None 128 | if stride != 1 or self.inplanes != planes * block.expansion: 129 | downsample = nn.Sequential( 130 | nn.Conv2d(self.inplanes, planes * block.expansion, 131 | kernel_size=1, stride=stride, bias=False), 132 | nn.BatchNorm2d(planes * block.expansion), 133 | ) 134 | 135 | layers = [] 136 | layers.append(block(self.inplanes, planes, stride, downsample)) 137 | self.inplanes = planes * block.expansion 138 | for i in range(1, blocks): 139 | layers.append(block(self.inplanes, planes)) 140 | 141 | return nn.Sequential(*layers) 142 | 143 | def forward(self, x): 144 | x = self.conv1(x) 145 | x = self.bn1(x) 146 | x = self.relu(x) 147 | x = self.maxpool(x) 148 | 149 | x = self.layer1(x) 150 | x = self.layer2(x) 151 | x = self.layer3(x) 152 | x = self.layer4(x) 153 | 154 | x = self.avgpool(x) 155 | x = x.view(x.size(0), -1) 156 | x = self.last_linear(x) 157 | 158 | return x 159 | 160 | 161 | def cafferesnet101(num_classes=1000, pretrained='imagenet'): 162 | """Constructs a ResNet-101 model. 163 | Args: 164 | pretrained (bool): If True, returns a model pre-trained on ImageNet 165 | """ 166 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes) 167 | if pretrained is not None: 168 | settings = pretrained_settings['cafferesnet101'][pretrained] 169 | assert num_classes == settings['num_classes'], \ 170 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 171 | model.load_state_dict(model_zoo.load_url(settings['url'])) 172 | model.input_space = settings['input_space'] 173 | model.input_size = settings['input_size'] 174 | model.input_range = settings['input_range'] 175 | model.mean = settings['mean'] 176 | model.std = settings['std'] 177 | return model -------------------------------------------------------------------------------- /pretrainedmodels/models/fbresnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | 6 | __all__ = ['FBResNet', 7 | #'fbresnet18', 'fbresnet34', 'fbresnet50', 'fbresnet101', 8 | 'fbresnet152'] 9 | 10 | pretrained_settings = { 11 | 'fbresnet152': { 12 | 'imagenet': { 13 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/fbresnet152-2e20f6b4.pth', 14 | 'input_space': 'RGB', 15 | 'input_size': [3, 224, 224], 16 | 'input_range': [0, 1], 17 | 'mean': [0.485, 0.456, 0.406], 18 | 'std': [0.229, 0.224, 0.225], 19 | 'num_classes': 1000 20 | } 21 | } 22 | } 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | "3x3 convolution with padding" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 28 | padding=1, bias=True) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.bn1 = nn.BatchNorm2d(planes) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | self.bn2 = nn.BatchNorm2d(planes) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True) 69 | self.bn1 = nn.BatchNorm2d(planes) 70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=True) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True) 74 | self.bn3 = nn.BatchNorm2d(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | class FBResNet(nn.Module): 102 | 103 | def __init__(self, block, layers, num_classes=1000): 104 | self.inplanes = 64 105 | # Special attributs 106 | self.input_space = None 107 | self.input_size = (299, 299, 3) 108 | self.mean = None 109 | self.std = None 110 | super(FBResNet, self).__init__() 111 | # Modules 112 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 113 | bias=True) 114 | self.bn1 = nn.BatchNorm2d(64) 115 | self.relu = nn.ReLU(inplace=True) 116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 117 | self.layer1 = self._make_layer(block, 64, layers[0]) 118 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 119 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 120 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 121 | self.avgpool = nn.AvgPool2d(7) 122 | self.last_linear = nn.Linear(512 * block.expansion, num_classes) 123 | 124 | for m in self.modules(): 125 | if isinstance(m, nn.Conv2d): 126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 127 | m.weight.data.normal_(0, math.sqrt(2. / n)) 128 | elif isinstance(m, nn.BatchNorm2d): 129 | m.weight.data.fill_(1) 130 | m.bias.data.zero_() 131 | 132 | def _make_layer(self, block, planes, blocks, stride=1): 133 | downsample = None 134 | if stride != 1 or self.inplanes != planes * block.expansion: 135 | downsample = nn.Sequential( 136 | nn.Conv2d(self.inplanes, planes * block.expansion, 137 | kernel_size=1, stride=stride, bias=True), 138 | nn.BatchNorm2d(planes * block.expansion), 139 | ) 140 | 141 | layers = [] 142 | layers.append(block(self.inplanes, planes, stride, downsample)) 143 | self.inplanes = planes * block.expansion 144 | for i in range(1, blocks): 145 | layers.append(block(self.inplanes, planes)) 146 | 147 | return nn.Sequential(*layers) 148 | 149 | def features(self, input): 150 | x = self.conv1(input) 151 | self.conv1_input = x.clone() 152 | x = self.bn1(x) 153 | x = self.relu(x) 154 | x = self.maxpool(x) 155 | 156 | x = self.layer1(x) 157 | x = self.layer2(x) 158 | x = self.layer3(x) 159 | x = self.layer4(x) 160 | return x 161 | 162 | def logits(self, features): 163 | x = self.avgpool(features) 164 | x = x.view(x.size(0), -1) 165 | x = self.last_linear(x) 166 | return x 167 | 168 | def forward(self, input): 169 | x = self.features(input) 170 | x = self.logits(x) 171 | return x 172 | 173 | 174 | def fbresnet18(num_classes=1000): 175 | """Constructs a ResNet-18 model. 176 | 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = FBResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes) 181 | return model 182 | 183 | 184 | def fbresnet34(num_classes=1000): 185 | """Constructs a ResNet-34 model. 186 | 187 | Args: 188 | pretrained (bool): If True, returns a model pre-trained on ImageNet 189 | """ 190 | model = FBResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes) 191 | return model 192 | 193 | 194 | def fbresnet50(num_classes=1000): 195 | """Constructs a ResNet-50 model. 196 | 197 | Args: 198 | pretrained (bool): If True, returns a model pre-trained on ImageNet 199 | """ 200 | model = FBResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes) 201 | return model 202 | 203 | 204 | def fbresnet101(num_classes=1000): 205 | """Constructs a ResNet-101 model. 206 | 207 | Args: 208 | pretrained (bool): If True, returns a model pre-trained on ImageNet 209 | """ 210 | model = FBResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes) 211 | return model 212 | 213 | 214 | def fbresnet152(num_classes=1000, pretrained='imagenet'): 215 | """Constructs a ResNet-152 model. 216 | 217 | Args: 218 | pretrained (bool): If True, returns a model pre-trained on ImageNet 219 | """ 220 | model = FBResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes) 221 | if pretrained is not None: 222 | settings = pretrained_settings['fbresnet152'][pretrained] 223 | assert num_classes == settings['num_classes'], \ 224 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 225 | model.load_state_dict(model_zoo.load_url(settings['url'])) 226 | model.input_space = settings['input_space'] 227 | model.input_size = settings['input_size'] 228 | model.input_range = settings['input_range'] 229 | model.mean = settings['mean'] 230 | model.std = settings['std'] 231 | return model 232 | 233 | 234 | -------------------------------------------------------------------------------- /pretrainedmodels/models/fbresnet/resnet152_dump.lua: -------------------------------------------------------------------------------- 1 | require 'cutorch' 2 | require 'cunn' 3 | require 'cudnn' 4 | require 'image' 5 | vision=require 'torchnet-vision' 6 | 7 | net=vision.models.resnet.load{filename='data/resnet152/net.t7',length=152} 8 | print(net) 9 | 10 | require 'nn' 11 | nn.Module.parameters = function(self) 12 | if self.weight and self.bias and self.running_mean and self.running_var then 13 | return {self.weight, self.bias, self.running_mean, self.running_var}, {self.gradWeight, self.gradBias} 14 | 15 | elseif self.weight and self.bias then 16 | return {self.weight, self.bias}, {self.gradWeight, self.gradBias} 17 | elseif self.weight then 18 | return {self.weight}, {self.gradWeight} 19 | elseif self.bias then 20 | return {self.bias}, {self.gradBias} 21 | else 22 | return 23 | end 24 | end 25 | 26 | netparams, _ = net:parameters() 27 | print(#netparams) 28 | torch.save('data/resnet152/netparams.t7', netparams) 29 | 30 | net=net:cuda() 31 | net:evaluate() 32 | --p, gp = net:getParameters() 33 | input = torch.ones(1,3,224,224) 34 | input[{1,1,1,1}] = -1 35 | input[1] = image.load('data/lena_224.png') 36 | print(input:sum()) 37 | input = input:cuda() 38 | output=net:forward(input) 39 | 40 | for i=1, 11 do 41 | torch.save('data/resnet152/output'..i..'.t7', net:get(i).output:float()) 42 | end 43 | -------------------------------------------------------------------------------- /pretrainedmodels/models/resnext.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.utils.model_zoo as model_zoo 5 | from .resnext_features import resnext101_32x4d_features 6 | from .resnext_features import resnext101_64x4d_features 7 | 8 | __all__ = ['ResNeXt101_32x4d', 'resnext101_32x4d', 9 | 'ResNeXt101_64x4d', 'resnext101_64x4d'] 10 | 11 | pretrained_settings = { 12 | 'resnext101_32x4d': { 13 | 'imagenet': { 14 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_32x4d-29e315fa.pth', 15 | 'input_space': 'RGB', 16 | 'input_size': [3, 224, 224], 17 | 'input_range': [0, 1], 18 | 'mean': [0.485, 0.456, 0.406], 19 | 'std': [0.229, 0.224, 0.225], 20 | 'num_classes': 1000 21 | } 22 | }, 23 | 'resnext101_64x4d': { 24 | 'imagenet': { 25 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_64x4d-e77a0586.pth', 26 | 'input_space': 'RGB', 27 | 'input_size': [3, 224, 224], 28 | 'input_range': [0, 1], 29 | 'mean': [0.485, 0.456, 0.406], 30 | 'std': [0.229, 0.224, 0.225], 31 | 'num_classes': 1000 32 | } 33 | } 34 | } 35 | 36 | class ResNeXt101_32x4d(nn.Module): 37 | 38 | def __init__(self, num_classes=1000): 39 | super(ResNeXt101_32x4d, self).__init__() 40 | self.num_classes = num_classes 41 | self.features = resnext101_32x4d_features 42 | self.avg_pool = nn.AvgPool2d((7, 7), (1, 1)) 43 | self.last_linear = nn.Linear(2048, num_classes) 44 | 45 | def logits(self, input): 46 | x = self.avg_pool(input) 47 | x = x.view(x.size(0), -1) 48 | x = self.last_linear(x) 49 | return x 50 | 51 | def forward(self, input): 52 | x = self.features(input) 53 | x = self.logits(x) 54 | return x 55 | 56 | 57 | class ResNeXt101_64x4d(nn.Module): 58 | 59 | def __init__(self, num_classes=1000): 60 | super(ResNeXt101_64x4d, self).__init__() 61 | self.num_classes = num_classes 62 | self.features = resnext101_64x4d_features 63 | self.avg_pool = nn.AvgPool2d((7, 7), (1, 1)) 64 | self.last_linear = nn.Linear(2048, num_classes) 65 | 66 | def logits(self, input): 67 | x = self.avg_pool(input) 68 | x = x.view(x.size(0), -1) 69 | x = self.last_linear(x) 70 | return x 71 | 72 | def forward(self, input): 73 | x = self.features(input) 74 | x = self.logits(x) 75 | return x 76 | 77 | 78 | def resnext101_32x4d(num_classes=1000, pretrained='imagenet'): 79 | model = ResNeXt101_32x4d(num_classes=num_classes) 80 | if pretrained is not None: 81 | settings = pretrained_settings['resnext101_32x4d'][pretrained] 82 | assert num_classes == settings['num_classes'], \ 83 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 84 | model.load_state_dict(model_zoo.load_url(settings['url'])) 85 | model.input_space = settings['input_space'] 86 | model.input_size = settings['input_size'] 87 | model.input_range = settings['input_range'] 88 | model.mean = settings['mean'] 89 | model.std = settings['std'] 90 | return model 91 | 92 | def resnext101_64x4d(num_classes=1000, pretrained='imagenet'): 93 | model = ResNeXt101_64x4d(num_classes=num_classes) 94 | if pretrained is not None: 95 | settings = pretrained_settings['resnext101_64x4d'][pretrained] 96 | assert num_classes == settings['num_classes'], \ 97 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 98 | model.load_state_dict(model_zoo.load_url(settings['url'])) 99 | model.input_space = settings['input_space'] 100 | model.input_size = settings['input_size'] 101 | model.input_range = settings['input_range'] 102 | model.mean = settings['mean'] 103 | model.std = settings['std'] 104 | return model 105 | -------------------------------------------------------------------------------- /pretrainedmodels/models/resnext_features/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnext101_32x4d_features import resnext101_32x4d_features 2 | from .resnext101_64x4d_features import resnext101_64x4d_features -------------------------------------------------------------------------------- /pretrainedmodels/models/utils.py: -------------------------------------------------------------------------------- 1 | from .fbresnet import pretrained_settings as fbresnet_settings 2 | from .bninception import pretrained_settings as bninception_settings 3 | from .resnext import pretrained_settings as resnext_settings 4 | from .inceptionv4 import pretrained_settings as inceptionv4_settings 5 | from .inceptionresnetv2 import pretrained_settings as inceptionresnetv2_settings 6 | from .torchvision_models import pretrained_settings as torchvision_models_settings 7 | from .nasnet_mobile import pretrained_settings as nasnet_mobile_settings 8 | from .nasnet import pretrained_settings as nasnet_settings 9 | from .dpn import pretrained_settings as dpn_settings 10 | from .xception import pretrained_settings as xception_settings 11 | from .senet import pretrained_settings as senet_settings 12 | from .cafferesnet import pretrained_settings as cafferesnet_settings 13 | from .pnasnet import pretrained_settings as pnasnet_settings 14 | from .polynet import pretrained_settings as polynet_settings 15 | 16 | all_settings = [ 17 | fbresnet_settings, 18 | bninception_settings, 19 | resnext_settings, 20 | inceptionv4_settings, 21 | inceptionresnetv2_settings, 22 | torchvision_models_settings, 23 | nasnet_mobile_settings, 24 | nasnet_settings, 25 | dpn_settings, 26 | xception_settings, 27 | senet_settings, 28 | cafferesnet_settings, 29 | pnasnet_settings, 30 | polynet_settings 31 | ] 32 | 33 | model_names = [] 34 | pretrained_settings = {} 35 | for settings in all_settings: 36 | for model_name, model_settings in settings.items(): 37 | pretrained_settings[model_name] = model_settings 38 | model_names.append(model_name) 39 | -------------------------------------------------------------------------------- /pretrainedmodels/models/vggm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | #from torch.legacy import nn as nnl 5 | import torch.utils.model_zoo as model_zoo 6 | 7 | __all__ = ['vggm'] 8 | 9 | pretrained_settings = { 10 | 'vggm': { 11 | 'imagenet': { 12 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/vggm-786f2434.pth', 13 | 'input_space': 'BGR', 14 | 'input_size': [3, 221, 221], 15 | 'input_range': [0, 255], 16 | 'mean': [123.68, 116.779, 103.939], 17 | 'std': [1, 1, 1], 18 | 'num_classes': 1000 19 | } 20 | } 21 | } 22 | 23 | class SpatialCrossMapLRN(nn.Module): 24 | def __init__(self, local_size=1, alpha=1.0, beta=0.75, k=1, ACROSS_CHANNELS=True): 25 | super(SpatialCrossMapLRN, self).__init__() 26 | self.ACROSS_CHANNELS = ACROSS_CHANNELS 27 | if ACROSS_CHANNELS: 28 | self.average=nn.AvgPool3d(kernel_size=(local_size, 1, 1), 29 | stride=1, 30 | padding=(int((local_size-1.0)/2), 0, 0)) 31 | else: 32 | self.average=nn.AvgPool2d(kernel_size=local_size, 33 | stride=1, 34 | padding=int((local_size-1.0)/2)) 35 | self.alpha = alpha 36 | self.beta = beta 37 | self.k = k 38 | 39 | def forward(self, x): 40 | if self.ACROSS_CHANNELS: 41 | div = x.pow(2).unsqueeze(1) 42 | div = self.average(div).squeeze(1) 43 | div = div.mul(self.alpha).add(self.k).pow(self.beta) 44 | else: 45 | div = x.pow(2) 46 | div = self.average(div) 47 | div = div.mul(self.alpha).add(self.k).pow(self.beta) 48 | x = x.div(div) 49 | return x 50 | 51 | class LambdaBase(nn.Sequential): 52 | def __init__(self, fn, *args): 53 | super(LambdaBase, self).__init__(*args) 54 | self.lambda_func = fn 55 | 56 | def forward_prepare(self, input): 57 | output = [] 58 | for module in self._modules.values(): 59 | output.append(module(input)) 60 | return output if output else input 61 | 62 | class Lambda(LambdaBase): 63 | def forward(self, input): 64 | return self.lambda_func(self.forward_prepare(input)) 65 | 66 | class VGGM(nn.Module): 67 | 68 | def __init__(self, num_classes=1000): 69 | super(VGGM, self).__init__() 70 | self.num_classes = num_classes 71 | self.features = nn.Sequential( 72 | nn.Conv2d(3,96,(7, 7),(2, 2)), 73 | nn.ReLU(), 74 | SpatialCrossMapLRN(5, 0.0005, 0.75, 2), 75 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True), 76 | nn.Conv2d(96,256,(5, 5),(2, 2),(1, 1)), 77 | nn.ReLU(), 78 | SpatialCrossMapLRN(5, 0.0005, 0.75, 2), 79 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True), 80 | nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)), 81 | nn.ReLU(), 82 | nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)), 83 | nn.ReLU(), 84 | nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)), 85 | nn.ReLU(), 86 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True) 87 | ) 88 | self.classif = nn.Sequential( 89 | nn.Linear(18432,4096), 90 | nn.ReLU(), 91 | nn.Dropout(0.5), 92 | nn.Linear(4096,4096), 93 | nn.ReLU(), 94 | nn.Dropout(0.5), 95 | nn.Linear(4096,num_classes) 96 | ) 97 | 98 | def forward(self, x): 99 | x = self.features(x) 100 | x = x.view(x.size(0), -1) 101 | x = self.classif(x) 102 | return x 103 | 104 | def vggm(num_classes=1000, pretrained='imagenet'): 105 | if pretrained: 106 | settings = pretrained_settings['vggm'][pretrained] 107 | assert num_classes == settings['num_classes'], \ 108 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 109 | 110 | model = VGGM(num_classes=1000) 111 | model.load_state_dict(model_zoo.load_url(settings['url'])) 112 | 113 | model.input_space = settings['input_space'] 114 | model.input_size = settings['input_size'] 115 | model.input_range = settings['input_range'] 116 | model.mean = settings['mean'] 117 | model.std = settings['std'] 118 | else: 119 | model = VGGM(num_classes=num_classes) 120 | return model -------------------------------------------------------------------------------- /pretrainedmodels/models/wideresnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import expanduser 3 | import hickle as hkl 4 | import torch 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | __all__ = ['wideresnet50'] 9 | 10 | model_urls = { 11 | 'wideresnet152': 'https://s3.amazonaws.com/pytorch/h5models/wide-resnet-50-2-export.hkl' 12 | } 13 | 14 | def define_model(params): 15 | def conv2d(input, params, base, stride=1, pad=0): 16 | return F.conv2d(input, params[base + '.weight'], 17 | params[base + '.bias'], stride, pad) 18 | 19 | def group(input, params, base, stride, n): 20 | o = input 21 | for i in range(0,n): 22 | b_base = ('%s.block%d.conv') % (base, i) 23 | x = o 24 | o = conv2d(x, params, b_base + '0') 25 | o = F.relu(o) 26 | o = conv2d(o, params, b_base + '1', stride=i==0 and stride or 1, pad=1) 27 | o = F.relu(o) 28 | o = conv2d(o, params, b_base + '2') 29 | if i == 0: 30 | o += conv2d(x, params, b_base + '_dim', stride=stride) 31 | else: 32 | o += x 33 | o = F.relu(o) 34 | return o 35 | 36 | # determine network size by parameters 37 | blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None 38 | for k in params.keys()]) for j in range(4)] 39 | 40 | def f(input, params, pooling_classif=True): 41 | o = F.conv2d(input, params['conv0.weight'], params['conv0.bias'], 2, 3) 42 | o = F.relu(o) 43 | o = F.max_pool2d(o, 3, 2, 1) 44 | o_g0 = group(o, params, 'group0', 1, blocks[0]) 45 | o_g1 = group(o_g0, params, 'group1', 2, blocks[1]) 46 | o_g2 = group(o_g1, params, 'group2', 2, blocks[2]) 47 | o_g3 = group(o_g2, params, 'group3', 2, blocks[3]) 48 | if pooling_classif: 49 | o = F.avg_pool2d(o_g3, 7, 1, 0) 50 | o = o.view(o.size(0), -1) 51 | o = F.linear(o, params['fc.weight'], params['fc.bias']) 52 | return o 53 | 54 | return f 55 | 56 | 57 | class WideResNet(nn.Module): 58 | 59 | def __init__(self, pooling): 60 | super(WideResNet, self).__init__() 61 | self.pooling = pooling 62 | self.params = params 63 | 64 | def forward(self, x): 65 | x = f(x, self.params, self.pooling) 66 | return x 67 | 68 | 69 | def wideresnet50(pooling): 70 | dir_models = os.path.join(expanduser("~"), '.torch/wideresnet') 71 | path_hkl = os.path.join(dir_models, 'wideresnet50.hkl') 72 | if os.path.isfile(path_hkl): 73 | params = hkl.load(path_hkl) 74 | # convert numpy arrays to torch Variables 75 | for k,v in sorted(params.items()): 76 | print k, v.shape 77 | params[k] = Variable(torch.from_numpy(v), requires_grad=True) 78 | else: 79 | os.system('mkdir -p ' + dir_models) 80 | os.system('wget {} -O {}'.format(model_urls['wideresnet50'], path_hkl)) 81 | f = define_model(params) 82 | model = WideResNet(pooling) 83 | return model 84 | 85 | 86 | -------------------------------------------------------------------------------- /pretrainedmodels/models/xception.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ported to pytorch thanks to [tstandley](https://github.com/tstandley/Xception-PyTorch) 3 | 4 | @author: tstandley 5 | Adapted by cadene 6 | 7 | Creates an Xception Model as defined in: 8 | 9 | Francois Chollet 10 | Xception: Deep Learning with Depthwise Separable Convolutions 11 | https://arxiv.org/pdf/1610.02357.pdf 12 | 13 | This weights ported from the Keras implementation. Achieves the following performance on the validation set: 14 | 15 | Loss:0.9173 Prec@1:78.892 Prec@5:94.292 16 | 17 | REMEMBER to set your image size to 3x299x299 for both test and validation 18 | 19 | normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], 20 | std=[0.5, 0.5, 0.5]) 21 | 22 | The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 23 | """ 24 | import math 25 | import torch 26 | import torch.nn as nn 27 | import torch.nn.functional as F 28 | import torch.utils.model_zoo as model_zoo 29 | from torch.nn import init 30 | 31 | __all__ = ['xception'] 32 | 33 | pretrained_settings = { 34 | 'xception': { 35 | 'imagenet': { 36 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth', 37 | 'input_space': 'RGB', 38 | 'input_size': [3, 299, 299], 39 | 'input_range': [0, 1], 40 | 'mean': [0.5, 0.5, 0.5], 41 | 'std': [0.5, 0.5, 0.5], 42 | 'num_classes': 1000, 43 | 'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 44 | } 45 | } 46 | } 47 | 48 | 49 | class SeparableConv2d(nn.Module): 50 | def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False): 51 | super(SeparableConv2d,self).__init__() 52 | 53 | self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias) 54 | self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias) 55 | 56 | def forward(self,x): 57 | x = self.conv1(x) 58 | x = self.pointwise(x) 59 | return x 60 | 61 | 62 | class Block(nn.Module): 63 | def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True): 64 | super(Block, self).__init__() 65 | 66 | if out_filters != in_filters or strides!=1: 67 | self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False) 68 | self.skipbn = nn.BatchNorm2d(out_filters) 69 | else: 70 | self.skip=None 71 | 72 | self.relu = nn.ReLU(inplace=True) 73 | rep=[] 74 | 75 | filters=in_filters 76 | if grow_first: 77 | rep.append(self.relu) 78 | rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) 79 | rep.append(nn.BatchNorm2d(out_filters)) 80 | filters = out_filters 81 | 82 | for i in range(reps-1): 83 | rep.append(self.relu) 84 | rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False)) 85 | rep.append(nn.BatchNorm2d(filters)) 86 | 87 | if not grow_first: 88 | rep.append(self.relu) 89 | rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) 90 | rep.append(nn.BatchNorm2d(out_filters)) 91 | 92 | if not start_with_relu: 93 | rep = rep[1:] 94 | else: 95 | rep[0] = nn.ReLU(inplace=False) 96 | 97 | if strides != 1: 98 | rep.append(nn.MaxPool2d(3,strides,1)) 99 | self.rep = nn.Sequential(*rep) 100 | 101 | def forward(self,inp): 102 | x = self.rep(inp) 103 | 104 | if self.skip is not None: 105 | skip = self.skip(inp) 106 | skip = self.skipbn(skip) 107 | else: 108 | skip = inp 109 | 110 | x+=skip 111 | return x 112 | 113 | 114 | class Xception(nn.Module): 115 | """ 116 | Xception optimized for the ImageNet dataset, as specified in 117 | https://arxiv.org/pdf/1610.02357.pdf 118 | """ 119 | def __init__(self, num_classes=1000): 120 | """ Constructor 121 | Args: 122 | num_classes: number of classes 123 | """ 124 | super(Xception, self).__init__() 125 | self.num_classes = num_classes 126 | 127 | self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False) 128 | self.bn1 = nn.BatchNorm2d(32) 129 | self.relu = nn.ReLU(inplace=True) 130 | 131 | self.conv2 = nn.Conv2d(32,64,3,bias=False) 132 | self.bn2 = nn.BatchNorm2d(64) 133 | #do relu here 134 | 135 | self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True) 136 | self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True) 137 | self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True) 138 | 139 | self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True) 140 | self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True) 141 | self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True) 142 | self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True) 143 | 144 | self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True) 145 | self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True) 146 | self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True) 147 | self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True) 148 | 149 | self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False) 150 | 151 | self.conv3 = SeparableConv2d(1024,1536,3,1,1) 152 | self.bn3 = nn.BatchNorm2d(1536) 153 | 154 | #do relu here 155 | self.conv4 = SeparableConv2d(1536,2048,3,1,1) 156 | self.bn4 = nn.BatchNorm2d(2048) 157 | 158 | self.fc = nn.Linear(2048, num_classes) 159 | 160 | # #------- init weights -------- 161 | # for m in self.modules(): 162 | # if isinstance(m, nn.Conv2d): 163 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 164 | # m.weight.data.normal_(0, math.sqrt(2. / n)) 165 | # elif isinstance(m, nn.BatchNorm2d): 166 | # m.weight.data.fill_(1) 167 | # m.bias.data.zero_() 168 | # #----------------------------- 169 | 170 | def features(self, input): 171 | x = self.conv1(input) 172 | x = self.bn1(x) 173 | x = self.relu(x) 174 | 175 | x = self.conv2(x) 176 | x = self.bn2(x) 177 | x = self.relu(x) 178 | 179 | x = self.block1(x) 180 | x = self.block2(x) 181 | x = self.block3(x) 182 | x = self.block4(x) 183 | x = self.block5(x) 184 | x = self.block6(x) 185 | x = self.block7(x) 186 | x = self.block8(x) 187 | x = self.block9(x) 188 | x = self.block10(x) 189 | x = self.block11(x) 190 | x = self.block12(x) 191 | 192 | x = self.conv3(x) 193 | x = self.bn3(x) 194 | x = self.relu(x) 195 | 196 | x = self.conv4(x) 197 | x = self.bn4(x) 198 | return x 199 | 200 | def logits(self, features): 201 | x = self.relu(features) 202 | 203 | x = F.adaptive_avg_pool2d(x, (1, 1)) 204 | x = x.view(x.size(0), -1) 205 | x = self.last_linear(x) 206 | return x 207 | 208 | def forward(self, input): 209 | x = self.features(input) 210 | x = self.logits(x) 211 | return x 212 | 213 | 214 | def xception(num_classes=1000, pretrained='imagenet'): 215 | model = Xception(num_classes=num_classes) 216 | if pretrained: 217 | settings = pretrained_settings['xception'][pretrained] 218 | assert num_classes == settings['num_classes'], \ 219 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) 220 | 221 | model = Xception(num_classes=num_classes) 222 | model.load_state_dict(model_zoo.load_url(settings['url'])) 223 | 224 | model.input_space = settings['input_space'] 225 | model.input_size = settings['input_size'] 226 | model.input_range = settings['input_range'] 227 | model.mean = settings['mean'] 228 | model.std = settings['std'] 229 | 230 | # TODO: ugly 231 | model.last_linear = model.fc 232 | del model.fc 233 | return model 234 | -------------------------------------------------------------------------------- /pretrainedmodels/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torchvision.transforms as transforms 5 | from PIL import Image 6 | from munch import munchify 7 | 8 | class ToSpaceBGR(object): 9 | 10 | def __init__(self, is_bgr): 11 | self.is_bgr = is_bgr 12 | 13 | def __call__(self, tensor): 14 | if self.is_bgr: 15 | new_tensor = tensor.clone() 16 | new_tensor[0] = tensor[2] 17 | new_tensor[2] = tensor[0] 18 | tensor = new_tensor 19 | return tensor 20 | 21 | 22 | class ToRange255(object): 23 | 24 | def __init__(self, is_255): 25 | self.is_255 = is_255 26 | 27 | def __call__(self, tensor): 28 | if self.is_255: 29 | tensor.mul_(255) 30 | return tensor 31 | 32 | 33 | class TransformImage(object): 34 | 35 | def __init__(self, opts, scale=0.875, random_crop=False, 36 | random_hflip=False, random_vflip=False, 37 | preserve_aspect_ratio=True): 38 | if type(opts) == dict: 39 | opts = munchify(opts) 40 | self.input_size = opts.input_size 41 | self.input_space = opts.input_space 42 | self.input_range = opts.input_range 43 | self.mean = opts.mean 44 | self.std = opts.std 45 | 46 | # https://github.com/tensorflow/models/blob/master/research/inception/inception/image_processing.py#L294 47 | self.scale = scale 48 | self.random_crop = random_crop 49 | self.random_hflip = random_hflip 50 | self.random_vflip = random_vflip 51 | 52 | tfs = [] 53 | if preserve_aspect_ratio: 54 | tfs.append(transforms.Resize(int(math.floor(max(self.input_size)/self.scale)))) 55 | else: 56 | height = int(self.input_size[1] / self.scale) 57 | width = int(self.input_size[2] / self.scale) 58 | tfs.append(transforms.Resize((height, width))) 59 | 60 | if random_crop: 61 | tfs.append(transforms.RandomCrop(max(self.input_size))) 62 | else: 63 | tfs.append(transforms.CenterCrop(max(self.input_size))) 64 | 65 | if random_hflip: 66 | tfs.append(transforms.RandomHorizontalFlip()) 67 | 68 | if random_vflip: 69 | tfs.append(transforms.RandomVerticalFlip()) 70 | 71 | tfs.append(transforms.ToTensor()) 72 | tfs.append(ToSpaceBGR(self.input_space=='BGR')) 73 | tfs.append(ToRange255(max(self.input_range)==255)) 74 | tfs.append(transforms.Normalize(mean=self.mean, std=self.std)) 75 | 76 | self.tf = transforms.Compose(tfs) 77 | 78 | def __call__(self, img): 79 | tensor = self.tf(img) 80 | return tensor 81 | 82 | 83 | class LoadImage(object): 84 | 85 | def __init__(self, space='RGB'): 86 | self.space = space 87 | 88 | def __call__(self, path_img): 89 | with open(path_img, 'rb') as f: 90 | with Image.open(f) as img: 91 | img = img.convert(self.space) 92 | return img 93 | 94 | 95 | class LoadTransformImage(object): 96 | 97 | def __init__(self, model, scale=0.875): 98 | self.load = LoadImage() 99 | self.tf = TransformImage(model, scale=scale) 100 | 101 | def __call__(self, path_img): 102 | img = self.load(path_img) 103 | tensor = self.tf(img) 104 | return tensor 105 | 106 | 107 | class Identity(nn.Module): 108 | 109 | def __init__(self): 110 | super(Identity, self).__init__() 111 | 112 | def forward(self, x): 113 | return x -------------------------------------------------------------------------------- /pretrainedmodels/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.6.2' 2 | -------------------------------------------------------------------------------- /score_pedestrian_detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import argparse 4 | import os.path as osp 5 | 6 | def check_size(submission_file): 7 | max_size = 60*1024*1024 8 | if osp.getsize(submission_file) > max_size: 9 | raise IOError #File size exceeds the specified maximum size, which is 60M for the server. 10 | 11 | def judge_overlap(pbox,ignore_box): 12 | overlap=[] 13 | delete=[] 14 | for p in pbox: 15 | pl=min(p[0],p[2]) 16 | pr=max(p[0],p[2]) 17 | pb=min(p[1],p[3]) 18 | pt=max(p[1],p[3]) 19 | s_p=(pr-pl)*(pt-pb) 20 | s_lap=-0.01 21 | for c in ignore_box: 22 | cl=min(c[0],c[2]) 23 | cr=max(c[0],c[2]) 24 | cb=min(c[1],c[3]) 25 | ct=max(c[1],c[3]) 26 | if not (crpr or ctpt): 27 | s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb)) 28 | if s_lap>0: 29 | overlap.append([p,s_lap/s_p]) 30 | for o in overlap: 31 | if o[1]>0.5: 32 | delete.append(o[0]) 33 | remain_id = [p for p in pbox if p not in delete] 34 | return remain_id 35 | 36 | def parse_ignore_file(ignore_file): 37 | with open(ignore_file,'r') as f: 38 | lines = f.readlines() 39 | ig = [x.strip().split() for x in lines] 40 | ignore = {} 41 | for item in ig: 42 | key = item[0] 43 | ignore_num = (len(item)-1)/4 44 | bbox = [] 45 | for i in range(int(ignore_num)): 46 | b = [] 47 | b.append(int(item[1+4*i])) 48 | b.append(int(item[2+4*i])) 49 | b.append(int(item[1+4*i])+int(item[3+4*i])) 50 | b.append(int(item[2+4*i])+int(item[4+4*i])) 51 | bbox.append(b) 52 | ignore[key] = bbox 53 | return ignore 54 | 55 | def parse_submission(submission_file,ignore_file): 56 | ignore_zone = parse_ignore_file(ignore_file) 57 | ignore_keys = ignore_zone.keys() 58 | with open(submission_file, 'r') as f: 59 | lines = f.readlines() 60 | splitlines = [x.strip().split() for x in lines] 61 | image_ids = [x[0] for x in splitlines] 62 | confidence = np.array([float(x[1]) for x in splitlines]) 63 | BB = [] 64 | for x in splitlines: 65 | bb = [] 66 | bb.append(float(x[2])) 67 | bb.append(float(x[3])) 68 | bb.append(float(x[2])+float(x[4])) 69 | bb.append(float(x[3])+float(x[5])) 70 | BB.append(bb) 71 | 72 | sub_key = [] 73 | for x in image_ids: 74 | if x not in sub_key: 75 | sub_key.append(x) 76 | final_confidence = [] 77 | final_ids = [] 78 | final_BB = [] 79 | 80 | for key in sub_key: 81 | find = [i for i,v in enumerate(image_ids) if v == key] 82 | BB_sub = [BB[i] for i in find] 83 | confid_sub = [confidence[i] for i in find] 84 | if key in ignore_keys: 85 | ignore_bbox = ignore_zone[key] 86 | bbox_remain = judge_overlap(BB_sub,ignore_bbox) 87 | find_remain = [] 88 | for i,v in enumerate(BB_sub): 89 | if v in bbox_remain: 90 | find_remain.append(i) 91 | confid_remain = [confid_sub[i] for i in find_remain] 92 | BB_sub = bbox_remain 93 | confid_sub = confid_remain 94 | ids_sub = [key]*len(BB_sub) 95 | final_ids.extend(ids_sub) 96 | final_confidence.extend(confid_sub) 97 | final_BB.extend(BB_sub) 98 | 99 | final_BB = np.array(final_BB) 100 | final_confidence = np.array(final_confidence) 101 | sorted_ind = np.argsort(-final_confidence) 102 | final_BB = final_BB[sorted_ind, :] 103 | final_ids = [final_ids[x] for x in sorted_ind] 104 | return final_ids, final_BB 105 | 106 | def parse_gt_annotation(gt_file,ignore_file): 107 | ignore_zone = parse_ignore_file(ignore_file) 108 | ignore_keys = ignore_zone.keys() 109 | with open(gt_file, 'r') as f: 110 | lines = f.readlines() 111 | info = [x.strip().split() for x in lines] 112 | gt = {} 113 | for item in info: 114 | bbox = [] 115 | bbox_num = (len(item)-1)/5 116 | for i in range(int(bbox_num)): 117 | b = [] 118 | b.append(int(item[2+5*i])) 119 | b.append(int(item[3+5*i])) 120 | b.append(int(item[2+5*i])+int(item[4+5*i])) 121 | b.append(int(item[3+5*i])+int(item[5+5*i])) 122 | bbox.append(b) 123 | if item[0] in ignore_keys: 124 | ignore_bbox = ignore_zone[item[0]] 125 | bbox_remain = judge_overlap(bbox,ignore_bbox) 126 | else: 127 | bbox_remain = bbox 128 | gt[item[0]] = np.array(bbox_remain) 129 | return gt 130 | 131 | def compute_ap(rec, prec): 132 | mrec = np.concatenate(([0.], rec, [1.])) 133 | mpre = np.concatenate(([0.], prec, [0.])) 134 | for i in range(mpre.size - 1, 0, -1): 135 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 136 | i = np.where(mrec[1:] != mrec[:-1])[0] 137 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 138 | return ap 139 | 140 | 141 | def pedestrian_eval(input, gt_file, ignore_file, ovthresh): 142 | gt = parse_gt_annotation(gt_file,ignore_file) 143 | image_ids, BB = parse_submission(input,ignore_file) 144 | npos = 0 145 | recs = {} 146 | for key in gt.keys(): 147 | det = [False]*len(gt[key]) 148 | recs[key] = {'bbox': gt[key], 'det': det} 149 | npos += len(gt[key]) 150 | nd = len(image_ids) 151 | tp = np.zeros(nd) 152 | fp = np.zeros(nd) 153 | for d in range(nd): 154 | if image_ids[d] not in recs.keys(): 155 | raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d])) 156 | for d in range(nd): 157 | R = recs[image_ids[d]] 158 | bb = BB[d, :].astype(float) 159 | ovmax = -np.inf 160 | BBGT = R['bbox'].astype(float) 161 | if BBGT.size > 0: 162 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 163 | iymin = np.maximum(BBGT[:, 1], bb[1]) 164 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 165 | iymax = np.minimum(BBGT[:, 3], bb[3]) 166 | iw = np.maximum(ixmax - ixmin + 1., 0.) 167 | ih = np.maximum(iymax - iymin + 1., 0.) 168 | inters = iw * ih 169 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 170 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 171 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 172 | overlaps = inters / uni 173 | ovmax = np.max(overlaps) 174 | jmax = np.argmax(overlaps) 175 | 176 | if ovmax > ovthresh: 177 | if not R['det'][jmax]: 178 | tp[d] = 1. 179 | R['det'][jmax] = 1 180 | else: 181 | fp[d] = 1. 182 | else: 183 | fp[d] = 1. 184 | fp = np.cumsum(fp) 185 | tp = np.cumsum(tp) 186 | rec = tp / float(npos+1e-8) 187 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 188 | ap = compute_ap(rec, prec) 189 | return ap 190 | 191 | 192 | def wider_ped_eval(input, gt,ignore_file): 193 | aap = [] 194 | for ove in np.arange(0.5, 1.0, 0.05): 195 | ap = pedestrian_eval(input, gt,ignore_file, ovthresh=ove) 196 | aap.append(ap) 197 | mAP = np.average(aap) 198 | return mAP 199 | 200 | 201 | def get_average_precision_validation(): 202 | input_dir = './' 203 | output_dir = './' 204 | ref_dir = osp.join(input_dir, 'ref') 205 | submit_dir = osp.join(input_dir, 'res') 206 | submit_file = 'submit_files/scores_validation.txt' 207 | gt_file = osp.join(ref_dir, 'val_annotations.txt') 208 | ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt') 209 | check_size(submit_file) 210 | mAP = wider_ped_eval(submit_file, gt_file, ignore_file) 211 | out = {'Average AP': mAP} 212 | print(out) 213 | return mAP 214 | 215 | 216 | 217 | if __name__ == '__main__': 218 | # parser = argparse.ArgumentParser() 219 | # parser.add_argument("input", type=str) 220 | # parser.add_argument("output", type=str) 221 | # args = parser.parse_args() 222 | get_average_precision_validation() 223 | # strings = ['{}: {}\n'.format(k, v) for k, v in out.items()] 224 | # open(os.path.join(output_dir, 'scores.txt'), 'w').writelines(strings) -------------------------------------------------------------------------------- /statics.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | data_dir="../data_wider_pedestrian" 4 | 5 | train_bbx_gt_file=os.path.join(data_dir,'train_annotations.txt') 6 | train_img_dir=os.path.join(data_dir,'train') 7 | 8 | val_bbx_gt_file=os.path.join(data_dir,'val_annotations.txt') 9 | val_img_dir=os.path.join(data_dir,'val') 10 | 11 | train_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_train.txt') 12 | 13 | val_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_val.txt') 14 | 15 | 16 | # config.py 17 | import os.path 18 | 19 | # gets home dir cross platform 20 | HOME = os.path.expanduser("~") 21 | 22 | # for making bounding boxes pretty 23 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 24 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 25 | 26 | MEANS = (104, 117, 123) 27 | 28 | # SSD300 CONFIGS 29 | voc = { 30 | 'num_classes': 21, 31 | 'lr_steps': (80000, 100000, 120000), 32 | 'max_iter': 120000, 33 | 'feature_maps': [38, 19, 10, 5, 3, 1], 34 | 'min_dim': 300, 35 | 'steps': [8, 16, 32, 64, 100, 300], 36 | 'min_sizes': [30, 60, 111, 162, 213, 264], 37 | 'max_sizes': [60, 111, 162, 213, 264, 315], 38 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 39 | 'variance': [0.1, 0.2], 40 | 'clip': True, 41 | 'name': 'VOC', 42 | } 43 | 44 | coco = { 45 | 'num_classes': 201, 46 | 'lr_steps': (280000, 360000, 400000), 47 | 'max_iter': 400000, 48 | 'feature_maps': [38, 19, 10, 5, 3, 1], 49 | 'min_dim': 300, 50 | 'steps': [8, 16, 32, 64, 100, 300], 51 | 'min_sizes': [21, 45, 99, 153, 207, 261], 52 | 'max_sizes': [45, 99, 153, 207, 261, 315], 53 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 54 | 'variance': [0.1, 0.2], 55 | 'clip': True, 56 | 'name': 'COCO', 57 | } 58 | 59 | 60 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/__init__.py -------------------------------------------------------------------------------- /utils/build.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 19 | for dir in path.split(os.pathsep): 20 | binpath = pjoin(dir, name) 21 | if os.path.exists(binpath): 22 | return os.path.abspath(binpath) 23 | return None 24 | 25 | 26 | def locate_cuda(): 27 | """Locate the CUDA environment on the system 28 | 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | 32 | Starts by looking for the CUDAHOME env variable. If not found, everything 33 | is based on finding 'nvcc' in the PATH. 34 | """ 35 | 36 | # first check if the CUDAHOME env variable is in use 37 | if 'CUDAHOME' in os.environ: 38 | home = os.environ['CUDAHOME'] 39 | nvcc = pjoin(home, 'bin', 'nvcc') 40 | else: 41 | # otherwise, search the PATH for NVCC 42 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 43 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 44 | if nvcc is None: 45 | raise EnvironmentError('The nvcc binary could not be ' 46 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 47 | home = os.path.dirname(os.path.dirname(nvcc)) 48 | 49 | cudaconfig = {'home': home, 'nvcc': nvcc, 50 | 'include': pjoin(home, 'include'), 51 | 'lib64': pjoin(home, 'lib64')} 52 | for k, v in cudaconfig.items(): 53 | if not os.path.exists(v): 54 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 55 | 56 | return cudaconfig 57 | 58 | 59 | CUDA = locate_cuda() 60 | 61 | # Obtain the numpy include directory. This logic works across numpy versions. 62 | try: 63 | numpy_include = np.get_include() 64 | except AttributeError: 65 | numpy_include = np.get_numpy_include() 66 | 67 | 68 | def customize_compiler_for_nvcc(self): 69 | """inject deep into distutils to customize how the dispatch 70 | to gcc/nvcc works. 71 | 72 | If you subclass UnixCCompiler, it's not trivial to get your subclass 73 | injected in, and still have the right customizations (i.e. 74 | distutils.sysconfig.customize_compiler) run on it. So instead of going 75 | the OO route, I have this. Note, it's kindof like a wierd functional 76 | subclassing going on.""" 77 | 78 | # tell the compiler it can processes .cu 79 | self.src_extensions.append('.cu') 80 | 81 | # save references to the default compiler_so and _comple methods 82 | default_compiler_so = self.compiler_so 83 | super = self._compile 84 | 85 | # now redefine the _compile method. This gets executed for each 86 | # object but distutils doesn't have the ability to change compilers 87 | # based on source extension: we add it. 88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 89 | print(extra_postargs) 90 | if os.path.splitext(src)[1] == '.cu': 91 | # use the cuda for .cu files 92 | self.set_executable('compiler_so', CUDA['nvcc']) 93 | # use only a subset of the extra_postargs, which are 1-1 translated 94 | # from the extra_compile_args in the Extension class 95 | postargs = extra_postargs['nvcc'] 96 | else: 97 | postargs = extra_postargs['gcc'] 98 | 99 | super(obj, src, ext, cc_args, postargs, pp_opts) 100 | # reset the default compiler_so, which we might have changed for cuda 101 | self.compiler_so = default_compiler_so 102 | 103 | # inject our redefined _compile method into the class 104 | self._compile = _compile 105 | 106 | 107 | # run the customize_compiler 108 | class custom_build_ext(build_ext): 109 | def build_extensions(self): 110 | customize_compiler_for_nvcc(self.compiler) 111 | build_ext.build_extensions(self) 112 | 113 | 114 | ext_modules = [ 115 | Extension( 116 | "nms.cpu_nms", 117 | ["nms/cpu_nms.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs=[numpy_include] 120 | ), 121 | Extension('nms.gpu_nms', 122 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 123 | library_dirs=[CUDA['lib64']], 124 | libraries=['cudart'], 125 | language='c++', 126 | runtime_library_dirs=[CUDA['lib64']], 127 | # this syntax is specific to this build system 128 | # we're only going to use certain compiler args with nvcc and not with gcc 129 | # the implementation of this trick is in customize_compiler() below 130 | extra_compile_args={'gcc': ["-Wno-unused-function"], 131 | 'nvcc': ['-arch=sm_52', 132 | '--ptxas-options=-v', 133 | '-c', 134 | '--compiler-options', 135 | "'-fPIC'"]}, 136 | include_dirs=[numpy_include, CUDA['include']] 137 | ), 138 | Extension( 139 | 'pycocotools._mask', 140 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'], 141 | include_dirs=[numpy_include, 'pycocotools'], 142 | extra_compile_args={ 143 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']}, 144 | ), 145 | ] 146 | 147 | setup( 148 | name='mot_utils', 149 | ext_modules=ext_modules, 150 | # inject our custom trigger 151 | cmdclass={'build_ext': custom_build_ext}, 152 | ) 153 | -------------------------------------------------------------------------------- /utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def read_json_file(json_file): 4 | with open(json_file) as f: 5 | data = json.load(f) 6 | return data -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/nms/__init__.py -------------------------------------------------------------------------------- /utils/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef float iw, ih, box_area 73 | cdef float ua 74 | cdef int pos = 0 75 | cdef float maxscore = 0 76 | cdef int maxpos = 0 77 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 78 | 79 | for i in range(N): 80 | maxscore = boxes[i, 4] 81 | maxpos = i 82 | 83 | tx1 = boxes[i,0] 84 | ty1 = boxes[i,1] 85 | tx2 = boxes[i,2] 86 | ty2 = boxes[i,3] 87 | ts = boxes[i,4] 88 | 89 | pos = i + 1 90 | # get max box 91 | while pos < N: 92 | if maxscore < boxes[pos, 4]: 93 | maxscore = boxes[pos, 4] 94 | maxpos = pos 95 | pos = pos + 1 96 | 97 | # add max box as a detection 98 | boxes[i,0] = boxes[maxpos,0] 99 | boxes[i,1] = boxes[maxpos,1] 100 | boxes[i,2] = boxes[maxpos,2] 101 | boxes[i,3] = boxes[maxpos,3] 102 | boxes[i,4] = boxes[maxpos,4] 103 | 104 | # swap ith box with position of max box 105 | boxes[maxpos,0] = tx1 106 | boxes[maxpos,1] = ty1 107 | boxes[maxpos,2] = tx2 108 | boxes[maxpos,3] = ty2 109 | boxes[maxpos,4] = ts 110 | 111 | tx1 = boxes[i,0] 112 | ty1 = boxes[i,1] 113 | tx2 = boxes[i,2] 114 | ty2 = boxes[i,3] 115 | ts = boxes[i,4] 116 | 117 | pos = i + 1 118 | # NMS iterations, note that N changes if detection boxes fall below threshold 119 | while pos < N: 120 | x1 = boxes[pos, 0] 121 | y1 = boxes[pos, 1] 122 | x2 = boxes[pos, 2] 123 | y2 = boxes[pos, 3] 124 | s = boxes[pos, 4] 125 | 126 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 127 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 128 | if iw > 0: 129 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 130 | if ih > 0: 131 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 132 | ov = iw * ih / ua #iou between max box and detection box 133 | 134 | if method == 1: # linear 135 | if ov > Nt: 136 | weight = 1 - ov 137 | else: 138 | weight = 1 139 | elif method == 2: # gaussian 140 | weight = np.exp(-(ov * ov)/sigma) 141 | else: # original NMS 142 | if ov > Nt: 143 | weight = 0 144 | else: 145 | weight = 1 146 | 147 | boxes[pos, 4] = weight*boxes[pos, 4] 148 | 149 | # if box score falls below threshold, discard the box by swapping with last box 150 | # update N 151 | if boxes[pos, 4] < threshold: 152 | boxes[pos,0] = boxes[N-1, 0] 153 | boxes[pos,1] = boxes[N-1, 1] 154 | boxes[pos,2] = boxes[N-1, 2] 155 | boxes[pos,3] = boxes[N-1, 3] 156 | boxes[pos,4] = boxes[N-1, 4] 157 | N = N - 1 158 | pos = pos - 1 159 | 160 | pos = pos + 1 161 | 162 | keep = [i for i in range(N)] 163 | return keep 164 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/pascal_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import xml.etree.ElementTree as ET 3 | from PIL import Image 4 | from xml.dom import minidom 5 | from statics import * 6 | from data_reader import * 7 | 8 | def write_pascal_annotation(file_name,obj_list,xml_file): 9 | annotation=ET.Element('annotation') 10 | filename=ET.SubElement(annotation,'filename') 11 | filename.text=file_name 12 | size = ET.SubElement(annotation, 'size') 13 | img=Image.open(file_name) 14 | width, height = img.size 15 | height_elem=ET.SubElement(size,'height') 16 | width_elem=ET.SubElement(size,'width') 17 | height_elem.text=str(height) 18 | width_elem.text=str(width) 19 | # print(obj_list) 20 | for i in range(0, len(obj_list), 5): 21 | class_index = obj_list[i] 22 | obj_cord = obj_list[i + 1:i + 5] 23 | obj_cord[2] = int(obj_cord[2]) + int(obj_cord[0]) 24 | obj_cord[3] = int(obj_cord[3]) + int(obj_cord[1]) 25 | object = ET.SubElement(annotation, 'object') 26 | get_object(object, obj_cord) 27 | 28 | # print(ET.dump(annotation)) 29 | anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml() 30 | text_file = open(xml_file, "w") 31 | text_file.write(anno_txt) 32 | text_file.close() 33 | return 34 | 35 | 36 | def write_pascal_annotation_aug(file_name,obj_list,xml_file): 37 | annotation=ET.Element('annotation') 38 | filename=ET.SubElement(annotation,'filename') 39 | filename.text=file_name 40 | size = ET.SubElement(annotation, 'size') 41 | img=Image.open(file_name) 42 | width, height = img.size 43 | height_elem=ET.SubElement(size,'height') 44 | width_elem=ET.SubElement(size,'width') 45 | height_elem.text=str(height) 46 | width_elem.text=str(width) 47 | # print(obj_list) 48 | for i,obj in enumerate(obj_list): 49 | class_index = obj[4] 50 | obj_cord = obj[0:4] 51 | object = ET.SubElement(annotation, 'object') 52 | get_object(object, obj_cord) 53 | 54 | # print(ET.dump(annotation)) 55 | anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml() 56 | text_file = open(xml_file, "w") 57 | text_file.write(anno_txt) 58 | text_file.close() 59 | return 60 | 61 | 62 | def get_object(object, obj_cord): 63 | name = ET.SubElement(object, 'name') 64 | name.text = 'pedestrian' 65 | bndbox = ET.SubElement(object, 'bndbox') 66 | difficult=ET.SubElement(object,'difficult') 67 | difficult.text=str(0) 68 | xmin = ET.SubElement(bndbox, 'xmin') 69 | ymin = ET.SubElement(bndbox, 'ymin') 70 | xmax = ET.SubElement(bndbox, 'xmax') 71 | ymax = ET.SubElement(bndbox, 'ymax') 72 | 73 | xmin.text=str(obj_cord[0]) 74 | ymin.text=str(obj_cord[1]) 75 | xmax.text=str(obj_cord[2]) 76 | ymax.text=str(obj_cord[3]) 77 | 78 | 79 | return 80 | 81 | 82 | def read_pascal_annotation(anno_file): 83 | """ 84 | 85 | :param anno_file: 86 | :return: 87 | 88 | """ 89 | tree = ET.parse(anno_file) 90 | root = tree.getroot() 91 | filename=root.find('filename').text 92 | height=int(root.find('size/height').text) 93 | width=int(root.find('size/width').text) 94 | objs=root.findall('object') 95 | objects=[] 96 | for obj in objs: 97 | class_label=obj.find('name').text 98 | xmin=int(float(obj.find('bndbox/xmin').text)) 99 | xmax=int(float(obj.find('bndbox/xmax').text)) 100 | ymin=int(float(obj.find('bndbox/ymin').text)) 101 | ymax=int(float(obj.find('bndbox/ymax').text)) 102 | objects.append([xmin,ymin,xmax,ymax,1]) 103 | res={ 104 | 'filename':filename, 105 | 'height':height, 106 | 'width':width, 107 | 'objects':objects 108 | } 109 | return res -------------------------------------------------------------------------------- /utils/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /utils/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | #import pycocotools._mask as _mask 4 | from . import _mask 5 | 6 | # Interface for manipulating masks stored in RLE format. 7 | # 8 | # RLE is a simple yet efficient format for storing binary masks. RLE 9 | # first divides a vector (or vectorized image) into a series of piecewise 10 | # constant regions and then for each piece simply stores the length of 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 13 | # (note that the odd counts are always the numbers of zeros). Instead of 14 | # storing the counts directly, additional compression is achieved with a 15 | # variable bitrate representation based on a common scheme called LEB128. 16 | # 17 | # Compression is greatest given large piecewise constant regions. 18 | # Specifically, the size of the RLE is proportional to the number of 19 | # *boundaries* in M (or for an image the number of boundaries in the y 20 | # direction). Assuming fairly simple shapes, the RLE representation is 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 22 | # is substantially lower, especially for large simple objects (large n). 23 | # 24 | # Many common operations on masks can be computed directly using the RLE 25 | # (without need for decoding). This includes computations such as area, 26 | # union, intersection, etc. All of these operations are linear in the 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 28 | # of the object. Computing these operations on the original mask is O(n). 29 | # Thus, using the RLE can result in substantial computational savings. 30 | # 31 | # The following API functions are defined: 32 | # encode - Encode binary masks using RLE. 33 | # decode - Decode binary masks encoded via RLE. 34 | # merge - Compute union or intersection of encoded masks. 35 | # iou - Compute intersection over union between masks. 36 | # area - Compute area of encoded masks. 37 | # toBbox - Get bounding boxes surrounding encoded masks. 38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 39 | # 40 | # Usage: 41 | # Rs = encode( masks ) 42 | # masks = decode( Rs ) 43 | # R = merge( Rs, intersect=false ) 44 | # o = iou( dt, gt, iscrowd ) 45 | # a = area( Rs ) 46 | # bbs = toBbox( Rs ) 47 | # Rs = frPyObjects( [pyObjects], h, w ) 48 | # 49 | # In the API the following formats are used: 50 | # Rs - [dict] Run-length encoding of binary masks 51 | # R - dict Run-length encoding of binary mask 52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 54 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 56 | # dt,gt - May be either bounding boxes or encoded masks 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 58 | # 59 | # Finally, a note about the intersection over union (iou) computation. 60 | # The standard iou of a ground truth (gt) and detected (dt) object is 61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 62 | # For "crowd" regions, we use a modified criteria. If a gt object is 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 67 | # For crowd gt regions we use this modified criteria above for the iou. 68 | # 69 | # To compile run "python setup.py build_ext --inplace" 70 | # Please do not contact us for help with compiling. 71 | # 72 | # Microsoft COCO Toolbox. version 2.0 73 | # Data, paper, and tutorials available at: http://mscoco.org/ 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 75 | # Licensed under the Simplified BSD License [see coco/license.txt] 76 | 77 | iou = _mask.iou 78 | merge = _mask.merge 79 | frPyObjects = _mask.frPyObjects 80 | 81 | def encode(bimask): 82 | if len(bimask.shape) == 3: 83 | return _mask.encode(bimask) 84 | elif len(bimask.shape) == 2: 85 | h, w = bimask.shape 86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 87 | 88 | def decode(rleObjs): 89 | if type(rleObjs) == list: 90 | return _mask.decode(rleObjs) 91 | else: 92 | return _mask.decode([rleObjs])[:,:,0] 93 | 94 | def area(rleObjs): 95 | if type(rleObjs) == list: 96 | return _mask.area(rleObjs) 97 | else: 98 | return _mask.area([rleObjs])[0] 99 | 100 | def toBbox(rleObjs): 101 | if type(rleObjs) == list: 102 | return _mask.toBbox(rleObjs) 103 | else: 104 | return _mask.toBbox([rleObjs])[0] 105 | -------------------------------------------------------------------------------- /utils/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance 4 | import os 5 | import cv2 6 | import os 7 | """ 8 | face=[[x1,x2,x2,y2]] 9 | """ 10 | 11 | 12 | def draw_rectangle_w_h_box(img_path, faces, save_dir='./detected_face'): 13 | create_dir_if_not_exists(save_dir) 14 | img_face_detect = cv2.imread(img_path) 15 | for face in faces: 16 | x1, y1, x2, y2 = face 17 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1) 18 | cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect) 19 | 20 | def draw_rectangle(img_path, faces, save_dir='./detected_face'): 21 | create_dir_if_not_exists(save_dir) 22 | img_face_detect = cv2.imread(img_path) 23 | for face in faces: 24 | x1, y1, x2, y2 = face 25 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1) 26 | cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect) 27 | 28 | def drawbbox(file_name,bbox,save_dir): 29 | source_img = Image.open(file_name).convert("RGBA") 30 | 31 | draw = ImageDraw.Draw(source_img) 32 | # draw.rectangle(((0, 00), (100, 100)), fill="black") 33 | # draw.text((20, 70), "something123", font=ImageFont.truetype("font_path123")) 34 | 35 | create_dir_if_not_exists(save_dir) 36 | save_file=os.path.join(save_dir,os.path.basename(file_name)) 37 | source_img.save(save_file, "JPEG") 38 | 39 | def get_total_params(model): 40 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 41 | params = sum([np.prod(p.size()) for p in model_parameters]) 42 | return params 43 | 44 | def create_dir_if_not_exists(dir): 45 | if not os.path.exists(dir): 46 | os.makedirs(dir) 47 | 48 | def check_if_exists(dir): 49 | return os.path.exists(dir) 50 | 51 | def progress_bar(progress, count ,message): 52 | sys.stdout.write('\r' + "{} of {}: {}".format(progress, count, message)) 53 | -------------------------------------------------------------------------------- /utils/visualization/pascal_detection_visualize.py: -------------------------------------------------------------------------------- 1 | from utils.file_utils import read_text_file 2 | from utils.utils import create_dir_if_not_exists 3 | import os 4 | import cv2 5 | from utils.pascal_utils import read_pascal_annotation 6 | 7 | def draw_bbox_pascal(anno_path,image_dir=None): 8 | annotation = read_pascal_annotation(anno_path) 9 | image_path = annotation['filename'] 10 | if image_dir!=None: 11 | image_path=os.path.join(image_dir,image_path) 12 | print(image_path) 13 | objects = annotation['objects'] 14 | # objects=[[100,100,200,200,1]] 15 | create_dir_if_not_exists('pascal_images') 16 | img_demo_detect = cv2.imread(image_path) 17 | save_path = os.path.join('pascal_images', os.path.basename(image_path)) 18 | for object in objects: 19 | x1, y1, x2, y2 = object[:4] 20 | cv2.rectangle(img_demo_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1) 21 | cv2.imwrite(save_path, img_demo_detect) 22 | 23 | 24 | def show_augment(): 25 | imageid = 'img08456_0' 26 | anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_aug/{}.xml'.format( 27 | imageid) 28 | draw_bbox_pascal(anno_path) 29 | 30 | def show_original_512(): 31 | imageid = 'img00175_6683' 32 | anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_512/{}.xml'.format( 33 | imageid) 34 | draw_bbox_pascal(anno_path) 35 | 36 | def show_inria_person(): 37 | # anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train/PennPed00001.xml' 38 | anno_path='/media/milton/ssd1/dataset/pedestrian/tud_brussels/annotations/img-000-2.xml' 39 | draw_bbox_pascal(anno_path) 40 | 41 | show_inria_person() 42 | # show_original_512() 43 | # show_augment() 44 | # anno_path='/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/Annotations/000247.xml' 45 | # draw_bbox_pascal(anno_path,'/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/JPEGImages') 46 | 47 | -------------------------------------------------------------------------------- /wider_face_pedestrian_to_pascal.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from PIL import Image 3 | from xml.dom import minidom 4 | from statics import * 5 | from data_reader import * 6 | from utils.utils import create_dir_if_not_exists 7 | from utils.pascal_utils import * 8 | 9 | 10 | def convert_wider_pedestrian_to_pascal(): 11 | data=read_train_gt() 12 | trainvalids=[] 13 | for row in data: 14 | obj_list = row[1] 15 | image_name = row[0] 16 | annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations' 17 | create_dir_if_not_exists(annodir) 18 | create_dir_if_not_exists('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/JPEGImages') 19 | xml_file_name=image_name.split('.')[0]+".xml" 20 | xml_file=os.path.join(annodir, xml_file_name) 21 | image_path=os.path.abspath(os.path.join(data_dir,"train", image_name)) 22 | write_pascal_annotation(image_path,obj_list,xml_file) 23 | 24 | voc_anno_train_dir="/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train" 25 | if not os.path.exists(voc_anno_train_dir): 26 | os.makedirs(voc_anno_train_dir) 27 | anno_path=os.path.join(voc_anno_train_dir,xml_file_name) 28 | write_pascal_annotation(image_path,obj_list,anno_path) 29 | 30 | trainvalids.append(image_name.split('.')[0]) 31 | # break 32 | with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/trainval.txt', mode='wt', encoding='utf-8') as myfile: 33 | myfile.write('\n'.join(trainvalids)) 34 | testids=[] 35 | for row in read_val_gt(): 36 | obj_list = row[1] 37 | image_name = row[0] 38 | annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations' 39 | xml_file_name=image_name.split('.')[0]+".xml" 40 | xml_file=os.path.join(annodir, xml_file_name) 41 | image_path=os.path.abspath(os.path.join(data_dir,"val", image_name)) 42 | write_pascal_annotation(image_path,obj_list,xml_file) 43 | testids.append(image_name.split('.')[0]) 44 | 45 | voc_anno_train_dir = "/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_valid" 46 | if not os.path.exists(voc_anno_train_dir): 47 | os.makedirs(voc_anno_train_dir) 48 | anno_path = os.path.join(voc_anno_train_dir, xml_file_name) 49 | write_pascal_annotation(image_path, obj_list, anno_path) 50 | testids.append(image_name.split('.')[0]) 51 | 52 | # break 53 | with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/test.txt', mode='wt', encoding='utf-8') as myfile: 54 | myfile.write('\n'.join(testids)) 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | convert_wider_pedestrian_to_pascal() --------------------------------------------------------------------------------