├── LICENSE
├── README.md
├── __init__.py
├── arguments.py
├── augment_data.py
├── coco_eval.py
├── data
├── __init__.py
├── coco.py
├── coco_labels.txt
├── config.py
├── data_augment.py
├── example.jpg
├── scripts
│ ├── COCO2014.sh
│ ├── VOC2007.sh
│ └── VOC2012.sh
├── voc0712.py
└── voc_eval.py
├── data_process.ipynb
├── data_reader.py
├── data_reader_pedestrian.py
├── dataset.py
├── dataset
├── caltech_pedestrian.py
├── inria_person.py
├── mall.py
└── upen_person.py
├── demo
├── __init__.py
├── demo.ipynb
├── demo.py
└── live.py
├── doc
├── RFB.png
├── SSD.jpg
├── detection_example.png
├── detection_example2.png
├── detection_examples.png
├── rfb.png
└── ssd.png
├── eval.py
├── focal_loss.py
├── layers
├── __init__.py
├── functions
│ ├── __init__.py
│ ├── detection.py
│ └── prior_box.py
└── modules
│ ├── __init__.py
│ ├── l2norm.py
│ ├── multibox_loss.py
│ └── refine_multibox_loss.py
├── loss_loader.py
├── main.py
├── make.sh
├── model_loader.py
├── models
├── FRFBSSD_vgg.py
├── FSSD_mobile.py
├── FSSD_vgg.py
├── RFB_Net_E_vgg.py
├── RFB_Net_mobile.py
├── RFB_Net_vgg.py
├── RefineSSD_vgg.py
├── SSD_vgg.py
├── __init__.py
├── base_models.py
├── densenet.py
├── mobilenet.py
├── resnet.py
└── vgg.py
├── multi_thread_score_pedestrian_detection.py
├── object_detector.py
├── pretrainedmodels
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── utils.py
│ └── voc.py
├── models
│ ├── __init__.py
│ ├── bninception.py
│ ├── cafferesnet.py
│ ├── dpn.py
│ ├── fbresnet.py
│ ├── fbresnet
│ │ ├── resnet152_dump.lua
│ │ └── resnet152_load.py
│ ├── inceptionresnetv2.py
│ ├── inceptionv4.py
│ ├── nasnet.py
│ ├── nasnet_mobile.py
│ ├── pnasnet.py
│ ├── polynet.py
│ ├── resnext.py
│ ├── resnext_features
│ │ ├── __init__.py
│ │ ├── resnext101_32x4d_features.py
│ │ └── resnext101_64x4d_features.py
│ ├── senet.py
│ ├── torchvision_models.py
│ ├── utils.py
│ ├── vggm.py
│ ├── wideresnet.py
│ └── xception.py
├── utils.py
└── version.py
├── refinedet_train_test.py
├── score_pedestrian_detection.py
├── statics.py
├── train.py
├── train_test.py
├── train_test_fssd_mobile_pre.py
├── transforms.py
├── utils
├── __init__.py
├── box_utils.py
├── build.py
├── json_utils.py
├── nms
│ ├── __init__.py
│ ├── cpu_nms.c
│ ├── cpu_nms.pyx
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── pascal_utils.py
├── pycocotools
│ ├── __init__.py
│ ├── _mask.c
│ ├── _mask.pyx
│ ├── coco.py
│ ├── cocoeval.py
│ ├── mask.py
│ ├── maskApi.c
│ └── maskApi.h
├── timer.py
├── utils.py
└── visualization
│ └── pascal_detection_visualize.py
└── wider_face_pedestrian_to_pascal.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Max deGroot, Ellis Brown
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RetinaNet applied to widerface pedestrian detection challenege in ECCV 2018, in PyTorch
2 |
3 | This code placed 21 position among 168 teams.
4 |
5 | Improvement ideas (todos):
6 |
7 | - Run for more epoch
8 | - User More data augmentation methods
9 | - optimize hyper-parameter like learning rate, learning rate decay
10 | - Use ADAM, SGD with momentum
11 | - use soft nms
12 | - use multi scale testing.
13 | - Optimize FPN feature extractor for small pedestrian object.
14 | - Use GAN to generate training data for in context of road and pedestrian
-
15 |
16 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/__init__.py
--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | parser = argparse.ArgumentParser(description='PyTorch student network training')
3 |
4 | parser.add_argument('--lr',default=0.001,
5 | type=float,
6 | help='learning rate')
7 | parser.add_argument('--resume',
8 | action='store_true',
9 | help='resume from checkpoint')
10 | parser.add_argument('--optimizer',
11 | type=str,
12 | help='optimizer type',
13 | default='adam')
14 | parser.add_argument('--criterion',
15 | type=str,
16 | help='criterion',
17 | default='MSE')
18 | parser.add_argument('--root',
19 | default='../data/',
20 | type=str,
21 | help='data root path')
22 | parser.add_argument('--datalist',
23 | default='../data/datalist/',
24 | type=str,
25 | help='datalist path')
26 | parser.add_argument('--batch_size',
27 | type=int,
28 | help='mini-batch size',
29 | default=150)
30 | parser.add_argument('--name',
31 | default='VGG19_BN',
32 | type=str,
33 | help='session name')
34 | parser.add_argument('--log_dir_path',
35 | default='./student_net_learning/logs',
36 | type=str,
37 | help='log directory path')
38 | parser.add_argument('--epochs',
39 | default=200,
40 | type=int,
41 | help='number of epochs')
42 | parser.add_argument('--cuda',
43 | type=int,
44 | default=1,
45 | help='use CUDA')
46 | parser.add_argument('--model_name',
47 | type=str,
48 | help='model name',
49 | default='ResNet50')
50 | parser.add_argument('--down_epoch',
51 | type=int,
52 | help='epoch number for lr * 1e-1',
53 | default=30)
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
3 | from .coco import COCODetection
4 | from .data_augment import *
5 | from .config import *
6 |
--------------------------------------------------------------------------------
/data/coco_labels.txt:
--------------------------------------------------------------------------------
1 | 1,1,person
2 | 2,2,bicycle
3 | 3,3,car
4 | 4,4,motorcycle
5 | 5,5,airplane
6 | 6,6,bus
7 | 7,7,train
8 | 8,8,truck
9 | 9,9,boat
10 | 10,10,traffic light
11 | 11,11,fire hydrant
12 | 13,12,stop sign
13 | 14,13,parking meter
14 | 15,14,bench
15 | 16,15,bird
16 | 17,16,cat
17 | 18,17,dog
18 | 19,18,horse
19 | 20,19,sheep
20 | 21,20,cow
21 | 22,21,elephant
22 | 23,22,bear
23 | 24,23,zebra
24 | 25,24,giraffe
25 | 27,25,backpack
26 | 28,26,umbrella
27 | 31,27,handbag
28 | 32,28,tie
29 | 33,29,suitcase
30 | 34,30,frisbee
31 | 35,31,skis
32 | 36,32,snowboard
33 | 37,33,sports ball
34 | 38,34,kite
35 | 39,35,baseball bat
36 | 40,36,baseball glove
37 | 41,37,skateboard
38 | 42,38,surfboard
39 | 43,39,tennis racket
40 | 44,40,bottle
41 | 46,41,wine glass
42 | 47,42,cup
43 | 48,43,fork
44 | 49,44,knife
45 | 50,45,spoon
46 | 51,46,bowl
47 | 52,47,banana
48 | 53,48,apple
49 | 54,49,sandwich
50 | 55,50,orange
51 | 56,51,broccoli
52 | 57,52,carrot
53 | 58,53,hot dog
54 | 59,54,pizza
55 | 60,55,donut
56 | 61,56,cake
57 | 62,57,chair
58 | 63,58,couch
59 | 64,59,potted plant
60 | 65,60,bed
61 | 67,61,dining table
62 | 70,62,toilet
63 | 72,63,tv
64 | 73,64,laptop
65 | 74,65,mouse
66 | 75,66,remote
67 | 76,67,keyboard
68 | 77,68,cell phone
69 | 78,69,microwave
70 | 79,70,oven
71 | 80,71,toaster
72 | 81,72,sink
73 | 82,73,refrigerator
74 | 84,74,book
75 | 85,75,clock
76 | 86,76,vase
77 | 87,77,scissors
78 | 88,78,teddy bear
79 | 89,79,hair drier
80 | 90,80,toothbrush
81 |
--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
1 | # config.py
2 |
3 | # gets home dir cross platform
4 | import cv2
5 | cv2.setNumThreads(0) # pytorch issue 1355: possible deadlock in dataloader
6 | # note: if you used our download scripts, this should be right
7 | VOCroot = '/media/milton/ssd1/dataset/pascal/VOCdevkit/' # path to VOCdevkit root dir
8 | COCOroot = '/home/user/Database/MSCOCO2017'
9 |
10 | # RFB CONFIGS
11 | VOC_300 = {
12 | 'feature_maps': [38, 19, 10, 5, 3, 1],
13 |
14 | 'min_dim': 300,
15 |
16 | 'steps': [8, 16, 32, 64, 100, 300],
17 |
18 | 'min_sizes': [30, 60, 111, 162, 213, 264],
19 |
20 | 'max_sizes': [60, 111, 162, 213, 264, 315],
21 |
22 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
23 |
24 | 'variance': [0.1, 0.2],
25 |
26 | 'clip': True,
27 | }
28 |
29 | VOC_512 = {
30 | 'feature_maps': [64, 32, 16, 8, 4, 2, 1],
31 |
32 | 'min_dim': 512,
33 |
34 | 'steps': [8, 16, 32, 64, 128, 256, 512],
35 |
36 | 'min_sizes': [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8],
37 |
38 | 'max_sizes': [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6],
39 |
40 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
41 |
42 | 'variance': [0.1, 0.2],
43 |
44 | 'clip': True,
45 | }
46 |
47 | COCO_300 = {
48 | 'feature_maps': [38, 19, 10, 5, 3, 1],
49 |
50 | 'min_dim': 300,
51 |
52 | 'steps': [8, 16, 32, 64, 100, 300],
53 |
54 | 'min_sizes': [21, 45, 99, 153, 207, 261],
55 |
56 | 'max_sizes': [45, 99, 153, 207, 261, 315],
57 |
58 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
59 |
60 | 'variance': [0.1, 0.2],
61 |
62 | 'clip': True,
63 | }
64 |
65 | COCO_512 = {
66 | 'feature_maps': [64, 32, 16, 8, 4, 2, 1],
67 |
68 | 'min_dim': 512,
69 |
70 | 'steps': [8, 16, 32, 64, 128, 256, 512],
71 |
72 | 'min_sizes': [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8],
73 |
74 | 'max_sizes': [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72],
75 |
76 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
77 |
78 | 'variance': [0.1, 0.2],
79 |
80 | 'clip': True,
81 | }
82 |
83 | COCO_mobile_300 = {
84 | 'feature_maps': [19, 10, 5, 3, 2, 1],
85 |
86 | 'min_dim': 300,
87 |
88 | 'steps': [16, 32, 64, 100, 150, 300],
89 |
90 | 'min_sizes': [45, 90, 135, 180, 225, 270],
91 |
92 | 'max_sizes': [90, 135, 180, 225, 270, 315],
93 |
94 | 'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
95 |
96 | 'variance': [0.1, 0.2],
97 |
98 | 'clip': True,
99 | }
100 |
101 | VOC_320 = {
102 | 'feature_maps': [40, 20, 10, 5],
103 |
104 | 'min_dim': 320,
105 |
106 | 'steps': [8, 16, 32, 64],
107 |
108 | 'min_sizes': [32, 64, 128, 256],
109 |
110 | 'max_sizes': [],
111 |
112 | 'aspect_ratios': [[2], [2], [2], [2]],
113 |
114 | 'variance': [0.1, 0.2],
115 |
116 | 'clip': True,
117 | }
118 |
--------------------------------------------------------------------------------
/data/example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/data/example.jpg
--------------------------------------------------------------------------------
/data/scripts/COCO2014.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | start=`date +%s`
4 |
5 | # handle optional download dir
6 | if [ -z "$1" ]
7 | then
8 | # navigate to ~/data
9 | echo "navigating to ~/data/ ..."
10 | mkdir -p ~/data
11 | cd ~/data/
12 | mkdir -p ./coco
13 | cd ./coco
14 | mkdir -p ./images
15 | mkdir -p ./annotations
16 | else
17 | # check if specified dir is valid
18 | if [ ! -d $1 ]; then
19 | echo $1 " is not a valid directory"
20 | exit 0
21 | fi
22 | echo "navigating to " $1 " ..."
23 | cd $1
24 | fi
25 |
26 | if [ ! -d images ]
27 | then
28 | mkdir -p ./images
29 | fi
30 |
31 | # Download the image data.
32 | cd ./images
33 | echo "Downloading MSCOCO train images ..."
34 | curl -LO http://images.cocodataset.org/zips/train2014.zip
35 | echo "Downloading MSCOCO val images ..."
36 | curl -LO http://images.cocodataset.org/zips/val2014.zip
37 |
38 | cd ../
39 | if [ ! -d annotations]
40 | then
41 | mkdir -p ./annotations
42 | fi
43 |
44 | # Download the annotation data.
45 | cd ./annotations
46 | echo "Downloading MSCOCO train/val annotations ..."
47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
48 | echo "Finished downloading. Now extracting ..."
49 |
50 | # Unzip data
51 | echo "Extracting train images ..."
52 | unzip ../images/train2014.zip -d ../images
53 | echo "Extracting val images ..."
54 | unzip ../images/val2014.zip -d ../images
55 | echo "Extracting annotations ..."
56 | unzip ./annotations_trainval2014.zip
57 |
58 | echo "Removing zip files ..."
59 | rm ../images/train2014.zip
60 | rm ../images/val2014.zip
61 | rm ./annotations_trainval2014.zip
62 |
63 | echo "Creating trainval35k dataset..."
64 |
65 | # Download annotations json
66 | echo "Downloading trainval35k annotations from S3"
67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
68 |
69 | # combine train and val
70 | echo "Combining train and val images"
71 | mkdir ../images/trainval35k
72 | cd ../images/train2014
73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp
74 | cd ../val2014
75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} +
76 |
77 |
78 | end=`date +%s`
79 | runtime=$((end-start))
80 |
81 | echo "Completed in " $runtime " seconds"
82 |
--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 |
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 |
39 | end=`date +%s`
40 | runtime=$((end-start))
41 |
42 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 |
28 |
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 |
35 | end=`date +%s`
36 | runtime=$((end-start))
37 |
38 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/voc_eval.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Bharath Hariharan
5 | # --------------------------------------------------------
6 |
7 | import pickle
8 | import xml.etree.ElementTree as ET
9 |
10 | import numpy as np
11 | import os
12 |
13 |
14 | def parse_rec(filename):
15 | """ Parse a PASCAL VOC xml file """
16 | tree = ET.parse(filename)
17 | objects = []
18 | for obj in tree.findall('object'):
19 | obj_struct = {}
20 | obj_struct['name'] = obj.find('name').text
21 | obj_struct['pose'] = obj.find('pose').text
22 | obj_struct['truncated'] = int(obj.find('truncated').text)
23 | obj_struct['difficult'] = int(obj.find('difficult').text)
24 | bbox = obj.find('bndbox')
25 | obj_struct['bbox'] = [int(bbox.find('xmin').text),
26 | int(bbox.find('ymin').text),
27 | int(bbox.find('xmax').text),
28 | int(bbox.find('ymax').text)]
29 | objects.append(obj_struct)
30 |
31 | return objects
32 |
33 |
34 | def voc_ap(rec, prec, use_07_metric=False):
35 | """ ap = voc_ap(rec, prec, [use_07_metric])
36 | Compute VOC AP given precision and recall.
37 | If use_07_metric is true, uses the
38 | VOC 07 11 point method (default:False).
39 | """
40 | if use_07_metric:
41 | # 11 point metric
42 | ap = 0.
43 | for t in np.arange(0., 1.1, 0.1):
44 | if np.sum(rec >= t) == 0:
45 | p = 0
46 | else:
47 | p = np.max(prec[rec >= t])
48 | ap = ap + p / 11.
49 | else:
50 | # correct AP calculation
51 | # first append sentinel values at the end
52 | mrec = np.concatenate(([0.], rec, [1.]))
53 | mpre = np.concatenate(([0.], prec, [0.]))
54 |
55 | # compute the precision envelope
56 | for i in range(mpre.size - 1, 0, -1):
57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
58 |
59 | # to calculate area under PR curve, look for points
60 | # where X axis (recall) changes value
61 | i = np.where(mrec[1:] != mrec[:-1])[0]
62 |
63 | # and sum (\Delta recall) * prec
64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
65 | return ap
66 |
67 |
68 | def voc_eval(detpath,
69 | annopath,
70 | imagesetfile,
71 | classname,
72 | cachedir,
73 | ovthresh=0.5,
74 | use_07_metric=False):
75 | """rec, prec, ap = voc_eval(detpath,
76 | annopath,
77 | imagesetfile,
78 | classname,
79 | [ovthresh],
80 | [use_07_metric])
81 |
82 | Top level function that does the PASCAL VOC evaluation.
83 |
84 | detpath: Path to detections
85 | detpath.format(classname) should produce the detection results file.
86 | annopath: Path to annotations
87 | annopath.format(imagename) should be the xml annotations file.
88 | imagesetfile: Text file containing the list of images, one image per line.
89 | classname: Category name (duh)
90 | cachedir: Directory for caching the annotations
91 | [ovthresh]: Overlap threshold (default = 0.5)
92 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
93 | (default False)
94 | """
95 | # assumes detections are in detpath.format(classname)
96 | # assumes annotations are in annopath.format(imagename)
97 | # assumes imagesetfile is a text file with each line an image name
98 | # cachedir caches the annotations in a pickle file
99 |
100 | # first load gt
101 | if not os.path.isdir(cachedir):
102 | os.mkdir(cachedir)
103 | cachefile = os.path.join(cachedir, 'annots.pkl')
104 | # read list of images
105 | with open(imagesetfile, 'r') as f:
106 | lines = f.readlines()
107 | imagenames = [x.strip() for x in lines]
108 |
109 | if not os.path.isfile(cachefile):
110 | # load annots
111 | recs = {}
112 | for i, imagename in enumerate(imagenames):
113 | recs[imagename] = parse_rec(annopath.format(imagename))
114 | if i % 100 == 0:
115 | print('Reading annotation for {:d}/{:d}'.format(
116 | i + 1, len(imagenames)))
117 | # save
118 | print('Saving cached annotations to {:s}'.format(cachefile))
119 | with open(cachefile, 'wb') as f:
120 | pickle.dump(recs, f)
121 | else:
122 | # load
123 | with open(cachefile, 'rb') as f:
124 | recs = pickle.load(f)
125 |
126 | # extract gt objects for this class
127 | class_recs = {}
128 | npos = 0
129 | for imagename in imagenames:
130 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
131 | bbox = np.array([x['bbox'] for x in R])
132 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
133 | det = [False] * len(R)
134 | npos = npos + sum(~difficult)
135 | class_recs[imagename] = {'bbox': bbox,
136 | 'difficult': difficult,
137 | 'det': det}
138 |
139 | # read dets
140 | detfile = detpath.format(classname)
141 | with open(detfile, 'r') as f:
142 | lines = f.readlines()
143 |
144 | splitlines = [x.strip().split(' ') for x in lines]
145 | image_ids = [x[0] for x in splitlines]
146 | confidence = np.array([float(x[1]) for x in splitlines])
147 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
148 |
149 | # sort by confidence
150 | sorted_ind = np.argsort(-confidence)
151 | sorted_scores = np.sort(-confidence)
152 | BB = BB[sorted_ind, :]
153 | image_ids = [image_ids[x] for x in sorted_ind]
154 |
155 | # go down dets and mark TPs and FPs
156 | nd = len(image_ids)
157 | tp = np.zeros(nd)
158 | fp = np.zeros(nd)
159 | for d in range(nd):
160 | R = class_recs[image_ids[d]]
161 | bb = BB[d, :].astype(float)
162 | ovmax = -np.inf
163 | BBGT = R['bbox'].astype(float)
164 |
165 | if BBGT.size > 0:
166 | # compute overlaps
167 | # intersection
168 | ixmin = np.maximum(BBGT[:, 0], bb[0])
169 | iymin = np.maximum(BBGT[:, 1], bb[1])
170 | ixmax = np.minimum(BBGT[:, 2], bb[2])
171 | iymax = np.minimum(BBGT[:, 3], bb[3])
172 | iw = np.maximum(ixmax - ixmin + 1., 0.)
173 | ih = np.maximum(iymax - iymin + 1., 0.)
174 | inters = iw * ih
175 |
176 | # union
177 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 |
181 | overlaps = inters / uni
182 | ovmax = np.max(overlaps)
183 | jmax = np.argmax(overlaps)
184 |
185 | if ovmax > ovthresh:
186 | if not R['difficult'][jmax]:
187 | if not R['det'][jmax]:
188 | tp[d] = 1.
189 | R['det'][jmax] = 1
190 | else:
191 | fp[d] = 1.
192 | else:
193 | fp[d] = 1.
194 |
195 | # compute precision recall
196 | fp = np.cumsum(fp)
197 | tp = np.cumsum(tp)
198 | rec = tp / float(npos)
199 | # avoid divide by zero in case the first detection matches a difficult
200 | # ground truth
201 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 | ap = voc_ap(rec, prec, use_07_metric)
203 |
204 | return rec, prec, ap
205 |
--------------------------------------------------------------------------------
/data_reader.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data.dataset import Dataset
2 | from torchvision import transforms
3 | from PIL import Image
4 | from torchvision.transforms import *
5 |
6 | data_set_name="ISIC 2018"
7 |
8 | from layers.functions import Detect
9 |
10 |
11 | def str2bool(v):
12 | return v.lower() in ("yes", "true", "t", "1")
13 |
14 | """
15 | Train Val Test
16 | Images 11500 5000 3500
17 | Labels 46513 19696
18 |
19 | todo ignore parts set zero
20 | """
21 |
22 | data_set_name="Wider Face Pedestrian dataset."
23 |
24 | def read_train_gt():
25 | annotations=[]
26 | with open(train_bbx_gt_file,'r') as train_bbx_file:
27 | content=train_bbx_file.readlines();
28 | for line in content:
29 | line_list=line.split(" ")
30 | file_name=line_list[0]
31 | row=[]
32 | for idx in range(1,len(line_list)-1,5):
33 | class_num=line_list[idx]
34 | left=line_list[idx+1]
35 | top=line_list[idx+2]
36 | w=line_list[idx+3]
37 | h=line_list[idx+4].strip()
38 | obj=[class_num, left, top, w, h]
39 | if len(obj)>0:
40 | row+=obj
41 | if len(row)>0:
42 | annotations.append([file_name,row[:]])
43 | return annotations
44 |
45 |
46 | def read_val_gt():
47 | annotations = []
48 | with open(val_bbx_gt_file, 'r') as train_bbx_file:
49 | content = train_bbx_file.readlines();
50 | for line in content:
51 | line_list = line.split(" ")
52 | file_name = line_list[0]
53 | row = []
54 | for idx in range(1, len(line_list) - 1, 5):
55 | class_num = line_list[idx]
56 | left = line_list[idx + 1]
57 | top = line_list[idx + 2]
58 | w = line_list[idx + 3]
59 | h = line_list[idx + 4].strip()
60 | obj = [class_num, left, top, w, h]
61 | if len(obj) > 0:
62 | row += obj
63 | if len(row) > 0:
64 | annotations.append([file_name, row[:]])
65 | return annotations
66 |
67 |
68 | # annotations=read_train_gt()
69 | # print(len(annotations))
70 | #
71 | # count=0
72 | # for anno in annotations:
73 | # count+=len(annotations[anno])
74 | # print(count)
75 | # # annos= read_train_gt()
76 | # # for anno in annos:
77 | # # print(annos[anno])
78 |
79 | def test_read_data():
80 | train_gt=read_train_gt()
81 | for row in train_gt:
82 | print(row)
83 |
84 |
85 | def get_validation_data():
86 | return
87 |
88 | class DatasetReader(Dataset):
89 | """
90 | """
91 | def __init__(self, data,mode='train',):
92 | print("{} count:{}".format(mode,len(data)))
93 | self.mode=mode
94 | self.data=np.asarray(data)
95 | self.transform_train_image=transforms.Compose([
96 | RandomCrop([224,224]),
97 | RandomHorizontalFlip(p=.2),
98 | # ColorJitter(.6),
99 | # RandomVerticalFlip(p=.2),
100 | # RandomGrayscale(p=.2),
101 | # transforms.RandomRotation(10),
102 | # transforms.RandomAffine(10),
103 | # ColorJitter(.6),
104 | transforms.ToTensor(),
105 | # transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
106 | ]);
107 |
108 | self.transform_test_image = transforms.Compose([
109 | transforms.Resize([224, 224]),
110 | transforms.ToTensor()]);
111 |
112 |
113 | def __getitem__(self, index):
114 | img_path=self.data[index,0]
115 | label=int(self.data[index,1])
116 |
117 | if not os.path.exists(img_path):
118 | print("{} image not found".format(img_path))
119 | exit(0);
120 | img = Image.open(img_path)
121 | if self.mode=="train":
122 | data = self.transform_train_image(img)
123 | return data, label
124 |
125 | elif self.mode=="valid":
126 | data = self.transform_test_image(img)
127 | return data, label
128 |
129 | def __len__(self):
130 | return len(self.data)
131 | from statics import *
132 | from data import *
133 | def get_data_loader(args):
134 | return get_voc_reader(args)
135 |
136 | def get_voc_reader(args):
137 | img_dim=args.size
138 | rgb_means = (104, 117, 123)
139 | rgb_std = (1, 1, 1)
140 | p = (0.6, 0.2)[args.version == 'RFB_mobile']
141 | train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
142 | cfg = (VOC_300, VOC_512)[args.size == '512']
143 |
144 | testset = VOCDetection(
145 | VOCroot, [('2007', 'test')], None, AnnotationTransform())
146 |
147 | train_dataset = VOCDetection(VOCroot, train_sets, preproc(
148 | img_dim, rgb_means, rgb_std, p), AnnotationTransform())
149 |
150 | trainloader = torch.utils.data.DataLoader(train_dataset, args.batch_size,
151 | shuffle=True, num_workers=args.num_workers,
152 | collate_fn=detection_collate)
153 | num_classes=len(args.classes.split(","))
154 | detector = Detect(num_classes, 0, cfg)
155 |
156 | return (trainloader, (testset,detector))
157 |
158 | def test():
159 | trainloader, valloader = get_data_loader(100)
160 | for idx, (inputs, targets) in enumerate(valloader):
161 | print(inputs.shape)
162 |
163 | """
164 | all the ignore parts of image will be zero.
165 | """
166 | from utils.file_utils import *
167 |
168 | def get_ignore_parts_for_train():
169 | annotations=[]
170 | for line in read_text_file(train_bbx_ignore_file):
171 | line_list = line.split(" ")
172 | # print(len(line_list))
173 | file_name = line_list[0]
174 | for idx in range(1, len(line_list) - 1, 4):
175 | left = line_list[idx + 1]
176 | top = line_list[idx + 2]
177 | w = line_list[idx + 3]
178 | h = line_list[idx + 4]
179 | annotations[file_name].append([ left, top, w, h])
180 | return annotations
181 |
182 |
183 |
184 | if __name__ == '__main__':
185 | read_train_gt()
186 |
187 |
--------------------------------------------------------------------------------
/data_reader_pedestrian.py:
--------------------------------------------------------------------------------
1 | import glob
2 |
3 | def get_test_loader_for_upload(batch_size):
4 | test_files=glob.glob("/media/milton/ssd1/research/competitions/data_wider_pedestrian/test_new/test_new/**.jpg")
5 | return test_files
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | '''Custom dataset for loading imgs and descriptors
2 | '''
3 | import os.path
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import torch
8 | import torch.utils.data as data
9 | from PIL import Image
10 |
11 | def default_loader(path):
12 | with open(path, 'rb') as f:
13 | with Image.open(f) as img:
14 | return img.convert('RGB')
15 |
16 | def np_loader(path):
17 | return np.load(path)
18 |
19 |
20 |
21 | def build_dataset_lists(list_path,split):
22 | im_list = os.path.join(list_path, 'im_'+split+'.txt')
23 | at_list = os.path.join(list_path, 'at_'+split+'.npy')
24 | print(os.path.abspath(im_list))
25 | images = pd.read_csv(im_list, header=None, names=['impath'])
26 | targets = np.load(at_list)
27 | return images.impath.values,targets
28 |
29 | class ImageListDataset(data.Dataset):
30 | """
31 | Builds a dataset based on a list of images.
32 | root -- path to images
33 | list_path -- path to image lists
34 | split -- train|val| - name of the dataset part (default train)
35 | transform -- transform for images
36 | """
37 | def __init__(self, root, list_path, split = 'train',
38 | transform=None, loader=default_loader):
39 |
40 | images, targets = build_dataset_lists(list_path,split)
41 | self.root = root
42 | self.images = root + images
43 | self.targets = targets
44 | self.transform = transform
45 | self.loader = loader
46 |
47 | def __getitem__(self, index):
48 | """
49 | Args:
50 | index (int): Index
51 | Returns:
52 | tuple: (image, target)
53 | """
54 | path = self.images[index]
55 | target = self.targets[index]
56 | img = self.loader(path)
57 | if self.transform is not None:
58 | img = self.transform(img)
59 | img = img.type(torch.FloatTensor)
60 | return img, target
61 |
62 | def __len__(self):
63 | return len(self.images)
64 |
--------------------------------------------------------------------------------
/dataset/caltech_pedestrian.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | from PIL import Image
4 | from utils.pascal_utils import write_pascal_annotation_aug
5 | from utils.json_utils import read_json_file
6 |
7 | data_dir='/media/milton/ssd1/dataset/pedestrian/caltech_pedestrian/caltech-pedestrian-dataset-converter/data'
8 | images_dir=os.path.join(data_dir,'images')
9 | json_file=os.path.join(data_dir,'annotations.json')
10 |
11 | data=read_json_file(json_file)
12 | for set_key in data.keys():
13 | set_data=data[set_key]
14 | for v_key in set_data.keys():
15 | frames=set_data[v_key]['frames']
16 | for frame_key in frames.keys():
17 | for frame_anno in frames[frame_key]:
18 |
19 | filename="{}_{}_{}.png".format(set_key.lower(),v_key,frame_key)
20 | file_path=os.path.join(images_dir, filename)
21 | # if not os.path.exists(file_path):
22 | # print("{} not found".format(file_path))
23 | try:
24 | img=Image.open(file_path)
25 | except Exception as e:
26 | continue
27 | pass
28 |
29 |
30 |
--------------------------------------------------------------------------------
/dataset/inria_person.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 |
4 | from utils.pascal_utils import write_pascal_annotation_aug
5 | from utils.file_utils import read_text_file
6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Train/annotations'
7 | test_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Test/annotations'
8 |
9 |
10 | def inria_person_to_pascal(train_anno_dir):
11 | anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt'))
12 | for anno_file in anno_files:
13 | filename = ''
14 | obj_list = []
15 |
16 | for line in read_text_file(anno_file):
17 | # xml_file=os.path.join(annodir, xml_file_name)
18 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
19 | # write_pascal_annotation(image_path,obj_list,xml_file)
20 |
21 | if 'Image filename' in line:
22 | filename = line.split(':')[1].strip()[1:-1]
23 | if 'Bounding box for object' in line:
24 | bounds = line.split(':')[1].split('-')
25 | xmin, ymin = bounds[0].strip()[1:-1].split(',')
26 | xmax, ymax = bounds[1].strip()[1:-1].split(',')
27 | xmin = int(xmin.strip())
28 | ymin = int(ymin.strip())
29 | xmax = int(xmax.strip())
30 | ymax = int(ymax.strip())
31 | obj_list.append([xmin, ymin, xmax, ymax, 1])
32 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/INRIAPerson', filename)
33 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
34 | os.path.basename(image_path).split('.')[0] + ".xml")
35 | write_pascal_annotation_aug(image_path, obj_list, xml_file)
36 |
37 |
38 | inria_person_to_pascal(train_anno_dir)
39 | inria_person_to_pascal(test_anno_dir)
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/dataset/mall.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import scipy.io as sio
4 |
5 | from utils.pascal_utils import write_pascal_annotation_aug
6 | from utils.file_utils import read_text_file
7 | data_dir='/media/milton/ssd1/dataset/pedestrian/mall/mall_dataset'
8 | gt_file=os.path.join(data_dir,'mall_gt.mat')
9 |
10 | def inria_person_to_pascal(gt_file):
11 |
12 | gt=sio.loadmat(gt_file)
13 | frames=gt['frame']
14 | anno_files = glob.glob(os.path.join(gt_file, '**.txt'))
15 | for anno_file in anno_files:
16 | filename = ''
17 | obj_list = []
18 |
19 | for line in read_text_file(anno_file):
20 | # xml_file=os.path.join(annodir, xml_file_name)
21 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
22 | # write_pascal_annotation(image_path,obj_list,xml_file)
23 |
24 | if 'Image filename' in line:
25 | filename = line.split(':')[1].strip()[1:-1]
26 | if 'Bounding box for object' in line:
27 | bounds = line.split(':')[1].split('-')
28 | xmin, ymin = bounds[0].strip()[1:-1].split(',')
29 | xmax, ymax = bounds[1].strip()[1:-1].split(',')
30 | xmin = int(xmin.strip())
31 | ymin = int(ymin.strip())
32 | xmax = int(xmax.strip())
33 | ymax = int(ymax.strip())
34 | obj_list.append([xmin, ymin, xmax, ymax, 1])
35 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename)
36 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
37 | os.path.basename(image_path).split('.')[0] + ".xml")
38 | write_pascal_annotation_aug(image_path, obj_list, xml_file)
39 |
40 |
41 | inria_person_to_pascal(gt_file)
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/dataset/upen_person.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 |
4 | from utils.pascal_utils import write_pascal_annotation_aug
5 | from utils.file_utils import read_text_file
6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/upenn/PennFudanPed/Annotation'
7 |
8 | def inria_person_to_pascal(train_anno_dir):
9 | anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt'))
10 | for anno_file in anno_files:
11 | filename = ''
12 | obj_list = []
13 |
14 | for line in read_text_file(anno_file):
15 | # xml_file=os.path.join(annodir, xml_file_name)
16 | # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
17 | # write_pascal_annotation(image_path,obj_list,xml_file)
18 |
19 | if 'Image filename' in line:
20 | filename = line.split(':')[1].strip()[1:-1]
21 | if 'Bounding box for object' in line:
22 | bounds = line.split(':')[1].split('-')
23 | xmin, ymin = bounds[0].strip()[1:-1].split(',')
24 | xmax, ymax = bounds[1].strip()[1:-1].split(',')
25 | xmin = int(xmin.strip())
26 | ymin = int(ymin.strip())
27 | xmax = int(xmax.strip())
28 | ymax = int(ymax.strip())
29 | obj_list.append([xmin, ymin, xmax, ymax, 1])
30 | image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename)
31 | xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
32 | os.path.basename(image_path).split('.')[0] + ".xml")
33 | write_pascal_annotation_aug(image_path, obj_list, xml_file)
34 |
35 |
36 | inria_person_to_pascal(train_anno_dir)
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/demo/__init__.py
--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
1 | from utils.file_utils import read_text_file
2 | import os
3 | import cv2
4 |
5 | val_dir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/val'
6 | for line in read_text_file('scores.txt'):
7 | line_arr=line.split(' ')
8 | image_name=line_arr[0]
9 | image_path=os.path.join(val_dir,image_name)
10 | save_path=os.path.join('out',image_name)
11 | if os.path.exists(save_path):
12 | image_path=save_path
13 | print(image_path)
14 | img_face_detect = cv2.imread(image_path)
15 | print(line_arr)
16 | x1, y1, w, h = line_arr[2:]
17 | x1=float(x1)
18 | y1=float(y1)
19 | w=float(w)
20 | h=float(h.strip())
21 | x2=int(x1)+int(w)
22 | y2=int(y1)+int(h)
23 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
24 | print(save_path)
25 | print(img_face_detect.shape)
26 | cv2.imwrite(save_path, img_face_detect)
27 |
--------------------------------------------------------------------------------
/doc/RFB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/RFB.png
--------------------------------------------------------------------------------
/doc/SSD.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/SSD.jpg
--------------------------------------------------------------------------------
/doc/detection_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example.png
--------------------------------------------------------------------------------
/doc/detection_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example2.png
--------------------------------------------------------------------------------
/doc/detection_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_examples.png
--------------------------------------------------------------------------------
/doc/rfb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/rfb.png
--------------------------------------------------------------------------------
/doc/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/ssd.png
--------------------------------------------------------------------------------
/focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 |
6 |
7 | def one_hot(index, classes):
8 | size = index.size() + (classes,)
9 | view = index.size() + (1,)
10 |
11 | mask = torch.Tensor(*size).fill_(0)
12 | index = index.view(*view)
13 | ones = 1.
14 |
15 | if isinstance(index, Variable):
16 | ones = Variable(torch.Tensor(index.size()).fill_(1))
17 | mask = Variable(mask)
18 |
19 | return mask.scatter_(1, index, ones)
20 |
21 |
22 | class FocalLoss(nn.Module):
23 |
24 | def __init__(self, gamma=0, eps=1e-7):
25 | super(FocalLoss, self).__init__()
26 | self.gamma = gamma
27 | self.eps = eps
28 |
29 | def forward(self, input, target):
30 | input=input.cpu()
31 | target=target.cpu()
32 | y = one_hot(target, input.size(-1))
33 | logit = F.softmax(input, dim=-1)
34 | logit = logit.clamp(self.eps, 1. - self.eps)
35 |
36 | loss = -1 * y * torch.log(logit) # cross entropy
37 | loss = loss * (1 - logit) ** self.gamma # focal loss
38 |
39 | return loss.sum()
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 |
4 |
5 | __all__ = ['Detect', 'PriorBox']
6 |
--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 |
4 | from utils.box_utils import decode, center_size
5 |
6 |
7 | class Detect(Function):
8 | """At test time, Detect is the final layer of SSD. Decode location preds,
9 | apply non-maximum suppression to location predictions based on conf
10 | scores and threshold to a top_k number of output predictions for both
11 | confidence score and locations.
12 | """
13 |
14 | def __init__(self, num_classes, bkg_label, cfg, object_score=0):
15 | self.num_classes = num_classes
16 | self.background_label = bkg_label
17 | self.object_score = object_score
18 | # self.thresh = thresh
19 |
20 | # Parameters used in nms.
21 | self.variance = cfg['variance']
22 |
23 | def forward(self, predictions, prior, arm_data=None):
24 | """
25 | Args:
26 | loc_data: (tensor) Loc preds from loc layers
27 | Shape: [batch,num_priors*4]
28 | conf_data: (tensor) Shape: Conf preds from conf layers
29 | Shape: [batch*num_priors,num_classes]
30 | prior_data: (tensor) Prior boxes and variances from priorbox layers
31 | Shape: [1,num_priors,4]
32 | """
33 |
34 | loc, conf = predictions
35 | loc_data = loc.data
36 | conf_data = conf.data
37 | prior_data = prior.data
38 | num = loc_data.size(0) # batch size
39 | if arm_data:
40 | arm_loc, arm_conf = arm_data
41 | arm_loc_data = arm_loc.data
42 | arm_conf_data = arm_conf.data
43 | arm_object_conf = arm_conf_data[:, 1:]
44 | no_object_index = arm_object_conf <= self.object_score
45 | conf_data[no_object_index.expand_as(conf_data)] = 0
46 |
47 | self.num_priors = prior_data.size(0)
48 | self.boxes = torch.zeros(num, self.num_priors, 4)
49 | self.scores = torch.zeros(num, self.num_priors, self.num_classes)
50 |
51 | if num == 1:
52 | # size batch x num_classes x num_priors
53 | conf_preds = conf_data.unsqueeze(0)
54 |
55 | else:
56 | conf_preds = conf_data.view(num, self.num_priors,
57 | self.num_classes)
58 | self.boxes.expand(num, self.num_priors, 4)
59 | self.scores.expand(num, self.num_priors, self.num_classes)
60 | # Decode predictions into bboxes.
61 | for i in range(num):
62 | if arm_data:
63 | default = decode(arm_loc_data[i], prior_data, self.variance)
64 | default = center_size(default)
65 | else:
66 | default = prior_data
67 | decoded_boxes = decode(loc_data[i], default, self.variance)
68 | # For each class, perform nms
69 | conf_scores = conf_preds[i].clone()
70 | '''
71 | c_mask = conf_scores.gt(self.thresh)
72 | decoded_boxes = decoded_boxes[c_mask]
73 | conf_scores = conf_scores[c_mask]
74 | '''
75 |
76 | self.boxes[i] = decoded_boxes
77 | self.scores[i] = conf_scores
78 |
79 | return self.boxes, self.scores
80 |
--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | from itertools import product as product
2 | from math import sqrt as sqrt
3 |
4 | import torch
5 |
6 | if torch.cuda.is_available():
7 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
8 |
9 |
10 | class PriorBox(object):
11 | """Compute priorbox coordinates in center-offset form for each source
12 | feature map.
13 | Note:
14 | This 'layer' has changed between versions of the original SSD
15 | paper, so we include both versions, but note v2 is the most tested and most
16 | recent version of the paper.
17 |
18 | """
19 |
20 | def __init__(self, cfg):
21 | super(PriorBox, self).__init__()
22 | self.image_size = cfg['min_dim']
23 | # number of priors for feature map location (either 4 or 6)
24 | self.num_priors = len(cfg['aspect_ratios'])
25 | self.variance = cfg['variance'] or [0.1]
26 | self.feature_maps = cfg['feature_maps']
27 | self.min_sizes = cfg['min_sizes']
28 | self.max_sizes = cfg['max_sizes']
29 | self.steps = cfg['steps']
30 | self.aspect_ratios = cfg['aspect_ratios']
31 | self.clip = cfg['clip']
32 | for v in self.variance:
33 | if v <= 0:
34 | raise ValueError('Variances must be greater than 0')
35 |
36 | def forward(self):
37 | mean = []
38 | for k, f in enumerate(self.feature_maps):
39 | for i, j in product(range(f), repeat=2):
40 | f_k = self.image_size / self.steps[k]
41 | cx = (j + 0.5) / f_k
42 | cy = (i + 0.5) / f_k
43 |
44 | s_k = self.min_sizes[k] / self.image_size
45 | mean += [cx, cy, s_k, s_k]
46 |
47 | # aspect_ratio: 1
48 | # rel size: sqrt(s_k * s_(k+1))
49 | if self.max_sizes:
50 | s_k_prime = sqrt(s_k * (self.max_sizes[k] / self.image_size))
51 | mean += [cx, cy, s_k_prime, s_k_prime]
52 |
53 | # rest of aspect ratios
54 | for ar in self.aspect_ratios[k]:
55 | mean += [cx, cy, s_k * sqrt(ar), s_k / sqrt(ar)]
56 | mean += [cx, cy, s_k / sqrt(ar), s_k * sqrt(ar)]
57 |
58 | # back to torch land
59 | output = torch.Tensor(mean).view(-1, 4)
60 | if self.clip:
61 | output.clamp_(max=1, min=0)
62 | return output
63 |
--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | from .refine_multibox_loss import RefineMultiBoxLoss
3 | from .l2norm import L2Norm
4 |
5 | __all__ = ['MultiBoxLoss','L2Norm']
6 |
--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Function
4 | from torch.autograd import Variable
5 | import torch.nn.init as init
6 |
7 | class L2Norm(nn.Module):
8 | def __init__(self,n_channels, scale):
9 | super(L2Norm,self).__init__()
10 | self.n_channels = n_channels
11 | self.gamma = scale or None
12 | self.eps = 1e-10
13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 | self.reset_parameters()
15 |
16 | def reset_parameters(self):
17 | init.constant(self.weight,self.gamma)
18 |
19 | def forward(self, x):
20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 | x /= norm
22 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
23 | return out
24 |
--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | from utils.box_utils import match, log_sum_exp
6 | GPU = False
7 | if torch.cuda.is_available():
8 | GPU = True
9 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
10 |
11 |
12 | class MultiBoxLoss(nn.Module):
13 | """SSD Weighted Loss Function
14 | Compute Targets:
15 | 1) Produce Confidence Target Indices by matching ground truth boxes
16 | with (default) 'priorboxes' that have jaccard index > threshold parameter
17 | (default threshold: 0.5).
18 | 2) Produce localization target by 'encoding' variance into offsets of ground
19 | truth boxes and their matched 'priorboxes'.
20 | 3) Hard negative mining to filter the excessive number of negative examples
21 | that comes with using a large number of default bounding boxes.
22 | (default negative:positive ratio 3:1)
23 | Objective Loss:
24 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
25 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
26 | weighted by α which is set to 1 by cross val.
27 | Args:
28 | c: class confidences,
29 | l: predicted boxes,
30 | g: ground truth boxes
31 | N: number of matched default boxes
32 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
33 | """
34 |
35 |
36 | def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target):
37 | super(MultiBoxLoss, self).__init__()
38 | self.num_classes = num_classes
39 | self.threshold = overlap_thresh
40 | self.background_label = bkg_label
41 | self.encode_target = encode_target
42 | self.use_prior_for_matching = prior_for_matching
43 | self.do_neg_mining = neg_mining
44 | self.negpos_ratio = neg_pos
45 | self.neg_overlap = neg_overlap
46 | self.variance = [0.1,0.2]
47 |
48 | def forward(self, predictions, priors, targets):
49 | """Multibox Loss
50 | Args:
51 | predictions (tuple): A tuple containing loc preds, conf preds,
52 | and prior boxes from SSD net.
53 | conf shape: torch.size(batch_size,num_priors,num_classes)
54 | loc shape: torch.size(batch_size,num_priors,4)
55 | priors shape: torch.size(num_priors,4)
56 |
57 | ground_truth (tensor): Ground truth boxes and labels for a batch,
58 | shape: [batch_size,num_objs,5] (last idx is the label).
59 | """
60 |
61 | loc_data, conf_data = predictions
62 | priors = priors
63 | num = loc_data.size(0)
64 | num_priors = (priors.size(0))
65 | num_classes = self.num_classes
66 |
67 | # match priors (default boxes) and ground truth boxes
68 | loc_t = torch.Tensor(num, num_priors, 4)
69 | conf_t = torch.LongTensor(num, num_priors)
70 | for idx in range(num):
71 | truths = targets[idx][:,:-1].data
72 | labels = targets[idx][:,-1].data
73 | defaults = priors.data
74 | match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
75 | if GPU:
76 | loc_t = loc_t.cuda()
77 | conf_t = conf_t.cuda()
78 | # wrap targets
79 | loc_t = Variable(loc_t, requires_grad=False)
80 | conf_t = Variable(conf_t,requires_grad=False)
81 |
82 | pos = conf_t > 0
83 |
84 | # Localization Loss (Smooth L1)
85 | # Shape: [batch,num_priors,4]
86 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
87 | loc_p = loc_data[pos_idx].view(-1,4)
88 | loc_t = loc_t[pos_idx].view(-1,4)
89 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
90 |
91 | # Compute max conf across batch for hard negative mining
92 | batch_conf = conf_data.view(-1,self.num_classes)
93 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
94 |
95 | # Hard Negative Mining
96 | loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now
97 | loss_c = loss_c.view(num, -1)
98 | _,loss_idx = loss_c.sort(1, descending=True)
99 | _,idx_rank = loss_idx.sort(1)
100 | num_pos = pos.long().sum(1,keepdim=True)
101 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
102 | neg = idx_rank < num_neg.expand_as(idx_rank)
103 |
104 | # Confidence Loss Including Positive and Negative Examples
105 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
106 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
107 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
108 | targets_weighted = conf_t[(pos+neg).gt(0)]
109 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
110 |
111 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
112 |
113 | N = num_pos.data.float().sum()
114 | loss_l/=N
115 | loss_c/=N
116 | return loss_l,loss_c
117 |
--------------------------------------------------------------------------------
/layers/modules/refine_multibox_loss.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | from utils.box_utils import match,refine_match, log_sum_exp,decode
7 | GPU = False
8 | if torch.cuda.is_available():
9 | GPU = True
10 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
11 |
12 |
13 | class RefineMultiBoxLoss(nn.Module):
14 | """SSD Weighted Loss Function
15 | Compute Targets:
16 | 1) Produce Confidence Target Indices by matching ground truth boxes
17 | with (default) 'priorboxes' that have jaccard index > threshold parameter
18 | (default threshold: 0.5).
19 | 2) Produce localization target by 'encoding' variance into offsets of ground
20 | truth boxes and their matched 'priorboxes'.
21 | 3) Hard negative mining to filter the excessive number of negative examples
22 | that comes with using a large number of default bounding boxes.
23 | (default negative:positive ratio 3:1)
24 | Objective Loss:
25 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
26 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
27 | weighted by α which is set to 1 by cross val.
28 | Args:
29 | c: class confidences,
30 | l: predicted boxes,
31 | g: ground truth boxes
32 | N: number of matched default boxes
33 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
34 | """
35 |
36 |
37 | def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target,object_score = 0):
38 | super(RefineMultiBoxLoss, self).__init__()
39 | self.num_classes = num_classes
40 | self.threshold = overlap_thresh
41 | self.background_label = bkg_label
42 | self.encode_target = encode_target
43 | self.use_prior_for_matching = prior_for_matching
44 | self.do_neg_mining = neg_mining
45 | self.negpos_ratio = neg_pos
46 | self.neg_overlap = neg_overlap
47 | self.object_score = object_score
48 | self.variance = [0.1,0.2]
49 |
50 | def forward(self, odm_data,priors, targets,arm_data = None,filter_object = False):
51 | """Multibox Loss
52 | Args:
53 | predictions (tuple): A tuple containing loc preds, conf preds,
54 | and prior boxes from SSD net.
55 | conf shape: torch.size(batch_size,num_priors,num_classes)
56 | loc shape: torch.size(batch_size,num_priors,4)
57 | priors shape: torch.size(num_priors,4)
58 |
59 | ground_truth (tensor): Ground truth boxes and labels for a batch,
60 | shape: [batch_size,num_objs,5] (last idx is the label).
61 | arm_data (tuple): arm branch containg arm_loc and arm_conf
62 | filter_object: whether filter out the prediction according to the arm conf score
63 | """
64 |
65 | loc_data,conf_data = odm_data
66 | if arm_data:
67 | arm_loc,arm_conf = arm_data
68 | priors = priors.data
69 | num = loc_data.size(0)
70 | num_priors = (priors.size(0))
71 |
72 | # match priors (default boxes) and ground truth boxes
73 | loc_t = torch.Tensor(num, num_priors, 4)
74 | conf_t = torch.LongTensor(num, num_priors)
75 | for idx in range(num):
76 | truths = targets[idx][:,:-1].data
77 | labels = targets[idx][:,-1].data
78 | #for object detection
79 | if self.num_classes == 2:
80 | labels = labels > 0
81 | if arm_data:
82 | refine_match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx,arm_loc[idx].data)
83 | else:
84 | match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx)
85 | if GPU:
86 | loc_t = loc_t.cuda()
87 | conf_t = conf_t.cuda()
88 | # wrap targets
89 | loc_t = Variable(loc_t, requires_grad=False)
90 | conf_t = Variable(conf_t,requires_grad=False)
91 | if arm_data and filter_object:
92 | arm_conf_data = arm_conf.data[:,:,1]
93 | pos = conf_t > 0
94 | object_score_index = arm_conf_data <= self.object_score
95 | pos[object_score_index] = 0
96 |
97 | else:
98 | pos = conf_t > 0
99 |
100 | # Localization Loss (Smooth L1)
101 | # Shape: [batch,num_priors,4]
102 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
103 | loc_p = loc_data[pos_idx].view(-1,4)
104 | loc_t = loc_t[pos_idx].view(-1,4)
105 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
106 |
107 | # Compute max conf across batch for hard negative mining
108 | batch_conf = conf_data.view(-1,self.num_classes)
109 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
110 |
111 | # Hard Negative Mining
112 | loss_c[pos] = 0 # filter out pos boxes for now
113 | loss_c = loss_c.view(num, -1)
114 | _,loss_idx = loss_c.sort(1, descending=True)
115 | _,idx_rank = loss_idx.sort(1)
116 | num_pos = pos.long().sum(1,keepdim=True)
117 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
118 | neg = idx_rank < num_neg.expand_as(idx_rank)
119 |
120 | # Confidence Loss Including Positive and Negative Examples
121 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
122 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
123 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
124 | targets_weighted = conf_t[(pos+neg).gt(0)]
125 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
126 |
127 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
128 | N = num_pos.data.sum()
129 | loss_l/=N
130 | loss_c/=N
131 | return loss_l,loss_c
132 |
--------------------------------------------------------------------------------
/loss_loader.py:
--------------------------------------------------------------------------------
1 | from focal_loss import FocalLoss
2 | from torch import nn
3 | gamma = 2
4 |
5 | def get_focal_loss(classifier):
6 | print("==> Using Focal Loss.....")
7 | classifier.writer.add_text('Info', "Using Focal Loss ")
8 | return FocalLoss(gamma)
9 |
10 | def get_cross_entropy(classifier):
11 | print("==> Using CrossEntropy.....")
12 | classifier.writer.add_text('Info', "Using Cross Entropy Loss ")
13 | return nn.CrossEntropyLoss()
14 |
15 | def get_vat_cross_entropy(classifier):
16 | print("==> Using Adversarial Training Cross Entropy.....")
17 | pass
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | gpu=0
3 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
4 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
5 | os.environ['CUDA_LAUNCH_BLOCKING'] = str(gpu)
6 | from object_detector import Detector
7 | from torch import optim
8 | from augment_data import augment_images
9 | from model_loader import *
10 | from loss_loader import *
11 | from data_reader import *
12 |
13 | import argparse
14 | import pickle
15 | import time
16 |
17 | import numpy as np
18 | import os
19 | import torch
20 | import torch.backends.cudnn as cudnn
21 | import torch.nn.init as init
22 | import torch.optim as optim
23 | import torch.utils.data as data
24 | from torch.autograd import Variable
25 |
26 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \
27 | COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
28 | from layers.functions import Detect, PriorBox
29 | from layers.modules import MultiBoxLoss
30 | from utils.nms_wrapper import nms
31 | from utils.timer import Timer
32 |
33 |
34 | def str2bool(v):
35 | return v.lower() in ("yes", "true", "t", "1")
36 |
37 | classes=VOC_CLASSES
38 | classes_delimited=','.join(classes)
39 | num_classes=len(classes)
40 |
41 | parser = argparse.ArgumentParser(
42 | description='Receptive Field Block Net Training')
43 |
44 | parser.add_argument('-gpu', default=gpu,
45 | type=int, help='gpu index for training.')
46 | parser.add_argument('-v', '--version', default='RFB_vgg',
47 | help='RFB_vgg ,RFB_E_vgg RFB_mobile SSD_vgg version.')
48 | parser.add_argument('-s', '--size', default='300',type=int,
49 | help='300 or 512 input size.')
50 | parser.add_argument('-d', '--dataset', default='VOC',
51 | help='VOC or COCO dataset')
52 |
53 | parser.add_argument('-classes', default=classes_delimited,type=str,
54 | help='class names delimited by ,')
55 | parser.add_argument('-num_classes', default=num_classes, type=int,
56 | help='total classes')
57 |
58 | parser.add_argument(
59 | '--basenet', default='weights/vgg16_reducedfc.pth', help='pretrained base model')
60 | parser.add_argument('--jaccard_threshold', default=0.5,
61 | type=float, help='Min Jaccard index for matching')
62 | parser.add_argument('-b', '--batch_size', default=8,
63 | type=int, help='Batch size for training')
64 | parser.add_argument('--num_workers', default=4,
65 | type=int, help='Number of workers used in dataloading')
66 | parser.add_argument('--cuda', default=True,
67 | type=bool, help='Use cuda to train model')
68 | parser.add_argument('--ngpu', default=2, type=int, help='gpus')
69 | parser.add_argument('--lr', '--learning-rate',
70 | default=4e-3, type=float, help='initial learning rate')
71 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
72 |
73 | parser.add_argument('--resume_net', default=False, help='resume net for retraining')
74 | parser.add_argument('--resume_epoch', default=0,
75 | type=int, help='resume iter for retraining')
76 | parser.add_argument('-epochs', '--epochs', default=300,
77 | type=int, help='max epoch for retraining')
78 | parser.add_argument('--weight_decay', default=5e-4,
79 | type=float, help='Weight decay for SGD')
80 | parser.add_argument('-we', '--warm_epoch', default=1,
81 | type=int, help='max epoch for retraining')
82 | parser.add_argument('--gamma', default=0.1,
83 | type=float, help='Gamma update for SGD')
84 |
85 | parser.add_argument('--freeze_layers', default=0.80,
86 | type=float, help='PErcentage of weight to be freezed.')
87 |
88 | parser.add_argument('--log_iters', default=True,
89 | type=bool, help='Print the loss at each iteration')
90 | parser.add_argument('--save_folder', default='weights/',
91 | help='Location to save checkpoint models')
92 | parser.add_argument('--date', default='1213')
93 | parser.add_argument('--save_frequency', default=10)
94 | parser.add_argument('--retest', default=False, type=bool,
95 | help='test cache results')
96 | parser.add_argument('--test_frequency', default=10)
97 | parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization')
98 | parser.add_argument('--send_images_to_visdom', type=str2bool, default=False,
99 | help='Sample a random image from each 10th batch, send it to visdom after augmentations step')
100 | args = parser.parse_args()
101 |
102 | """
103 | sudo nvidia-smi -pl 180
104 | sudo nvidia-smi --gpu-reset -i 0
105 | use command line to run the training.
106 |
107 | todo download more images using image_utils and isic-arhive. Also, use more online resources for data.
108 |
109 | """
110 |
111 | from layers.modules.multibox_loss import MultiBoxLoss
112 |
113 | from statics import *
114 | def get_loss_function(classifier):
115 | return MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
116 |
117 | def get_model(args):
118 | return get_ssd_model(args)
119 |
120 | def get_optimizer(model_trainer):
121 | epsilon=1e-8
122 | momentum = 0.9
123 | weight_decay=5e-4
124 | # model_trainer.writer.add_scalar("leanring rate", learning_rate)
125 | # model_trainer.writer.add_scalar("epsilon", epsilon)
126 | # optimizer=optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()),
127 | # lr=0.001,momentum=momentum,weight_decay=weight_decay)
128 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, model_trainer.model.parameters()),lr=0.01)
129 | # optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()), lr=0.001, momentum=0.9,
130 | # weight_decay=weight_decay)
131 | return optimizer
132 |
133 | def get_prior():
134 | cfg = (VOC_300, VOC_512)[args.size == '512']
135 | priorbox = PriorBox(cfg)
136 | priors = Variable(priorbox.forward(), volatile=True)
137 | return priors
138 |
139 | class ModelDetails(object):
140 | def __init__(self,args):
141 | self.args=args
142 | self.priors=get_prior()
143 | self.model,self.model_name_str = get_model(args)
144 | self.logs_dir = "logs/{}/{}".format(args.gpu,self.model_name_str)
145 | self.augment_images = augment_images
146 | self.dataset_loader=get_data_loader(args)
147 | self.get_loss_function = get_loss_function
148 | self.get_optimizer = get_optimizer
149 | self.dataset=data_set_name
150 | self.class_names=VOC_CLASSES
151 |
152 |
153 | def start_training(args):
154 | model_details=ModelDetails(args)
155 | detector=Detector(model_details)
156 | detector.load_data()
157 | detector.load_model()
158 | for epoch in range(detector.start_epoch, detector.start_epoch + args.epochs):
159 | try:
160 | detector.train(epoch)
161 | detector.test(epoch)
162 | except KeyboardInterrupt:
163 | detector.test(epoch)
164 | break;
165 | detector.load_data()
166 |
167 | start_training(args)
--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ./utils/
3 |
4 | CUDA_PATH=/usr/local/cuda/
5 |
6 | python build.py build_ext --inplace
7 |
8 | cd ..
9 |
--------------------------------------------------------------------------------
/model_loader.py:
--------------------------------------------------------------------------------
1 | from statics import voc
2 | import argparse
3 | import pickle
4 | import time
5 |
6 | import numpy as np
7 | import os
8 | import torch
9 | import torch.backends.cudnn as cudnn
10 | import torch.nn.init as init
11 | import torch.optim as optim
12 | import torch.utils.data as data
13 | from torch.autograd import Variable
14 |
15 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \
16 | COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
17 | from layers.functions import Detect, PriorBox
18 | from layers.modules import MultiBoxLoss
19 | from utils.nms_wrapper import nms
20 | from utils.timer import Timer
21 |
22 | def get_ssd_model(args):
23 | save_folder = os.path.join(args.save_folder, args.version + '_' + str(args.size), args.date)
24 | if not os.path.exists(save_folder):
25 | os.makedirs(save_folder)
26 | test_save_dir = os.path.join(save_folder, 'ss_predict')
27 | if not os.path.exists(test_save_dir):
28 | os.makedirs(test_save_dir)
29 | gpu=args.gpu
30 | img_dim = args.size
31 | num_classes=args.num_classes
32 | print("==>Loading SSD model...")
33 | if args.version == 'RFB_vgg':
34 | from models.RFB_Net_vgg import build_net
35 | elif args.version == 'RFB_E_vgg':
36 | from models.RFB_Net_E_vgg import build_net
37 | elif args.version == 'RFB_mobile':
38 | from models.RFB_Net_mobile import build_net
39 |
40 | cfg = COCO_mobile_300
41 | elif args.version == 'SSD_vgg':
42 | from models.SSD_vgg import build_net
43 | elif args.version == 'FSSD_vgg':
44 | from models.FSSD_vgg import build_net
45 | elif args.version == 'FRFBSSD_vgg':
46 | from models.FRFBSSD_vgg import build_net
47 | else:
48 | print('Unkown version!')
49 | net = build_net(int(img_dim), num_classes)
50 | # model(model.cuda(), (3, height, width))
51 | if not args.resume_net:
52 | base_weights = torch.load(args.basenet)
53 | print('Loading base network...')
54 | net.base.load_state_dict(base_weights)
55 |
56 | def xavier(param):
57 | init.xavier_uniform(param)
58 |
59 | def weights_init(m):
60 | for key in m.state_dict():
61 | if key.split('.')[-1] == 'weight':
62 | if 'conv' in key:
63 | init.kaiming_normal(m.state_dict()[key], mode='fan_out')
64 | if 'bn' in key:
65 | m.state_dict()[key][...] = 1
66 | elif key.split('.')[-1] == 'bias':
67 | m.state_dict()[key][...] = 0
68 |
69 | print('Initializing weights...')
70 | # initialize newly added layers' weights with kaiming_normal method
71 | net.extras.apply(weights_init)
72 | net.loc.apply(weights_init)
73 | net.conf.apply(weights_init)
74 | if args.version == 'FSSD_vgg' or args.version == 'FRFBSSD_vgg':
75 | net.ft_module.apply(weights_init)
76 | net.pyramid_ext.apply(weights_init)
77 | if 'RFB' in args.version:
78 | net.Norm.apply(weights_init)
79 | if args.version == 'RFB_E_vgg':
80 | net.reduce.apply(weights_init)
81 | net.up_reduce.apply(weights_init)
82 |
83 | else:
84 | # load resume network
85 | resume_net_path = os.path.join(save_folder, args.version + '_' + args.dataset + '_epoches_' + \
86 | str(args.resume_epoch) + '.pth')
87 | print('Loading resume network', resume_net_path)
88 | state_dict = torch.load(resume_net_path)
89 | # create new OrderedDict that does not contain `module.`
90 | from collections import OrderedDict
91 |
92 | new_state_dict = OrderedDict()
93 | for k, v in state_dict.items():
94 | head = k[:7]
95 | if head == 'module.':
96 | name = k[7:] # remove `module.`
97 | else:
98 | name = k
99 | new_state_dict[name] = v
100 | net.load_state_dict(new_state_dict)
101 | return net,"ssd_{}_adam".format(gpu)
102 |
--------------------------------------------------------------------------------
/models/FSSD_mobile.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import os
4 | import torch
5 | import torch.nn as nn
6 |
7 | sys.path.append('./')
8 | from .mobilenet import mobilenet_1
9 |
10 |
11 | class BasicConv(nn.Module):
12 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
13 | bn=False, bias=True, up_size=0):
14 | super(BasicConv, self).__init__()
15 | self.out_channels = out_planes
16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
17 | dilation=dilation, groups=groups, bias=bias)
18 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
19 | self.relu = nn.ReLU(inplace=True) if relu else None
20 | self.up_size = up_size
21 | self.up_sample = nn.Upsample(size=(up_size, up_size), mode='bilinear') if up_size != 0 else None
22 |
23 | def forward(self, x):
24 | x = self.conv(x)
25 | if self.bn is not None:
26 | x = self.bn(x)
27 | if self.relu is not None:
28 | x = self.relu(x)
29 | if self.up_size > 0:
30 | x = self.up_sample(x)
31 | return x
32 |
33 |
34 | class FSSD(nn.Module):
35 | """Single Shot Multibox Architecture
36 | The network is composed of a base VGG network followed by the
37 | added multibox conv layers. Each multibox layer branches into
38 | 1) conv2d for class conf scores
39 | 2) conv2d for localization predictions
40 | 3) associated priorbox layer to produce default bounding
41 | boxes specific to the layer's feature map size.
42 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
43 |
44 | Args:
45 | phase: (string) Can be "test" or "train"
46 | base: VGG16 layers for input, size of either 300 or 500
47 | extras: extra layers that feed to multibox loc and conf layers
48 | head: "multibox head" consists of loc and conf conv layers
49 | """
50 |
51 | def __init__(self, size, head, ft_module, pyramid_ext, num_classes):
52 | super(FSSD, self).__init__()
53 | self.num_classes = num_classes
54 | # TODO: implement __call__ in PriorBox
55 | self.size = size
56 |
57 | # SSD network
58 | self.base = mobilenet_1()
59 | # Layer learns to scale the l2 normalized features from conv4_3
60 | self.ft_module = nn.ModuleList(ft_module)
61 | self.pyramid_ext = nn.ModuleList(pyramid_ext)
62 |
63 | self.loc = nn.ModuleList(head[0])
64 | self.conf = nn.ModuleList(head[1])
65 | self.fea_bn = nn.BatchNorm2d(256 * len(self.ft_module), affine=True)
66 |
67 | self.softmax = nn.Softmax()
68 |
69 | def forward(self, x, test=False):
70 | """Applies network layers and ops on input image(s) x.
71 |
72 | Args:
73 | x: input image or batch of images. Shape: [batch,3*batch,300,300].
74 |
75 | Return:
76 | Depending on phase:
77 | test:
78 | Variable(tensor) of output class label predictions,
79 | confidence score, and corresponding location predictions for
80 | each object detected. Shape: [batch,topk,7]
81 |
82 | train:
83 | list of concat outputs from:
84 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
85 | 2: localization layers, Shape: [batch,num_priors*4]
86 | 3: priorbox layers, Shape: [2,num_priors*4]
87 | """
88 | source_features = list()
89 | transformed_features = list()
90 | loc = list()
91 | conf = list()
92 |
93 | base_out = self.base(x)
94 | source_features.append(base_out[0]) # mobilenet 4_1
95 | source_features.append(base_out[1]) # mobilent_5_5
96 | source_features.append(base_out[2]) # mobilenet 6_1
97 |
98 | assert len(self.ft_module) == len(source_features)
99 | for k, v in enumerate(self.ft_module):
100 | transformed_features.append(v(source_features[k]))
101 | concat_fea = torch.cat(transformed_features, 1)
102 | x = self.fea_bn(concat_fea)
103 | fea_bn = x
104 | pyramid_fea = list()
105 | for k, v in enumerate(self.pyramid_ext):
106 | x = v(x)
107 | pyramid_fea.append(x)
108 | # apply multibox head to source layers
109 | for (x, l, c) in zip(pyramid_fea, self.loc, self.conf):
110 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
111 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
112 |
113 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
114 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
115 | if test:
116 | output = (
117 | loc.view(loc.size(0), -1, 4), # loc preds
118 | self.softmax(conf.view(-1, self.num_classes)), # conf preds
119 | )
120 | features = ()
121 | else:
122 | output = (
123 | loc.view(loc.size(0), -1, 4),
124 | conf.view(conf.size(0), -1, self.num_classes),
125 | )
126 | features = (
127 | fea_bn
128 | )
129 | return output
130 |
131 | def load_weights(self, base_file):
132 | other, ext = os.path.splitext(base_file)
133 | if ext == '.pkl' or '.pth':
134 | print('Loading weights into state dict...')
135 | state_dict = torch.load(base_file, map_location=lambda storage, loc: storage)
136 | from collections import OrderedDict
137 | new_state_dict = OrderedDict()
138 | for k, v in state_dict.items():
139 | head = k[:7]
140 | if head == 'module.':
141 | name = k[7:] # remove `module.`
142 | else:
143 | name = k
144 | new_state_dict[name] = v
145 | self.base.load_state_dict(new_state_dict)
146 | print('Finished!')
147 |
148 | else:
149 | print('Sorry only .pth and .pkl files supported.')
150 |
151 |
152 | def feature_transform_module(scale_factor):
153 | layers = []
154 | # conv4_1
155 | layers += [BasicConv(int(256 * scale_factor), 256, kernel_size=1, padding=0)]
156 | # conv5_5
157 | layers += [BasicConv(int(512 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)]
158 | # conv6_mpo1
159 | layers += [BasicConv(int(1024 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)]
160 | return layers
161 |
162 |
163 | def pyramid_feature_extractor():
164 | '''
165 | layers = [BasicConv(256*3,512,kernel_size=3,stride=1,padding=1),BasicConv(512,512,kernel_size=3,stride=2,padding=1), \
166 | BasicConv(512,256,kernel_size=3,stride=2,padding=1),BasicConv(256,256,kernel_size=3,stride=2,padding=1), \
167 | BasicConv(256,256,kernel_size=3,stride=1,padding=0),BasicConv(256,256,kernel_size=3,stride=1,padding=0)]
168 | '''
169 | from .mobilenet import DepthWiseBlock
170 | layers = [DepthWiseBlock(256 * 3, 512, stride=1), DepthWiseBlock(512, 512, stride=2),
171 | DepthWiseBlock(512, 256, stride=2), DepthWiseBlock(256, 256, stride=2), \
172 | DepthWiseBlock(256, 128, stride=1, padding=0), DepthWiseBlock(128, 128, stride=1, padding=0)]
173 |
174 | return layers
175 |
176 |
177 | def multibox(fea_channels, cfg, num_classes):
178 | loc_layers = []
179 | conf_layers = []
180 | assert len(fea_channels) == len(cfg)
181 | for i, fea_channel in enumerate(fea_channels):
182 | loc_layers += [nn.Conv2d(fea_channel, cfg[i] * 4, kernel_size=3, padding=1)]
183 | conf_layers += [nn.Conv2d(fea_channel, cfg[i] * num_classes, kernel_size=3, padding=1)]
184 | return (loc_layers, conf_layers)
185 |
186 |
187 | mbox = {
188 | '300': [6, 6, 6, 6, 4, 4], # number of boxes per feature map location
189 | }
190 | fea_channels = [512, 512, 256, 256, 128, 128]
191 |
192 |
193 | def build_net(size=300, num_classes=21):
194 | if size != 300 and size != 512:
195 | print("Error: Sorry only SSD300 and SSD512 is supported currently!")
196 | return
197 |
198 | return FSSD(size, multibox(fea_channels, mbox[str(size)], num_classes), feature_transform_module(1),
199 | pyramid_feature_extractor(), \
200 | num_classes=num_classes)
201 |
202 |
203 | net = build_net()
204 | print(net)
205 |
--------------------------------------------------------------------------------
/models/SSD_vgg.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from layers import *
7 | from .base_models import vgg, vgg_base
8 |
9 |
10 | class SSD(nn.Module):
11 | """Single Shot Multibox Architecture
12 | The network is composed of a base VGG network followed by the
13 | added multibox conv layers. Each multibox layer branches into
14 | 1) conv2d for class conf scores
15 | 2) conv2d for localization predictions
16 | 3) associated priorbox layer to produce default bounding
17 | boxes specific to the layer's feature map size.
18 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
19 |
20 | Args:
21 | phase: (string) Can be "test" or "train"
22 | base: VGG16 layers for input, size of either 300 or 500
23 | extras: extra layers that feed to multibox loc and conf layers
24 | head: "multibox head" consists of loc and conf conv layers
25 | """
26 |
27 | def __init__(self, base, extras, head, num_classes,size):
28 | super(SSD, self).__init__()
29 | self.num_classes = num_classes
30 | # TODO: implement __call__ in PriorBox
31 | self.size = size
32 |
33 | # SSD network
34 | self.base = nn.ModuleList(base)
35 | # Layer learns to scale the l2 normalized features from conv4_3
36 | self.extras = nn.ModuleList(extras)
37 | self.L2Norm = L2Norm(512, 20)
38 |
39 | self.loc = nn.ModuleList(head[0])
40 | self.conf = nn.ModuleList(head[1])
41 |
42 | self.softmax = nn.Softmax()
43 |
44 | def forward(self, x, test=False):
45 | """Applies network layers and ops on input image(s) x.
46 |
47 | Args:
48 | x: input image or batch of images. Shape: [batch,3*batch,300,300].
49 |
50 | Return:
51 | Depending on phase:
52 | test:
53 | Variable(tensor) of output class label predictions,
54 | confidence score, and corresponding location predictions for
55 | each object detected. Shape: [batch,topk,7]
56 |
57 | train:
58 | list of concat outputs from:
59 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
60 | 2: localization layers, Shape: [batch,num_priors*4]
61 | 3: priorbox layers, Shape: [2,num_priors*4]
62 | """
63 | sources = list()
64 | loc = list()
65 | conf = list()
66 |
67 | # apply vgg up to conv4_3 relu
68 | for k in range(23):
69 | x = self.base[k](x)
70 |
71 | s = self.L2Norm(x)
72 | sources.append(s)
73 |
74 | # apply vgg up to fc7
75 | for k in range(23, len(self.base)):
76 | x = self.base[k](x)
77 | sources.append(x)
78 |
79 | # apply extra layers and cache source layer outputs
80 | for k, v in enumerate(self.extras):
81 | x = F.relu(v(x), inplace=True)
82 | if k % 2 == 1:
83 | sources.append(x)
84 |
85 | # apply multibox head to source layers
86 | for (x, l, c) in zip(sources, self.loc, self.conf):
87 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
88 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
89 |
90 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
91 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
92 | if test:
93 | output = (
94 | loc.view(loc.size(0), -1, 4), # loc preds
95 | self.softmax(conf.view(-1, self.num_classes)), # conf preds
96 | )
97 | else:
98 | output = (
99 | loc.view(loc.size(0), -1, 4),
100 | conf.view(conf.size(0), -1, self.num_classes),
101 | )
102 | return output
103 |
104 | def load_weights(self, base_file):
105 | other, ext = os.path.splitext(base_file)
106 | if ext == '.pkl' or '.pth':
107 | print('Loading weights into state dict...')
108 | self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage))
109 | print('Finished!')
110 | else:
111 | print('Sorry only .pth and .pkl files supported.')
112 |
113 |
114 | def add_extras(cfg, i, batch_norm=False, size=300):
115 | # Extra layers added to VGG for feature scaling
116 | layers = []
117 | in_channels = i
118 | flag = False
119 | for k, v in enumerate(cfg):
120 | if in_channels != 'S':
121 | if v == 'S':
122 | layers += [nn.Conv2d(in_channels, cfg[k + 1],
123 | kernel_size=(1, 3)[flag], stride=2, padding=1)]
124 | else:
125 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
126 | flag = not flag
127 | in_channels = v
128 | if size == 512:
129 | layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1))
130 | layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1))
131 | return layers
132 |
133 |
134 | def multibox(vgg, extra_layers, cfg, num_classes):
135 | loc_layers = []
136 | conf_layers = []
137 | vgg_source = [24, -2]
138 | for k, v in enumerate(vgg_source):
139 | loc_layers += [nn.Conv2d(vgg[v].out_channels,
140 | cfg[k] * 4, kernel_size=3, padding=1)]
141 | conf_layers += [nn.Conv2d(vgg[v].out_channels,
142 | cfg[k] * num_classes, kernel_size=3, padding=1)]
143 | for k, v in enumerate(extra_layers[1::2], 2):
144 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
145 | * 4, kernel_size=3, padding=1)]
146 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
147 | * num_classes, kernel_size=3, padding=1)]
148 | return vgg, extra_layers, (loc_layers, conf_layers)
149 |
150 |
151 | extras = {
152 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
153 | '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256],
154 | }
155 | mbox = {
156 | '300': [6, 6, 6, 6, 4, 4], # number of boxes per feature map location
157 | '512': [6, 6, 6, 6, 6, 4, 4],
158 | }
159 |
160 |
161 | def build_net(size=300, num_classes=21):
162 | if size != 300 and size != 512:
163 | print("Error: Sorry only SSD300 and SSD512 is supported currently!")
164 | return
165 |
166 | return SSD(*multibox(vgg(vgg_base[str(size)], 3),
167 | add_extras(extras[str(size)], 1024, size=size),
168 | mbox[str(size)], num_classes), num_classes=num_classes,size=size)
169 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/models/__init__.py
--------------------------------------------------------------------------------
/models/base_models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | def vgg(cfg, i, batch_norm=False):
6 | layers = []
7 | in_channels = i
8 | for v in cfg:
9 | if v == 'M':
10 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
11 | elif v == 'C':
12 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
13 | else:
14 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
15 | if batch_norm:
16 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
17 | else:
18 | layers += [conv2d, nn.ReLU(inplace=True)]
19 | in_channels = v
20 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
21 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
22 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
23 | layers += [pool5, conv6,
24 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
25 | return layers
26 |
27 |
28 | vgg_base = {
29 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
30 | 512, 512, 512],
31 | '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
32 | 512, 512, 512],
33 | }
34 |
35 |
36 | class BasicConv(nn.Module):
37 |
38 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
39 | bn=True, bias=False):
40 | super(BasicConv, self).__init__()
41 | self.out_channels = out_planes
42 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
43 | dilation=dilation, groups=groups, bias=bias)
44 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
45 | self.relu = nn.ReLU(inplace=True) if relu else None
46 |
47 | def forward(self, x):
48 | x = self.conv(x)
49 | if self.bn is not None:
50 | x = self.bn(x)
51 | if self.relu is not None:
52 | x = self.relu(x)
53 | return x
54 |
55 |
56 | class BasicRFB_a(nn.Module):
57 |
58 | def __init__(self, in_planes, out_planes, stride=1, scale=0.1):
59 | super(BasicRFB_a, self).__init__()
60 | self.scale = scale
61 | self.out_channels = out_planes
62 | inter_planes = in_planes // 4
63 |
64 | self.branch0 = nn.Sequential(
65 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
66 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1, relu=False)
67 | )
68 | self.branch1 = nn.Sequential(
69 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
70 | BasicConv(inter_planes, inter_planes, kernel_size=(3, 1), stride=1, padding=(1, 0)),
71 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
72 | )
73 | self.branch2 = nn.Sequential(
74 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
75 | BasicConv(inter_planes, inter_planes, kernel_size=(1, 3), stride=stride, padding=(0, 1)),
76 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
77 | )
78 | '''
79 | self.branch3 = nn.Sequential(
80 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
81 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1),
82 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
83 | )
84 | '''
85 | self.branch3 = nn.Sequential(
86 | BasicConv(in_planes, inter_planes // 2, kernel_size=1, stride=1),
87 | BasicConv(inter_planes // 2, (inter_planes // 4) * 3, kernel_size=(1, 3), stride=1, padding=(0, 1)),
88 | BasicConv((inter_planes // 4) * 3, inter_planes, kernel_size=(3, 1), stride=stride, padding=(1, 0)),
89 | BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False)
90 | )
91 |
92 | self.ConvLinear = BasicConv(4 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
93 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
94 | self.relu = nn.ReLU(inplace=False)
95 |
96 | def forward(self, x):
97 | x0 = self.branch0(x)
98 | x1 = self.branch1(x)
99 | x2 = self.branch2(x)
100 | x3 = self.branch3(x)
101 |
102 | out = torch.cat((x0, x1, x2, x3), 1)
103 | out = self.ConvLinear(out)
104 | short = self.shortcut(x)
105 | out = out * self.scale + short
106 | out = self.relu(out)
107 |
108 | return out
109 |
--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
1 | '''DenseNet in PyTorch.'''
2 | import math
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from torch.autograd import Variable
9 |
10 |
11 | class Bottleneck(nn.Module):
12 | def __init__(self, in_planes, growth_rate):
13 | super(Bottleneck, self).__init__()
14 | self.bn1 = nn.BatchNorm2d(in_planes)
15 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
16 | self.bn2 = nn.BatchNorm2d(4*growth_rate)
17 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
18 |
19 | def forward(self, x):
20 | out = self.conv1(F.relu(self.bn1(x)))
21 | out = self.conv2(F.relu(self.bn2(out)))
22 | out = torch.cat([out,x], 1)
23 | return out
24 |
25 |
26 | class Transition(nn.Module):
27 | def __init__(self, in_planes, out_planes):
28 | super(Transition, self).__init__()
29 | self.bn = nn.BatchNorm2d(in_planes)
30 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
31 |
32 | def forward(self, x):
33 | out = self.conv(F.relu(self.bn(x)))
34 | out = F.avg_pool2d(out, 2)
35 | return out
36 |
37 |
38 | class DenseNet(nn.Module):
39 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=512):
40 | super(DenseNet, self).__init__()
41 | self.growth_rate = growth_rate
42 |
43 | num_planes = 2*growth_rate
44 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=2, padding=1, bias=False)
45 |
46 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
47 | num_planes += nblocks[0]*growth_rate
48 | out_planes = int(math.floor(num_planes*reduction))
49 | self.trans1 = Transition(num_planes, out_planes)
50 | num_planes = out_planes
51 |
52 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
53 | num_planes += nblocks[1]*growth_rate
54 | out_planes = int(math.floor(num_planes*reduction))
55 | self.trans2 = Transition(num_planes, out_planes)
56 | num_planes = out_planes
57 |
58 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
59 | num_planes += nblocks[2]*growth_rate
60 | out_planes = int(math.floor(num_planes*reduction))
61 | self.trans3 = Transition(num_planes, out_planes)
62 | num_planes = out_planes
63 |
64 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
65 | num_planes += nblocks[3]*growth_rate
66 |
67 | self.bn = nn.BatchNorm2d(num_planes)
68 | self.linear = nn.Linear(num_planes, num_classes)
69 | self.fc_bn = nn.BatchNorm1d(512)
70 |
71 | def _make_dense_layers(self, block, in_planes, nblock):
72 | layers = []
73 | for i in range(nblock):
74 | layers.append(block(in_planes, self.growth_rate))
75 | in_planes += self.growth_rate
76 | return nn.Sequential(*layers)
77 |
78 | def forward(self, x):
79 | out = self.conv1(x)
80 | out = self.trans1(self.dense1(out))
81 | out = self.trans2(self.dense2(out))
82 | out = self.trans3(self.dense3(out))
83 | out = self.dense4(out)
84 | out = F.avg_pool2d(F.relu(self.bn(out)), 7)
85 | out = out.view(out.size(0), -1)
86 | out = self.linear(out)
87 | out = self.fc_bn(out)
88 | return out
89 |
90 | def DenseNet121():
91 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
92 |
93 | def DenseNet169():
94 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
95 |
96 | def DenseNet201():
97 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
98 |
99 | def DenseNet161():
100 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
101 |
102 | def test():
103 | net = DenseNet121()
104 | x = torch.randn(2,3,112,112)
105 | y = net(Variable(x))
106 | print(y.size())
107 |
108 | #test()
109 |
--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | """
4 | Creates a MobileNet Model as defined in:
5 | Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017).
6 | MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications.
7 | (c) Yang Lu
8 | """
9 | import math
10 | import torch.nn as nn
11 |
12 | __all__ = ['DepthWiseBlock', 'mobilenet', 'mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05',
13 | 'mobilenet_025']
14 |
15 |
16 | class DepthWiseBlock(nn.Module):
17 | def __init__(self, inplanes, planes, stride=1, padding=1):
18 | super(DepthWiseBlock, self).__init__()
19 | inplanes, planes = int(inplanes), int(planes)
20 | self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=padding, stride=stride, groups=inplanes,
21 | bias=False)
22 | self.bn_dw = nn.BatchNorm2d(inplanes)
23 | self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)
24 | self.bn_sep = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 |
27 | def forward(self, x):
28 | out = self.conv_dw(x)
29 | out = self.bn_dw(out)
30 | out = self.relu(out)
31 |
32 | out = self.conv_sep(out)
33 | out = self.bn_sep(out)
34 | out = self.relu(out)
35 |
36 | return out
37 |
38 |
39 | class MobileNet(nn.Module):
40 | def __init__(self, widen_factor=1.0, num_classes=1000):
41 | """ Constructor
42 | Args:
43 | widen_factor: config of widen_factor
44 | num_classes: number of classes
45 | """
46 | super(MobileNet, self).__init__()
47 |
48 | block = DepthWiseBlock
49 |
50 | self.conv1 = nn.Conv2d(3, int(32 * widen_factor), kernel_size=3, stride=2, padding=1, bias=False)
51 | self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
52 | self.relu = nn.ReLU(inplace=True)
53 |
54 | self.dw2_1 = block(32 * widen_factor, 64 * widen_factor)
55 | self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2)
56 |
57 | self.dw3_1 = block(128 * widen_factor, 128 * widen_factor)
58 | self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2)
59 |
60 | self.dw4_1 = block(256 * widen_factor, 256 * widen_factor)
61 | self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2)
62 |
63 | self.dw5_1 = block(512 * widen_factor, 512 * widen_factor)
64 | self.dw5_2 = block(512 * widen_factor, 512 * widen_factor)
65 | self.dw5_3 = block(512 * widen_factor, 512 * widen_factor)
66 | self.dw5_4 = block(512 * widen_factor, 512 * widen_factor)
67 | self.dw5_5 = block(512 * widen_factor, 512 * widen_factor)
68 | self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2)
69 |
70 | self.dw6 = block(1024 * widen_factor, 1024 * widen_factor)
71 |
72 | self.avgpool = nn.AdaptiveAvgPool2d(1)
73 | self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
74 |
75 | for m in self.modules():
76 | if isinstance(m, nn.Conv2d):
77 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
78 | m.weight.data.normal_(0, math.sqrt(2. / n))
79 | elif isinstance(m, nn.BatchNorm2d):
80 | m.weight.data.fill_(1)
81 | m.bias.data.zero_()
82 |
83 | def forward(self, x):
84 | x = self.conv1(x)
85 | x = self.bn1(x)
86 | x = self.relu(x)
87 |
88 | x = self.dw2_1(x)
89 | x = self.dw2_2(x)
90 | x = self.dw3_1(x)
91 | x = self.dw3_2(x)
92 | x0 = self.dw4_1(x)
93 | x = self.dw4_2(x0)
94 | x = self.dw5_1(x)
95 | x = self.dw5_2(x)
96 | x = self.dw5_3(x)
97 | x = self.dw5_4(x)
98 | x1 = self.dw5_5(x)
99 | x = self.dw5_6(x1)
100 | x2 = self.dw6(x)
101 | return x0, x1, x2
102 |
103 |
104 | def mobilenet(widen_factor=1.0, num_classes=1000):
105 | """
106 | Construct MobileNet.
107 | """
108 | model = MobileNet(widen_factor=widen_factor, num_classes=num_classes)
109 | return model
110 |
111 |
112 | def mobilenet_2():
113 | """
114 | Construct MobileNet.
115 | """
116 | model = MobileNet(widen_factor=2.0, num_classes=1000)
117 | return model
118 |
119 |
120 | def mobilenet_1():
121 | """
122 | Construct MobileNet.
123 | """
124 | model = MobileNet(widen_factor=1.0, num_classes=1000)
125 | return model
126 |
127 |
128 | def mobilenet_075():
129 | """
130 | Construct MobileNet.
131 | """
132 | model = MobileNet(widen_factor=0.75, num_classes=1000)
133 | return model
134 |
135 |
136 | def mobilenet_05():
137 | """
138 | Construct MobileNet.
139 | """
140 | model = MobileNet(widen_factor=0.5, num_classes=1000)
141 | return model
142 |
143 |
144 | def mobilenet_025():
145 | """
146 | Construct MobileNet.
147 | """
148 | model = MobileNet(widen_factor=0.25, num_classes=1000)
149 | return model
150 |
151 |
152 | if __name__ == '__main__':
153 | mobilenet = mobilenet_1()
154 | print(mobilenet)
155 | print(mobilenet.state_dict().keys())
156 |
--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
1 | '''ResNet in PyTorch.
2 | Reference:
3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
4 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
5 | '''
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | from torch.autograd import Variable
11 |
12 |
13 | class BasicBlock(nn.Module):
14 | expansion = 1
15 |
16 | def __init__(self, in_planes, planes, stride=1):
17 | super(BasicBlock, self).__init__()
18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
19 | self.bn1 = nn.BatchNorm2d(planes)
20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
21 | self.bn2 = nn.BatchNorm2d(planes)
22 |
23 | self.shortcut = nn.Sequential()
24 | if stride != 1 or in_planes != self.expansion*planes:
25 | self.shortcut = nn.Sequential(
26 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
27 | nn.BatchNorm2d(self.expansion*planes)
28 | )
29 |
30 | def forward(self, x):
31 | out = F.relu(self.bn1(self.conv1(x)))
32 | out = self.bn2(self.conv2(out))
33 | out += self.shortcut(x)
34 | out = F.relu(out)
35 | return out
36 |
37 |
38 | class Bottleneck(nn.Module):
39 | expansion = 4
40 |
41 | def __init__(self, in_planes, planes, stride=1):
42 | super(Bottleneck, self).__init__()
43 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
44 | self.bn1 = nn.BatchNorm2d(planes)
45 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
46 | self.bn2 = nn.BatchNorm2d(planes)
47 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
48 | self.bn3 = nn.BatchNorm2d(self.expansion*planes)
49 |
50 | self.shortcut = nn.Sequential()
51 | if stride != 1 or in_planes != self.expansion*planes:
52 | self.shortcut = nn.Sequential(
53 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
54 | nn.BatchNorm2d(self.expansion*planes)
55 | )
56 |
57 | def forward(self, x):
58 | out = F.relu(self.bn1(self.conv1(x)))
59 | out = F.relu(self.bn2(self.conv2(out)))
60 | out = self.bn3(self.conv3(out))
61 | out += self.shortcut(x)
62 | out = F.relu(out)
63 | return out
64 |
65 |
66 | class ResNet(nn.Module):
67 | def __init__(self, block, num_blocks, num_classes=512):
68 | super(ResNet, self).__init__()
69 | self.in_planes = 64
70 |
71 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False)
72 | self.bn1 = nn.BatchNorm2d(64)
73 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
74 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
75 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
76 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
77 | self.linear = nn.Linear(512*block.expansion, num_classes)
78 | self.fc_bn = nn.BatchNorm1d(512)
79 |
80 | def _make_layer(self, block, planes, num_blocks, stride):
81 | strides = [stride] + [1]*(num_blocks-1)
82 | layers = []
83 | for stride in strides:
84 | layers.append(block(self.in_planes, planes, stride))
85 | self.in_planes = planes * block.expansion
86 | return nn.Sequential(*layers)
87 |
88 | def forward(self, x):
89 | out = F.relu(self.bn1(self.conv1(x)))
90 | out = self.layer1(out)
91 | out = self.layer2(out)
92 | out = self.layer3(out)
93 | out = self.layer4(out)
94 | out = F.avg_pool2d(out, 7)
95 | out = out.view(out.size(0), -1)
96 | out = self.linear(out)
97 | out = self.fc_bn(out)
98 | return out
99 |
100 |
101 | def ResNet18():
102 | return ResNet(BasicBlock, [2,2,2,2])
103 |
104 | def ResNet34():
105 | return ResNet(BasicBlock, [3,4,6,3])
106 |
107 | def ResNet50():
108 | return ResNet(Bottleneck, [3,4,6,3])
109 |
110 | def ResNet101():
111 | return ResNet(Bottleneck, [3,4,23,3])
112 |
113 | def ResNet152():
114 | return ResNet(Bottleneck, [3,8,36,3])
115 |
116 |
117 | def test():
118 | net = ResNet34()
119 | y = net(Variable(torch.randn(32,3,112,112)))
120 | print(y.size())
121 |
122 | #test()
123 |
--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
1 | '''VGG11/13/16/19 in Pytorch.'''
2 | import torch
3 | import torch.nn as nn
4 | from torch.autograd import Variable
5 |
6 |
7 | cfg = {
8 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
9 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
11 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
12 | }
13 |
14 |
15 | class VGG(nn.Module):
16 | def __init__(self, vgg_name):
17 | super(VGG, self).__init__()
18 | self.features = self._make_layers(cfg[vgg_name])
19 | self.classifier = nn.Linear(512, 512)
20 | self.fc_bn = nn.BatchNorm1d(512)
21 |
22 | def forward(self, x):
23 | out = self.features(x)
24 | out = out.view(out.size(0), -1)
25 | out = self.classifier(out)
26 | out = self.fc_bn(out)
27 | return out
28 |
29 | def _make_layers(self, cfg):
30 | layers = []
31 | in_channels = 3
32 | for x in cfg:
33 | if x == 'M':
34 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
35 | else:
36 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
37 | nn.BatchNorm2d(x),
38 | nn.ReLU(inplace=True)]
39 | in_channels = x
40 | layers += [nn.AvgPool2d(kernel_size=3, stride=3)]
41 | return nn.Sequential(*layers)
42 |
43 | def test():
44 | net = VGG('VGG11')
45 | x = torch.randn(2,3,112,112)
46 | print(net(Variable(x)).size())
47 |
48 | #test()
49 |
--------------------------------------------------------------------------------
/multi_thread_score_pedestrian_detection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import argparse
4 | import os.path as osp
5 | from utils.utils import progress_bar
6 | import time
7 |
8 | def check_size(submission_file):
9 | max_size = 60*1024*1024
10 | if osp.getsize(submission_file) > max_size:
11 | raise IOError #File size exceeds the specified maximum size, which is 60M for the server.
12 |
13 | def judge_overlap(pbox,ignore_box):
14 | overlap=[]
15 | delete=[]
16 | for p in pbox:
17 | pl=min(p[0],p[2])
18 | pr=max(p[0],p[2])
19 | pb=min(p[1],p[3])
20 | pt=max(p[1],p[3])
21 | s_p=(pr-pl)*(pt-pb)
22 | s_lap=-0.01
23 | for c in ignore_box:
24 | cl=min(c[0],c[2])
25 | cr=max(c[0],c[2])
26 | cb=min(c[1],c[3])
27 | ct=max(c[1],c[3])
28 | if not (crpr or ctpt):
29 | s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb))
30 | if s_lap>0:
31 | overlap.append([p,s_lap/s_p])
32 | for o in overlap:
33 | if o[1]>0.5:
34 | delete.append(o[0])
35 | remain_id = [p for p in pbox if p not in delete]
36 | return remain_id
37 |
38 | def parse_ignore_file(ignore_file):
39 | with open(ignore_file,'r') as f:
40 | lines = f.readlines()
41 | ig = [x.strip().split() for x in lines]
42 | ignore = {}
43 | for item in ig:
44 | key = item[0]
45 | ignore_num = (len(item)-1)/4
46 | bbox = []
47 | for i in range(int(ignore_num)):
48 | b = []
49 | b.append(int(item[1+4*i]))
50 | b.append(int(item[2+4*i]))
51 | b.append(int(item[1+4*i])+int(item[3+4*i]))
52 | b.append(int(item[2+4*i])+int(item[4+4*i]))
53 | bbox.append(b)
54 | ignore[key] = bbox
55 | return ignore
56 |
57 | def parse_submission(submission_file,ignore_file):
58 | ignore_zone = parse_ignore_file(ignore_file)
59 | ignore_keys = ignore_zone.keys()
60 | with open(submission_file, 'r') as f:
61 | lines = f.readlines()
62 | splitlines = [x.strip().split() for x in lines]
63 | image_ids = [x[0] for x in splitlines]
64 | confidence = np.array([float(x[1]) for x in splitlines])
65 | BB = []
66 | for x in splitlines:
67 | bb = []
68 | bb.append(float(x[2]))
69 | bb.append(float(x[3]))
70 | bb.append(float(x[2])+float(x[4]))
71 | bb.append(float(x[3])+float(x[5]))
72 | BB.append(bb)
73 |
74 | sub_key = []
75 | for x in image_ids:
76 | if x not in sub_key:
77 | sub_key.append(x)
78 | final_confidence = []
79 | final_ids = []
80 | final_BB = []
81 |
82 | for key in sub_key:
83 | find = [i for i,v in enumerate(image_ids) if v == key]
84 | BB_sub = [BB[i] for i in find]
85 | confid_sub = [confidence[i] for i in find]
86 | if key in ignore_keys:
87 | ignore_bbox = ignore_zone[key]
88 | bbox_remain = judge_overlap(BB_sub,ignore_bbox)
89 | find_remain = []
90 | for i,v in enumerate(BB_sub):
91 | if v in bbox_remain:
92 | find_remain.append(i)
93 | confid_remain = [confid_sub[i] for i in find_remain]
94 | BB_sub = bbox_remain
95 | confid_sub = confid_remain
96 | ids_sub = [key]*len(BB_sub)
97 | final_ids.extend(ids_sub)
98 | final_confidence.extend(confid_sub)
99 | final_BB.extend(BB_sub)
100 |
101 | final_BB = np.array(final_BB)
102 | final_confidence = np.array(final_confidence)
103 | sorted_ind = np.argsort(-final_confidence)
104 | final_BB = final_BB[sorted_ind, :]
105 | final_ids = [final_ids[x] for x in sorted_ind]
106 | return final_ids, final_BB
107 |
108 | def parse_gt_annotation(gt_file,ignore_file):
109 | ignore_zone = parse_ignore_file(ignore_file)
110 | ignore_keys = ignore_zone.keys()
111 | with open(gt_file, 'r') as f:
112 | lines = f.readlines()
113 | info = [x.strip().split() for x in lines]
114 | gt = {}
115 | for item in info:
116 | bbox = []
117 | bbox_num = (len(item)-1)/5
118 | for i in range(int(bbox_num)):
119 | b = []
120 | b.append(int(item[2+5*i]))
121 | b.append(int(item[3+5*i]))
122 | b.append(int(item[2+5*i])+int(item[4+5*i]))
123 | b.append(int(item[3+5*i])+int(item[5+5*i]))
124 | bbox.append(b)
125 | if item[0] in ignore_keys:
126 | ignore_bbox = ignore_zone[item[0]]
127 | bbox_remain = judge_overlap(bbox,ignore_bbox)
128 | else:
129 | bbox_remain = bbox
130 | gt[item[0]] = np.array(bbox_remain)
131 | return gt
132 |
133 | def compute_ap(rec, prec):
134 | mrec = np.concatenate(([0.], rec, [1.]))
135 | mpre = np.concatenate(([0.], prec, [0.]))
136 | for i in range(mpre.size - 1, 0, -1):
137 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
138 | i = np.where(mrec[1:] != mrec[:-1])[0]
139 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
140 | return ap
141 |
142 |
143 | def pedestrian_eval(aap,input, gt_file, ignore_file, ovthresh):
144 | gt = parse_gt_annotation(gt_file,ignore_file)
145 | image_ids, BB = parse_submission(input,ignore_file)
146 | npos = 0
147 | recs = {}
148 | for key in gt.keys():
149 | det = [False]*len(gt[key])
150 | recs[key] = {'bbox': gt[key], 'det': det}
151 | npos += len(gt[key])
152 | nd = len(image_ids)
153 | tp = np.zeros(nd)
154 | fp = np.zeros(nd)
155 | for d in range(nd):
156 | if image_ids[d] not in recs.keys():
157 | raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d]))
158 | for d in range(nd):
159 | R = recs[image_ids[d]]
160 | bb = BB[d, :].astype(float)
161 | ovmax = -np.inf
162 | BBGT = R['bbox'].astype(float)
163 | if BBGT.size > 0:
164 | ixmin = np.maximum(BBGT[:, 0], bb[0])
165 | iymin = np.maximum(BBGT[:, 1], bb[1])
166 | ixmax = np.minimum(BBGT[:, 2], bb[2])
167 | iymax = np.minimum(BBGT[:, 3], bb[3])
168 | iw = np.maximum(ixmax - ixmin + 1., 0.)
169 | ih = np.maximum(iymax - iymin + 1., 0.)
170 | inters = iw * ih
171 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
172 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
173 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
174 | overlaps = inters / uni
175 | ovmax = np.max(overlaps)
176 | jmax = np.argmax(overlaps)
177 |
178 | if ovmax > ovthresh:
179 | if not R['det'][jmax]:
180 | tp[d] = 1.
181 | R['det'][jmax] = 1
182 | else:
183 | fp[d] = 1.
184 | else:
185 | fp[d] = 1.
186 | fp = np.cumsum(fp)
187 | tp = np.cumsum(tp)
188 | rec = tp / float(npos)
189 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
190 | ap = compute_ap(rec, prec)
191 | aap.append(ap)
192 | return ap
193 |
194 | import threading
195 | def wider_ped_eval(input, gt,ignore_file):
196 | aap = []
197 | threads=[]
198 | for ove in np.arange(0.5, 1.0, 0.05):
199 | # pedestrian_eval(aap, input, gt,ignore_file, ovthresh=ove)
200 | t=threading.Thread(target=pedestrian_eval, args=(aap,input,gt,ignore_file),kwargs={'ovthresh':ove})
201 | threads.append(t)
202 | t.start()
203 | time.sleep(5)
204 |
205 | print("Total threads:{}".format(len(threads)))
206 | for index,t in enumerate(threads):
207 | progress_bar(index, len(threads)," executing.")
208 | t.join()
209 | mAP = np.average(aap)
210 | return mAP
211 |
212 |
213 | def eval():
214 | input_dir = './'
215 | output_dir = './'
216 | ref_dir = osp.join(input_dir, 'ref')
217 | submit_dir = osp.join(input_dir, 'res')
218 | submit_file = 'submit_files/scores_validation.txt'
219 | gt_file = osp.join(ref_dir, 'val_annotations.txt')
220 | ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt')
221 | check_size(submit_file)
222 | mAP = wider_ped_eval(submit_file, gt_file, ignore_file)
223 | out = {'Average AP': mAP}
224 | strings = ['{}: {}\n'.format(k, v) for k, v in out.items()]
225 | open(os.path.join(output_dir, 'scores_out.txt'), 'w').writelines(strings)
226 | return mAP
227 |
228 |
229 | if __name__ == '__main__':
230 | # parser = argparse.ArgumentParser()
231 | # parser.add_argument("input", type=str, default='./')
232 | # parser.add_argument("output", type=str, default='./')
233 | # args = parser.parse_args()
234 | eval()
--------------------------------------------------------------------------------
/pretrainedmodels/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__
2 |
3 | from . import models
4 | from . import datasets
5 | from .models.utils import pretrained_settings
6 | from .models.utils import model_names
7 |
8 | # to support pretrainedmodels.__dict__['nasnetalarge']
9 | # but depreciated
10 | from .models.fbresnet import fbresnet152
11 | from .models.cafferesnet import cafferesnet101
12 | from .models.bninception import bninception
13 | from .models.resnext import resnext101_32x4d
14 | from .models.resnext import resnext101_64x4d
15 | from .models.inceptionv4 import inceptionv4
16 | from .models.inceptionresnetv2 import inceptionresnetv2
17 | from .models.nasnet import nasnetalarge
18 | from .models.nasnet_mobile import nasnetamobile
19 | from .models.torchvision_models import alexnet
20 | from .models.torchvision_models import densenet121
21 | from .models.torchvision_models import densenet169
22 | from .models.torchvision_models import densenet201
23 | from .models.torchvision_models import densenet161
24 | from .models.torchvision_models import resnet18
25 | from .models.torchvision_models import resnet34
26 | from .models.torchvision_models import resnet50
27 | from .models.torchvision_models import resnet101
28 | from .models.torchvision_models import resnet152
29 | from .models.torchvision_models import inceptionv3
30 | from .models.torchvision_models import squeezenet1_0
31 | from .models.torchvision_models import squeezenet1_1
32 | from .models.torchvision_models import vgg11
33 | from .models.torchvision_models import vgg11_bn
34 | from .models.torchvision_models import vgg13
35 | from .models.torchvision_models import vgg13_bn
36 | from .models.torchvision_models import vgg16
37 | from .models.torchvision_models import vgg16_bn
38 | from .models.torchvision_models import vgg19_bn
39 | from .models.torchvision_models import vgg19
40 | from .models.dpn import dpn68
41 | from .models.dpn import dpn68b
42 | from .models.dpn import dpn92
43 | from .models.dpn import dpn98
44 | from .models.dpn import dpn131
45 | from .models.dpn import dpn107
46 | from .models.xception import xception
47 | from .models.senet import senet154
48 | from .models.senet import se_resnet50
49 | from .models.senet import se_resnet101
50 | from .models.senet import se_resnet152
51 | from .models.senet import se_resnext50_32x4d
52 | from .models.senet import se_resnext101_32x4d
53 | from .models.pnasnet import pnasnet5large
54 | from .models.polynet import polynet
55 |
--------------------------------------------------------------------------------
/pretrainedmodels/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc import Voc2007Classification
--------------------------------------------------------------------------------
/pretrainedmodels/datasets/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | from urllib.request import urlretrieve
3 |
4 | import torch
5 | from PIL import Image
6 | from tqdm import tqdm
7 |
8 | def load_imagenet_classes(path_synsets='data/imagenet_synsets.txt',
9 | path_classes='data/imagenet_classes.txt'):
10 | with open(path_synsets, 'r') as f:
11 | synsets = f.readlines()
12 |
13 | synsets = [x.strip() for x in synsets]
14 | splits = [line.split(' ') for line in synsets]
15 | key_to_classname = {spl[0]:' '.join(spl[1:]) for spl in splits}
16 |
17 | with open(path_classes, 'r') as f:
18 | class_id_to_key = f.readlines()
19 |
20 | class_id_to_key = [x.strip() for x in class_id_to_key]
21 |
22 | cid_to_cname = []
23 | for i in range(len(class_id_to_key)):
24 | key = class_id_to_key[i]
25 | cname = key_to_classname[key]
26 | cid_to_cname.append(cname)
27 |
28 | return cid_to_cname
29 |
30 |
31 | class Warp(object):
32 | def __init__(self, size, interpolation=Image.BILINEAR):
33 | self.size = int(size)
34 | self.interpolation = interpolation
35 |
36 | def __call__(self, img):
37 | return img.resize((self.size, self.size), self.interpolation)
38 |
39 | def __str__(self):
40 | return self.__class__.__name__ + ' (size={size}, interpolation={interpolation})'.format(size=self.size,
41 | interpolation=self.interpolation)
42 |
43 |
44 | def download_url(url, destination=None, progress_bar=True):
45 | """Download a URL to a local file.
46 |
47 | Parameters
48 | ----------
49 | url : str
50 | The URL to download.
51 | destination : str, None
52 | The destination of the file. If None is given the file is saved to a temporary directory.
53 | progress_bar : bool
54 | Whether to show a command-line progress bar while downloading.
55 |
56 | Returns
57 | -------
58 | filename : str
59 | The location of the downloaded file.
60 |
61 | Notes
62 | -----
63 | Progress bar use/example adapted from tqdm documentation: https://github.com/tqdm/tqdm
64 | """
65 |
66 | def my_hook(t):
67 | last_b = [0]
68 |
69 | def inner(b=1, bsize=1, tsize=None):
70 | if tsize is not None:
71 | t.total = tsize
72 | if b > 0:
73 | t.update((b - last_b[0]) * bsize)
74 | last_b[0] = b
75 |
76 | return inner
77 |
78 | if progress_bar:
79 | with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
80 | filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t))
81 | else:
82 | filename, _ = urlretrieve(url, filename=destination)
83 |
84 |
85 | class AveragePrecisionMeter(object):
86 | """
87 | The APMeter measures the average precision per class.
88 | The APMeter is designed to operate on `NxK` Tensors `output` and
89 | `target`, and optionally a `Nx1` Tensor weight where (1) the `output`
90 | contains model output scores for `N` examples and `K` classes that ought to
91 | be higher when the model is more convinced that the example should be
92 | positively labeled, and smaller when the model believes the example should
93 | be negatively labeled (for instance, the output of a sigmoid function); (2)
94 | the `target` contains only values 0 (for negative examples) and 1
95 | (for positive examples); and (3) the `weight` ( > 0) represents weight for
96 | each sample.
97 | """
98 |
99 | def __init__(self, difficult_examples=False):
100 | super(AveragePrecisionMeter, self).__init__()
101 | self.reset()
102 | self.difficult_examples = difficult_examples
103 |
104 | def reset(self):
105 | """Resets the meter with empty member variables"""
106 | self.scores = torch.FloatTensor(torch.FloatStorage())
107 | self.targets = torch.LongTensor(torch.LongStorage())
108 |
109 | def add(self, output, target):
110 | """
111 | Args:
112 | output (Tensor): NxK tensor that for each of the N examples
113 | indicates the probability of the example belonging to each of
114 | the K classes, according to the model. The probabilities should
115 | sum to one over all classes
116 | target (Tensor): binary NxK tensort that encodes which of the K
117 | classes are associated with the N-th input
118 | (eg: a row [0, 1, 0, 1] indicates that the example is
119 | associated with classes 2 and 4)
120 | weight (optional, Tensor): Nx1 tensor representing the weight for
121 | each example (each weight > 0)
122 | """
123 | if not torch.is_tensor(output):
124 | output = torch.from_numpy(output)
125 | if not torch.is_tensor(target):
126 | target = torch.from_numpy(target)
127 |
128 | if output.dim() == 1:
129 | output = output.view(-1, 1)
130 | else:
131 | assert output.dim() == 2, \
132 | 'wrong output size (should be 1D or 2D with one column \
133 | per class)'
134 | if target.dim() == 1:
135 | target = target.view(-1, 1)
136 | else:
137 | assert target.dim() == 2, \
138 | 'wrong target size (should be 1D or 2D with one column \
139 | per class)'
140 | if self.scores.numel() > 0:
141 | assert target.size(1) == self.targets.size(1), \
142 | 'dimensions for output should match previously added examples.'
143 |
144 | # make sure storage is of sufficient size
145 | if self.scores.storage().size() < self.scores.numel() + output.numel():
146 | new_size = math.ceil(self.scores.storage().size() * 1.5)
147 | self.scores.storage().resize_(int(new_size + output.numel()))
148 | self.targets.storage().resize_(int(new_size + output.numel()))
149 |
150 | # store scores and targets
151 | offset = self.scores.size(0) if self.scores.dim() > 0 else 0
152 | self.scores.resize_(offset + output.size(0), output.size(1))
153 | self.targets.resize_(offset + target.size(0), target.size(1))
154 | self.scores.narrow(0, offset, output.size(0)).copy_(output)
155 | self.targets.narrow(0, offset, target.size(0)).copy_(target)
156 |
157 | def value(self):
158 | """Returns the model's average precision for each class
159 | Return:
160 | ap (FloatTensor): 1xK tensor, with avg precision for each class k
161 | """
162 |
163 | if self.scores.numel() == 0:
164 | return 0
165 | ap = torch.zeros(self.scores.size(1))
166 | rg = torch.arange(1, self.scores.size(0)).float()
167 |
168 | # compute average precision for each class
169 | for k in range(self.scores.size(1)):
170 | # sort scores
171 | scores = self.scores[:, k]
172 | targets = self.targets[:, k]
173 |
174 | # compute average precision
175 | ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples)
176 | return ap
177 |
178 | @staticmethod
179 | def average_precision(output, target, difficult_examples=True):
180 |
181 | # sort examples
182 | sorted, indices = torch.sort(output, dim=0, descending=True)
183 |
184 | # Computes prec@i
185 | pos_count = 0.
186 | total_count = 0.
187 | precision_at_i = 0.
188 | for i in indices:
189 | label = target[i]
190 | if difficult_examples and label == 0:
191 | continue
192 | if label == 1:
193 | pos_count += 1
194 | total_count += 1
195 | if label == 1:
196 | precision_at_i += pos_count / total_count
197 | precision_at_i /= pos_count
198 | return precision_at_i
--------------------------------------------------------------------------------
/pretrainedmodels/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/pretrainedmodels/models/__init__.py
--------------------------------------------------------------------------------
/pretrainedmodels/models/cafferesnet.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import torch.utils.model_zoo as model_zoo
6 |
7 | pretrained_settings = {
8 | 'cafferesnet101': {
9 | 'imagenet': {
10 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/cafferesnet101-9d633cc0.pth',
11 | 'input_space': 'BGR',
12 | 'input_size': [3, 224, 224],
13 | 'input_range': [0, 255],
14 | 'mean': [102.9801, 115.9465, 122.7717],
15 | 'std': [1, 1, 1],
16 | 'num_classes': 1000
17 | }
18 | }
19 | }
20 |
21 |
22 | def conv3x3(in_planes, out_planes, stride=1):
23 | "3x3 convolution with padding"
24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
25 | padding=1, bias=False)
26 |
27 |
28 | class BasicBlock(nn.Module):
29 | expansion = 1
30 |
31 | def __init__(self, inplanes, planes, stride=1, downsample=None):
32 | super(BasicBlock, self).__init__()
33 | self.conv1 = conv3x3(inplanes, planes, stride)
34 | self.bn1 = nn.BatchNorm2d(planes)
35 | self.relu = nn.ReLU(inplace=True)
36 | self.conv2 = conv3x3(planes, planes)
37 | self.bn2 = nn.BatchNorm2d(planes)
38 | self.downsample = downsample
39 | self.stride = stride
40 |
41 | def forward(self, x):
42 | residual = x
43 |
44 | out = self.conv1(x)
45 | out = self.bn1(out)
46 | out = self.relu(out)
47 |
48 | out = self.conv2(out)
49 | out = self.bn2(out)
50 |
51 | if self.downsample is not None:
52 | residual = self.downsample(x)
53 |
54 | out += residual
55 | out = self.relu(out)
56 |
57 | return out
58 |
59 |
60 | class Bottleneck(nn.Module):
61 | expansion = 4
62 |
63 | def __init__(self, inplanes, planes, stride=1, downsample=None):
64 | super(Bottleneck, self).__init__()
65 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
66 | self.bn1 = nn.BatchNorm2d(planes)
67 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
68 | padding=1, bias=False)
69 | self.bn2 = nn.BatchNorm2d(planes)
70 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
71 | self.bn3 = nn.BatchNorm2d(planes * 4)
72 | self.relu = nn.ReLU(inplace=True)
73 | self.downsample = downsample
74 | self.stride = stride
75 |
76 | def forward(self, x):
77 | residual = x
78 |
79 | out = self.conv1(x)
80 | out = self.bn1(out)
81 | out = self.relu(out)
82 |
83 | out = self.conv2(out)
84 | out = self.bn2(out)
85 | out = self.relu(out)
86 |
87 | out = self.conv3(out)
88 | out = self.bn3(out)
89 |
90 | if self.downsample is not None:
91 | residual = self.downsample(x)
92 |
93 | out += residual
94 | out = self.relu(out)
95 |
96 | return out
97 |
98 |
99 | class ResNet(nn.Module):
100 |
101 | def __init__(self, block, layers, num_classes=1000):
102 | self.inplanes = 64
103 | super(ResNet, self).__init__()
104 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
105 | bias=False)
106 | self.bn1 = nn.BatchNorm2d(64)
107 | self.relu = nn.ReLU(inplace=True)
108 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
109 | self.layer1 = self._make_layer(block, 64, layers[0])
110 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
111 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
112 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
113 | # it is slightly better whereas slower to set stride = 1
114 | # self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
115 | self.avgpool = nn.AvgPool2d(7)
116 | self.last_linear = nn.Linear(512 * block.expansion, num_classes)
117 |
118 | for m in self.modules():
119 | if isinstance(m, nn.Conv2d):
120 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
121 | m.weight.data.normal_(0, math.sqrt(2. / n))
122 | elif isinstance(m, nn.BatchNorm2d):
123 | m.weight.data.fill_(1)
124 | m.bias.data.zero_()
125 |
126 | def _make_layer(self, block, planes, blocks, stride=1):
127 | downsample = None
128 | if stride != 1 or self.inplanes != planes * block.expansion:
129 | downsample = nn.Sequential(
130 | nn.Conv2d(self.inplanes, planes * block.expansion,
131 | kernel_size=1, stride=stride, bias=False),
132 | nn.BatchNorm2d(planes * block.expansion),
133 | )
134 |
135 | layers = []
136 | layers.append(block(self.inplanes, planes, stride, downsample))
137 | self.inplanes = planes * block.expansion
138 | for i in range(1, blocks):
139 | layers.append(block(self.inplanes, planes))
140 |
141 | return nn.Sequential(*layers)
142 |
143 | def forward(self, x):
144 | x = self.conv1(x)
145 | x = self.bn1(x)
146 | x = self.relu(x)
147 | x = self.maxpool(x)
148 |
149 | x = self.layer1(x)
150 | x = self.layer2(x)
151 | x = self.layer3(x)
152 | x = self.layer4(x)
153 |
154 | x = self.avgpool(x)
155 | x = x.view(x.size(0), -1)
156 | x = self.last_linear(x)
157 |
158 | return x
159 |
160 |
161 | def cafferesnet101(num_classes=1000, pretrained='imagenet'):
162 | """Constructs a ResNet-101 model.
163 | Args:
164 | pretrained (bool): If True, returns a model pre-trained on ImageNet
165 | """
166 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
167 | if pretrained is not None:
168 | settings = pretrained_settings['cafferesnet101'][pretrained]
169 | assert num_classes == settings['num_classes'], \
170 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
171 | model.load_state_dict(model_zoo.load_url(settings['url']))
172 | model.input_space = settings['input_space']
173 | model.input_size = settings['input_size']
174 | model.input_range = settings['input_range']
175 | model.mean = settings['mean']
176 | model.std = settings['std']
177 | return model
--------------------------------------------------------------------------------
/pretrainedmodels/models/fbresnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | import torch.utils.model_zoo as model_zoo
4 |
5 |
6 | __all__ = ['FBResNet',
7 | #'fbresnet18', 'fbresnet34', 'fbresnet50', 'fbresnet101',
8 | 'fbresnet152']
9 |
10 | pretrained_settings = {
11 | 'fbresnet152': {
12 | 'imagenet': {
13 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/fbresnet152-2e20f6b4.pth',
14 | 'input_space': 'RGB',
15 | 'input_size': [3, 224, 224],
16 | 'input_range': [0, 1],
17 | 'mean': [0.485, 0.456, 0.406],
18 | 'std': [0.229, 0.224, 0.225],
19 | 'num_classes': 1000
20 | }
21 | }
22 | }
23 |
24 |
25 | def conv3x3(in_planes, out_planes, stride=1):
26 | "3x3 convolution with padding"
27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
28 | padding=1, bias=True)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None):
35 | super(BasicBlock, self).__init__()
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.bn1 = nn.BatchNorm2d(planes)
38 | self.relu = nn.ReLU(inplace=True)
39 | self.conv2 = conv3x3(planes, planes)
40 | self.bn2 = nn.BatchNorm2d(planes)
41 | self.downsample = downsample
42 | self.stride = stride
43 |
44 | def forward(self, x):
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 |
54 | if self.downsample is not None:
55 | residual = self.downsample(x)
56 |
57 | out += residual
58 | out = self.relu(out)
59 |
60 | return out
61 |
62 |
63 | class Bottleneck(nn.Module):
64 | expansion = 4
65 |
66 | def __init__(self, inplanes, planes, stride=1, downsample=None):
67 | super(Bottleneck, self).__init__()
68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
69 | self.bn1 = nn.BatchNorm2d(planes)
70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
71 | padding=1, bias=True)
72 | self.bn2 = nn.BatchNorm2d(planes)
73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True)
74 | self.bn3 = nn.BatchNorm2d(planes * 4)
75 | self.relu = nn.ReLU(inplace=True)
76 | self.downsample = downsample
77 | self.stride = stride
78 |
79 | def forward(self, x):
80 | residual = x
81 |
82 | out = self.conv1(x)
83 | out = self.bn1(out)
84 | out = self.relu(out)
85 |
86 | out = self.conv2(out)
87 | out = self.bn2(out)
88 | out = self.relu(out)
89 |
90 | out = self.conv3(out)
91 | out = self.bn3(out)
92 |
93 | if self.downsample is not None:
94 | residual = self.downsample(x)
95 |
96 | out += residual
97 | out = self.relu(out)
98 |
99 | return out
100 |
101 | class FBResNet(nn.Module):
102 |
103 | def __init__(self, block, layers, num_classes=1000):
104 | self.inplanes = 64
105 | # Special attributs
106 | self.input_space = None
107 | self.input_size = (299, 299, 3)
108 | self.mean = None
109 | self.std = None
110 | super(FBResNet, self).__init__()
111 | # Modules
112 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
113 | bias=True)
114 | self.bn1 = nn.BatchNorm2d(64)
115 | self.relu = nn.ReLU(inplace=True)
116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
117 | self.layer1 = self._make_layer(block, 64, layers[0])
118 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
119 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
120 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
121 | self.avgpool = nn.AvgPool2d(7)
122 | self.last_linear = nn.Linear(512 * block.expansion, num_classes)
123 |
124 | for m in self.modules():
125 | if isinstance(m, nn.Conv2d):
126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 | m.weight.data.normal_(0, math.sqrt(2. / n))
128 | elif isinstance(m, nn.BatchNorm2d):
129 | m.weight.data.fill_(1)
130 | m.bias.data.zero_()
131 |
132 | def _make_layer(self, block, planes, blocks, stride=1):
133 | downsample = None
134 | if stride != 1 or self.inplanes != planes * block.expansion:
135 | downsample = nn.Sequential(
136 | nn.Conv2d(self.inplanes, planes * block.expansion,
137 | kernel_size=1, stride=stride, bias=True),
138 | nn.BatchNorm2d(planes * block.expansion),
139 | )
140 |
141 | layers = []
142 | layers.append(block(self.inplanes, planes, stride, downsample))
143 | self.inplanes = planes * block.expansion
144 | for i in range(1, blocks):
145 | layers.append(block(self.inplanes, planes))
146 |
147 | return nn.Sequential(*layers)
148 |
149 | def features(self, input):
150 | x = self.conv1(input)
151 | self.conv1_input = x.clone()
152 | x = self.bn1(x)
153 | x = self.relu(x)
154 | x = self.maxpool(x)
155 |
156 | x = self.layer1(x)
157 | x = self.layer2(x)
158 | x = self.layer3(x)
159 | x = self.layer4(x)
160 | return x
161 |
162 | def logits(self, features):
163 | x = self.avgpool(features)
164 | x = x.view(x.size(0), -1)
165 | x = self.last_linear(x)
166 | return x
167 |
168 | def forward(self, input):
169 | x = self.features(input)
170 | x = self.logits(x)
171 | return x
172 |
173 |
174 | def fbresnet18(num_classes=1000):
175 | """Constructs a ResNet-18 model.
176 |
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = FBResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
181 | return model
182 |
183 |
184 | def fbresnet34(num_classes=1000):
185 | """Constructs a ResNet-34 model.
186 |
187 | Args:
188 | pretrained (bool): If True, returns a model pre-trained on ImageNet
189 | """
190 | model = FBResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
191 | return model
192 |
193 |
194 | def fbresnet50(num_classes=1000):
195 | """Constructs a ResNet-50 model.
196 |
197 | Args:
198 | pretrained (bool): If True, returns a model pre-trained on ImageNet
199 | """
200 | model = FBResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
201 | return model
202 |
203 |
204 | def fbresnet101(num_classes=1000):
205 | """Constructs a ResNet-101 model.
206 |
207 | Args:
208 | pretrained (bool): If True, returns a model pre-trained on ImageNet
209 | """
210 | model = FBResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
211 | return model
212 |
213 |
214 | def fbresnet152(num_classes=1000, pretrained='imagenet'):
215 | """Constructs a ResNet-152 model.
216 |
217 | Args:
218 | pretrained (bool): If True, returns a model pre-trained on ImageNet
219 | """
220 | model = FBResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
221 | if pretrained is not None:
222 | settings = pretrained_settings['fbresnet152'][pretrained]
223 | assert num_classes == settings['num_classes'], \
224 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
225 | model.load_state_dict(model_zoo.load_url(settings['url']))
226 | model.input_space = settings['input_space']
227 | model.input_size = settings['input_size']
228 | model.input_range = settings['input_range']
229 | model.mean = settings['mean']
230 | model.std = settings['std']
231 | return model
232 |
233 |
234 |
--------------------------------------------------------------------------------
/pretrainedmodels/models/fbresnet/resnet152_dump.lua:
--------------------------------------------------------------------------------
1 | require 'cutorch'
2 | require 'cunn'
3 | require 'cudnn'
4 | require 'image'
5 | vision=require 'torchnet-vision'
6 |
7 | net=vision.models.resnet.load{filename='data/resnet152/net.t7',length=152}
8 | print(net)
9 |
10 | require 'nn'
11 | nn.Module.parameters = function(self)
12 | if self.weight and self.bias and self.running_mean and self.running_var then
13 | return {self.weight, self.bias, self.running_mean, self.running_var}, {self.gradWeight, self.gradBias}
14 |
15 | elseif self.weight and self.bias then
16 | return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
17 | elseif self.weight then
18 | return {self.weight}, {self.gradWeight}
19 | elseif self.bias then
20 | return {self.bias}, {self.gradBias}
21 | else
22 | return
23 | end
24 | end
25 |
26 | netparams, _ = net:parameters()
27 | print(#netparams)
28 | torch.save('data/resnet152/netparams.t7', netparams)
29 |
30 | net=net:cuda()
31 | net:evaluate()
32 | --p, gp = net:getParameters()
33 | input = torch.ones(1,3,224,224)
34 | input[{1,1,1,1}] = -1
35 | input[1] = image.load('data/lena_224.png')
36 | print(input:sum())
37 | input = input:cuda()
38 | output=net:forward(input)
39 |
40 | for i=1, 11 do
41 | torch.save('data/resnet152/output'..i..'.t7', net:get(i).output:float())
42 | end
43 |
--------------------------------------------------------------------------------
/pretrainedmodels/models/resnext.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.nn as nn
4 | import torch.utils.model_zoo as model_zoo
5 | from .resnext_features import resnext101_32x4d_features
6 | from .resnext_features import resnext101_64x4d_features
7 |
8 | __all__ = ['ResNeXt101_32x4d', 'resnext101_32x4d',
9 | 'ResNeXt101_64x4d', 'resnext101_64x4d']
10 |
11 | pretrained_settings = {
12 | 'resnext101_32x4d': {
13 | 'imagenet': {
14 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_32x4d-29e315fa.pth',
15 | 'input_space': 'RGB',
16 | 'input_size': [3, 224, 224],
17 | 'input_range': [0, 1],
18 | 'mean': [0.485, 0.456, 0.406],
19 | 'std': [0.229, 0.224, 0.225],
20 | 'num_classes': 1000
21 | }
22 | },
23 | 'resnext101_64x4d': {
24 | 'imagenet': {
25 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_64x4d-e77a0586.pth',
26 | 'input_space': 'RGB',
27 | 'input_size': [3, 224, 224],
28 | 'input_range': [0, 1],
29 | 'mean': [0.485, 0.456, 0.406],
30 | 'std': [0.229, 0.224, 0.225],
31 | 'num_classes': 1000
32 | }
33 | }
34 | }
35 |
36 | class ResNeXt101_32x4d(nn.Module):
37 |
38 | def __init__(self, num_classes=1000):
39 | super(ResNeXt101_32x4d, self).__init__()
40 | self.num_classes = num_classes
41 | self.features = resnext101_32x4d_features
42 | self.avg_pool = nn.AvgPool2d((7, 7), (1, 1))
43 | self.last_linear = nn.Linear(2048, num_classes)
44 |
45 | def logits(self, input):
46 | x = self.avg_pool(input)
47 | x = x.view(x.size(0), -1)
48 | x = self.last_linear(x)
49 | return x
50 |
51 | def forward(self, input):
52 | x = self.features(input)
53 | x = self.logits(x)
54 | return x
55 |
56 |
57 | class ResNeXt101_64x4d(nn.Module):
58 |
59 | def __init__(self, num_classes=1000):
60 | super(ResNeXt101_64x4d, self).__init__()
61 | self.num_classes = num_classes
62 | self.features = resnext101_64x4d_features
63 | self.avg_pool = nn.AvgPool2d((7, 7), (1, 1))
64 | self.last_linear = nn.Linear(2048, num_classes)
65 |
66 | def logits(self, input):
67 | x = self.avg_pool(input)
68 | x = x.view(x.size(0), -1)
69 | x = self.last_linear(x)
70 | return x
71 |
72 | def forward(self, input):
73 | x = self.features(input)
74 | x = self.logits(x)
75 | return x
76 |
77 |
78 | def resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
79 | model = ResNeXt101_32x4d(num_classes=num_classes)
80 | if pretrained is not None:
81 | settings = pretrained_settings['resnext101_32x4d'][pretrained]
82 | assert num_classes == settings['num_classes'], \
83 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
84 | model.load_state_dict(model_zoo.load_url(settings['url']))
85 | model.input_space = settings['input_space']
86 | model.input_size = settings['input_size']
87 | model.input_range = settings['input_range']
88 | model.mean = settings['mean']
89 | model.std = settings['std']
90 | return model
91 |
92 | def resnext101_64x4d(num_classes=1000, pretrained='imagenet'):
93 | model = ResNeXt101_64x4d(num_classes=num_classes)
94 | if pretrained is not None:
95 | settings = pretrained_settings['resnext101_64x4d'][pretrained]
96 | assert num_classes == settings['num_classes'], \
97 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
98 | model.load_state_dict(model_zoo.load_url(settings['url']))
99 | model.input_space = settings['input_space']
100 | model.input_size = settings['input_size']
101 | model.input_range = settings['input_range']
102 | model.mean = settings['mean']
103 | model.std = settings['std']
104 | return model
105 |
--------------------------------------------------------------------------------
/pretrainedmodels/models/resnext_features/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnext101_32x4d_features import resnext101_32x4d_features
2 | from .resnext101_64x4d_features import resnext101_64x4d_features
--------------------------------------------------------------------------------
/pretrainedmodels/models/utils.py:
--------------------------------------------------------------------------------
1 | from .fbresnet import pretrained_settings as fbresnet_settings
2 | from .bninception import pretrained_settings as bninception_settings
3 | from .resnext import pretrained_settings as resnext_settings
4 | from .inceptionv4 import pretrained_settings as inceptionv4_settings
5 | from .inceptionresnetv2 import pretrained_settings as inceptionresnetv2_settings
6 | from .torchvision_models import pretrained_settings as torchvision_models_settings
7 | from .nasnet_mobile import pretrained_settings as nasnet_mobile_settings
8 | from .nasnet import pretrained_settings as nasnet_settings
9 | from .dpn import pretrained_settings as dpn_settings
10 | from .xception import pretrained_settings as xception_settings
11 | from .senet import pretrained_settings as senet_settings
12 | from .cafferesnet import pretrained_settings as cafferesnet_settings
13 | from .pnasnet import pretrained_settings as pnasnet_settings
14 | from .polynet import pretrained_settings as polynet_settings
15 |
16 | all_settings = [
17 | fbresnet_settings,
18 | bninception_settings,
19 | resnext_settings,
20 | inceptionv4_settings,
21 | inceptionresnetv2_settings,
22 | torchvision_models_settings,
23 | nasnet_mobile_settings,
24 | nasnet_settings,
25 | dpn_settings,
26 | xception_settings,
27 | senet_settings,
28 | cafferesnet_settings,
29 | pnasnet_settings,
30 | polynet_settings
31 | ]
32 |
33 | model_names = []
34 | pretrained_settings = {}
35 | for settings in all_settings:
36 | for model_name, model_settings in settings.items():
37 | pretrained_settings[model_name] = model_settings
38 | model_names.append(model_name)
39 |
--------------------------------------------------------------------------------
/pretrainedmodels/models/vggm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | #from torch.legacy import nn as nnl
5 | import torch.utils.model_zoo as model_zoo
6 |
7 | __all__ = ['vggm']
8 |
9 | pretrained_settings = {
10 | 'vggm': {
11 | 'imagenet': {
12 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/vggm-786f2434.pth',
13 | 'input_space': 'BGR',
14 | 'input_size': [3, 221, 221],
15 | 'input_range': [0, 255],
16 | 'mean': [123.68, 116.779, 103.939],
17 | 'std': [1, 1, 1],
18 | 'num_classes': 1000
19 | }
20 | }
21 | }
22 |
23 | class SpatialCrossMapLRN(nn.Module):
24 | def __init__(self, local_size=1, alpha=1.0, beta=0.75, k=1, ACROSS_CHANNELS=True):
25 | super(SpatialCrossMapLRN, self).__init__()
26 | self.ACROSS_CHANNELS = ACROSS_CHANNELS
27 | if ACROSS_CHANNELS:
28 | self.average=nn.AvgPool3d(kernel_size=(local_size, 1, 1),
29 | stride=1,
30 | padding=(int((local_size-1.0)/2), 0, 0))
31 | else:
32 | self.average=nn.AvgPool2d(kernel_size=local_size,
33 | stride=1,
34 | padding=int((local_size-1.0)/2))
35 | self.alpha = alpha
36 | self.beta = beta
37 | self.k = k
38 |
39 | def forward(self, x):
40 | if self.ACROSS_CHANNELS:
41 | div = x.pow(2).unsqueeze(1)
42 | div = self.average(div).squeeze(1)
43 | div = div.mul(self.alpha).add(self.k).pow(self.beta)
44 | else:
45 | div = x.pow(2)
46 | div = self.average(div)
47 | div = div.mul(self.alpha).add(self.k).pow(self.beta)
48 | x = x.div(div)
49 | return x
50 |
51 | class LambdaBase(nn.Sequential):
52 | def __init__(self, fn, *args):
53 | super(LambdaBase, self).__init__(*args)
54 | self.lambda_func = fn
55 |
56 | def forward_prepare(self, input):
57 | output = []
58 | for module in self._modules.values():
59 | output.append(module(input))
60 | return output if output else input
61 |
62 | class Lambda(LambdaBase):
63 | def forward(self, input):
64 | return self.lambda_func(self.forward_prepare(input))
65 |
66 | class VGGM(nn.Module):
67 |
68 | def __init__(self, num_classes=1000):
69 | super(VGGM, self).__init__()
70 | self.num_classes = num_classes
71 | self.features = nn.Sequential(
72 | nn.Conv2d(3,96,(7, 7),(2, 2)),
73 | nn.ReLU(),
74 | SpatialCrossMapLRN(5, 0.0005, 0.75, 2),
75 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True),
76 | nn.Conv2d(96,256,(5, 5),(2, 2),(1, 1)),
77 | nn.ReLU(),
78 | SpatialCrossMapLRN(5, 0.0005, 0.75, 2),
79 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True),
80 | nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)),
81 | nn.ReLU(),
82 | nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
83 | nn.ReLU(),
84 | nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
85 | nn.ReLU(),
86 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
87 | )
88 | self.classif = nn.Sequential(
89 | nn.Linear(18432,4096),
90 | nn.ReLU(),
91 | nn.Dropout(0.5),
92 | nn.Linear(4096,4096),
93 | nn.ReLU(),
94 | nn.Dropout(0.5),
95 | nn.Linear(4096,num_classes)
96 | )
97 |
98 | def forward(self, x):
99 | x = self.features(x)
100 | x = x.view(x.size(0), -1)
101 | x = self.classif(x)
102 | return x
103 |
104 | def vggm(num_classes=1000, pretrained='imagenet'):
105 | if pretrained:
106 | settings = pretrained_settings['vggm'][pretrained]
107 | assert num_classes == settings['num_classes'], \
108 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
109 |
110 | model = VGGM(num_classes=1000)
111 | model.load_state_dict(model_zoo.load_url(settings['url']))
112 |
113 | model.input_space = settings['input_space']
114 | model.input_size = settings['input_size']
115 | model.input_range = settings['input_range']
116 | model.mean = settings['mean']
117 | model.std = settings['std']
118 | else:
119 | model = VGGM(num_classes=num_classes)
120 | return model
--------------------------------------------------------------------------------
/pretrainedmodels/models/wideresnet.py:
--------------------------------------------------------------------------------
1 | import os
2 | from os.path import expanduser
3 | import hickle as hkl
4 | import torch
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 |
8 | __all__ = ['wideresnet50']
9 |
10 | model_urls = {
11 | 'wideresnet152': 'https://s3.amazonaws.com/pytorch/h5models/wide-resnet-50-2-export.hkl'
12 | }
13 |
14 | def define_model(params):
15 | def conv2d(input, params, base, stride=1, pad=0):
16 | return F.conv2d(input, params[base + '.weight'],
17 | params[base + '.bias'], stride, pad)
18 |
19 | def group(input, params, base, stride, n):
20 | o = input
21 | for i in range(0,n):
22 | b_base = ('%s.block%d.conv') % (base, i)
23 | x = o
24 | o = conv2d(x, params, b_base + '0')
25 | o = F.relu(o)
26 | o = conv2d(o, params, b_base + '1', stride=i==0 and stride or 1, pad=1)
27 | o = F.relu(o)
28 | o = conv2d(o, params, b_base + '2')
29 | if i == 0:
30 | o += conv2d(x, params, b_base + '_dim', stride=stride)
31 | else:
32 | o += x
33 | o = F.relu(o)
34 | return o
35 |
36 | # determine network size by parameters
37 | blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None
38 | for k in params.keys()]) for j in range(4)]
39 |
40 | def f(input, params, pooling_classif=True):
41 | o = F.conv2d(input, params['conv0.weight'], params['conv0.bias'], 2, 3)
42 | o = F.relu(o)
43 | o = F.max_pool2d(o, 3, 2, 1)
44 | o_g0 = group(o, params, 'group0', 1, blocks[0])
45 | o_g1 = group(o_g0, params, 'group1', 2, blocks[1])
46 | o_g2 = group(o_g1, params, 'group2', 2, blocks[2])
47 | o_g3 = group(o_g2, params, 'group3', 2, blocks[3])
48 | if pooling_classif:
49 | o = F.avg_pool2d(o_g3, 7, 1, 0)
50 | o = o.view(o.size(0), -1)
51 | o = F.linear(o, params['fc.weight'], params['fc.bias'])
52 | return o
53 |
54 | return f
55 |
56 |
57 | class WideResNet(nn.Module):
58 |
59 | def __init__(self, pooling):
60 | super(WideResNet, self).__init__()
61 | self.pooling = pooling
62 | self.params = params
63 |
64 | def forward(self, x):
65 | x = f(x, self.params, self.pooling)
66 | return x
67 |
68 |
69 | def wideresnet50(pooling):
70 | dir_models = os.path.join(expanduser("~"), '.torch/wideresnet')
71 | path_hkl = os.path.join(dir_models, 'wideresnet50.hkl')
72 | if os.path.isfile(path_hkl):
73 | params = hkl.load(path_hkl)
74 | # convert numpy arrays to torch Variables
75 | for k,v in sorted(params.items()):
76 | print k, v.shape
77 | params[k] = Variable(torch.from_numpy(v), requires_grad=True)
78 | else:
79 | os.system('mkdir -p ' + dir_models)
80 | os.system('wget {} -O {}'.format(model_urls['wideresnet50'], path_hkl))
81 | f = define_model(params)
82 | model = WideResNet(pooling)
83 | return model
84 |
85 |
86 |
--------------------------------------------------------------------------------
/pretrainedmodels/models/xception.py:
--------------------------------------------------------------------------------
1 | """
2 | Ported to pytorch thanks to [tstandley](https://github.com/tstandley/Xception-PyTorch)
3 |
4 | @author: tstandley
5 | Adapted by cadene
6 |
7 | Creates an Xception Model as defined in:
8 |
9 | Francois Chollet
10 | Xception: Deep Learning with Depthwise Separable Convolutions
11 | https://arxiv.org/pdf/1610.02357.pdf
12 |
13 | This weights ported from the Keras implementation. Achieves the following performance on the validation set:
14 |
15 | Loss:0.9173 Prec@1:78.892 Prec@5:94.292
16 |
17 | REMEMBER to set your image size to 3x299x299 for both test and validation
18 |
19 | normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
20 | std=[0.5, 0.5, 0.5])
21 |
22 | The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
23 | """
24 | import math
25 | import torch
26 | import torch.nn as nn
27 | import torch.nn.functional as F
28 | import torch.utils.model_zoo as model_zoo
29 | from torch.nn import init
30 |
31 | __all__ = ['xception']
32 |
33 | pretrained_settings = {
34 | 'xception': {
35 | 'imagenet': {
36 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth',
37 | 'input_space': 'RGB',
38 | 'input_size': [3, 299, 299],
39 | 'input_range': [0, 1],
40 | 'mean': [0.5, 0.5, 0.5],
41 | 'std': [0.5, 0.5, 0.5],
42 | 'num_classes': 1000,
43 | 'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
44 | }
45 | }
46 | }
47 |
48 |
49 | class SeparableConv2d(nn.Module):
50 | def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
51 | super(SeparableConv2d,self).__init__()
52 |
53 | self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
54 | self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
55 |
56 | def forward(self,x):
57 | x = self.conv1(x)
58 | x = self.pointwise(x)
59 | return x
60 |
61 |
62 | class Block(nn.Module):
63 | def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
64 | super(Block, self).__init__()
65 |
66 | if out_filters != in_filters or strides!=1:
67 | self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
68 | self.skipbn = nn.BatchNorm2d(out_filters)
69 | else:
70 | self.skip=None
71 |
72 | self.relu = nn.ReLU(inplace=True)
73 | rep=[]
74 |
75 | filters=in_filters
76 | if grow_first:
77 | rep.append(self.relu)
78 | rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
79 | rep.append(nn.BatchNorm2d(out_filters))
80 | filters = out_filters
81 |
82 | for i in range(reps-1):
83 | rep.append(self.relu)
84 | rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
85 | rep.append(nn.BatchNorm2d(filters))
86 |
87 | if not grow_first:
88 | rep.append(self.relu)
89 | rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
90 | rep.append(nn.BatchNorm2d(out_filters))
91 |
92 | if not start_with_relu:
93 | rep = rep[1:]
94 | else:
95 | rep[0] = nn.ReLU(inplace=False)
96 |
97 | if strides != 1:
98 | rep.append(nn.MaxPool2d(3,strides,1))
99 | self.rep = nn.Sequential(*rep)
100 |
101 | def forward(self,inp):
102 | x = self.rep(inp)
103 |
104 | if self.skip is not None:
105 | skip = self.skip(inp)
106 | skip = self.skipbn(skip)
107 | else:
108 | skip = inp
109 |
110 | x+=skip
111 | return x
112 |
113 |
114 | class Xception(nn.Module):
115 | """
116 | Xception optimized for the ImageNet dataset, as specified in
117 | https://arxiv.org/pdf/1610.02357.pdf
118 | """
119 | def __init__(self, num_classes=1000):
120 | """ Constructor
121 | Args:
122 | num_classes: number of classes
123 | """
124 | super(Xception, self).__init__()
125 | self.num_classes = num_classes
126 |
127 | self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
128 | self.bn1 = nn.BatchNorm2d(32)
129 | self.relu = nn.ReLU(inplace=True)
130 |
131 | self.conv2 = nn.Conv2d(32,64,3,bias=False)
132 | self.bn2 = nn.BatchNorm2d(64)
133 | #do relu here
134 |
135 | self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
136 | self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
137 | self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)
138 |
139 | self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
140 | self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
141 | self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
142 | self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)
143 |
144 | self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
145 | self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
146 | self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
147 | self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)
148 |
149 | self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)
150 |
151 | self.conv3 = SeparableConv2d(1024,1536,3,1,1)
152 | self.bn3 = nn.BatchNorm2d(1536)
153 |
154 | #do relu here
155 | self.conv4 = SeparableConv2d(1536,2048,3,1,1)
156 | self.bn4 = nn.BatchNorm2d(2048)
157 |
158 | self.fc = nn.Linear(2048, num_classes)
159 |
160 | # #------- init weights --------
161 | # for m in self.modules():
162 | # if isinstance(m, nn.Conv2d):
163 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
164 | # m.weight.data.normal_(0, math.sqrt(2. / n))
165 | # elif isinstance(m, nn.BatchNorm2d):
166 | # m.weight.data.fill_(1)
167 | # m.bias.data.zero_()
168 | # #-----------------------------
169 |
170 | def features(self, input):
171 | x = self.conv1(input)
172 | x = self.bn1(x)
173 | x = self.relu(x)
174 |
175 | x = self.conv2(x)
176 | x = self.bn2(x)
177 | x = self.relu(x)
178 |
179 | x = self.block1(x)
180 | x = self.block2(x)
181 | x = self.block3(x)
182 | x = self.block4(x)
183 | x = self.block5(x)
184 | x = self.block6(x)
185 | x = self.block7(x)
186 | x = self.block8(x)
187 | x = self.block9(x)
188 | x = self.block10(x)
189 | x = self.block11(x)
190 | x = self.block12(x)
191 |
192 | x = self.conv3(x)
193 | x = self.bn3(x)
194 | x = self.relu(x)
195 |
196 | x = self.conv4(x)
197 | x = self.bn4(x)
198 | return x
199 |
200 | def logits(self, features):
201 | x = self.relu(features)
202 |
203 | x = F.adaptive_avg_pool2d(x, (1, 1))
204 | x = x.view(x.size(0), -1)
205 | x = self.last_linear(x)
206 | return x
207 |
208 | def forward(self, input):
209 | x = self.features(input)
210 | x = self.logits(x)
211 | return x
212 |
213 |
214 | def xception(num_classes=1000, pretrained='imagenet'):
215 | model = Xception(num_classes=num_classes)
216 | if pretrained:
217 | settings = pretrained_settings['xception'][pretrained]
218 | assert num_classes == settings['num_classes'], \
219 | "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
220 |
221 | model = Xception(num_classes=num_classes)
222 | model.load_state_dict(model_zoo.load_url(settings['url']))
223 |
224 | model.input_space = settings['input_space']
225 | model.input_size = settings['input_size']
226 | model.input_range = settings['input_range']
227 | model.mean = settings['mean']
228 | model.std = settings['std']
229 |
230 | # TODO: ugly
231 | model.last_linear = model.fc
232 | del model.fc
233 | return model
234 |
--------------------------------------------------------------------------------
/pretrainedmodels/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 | import torchvision.transforms as transforms
5 | from PIL import Image
6 | from munch import munchify
7 |
8 | class ToSpaceBGR(object):
9 |
10 | def __init__(self, is_bgr):
11 | self.is_bgr = is_bgr
12 |
13 | def __call__(self, tensor):
14 | if self.is_bgr:
15 | new_tensor = tensor.clone()
16 | new_tensor[0] = tensor[2]
17 | new_tensor[2] = tensor[0]
18 | tensor = new_tensor
19 | return tensor
20 |
21 |
22 | class ToRange255(object):
23 |
24 | def __init__(self, is_255):
25 | self.is_255 = is_255
26 |
27 | def __call__(self, tensor):
28 | if self.is_255:
29 | tensor.mul_(255)
30 | return tensor
31 |
32 |
33 | class TransformImage(object):
34 |
35 | def __init__(self, opts, scale=0.875, random_crop=False,
36 | random_hflip=False, random_vflip=False,
37 | preserve_aspect_ratio=True):
38 | if type(opts) == dict:
39 | opts = munchify(opts)
40 | self.input_size = opts.input_size
41 | self.input_space = opts.input_space
42 | self.input_range = opts.input_range
43 | self.mean = opts.mean
44 | self.std = opts.std
45 |
46 | # https://github.com/tensorflow/models/blob/master/research/inception/inception/image_processing.py#L294
47 | self.scale = scale
48 | self.random_crop = random_crop
49 | self.random_hflip = random_hflip
50 | self.random_vflip = random_vflip
51 |
52 | tfs = []
53 | if preserve_aspect_ratio:
54 | tfs.append(transforms.Resize(int(math.floor(max(self.input_size)/self.scale))))
55 | else:
56 | height = int(self.input_size[1] / self.scale)
57 | width = int(self.input_size[2] / self.scale)
58 | tfs.append(transforms.Resize((height, width)))
59 |
60 | if random_crop:
61 | tfs.append(transforms.RandomCrop(max(self.input_size)))
62 | else:
63 | tfs.append(transforms.CenterCrop(max(self.input_size)))
64 |
65 | if random_hflip:
66 | tfs.append(transforms.RandomHorizontalFlip())
67 |
68 | if random_vflip:
69 | tfs.append(transforms.RandomVerticalFlip())
70 |
71 | tfs.append(transforms.ToTensor())
72 | tfs.append(ToSpaceBGR(self.input_space=='BGR'))
73 | tfs.append(ToRange255(max(self.input_range)==255))
74 | tfs.append(transforms.Normalize(mean=self.mean, std=self.std))
75 |
76 | self.tf = transforms.Compose(tfs)
77 |
78 | def __call__(self, img):
79 | tensor = self.tf(img)
80 | return tensor
81 |
82 |
83 | class LoadImage(object):
84 |
85 | def __init__(self, space='RGB'):
86 | self.space = space
87 |
88 | def __call__(self, path_img):
89 | with open(path_img, 'rb') as f:
90 | with Image.open(f) as img:
91 | img = img.convert(self.space)
92 | return img
93 |
94 |
95 | class LoadTransformImage(object):
96 |
97 | def __init__(self, model, scale=0.875):
98 | self.load = LoadImage()
99 | self.tf = TransformImage(model, scale=scale)
100 |
101 | def __call__(self, path_img):
102 | img = self.load(path_img)
103 | tensor = self.tf(img)
104 | return tensor
105 |
106 |
107 | class Identity(nn.Module):
108 |
109 | def __init__(self):
110 | super(Identity, self).__init__()
111 |
112 | def forward(self, x):
113 | return x
--------------------------------------------------------------------------------
/pretrainedmodels/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.6.2'
2 |
--------------------------------------------------------------------------------
/score_pedestrian_detection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import argparse
4 | import os.path as osp
5 |
6 | def check_size(submission_file):
7 | max_size = 60*1024*1024
8 | if osp.getsize(submission_file) > max_size:
9 | raise IOError #File size exceeds the specified maximum size, which is 60M for the server.
10 |
11 | def judge_overlap(pbox,ignore_box):
12 | overlap=[]
13 | delete=[]
14 | for p in pbox:
15 | pl=min(p[0],p[2])
16 | pr=max(p[0],p[2])
17 | pb=min(p[1],p[3])
18 | pt=max(p[1],p[3])
19 | s_p=(pr-pl)*(pt-pb)
20 | s_lap=-0.01
21 | for c in ignore_box:
22 | cl=min(c[0],c[2])
23 | cr=max(c[0],c[2])
24 | cb=min(c[1],c[3])
25 | ct=max(c[1],c[3])
26 | if not (crpr or ctpt):
27 | s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb))
28 | if s_lap>0:
29 | overlap.append([p,s_lap/s_p])
30 | for o in overlap:
31 | if o[1]>0.5:
32 | delete.append(o[0])
33 | remain_id = [p for p in pbox if p not in delete]
34 | return remain_id
35 |
36 | def parse_ignore_file(ignore_file):
37 | with open(ignore_file,'r') as f:
38 | lines = f.readlines()
39 | ig = [x.strip().split() for x in lines]
40 | ignore = {}
41 | for item in ig:
42 | key = item[0]
43 | ignore_num = (len(item)-1)/4
44 | bbox = []
45 | for i in range(int(ignore_num)):
46 | b = []
47 | b.append(int(item[1+4*i]))
48 | b.append(int(item[2+4*i]))
49 | b.append(int(item[1+4*i])+int(item[3+4*i]))
50 | b.append(int(item[2+4*i])+int(item[4+4*i]))
51 | bbox.append(b)
52 | ignore[key] = bbox
53 | return ignore
54 |
55 | def parse_submission(submission_file,ignore_file):
56 | ignore_zone = parse_ignore_file(ignore_file)
57 | ignore_keys = ignore_zone.keys()
58 | with open(submission_file, 'r') as f:
59 | lines = f.readlines()
60 | splitlines = [x.strip().split() for x in lines]
61 | image_ids = [x[0] for x in splitlines]
62 | confidence = np.array([float(x[1]) for x in splitlines])
63 | BB = []
64 | for x in splitlines:
65 | bb = []
66 | bb.append(float(x[2]))
67 | bb.append(float(x[3]))
68 | bb.append(float(x[2])+float(x[4]))
69 | bb.append(float(x[3])+float(x[5]))
70 | BB.append(bb)
71 |
72 | sub_key = []
73 | for x in image_ids:
74 | if x not in sub_key:
75 | sub_key.append(x)
76 | final_confidence = []
77 | final_ids = []
78 | final_BB = []
79 |
80 | for key in sub_key:
81 | find = [i for i,v in enumerate(image_ids) if v == key]
82 | BB_sub = [BB[i] for i in find]
83 | confid_sub = [confidence[i] for i in find]
84 | if key in ignore_keys:
85 | ignore_bbox = ignore_zone[key]
86 | bbox_remain = judge_overlap(BB_sub,ignore_bbox)
87 | find_remain = []
88 | for i,v in enumerate(BB_sub):
89 | if v in bbox_remain:
90 | find_remain.append(i)
91 | confid_remain = [confid_sub[i] for i in find_remain]
92 | BB_sub = bbox_remain
93 | confid_sub = confid_remain
94 | ids_sub = [key]*len(BB_sub)
95 | final_ids.extend(ids_sub)
96 | final_confidence.extend(confid_sub)
97 | final_BB.extend(BB_sub)
98 |
99 | final_BB = np.array(final_BB)
100 | final_confidence = np.array(final_confidence)
101 | sorted_ind = np.argsort(-final_confidence)
102 | final_BB = final_BB[sorted_ind, :]
103 | final_ids = [final_ids[x] for x in sorted_ind]
104 | return final_ids, final_BB
105 |
106 | def parse_gt_annotation(gt_file,ignore_file):
107 | ignore_zone = parse_ignore_file(ignore_file)
108 | ignore_keys = ignore_zone.keys()
109 | with open(gt_file, 'r') as f:
110 | lines = f.readlines()
111 | info = [x.strip().split() for x in lines]
112 | gt = {}
113 | for item in info:
114 | bbox = []
115 | bbox_num = (len(item)-1)/5
116 | for i in range(int(bbox_num)):
117 | b = []
118 | b.append(int(item[2+5*i]))
119 | b.append(int(item[3+5*i]))
120 | b.append(int(item[2+5*i])+int(item[4+5*i]))
121 | b.append(int(item[3+5*i])+int(item[5+5*i]))
122 | bbox.append(b)
123 | if item[0] in ignore_keys:
124 | ignore_bbox = ignore_zone[item[0]]
125 | bbox_remain = judge_overlap(bbox,ignore_bbox)
126 | else:
127 | bbox_remain = bbox
128 | gt[item[0]] = np.array(bbox_remain)
129 | return gt
130 |
131 | def compute_ap(rec, prec):
132 | mrec = np.concatenate(([0.], rec, [1.]))
133 | mpre = np.concatenate(([0.], prec, [0.]))
134 | for i in range(mpre.size - 1, 0, -1):
135 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
136 | i = np.where(mrec[1:] != mrec[:-1])[0]
137 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
138 | return ap
139 |
140 |
141 | def pedestrian_eval(input, gt_file, ignore_file, ovthresh):
142 | gt = parse_gt_annotation(gt_file,ignore_file)
143 | image_ids, BB = parse_submission(input,ignore_file)
144 | npos = 0
145 | recs = {}
146 | for key in gt.keys():
147 | det = [False]*len(gt[key])
148 | recs[key] = {'bbox': gt[key], 'det': det}
149 | npos += len(gt[key])
150 | nd = len(image_ids)
151 | tp = np.zeros(nd)
152 | fp = np.zeros(nd)
153 | for d in range(nd):
154 | if image_ids[d] not in recs.keys():
155 | raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d]))
156 | for d in range(nd):
157 | R = recs[image_ids[d]]
158 | bb = BB[d, :].astype(float)
159 | ovmax = -np.inf
160 | BBGT = R['bbox'].astype(float)
161 | if BBGT.size > 0:
162 | ixmin = np.maximum(BBGT[:, 0], bb[0])
163 | iymin = np.maximum(BBGT[:, 1], bb[1])
164 | ixmax = np.minimum(BBGT[:, 2], bb[2])
165 | iymax = np.minimum(BBGT[:, 3], bb[3])
166 | iw = np.maximum(ixmax - ixmin + 1., 0.)
167 | ih = np.maximum(iymax - iymin + 1., 0.)
168 | inters = iw * ih
169 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
170 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
171 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
172 | overlaps = inters / uni
173 | ovmax = np.max(overlaps)
174 | jmax = np.argmax(overlaps)
175 |
176 | if ovmax > ovthresh:
177 | if not R['det'][jmax]:
178 | tp[d] = 1.
179 | R['det'][jmax] = 1
180 | else:
181 | fp[d] = 1.
182 | else:
183 | fp[d] = 1.
184 | fp = np.cumsum(fp)
185 | tp = np.cumsum(tp)
186 | rec = tp / float(npos+1e-8)
187 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
188 | ap = compute_ap(rec, prec)
189 | return ap
190 |
191 |
192 | def wider_ped_eval(input, gt,ignore_file):
193 | aap = []
194 | for ove in np.arange(0.5, 1.0, 0.05):
195 | ap = pedestrian_eval(input, gt,ignore_file, ovthresh=ove)
196 | aap.append(ap)
197 | mAP = np.average(aap)
198 | return mAP
199 |
200 |
201 | def get_average_precision_validation():
202 | input_dir = './'
203 | output_dir = './'
204 | ref_dir = osp.join(input_dir, 'ref')
205 | submit_dir = osp.join(input_dir, 'res')
206 | submit_file = 'submit_files/scores_validation.txt'
207 | gt_file = osp.join(ref_dir, 'val_annotations.txt')
208 | ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt')
209 | check_size(submit_file)
210 | mAP = wider_ped_eval(submit_file, gt_file, ignore_file)
211 | out = {'Average AP': mAP}
212 | print(out)
213 | return mAP
214 |
215 |
216 |
217 | if __name__ == '__main__':
218 | # parser = argparse.ArgumentParser()
219 | # parser.add_argument("input", type=str)
220 | # parser.add_argument("output", type=str)
221 | # args = parser.parse_args()
222 | get_average_precision_validation()
223 | # strings = ['{}: {}\n'.format(k, v) for k, v in out.items()]
224 | # open(os.path.join(output_dir, 'scores.txt'), 'w').writelines(strings)
--------------------------------------------------------------------------------
/statics.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | data_dir="../data_wider_pedestrian"
4 |
5 | train_bbx_gt_file=os.path.join(data_dir,'train_annotations.txt')
6 | train_img_dir=os.path.join(data_dir,'train')
7 |
8 | val_bbx_gt_file=os.path.join(data_dir,'val_annotations.txt')
9 | val_img_dir=os.path.join(data_dir,'val')
10 |
11 | train_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_train.txt')
12 |
13 | val_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_val.txt')
14 |
15 |
16 | # config.py
17 | import os.path
18 |
19 | # gets home dir cross platform
20 | HOME = os.path.expanduser("~")
21 |
22 | # for making bounding boxes pretty
23 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
24 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
25 |
26 | MEANS = (104, 117, 123)
27 |
28 | # SSD300 CONFIGS
29 | voc = {
30 | 'num_classes': 21,
31 | 'lr_steps': (80000, 100000, 120000),
32 | 'max_iter': 120000,
33 | 'feature_maps': [38, 19, 10, 5, 3, 1],
34 | 'min_dim': 300,
35 | 'steps': [8, 16, 32, 64, 100, 300],
36 | 'min_sizes': [30, 60, 111, 162, 213, 264],
37 | 'max_sizes': [60, 111, 162, 213, 264, 315],
38 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
39 | 'variance': [0.1, 0.2],
40 | 'clip': True,
41 | 'name': 'VOC',
42 | }
43 |
44 | coco = {
45 | 'num_classes': 201,
46 | 'lr_steps': (280000, 360000, 400000),
47 | 'max_iter': 400000,
48 | 'feature_maps': [38, 19, 10, 5, 3, 1],
49 | 'min_dim': 300,
50 | 'steps': [8, 16, 32, 64, 100, 300],
51 | 'min_sizes': [21, 45, 99, 153, 207, 261],
52 | 'max_sizes': [45, 99, 153, 207, 261, 315],
53 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
54 | 'variance': [0.1, 0.2],
55 | 'clip': True,
56 | 'name': 'COCO',
57 | }
58 |
59 |
60 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/__init__.py
--------------------------------------------------------------------------------
/utils/build.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | import numpy as np
11 | from distutils.core import setup
12 | from distutils.extension import Extension
13 | from Cython.Distutils import build_ext
14 |
15 |
16 | def find_in_path(name, path):
17 | "Find a file in a search path"
18 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
19 | for dir in path.split(os.pathsep):
20 | binpath = pjoin(dir, name)
21 | if os.path.exists(binpath):
22 | return os.path.abspath(binpath)
23 | return None
24 |
25 |
26 | def locate_cuda():
27 | """Locate the CUDA environment on the system
28 |
29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
30 | and values giving the absolute path to each directory.
31 |
32 | Starts by looking for the CUDAHOME env variable. If not found, everything
33 | is based on finding 'nvcc' in the PATH.
34 | """
35 |
36 | # first check if the CUDAHOME env variable is in use
37 | if 'CUDAHOME' in os.environ:
38 | home = os.environ['CUDAHOME']
39 | nvcc = pjoin(home, 'bin', 'nvcc')
40 | else:
41 | # otherwise, search the PATH for NVCC
42 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
43 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
44 | if nvcc is None:
45 | raise EnvironmentError('The nvcc binary could not be '
46 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
47 | home = os.path.dirname(os.path.dirname(nvcc))
48 |
49 | cudaconfig = {'home': home, 'nvcc': nvcc,
50 | 'include': pjoin(home, 'include'),
51 | 'lib64': pjoin(home, 'lib64')}
52 | for k, v in cudaconfig.items():
53 | if not os.path.exists(v):
54 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
55 |
56 | return cudaconfig
57 |
58 |
59 | CUDA = locate_cuda()
60 |
61 | # Obtain the numpy include directory. This logic works across numpy versions.
62 | try:
63 | numpy_include = np.get_include()
64 | except AttributeError:
65 | numpy_include = np.get_numpy_include()
66 |
67 |
68 | def customize_compiler_for_nvcc(self):
69 | """inject deep into distutils to customize how the dispatch
70 | to gcc/nvcc works.
71 |
72 | If you subclass UnixCCompiler, it's not trivial to get your subclass
73 | injected in, and still have the right customizations (i.e.
74 | distutils.sysconfig.customize_compiler) run on it. So instead of going
75 | the OO route, I have this. Note, it's kindof like a wierd functional
76 | subclassing going on."""
77 |
78 | # tell the compiler it can processes .cu
79 | self.src_extensions.append('.cu')
80 |
81 | # save references to the default compiler_so and _comple methods
82 | default_compiler_so = self.compiler_so
83 | super = self._compile
84 |
85 | # now redefine the _compile method. This gets executed for each
86 | # object but distutils doesn't have the ability to change compilers
87 | # based on source extension: we add it.
88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
89 | print(extra_postargs)
90 | if os.path.splitext(src)[1] == '.cu':
91 | # use the cuda for .cu files
92 | self.set_executable('compiler_so', CUDA['nvcc'])
93 | # use only a subset of the extra_postargs, which are 1-1 translated
94 | # from the extra_compile_args in the Extension class
95 | postargs = extra_postargs['nvcc']
96 | else:
97 | postargs = extra_postargs['gcc']
98 |
99 | super(obj, src, ext, cc_args, postargs, pp_opts)
100 | # reset the default compiler_so, which we might have changed for cuda
101 | self.compiler_so = default_compiler_so
102 |
103 | # inject our redefined _compile method into the class
104 | self._compile = _compile
105 |
106 |
107 | # run the customize_compiler
108 | class custom_build_ext(build_ext):
109 | def build_extensions(self):
110 | customize_compiler_for_nvcc(self.compiler)
111 | build_ext.build_extensions(self)
112 |
113 |
114 | ext_modules = [
115 | Extension(
116 | "nms.cpu_nms",
117 | ["nms/cpu_nms.pyx"],
118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 | include_dirs=[numpy_include]
120 | ),
121 | Extension('nms.gpu_nms',
122 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
123 | library_dirs=[CUDA['lib64']],
124 | libraries=['cudart'],
125 | language='c++',
126 | runtime_library_dirs=[CUDA['lib64']],
127 | # this syntax is specific to this build system
128 | # we're only going to use certain compiler args with nvcc and not with gcc
129 | # the implementation of this trick is in customize_compiler() below
130 | extra_compile_args={'gcc': ["-Wno-unused-function"],
131 | 'nvcc': ['-arch=sm_52',
132 | '--ptxas-options=-v',
133 | '-c',
134 | '--compiler-options',
135 | "'-fPIC'"]},
136 | include_dirs=[numpy_include, CUDA['include']]
137 | ),
138 | Extension(
139 | 'pycocotools._mask',
140 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
141 | include_dirs=[numpy_include, 'pycocotools'],
142 | extra_compile_args={
143 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
144 | ),
145 | ]
146 |
147 | setup(
148 | name='mot_utils',
149 | ext_modules=ext_modules,
150 | # inject our custom trigger
151 | cmdclass={'build_ext': custom_build_ext},
152 | )
153 |
--------------------------------------------------------------------------------
/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | def read_json_file(json_file):
4 | with open(json_file) as f:
5 | data = json.load(f)
6 | return data
--------------------------------------------------------------------------------
/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/nms/__init__.py
--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
71 | cdef unsigned int N = boxes.shape[0]
72 | cdef float iw, ih, box_area
73 | cdef float ua
74 | cdef int pos = 0
75 | cdef float maxscore = 0
76 | cdef int maxpos = 0
77 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
78 |
79 | for i in range(N):
80 | maxscore = boxes[i, 4]
81 | maxpos = i
82 |
83 | tx1 = boxes[i,0]
84 | ty1 = boxes[i,1]
85 | tx2 = boxes[i,2]
86 | ty2 = boxes[i,3]
87 | ts = boxes[i,4]
88 |
89 | pos = i + 1
90 | # get max box
91 | while pos < N:
92 | if maxscore < boxes[pos, 4]:
93 | maxscore = boxes[pos, 4]
94 | maxpos = pos
95 | pos = pos + 1
96 |
97 | # add max box as a detection
98 | boxes[i,0] = boxes[maxpos,0]
99 | boxes[i,1] = boxes[maxpos,1]
100 | boxes[i,2] = boxes[maxpos,2]
101 | boxes[i,3] = boxes[maxpos,3]
102 | boxes[i,4] = boxes[maxpos,4]
103 |
104 | # swap ith box with position of max box
105 | boxes[maxpos,0] = tx1
106 | boxes[maxpos,1] = ty1
107 | boxes[maxpos,2] = tx2
108 | boxes[maxpos,3] = ty2
109 | boxes[maxpos,4] = ts
110 |
111 | tx1 = boxes[i,0]
112 | ty1 = boxes[i,1]
113 | tx2 = boxes[i,2]
114 | ty2 = boxes[i,3]
115 | ts = boxes[i,4]
116 |
117 | pos = i + 1
118 | # NMS iterations, note that N changes if detection boxes fall below threshold
119 | while pos < N:
120 | x1 = boxes[pos, 0]
121 | y1 = boxes[pos, 1]
122 | x2 = boxes[pos, 2]
123 | y2 = boxes[pos, 3]
124 | s = boxes[pos, 4]
125 |
126 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
127 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
128 | if iw > 0:
129 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
130 | if ih > 0:
131 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
132 | ov = iw * ih / ua #iou between max box and detection box
133 |
134 | if method == 1: # linear
135 | if ov > Nt:
136 | weight = 1 - ov
137 | else:
138 | weight = 1
139 | elif method == 2: # gaussian
140 | weight = np.exp(-(ov * ov)/sigma)
141 | else: # original NMS
142 | if ov > Nt:
143 | weight = 0
144 | else:
145 | weight = 1
146 |
147 | boxes[pos, 4] = weight*boxes[pos, 4]
148 |
149 | # if box score falls below threshold, discard the box by swapping with last box
150 | # update N
151 | if boxes[pos, 4] < threshold:
152 | boxes[pos,0] = boxes[N-1, 0]
153 | boxes[pos,1] = boxes[N-1, 1]
154 | boxes[pos,2] = boxes[N-1, 2]
155 | boxes[pos,3] = boxes[N-1, 3]
156 | boxes[pos,4] = boxes[N-1, 4]
157 | N = N - 1
158 | pos = pos - 1
159 |
160 | pos = pos + 1
161 |
162 | keep = [i for i in range(N)]
163 | return keep
164 |
--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/utils/pascal_utils.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import xml.etree.ElementTree as ET
3 | from PIL import Image
4 | from xml.dom import minidom
5 | from statics import *
6 | from data_reader import *
7 |
8 | def write_pascal_annotation(file_name,obj_list,xml_file):
9 | annotation=ET.Element('annotation')
10 | filename=ET.SubElement(annotation,'filename')
11 | filename.text=file_name
12 | size = ET.SubElement(annotation, 'size')
13 | img=Image.open(file_name)
14 | width, height = img.size
15 | height_elem=ET.SubElement(size,'height')
16 | width_elem=ET.SubElement(size,'width')
17 | height_elem.text=str(height)
18 | width_elem.text=str(width)
19 | # print(obj_list)
20 | for i in range(0, len(obj_list), 5):
21 | class_index = obj_list[i]
22 | obj_cord = obj_list[i + 1:i + 5]
23 | obj_cord[2] = int(obj_cord[2]) + int(obj_cord[0])
24 | obj_cord[3] = int(obj_cord[3]) + int(obj_cord[1])
25 | object = ET.SubElement(annotation, 'object')
26 | get_object(object, obj_cord)
27 |
28 | # print(ET.dump(annotation))
29 | anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml()
30 | text_file = open(xml_file, "w")
31 | text_file.write(anno_txt)
32 | text_file.close()
33 | return
34 |
35 |
36 | def write_pascal_annotation_aug(file_name,obj_list,xml_file):
37 | annotation=ET.Element('annotation')
38 | filename=ET.SubElement(annotation,'filename')
39 | filename.text=file_name
40 | size = ET.SubElement(annotation, 'size')
41 | img=Image.open(file_name)
42 | width, height = img.size
43 | height_elem=ET.SubElement(size,'height')
44 | width_elem=ET.SubElement(size,'width')
45 | height_elem.text=str(height)
46 | width_elem.text=str(width)
47 | # print(obj_list)
48 | for i,obj in enumerate(obj_list):
49 | class_index = obj[4]
50 | obj_cord = obj[0:4]
51 | object = ET.SubElement(annotation, 'object')
52 | get_object(object, obj_cord)
53 |
54 | # print(ET.dump(annotation))
55 | anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml()
56 | text_file = open(xml_file, "w")
57 | text_file.write(anno_txt)
58 | text_file.close()
59 | return
60 |
61 |
62 | def get_object(object, obj_cord):
63 | name = ET.SubElement(object, 'name')
64 | name.text = 'pedestrian'
65 | bndbox = ET.SubElement(object, 'bndbox')
66 | difficult=ET.SubElement(object,'difficult')
67 | difficult.text=str(0)
68 | xmin = ET.SubElement(bndbox, 'xmin')
69 | ymin = ET.SubElement(bndbox, 'ymin')
70 | xmax = ET.SubElement(bndbox, 'xmax')
71 | ymax = ET.SubElement(bndbox, 'ymax')
72 |
73 | xmin.text=str(obj_cord[0])
74 | ymin.text=str(obj_cord[1])
75 | xmax.text=str(obj_cord[2])
76 | ymax.text=str(obj_cord[3])
77 |
78 |
79 | return
80 |
81 |
82 | def read_pascal_annotation(anno_file):
83 | """
84 |
85 | :param anno_file:
86 | :return:
87 |
88 | """
89 | tree = ET.parse(anno_file)
90 | root = tree.getroot()
91 | filename=root.find('filename').text
92 | height=int(root.find('size/height').text)
93 | width=int(root.find('size/width').text)
94 | objs=root.findall('object')
95 | objects=[]
96 | for obj in objs:
97 | class_label=obj.find('name').text
98 | xmin=int(float(obj.find('bndbox/xmin').text))
99 | xmax=int(float(obj.find('bndbox/xmax').text))
100 | ymin=int(float(obj.find('bndbox/ymin').text))
101 | ymax=int(float(obj.find('bndbox/ymax').text))
102 | objects.append([xmin,ymin,xmax,ymax,1])
103 | res={
104 | 'filename':filename,
105 | 'height':height,
106 | 'width':width,
107 | 'objects':objects
108 | }
109 | return res
--------------------------------------------------------------------------------
/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | #import pycocotools._mask as _mask
4 | from . import _mask
5 |
6 | # Interface for manipulating masks stored in RLE format.
7 | #
8 | # RLE is a simple yet efficient format for storing binary masks. RLE
9 | # first divides a vector (or vectorized image) into a series of piecewise
10 | # constant regions and then for each piece simply stores the length of
11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
13 | # (note that the odd counts are always the numbers of zeros). Instead of
14 | # storing the counts directly, additional compression is achieved with a
15 | # variable bitrate representation based on a common scheme called LEB128.
16 | #
17 | # Compression is greatest given large piecewise constant regions.
18 | # Specifically, the size of the RLE is proportional to the number of
19 | # *boundaries* in M (or for an image the number of boundaries in the y
20 | # direction). Assuming fairly simple shapes, the RLE representation is
21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
22 | # is substantially lower, especially for large simple objects (large n).
23 | #
24 | # Many common operations on masks can be computed directly using the RLE
25 | # (without need for decoding). This includes computations such as area,
26 | # union, intersection, etc. All of these operations are linear in the
27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
28 | # of the object. Computing these operations on the original mask is O(n).
29 | # Thus, using the RLE can result in substantial computational savings.
30 | #
31 | # The following API functions are defined:
32 | # encode - Encode binary masks using RLE.
33 | # decode - Decode binary masks encoded via RLE.
34 | # merge - Compute union or intersection of encoded masks.
35 | # iou - Compute intersection over union between masks.
36 | # area - Compute area of encoded masks.
37 | # toBbox - Get bounding boxes surrounding encoded masks.
38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
39 | #
40 | # Usage:
41 | # Rs = encode( masks )
42 | # masks = decode( Rs )
43 | # R = merge( Rs, intersect=false )
44 | # o = iou( dt, gt, iscrowd )
45 | # a = area( Rs )
46 | # bbs = toBbox( Rs )
47 | # Rs = frPyObjects( [pyObjects], h, w )
48 | #
49 | # In the API the following formats are used:
50 | # Rs - [dict] Run-length encoding of binary masks
51 | # R - dict Run-length encoding of binary mask
52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
54 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
56 | # dt,gt - May be either bounding boxes or encoded masks
57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
58 | #
59 | # Finally, a note about the intersection over union (iou) computation.
60 | # The standard iou of a ground truth (gt) and detected (dt) object is
61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
62 | # For "crowd" regions, we use a modified criteria. If a gt object is
63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
67 | # For crowd gt regions we use this modified criteria above for the iou.
68 | #
69 | # To compile run "python setup.py build_ext --inplace"
70 | # Please do not contact us for help with compiling.
71 | #
72 | # Microsoft COCO Toolbox. version 2.0
73 | # Data, paper, and tutorials available at: http://mscoco.org/
74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
75 | # Licensed under the Simplified BSD License [see coco/license.txt]
76 |
77 | iou = _mask.iou
78 | merge = _mask.merge
79 | frPyObjects = _mask.frPyObjects
80 |
81 | def encode(bimask):
82 | if len(bimask.shape) == 3:
83 | return _mask.encode(bimask)
84 | elif len(bimask.shape) == 2:
85 | h, w = bimask.shape
86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
87 |
88 | def decode(rleObjs):
89 | if type(rleObjs) == list:
90 | return _mask.decode(rleObjs)
91 | else:
92 | return _mask.decode([rleObjs])[:,:,0]
93 |
94 | def area(rleObjs):
95 | if type(rleObjs) == list:
96 | return _mask.area(rleObjs)
97 | else:
98 | return _mask.area([rleObjs])[0]
99 |
100 | def toBbox(rleObjs):
101 | if type(rleObjs) == list:
102 | return _mask.toBbox(rleObjs)
103 | else:
104 | return _mask.toBbox([rleObjs])[0]
105 |
--------------------------------------------------------------------------------
/utils/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 |
9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 |
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 |
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 |
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 |
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 |
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 |
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 |
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 |
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 |
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 |
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 |
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 |
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 |
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 |
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 |
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 |
--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
35 | def clear(self):
36 | self.total_time = 0.
37 | self.calls = 0
38 | self.start_time = 0.
39 | self.diff = 0.
40 | self.average_time = 0.
41 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
4 | import os
5 | import cv2
6 | import os
7 | """
8 | face=[[x1,x2,x2,y2]]
9 | """
10 |
11 |
12 | def draw_rectangle_w_h_box(img_path, faces, save_dir='./detected_face'):
13 | create_dir_if_not_exists(save_dir)
14 | img_face_detect = cv2.imread(img_path)
15 | for face in faces:
16 | x1, y1, x2, y2 = face
17 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
18 | cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect)
19 |
20 | def draw_rectangle(img_path, faces, save_dir='./detected_face'):
21 | create_dir_if_not_exists(save_dir)
22 | img_face_detect = cv2.imread(img_path)
23 | for face in faces:
24 | x1, y1, x2, y2 = face
25 | cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
26 | cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect)
27 |
28 | def drawbbox(file_name,bbox,save_dir):
29 | source_img = Image.open(file_name).convert("RGBA")
30 |
31 | draw = ImageDraw.Draw(source_img)
32 | # draw.rectangle(((0, 00), (100, 100)), fill="black")
33 | # draw.text((20, 70), "something123", font=ImageFont.truetype("font_path123"))
34 |
35 | create_dir_if_not_exists(save_dir)
36 | save_file=os.path.join(save_dir,os.path.basename(file_name))
37 | source_img.save(save_file, "JPEG")
38 |
39 | def get_total_params(model):
40 | model_parameters = filter(lambda p: p.requires_grad, model.parameters())
41 | params = sum([np.prod(p.size()) for p in model_parameters])
42 | return params
43 |
44 | def create_dir_if_not_exists(dir):
45 | if not os.path.exists(dir):
46 | os.makedirs(dir)
47 |
48 | def check_if_exists(dir):
49 | return os.path.exists(dir)
50 |
51 | def progress_bar(progress, count ,message):
52 | sys.stdout.write('\r' + "{} of {}: {}".format(progress, count, message))
53 |
--------------------------------------------------------------------------------
/utils/visualization/pascal_detection_visualize.py:
--------------------------------------------------------------------------------
1 | from utils.file_utils import read_text_file
2 | from utils.utils import create_dir_if_not_exists
3 | import os
4 | import cv2
5 | from utils.pascal_utils import read_pascal_annotation
6 |
7 | def draw_bbox_pascal(anno_path,image_dir=None):
8 | annotation = read_pascal_annotation(anno_path)
9 | image_path = annotation['filename']
10 | if image_dir!=None:
11 | image_path=os.path.join(image_dir,image_path)
12 | print(image_path)
13 | objects = annotation['objects']
14 | # objects=[[100,100,200,200,1]]
15 | create_dir_if_not_exists('pascal_images')
16 | img_demo_detect = cv2.imread(image_path)
17 | save_path = os.path.join('pascal_images', os.path.basename(image_path))
18 | for object in objects:
19 | x1, y1, x2, y2 = object[:4]
20 | cv2.rectangle(img_demo_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
21 | cv2.imwrite(save_path, img_demo_detect)
22 |
23 |
24 | def show_augment():
25 | imageid = 'img08456_0'
26 | anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_aug/{}.xml'.format(
27 | imageid)
28 | draw_bbox_pascal(anno_path)
29 |
30 | def show_original_512():
31 | imageid = 'img00175_6683'
32 | anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_512/{}.xml'.format(
33 | imageid)
34 | draw_bbox_pascal(anno_path)
35 |
36 | def show_inria_person():
37 | # anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train/PennPed00001.xml'
38 | anno_path='/media/milton/ssd1/dataset/pedestrian/tud_brussels/annotations/img-000-2.xml'
39 | draw_bbox_pascal(anno_path)
40 |
41 | show_inria_person()
42 | # show_original_512()
43 | # show_augment()
44 | # anno_path='/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/Annotations/000247.xml'
45 | # draw_bbox_pascal(anno_path,'/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/JPEGImages')
46 |
47 |
--------------------------------------------------------------------------------
/wider_face_pedestrian_to_pascal.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | from PIL import Image
3 | from xml.dom import minidom
4 | from statics import *
5 | from data_reader import *
6 | from utils.utils import create_dir_if_not_exists
7 | from utils.pascal_utils import *
8 |
9 |
10 | def convert_wider_pedestrian_to_pascal():
11 | data=read_train_gt()
12 | trainvalids=[]
13 | for row in data:
14 | obj_list = row[1]
15 | image_name = row[0]
16 | annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations'
17 | create_dir_if_not_exists(annodir)
18 | create_dir_if_not_exists('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/JPEGImages')
19 | xml_file_name=image_name.split('.')[0]+".xml"
20 | xml_file=os.path.join(annodir, xml_file_name)
21 | image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
22 | write_pascal_annotation(image_path,obj_list,xml_file)
23 |
24 | voc_anno_train_dir="/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train"
25 | if not os.path.exists(voc_anno_train_dir):
26 | os.makedirs(voc_anno_train_dir)
27 | anno_path=os.path.join(voc_anno_train_dir,xml_file_name)
28 | write_pascal_annotation(image_path,obj_list,anno_path)
29 |
30 | trainvalids.append(image_name.split('.')[0])
31 | # break
32 | with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/trainval.txt', mode='wt', encoding='utf-8') as myfile:
33 | myfile.write('\n'.join(trainvalids))
34 | testids=[]
35 | for row in read_val_gt():
36 | obj_list = row[1]
37 | image_name = row[0]
38 | annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations'
39 | xml_file_name=image_name.split('.')[0]+".xml"
40 | xml_file=os.path.join(annodir, xml_file_name)
41 | image_path=os.path.abspath(os.path.join(data_dir,"val", image_name))
42 | write_pascal_annotation(image_path,obj_list,xml_file)
43 | testids.append(image_name.split('.')[0])
44 |
45 | voc_anno_train_dir = "/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_valid"
46 | if not os.path.exists(voc_anno_train_dir):
47 | os.makedirs(voc_anno_train_dir)
48 | anno_path = os.path.join(voc_anno_train_dir, xml_file_name)
49 | write_pascal_annotation(image_path, obj_list, anno_path)
50 | testids.append(image_name.split('.')[0])
51 |
52 | # break
53 | with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/test.txt', mode='wt', encoding='utf-8') as myfile:
54 | myfile.write('\n'.join(testids))
55 |
56 |
57 |
58 | if __name__ == '__main__':
59 | convert_wider_pedestrian_to_pascal()
--------------------------------------------------------------------------------