├── LICENSE
├── README.md
├── __init__.py
├── arguments.py
├── augment_data.py
├── coco_eval.py
├── data
    ├── __init__.py
    ├── coco.py
    ├── coco_labels.txt
    ├── config.py
    ├── data_augment.py
    ├── example.jpg
    ├── scripts
    │   ├── COCO2014.sh
    │   ├── VOC2007.sh
    │   └── VOC2012.sh
    ├── voc0712.py
    └── voc_eval.py
├── data_process.ipynb
├── data_reader.py
├── data_reader_pedestrian.py
├── dataset.py
├── dataset
    ├── caltech_pedestrian.py
    ├── inria_person.py
    ├── mall.py
    └── upen_person.py
├── demo
    ├── __init__.py
    ├── demo.ipynb
    ├── demo.py
    └── live.py
├── doc
    ├── RFB.png
    ├── SSD.jpg
    ├── detection_example.png
    ├── detection_example2.png
    ├── detection_examples.png
    ├── rfb.png
    └── ssd.png
├── eval.py
├── focal_loss.py
├── layers
    ├── __init__.py
    ├── functions
    │   ├── __init__.py
    │   ├── detection.py
    │   └── prior_box.py
    └── modules
    │   ├── __init__.py
    │   ├── l2norm.py
    │   ├── multibox_loss.py
    │   └── refine_multibox_loss.py
├── loss_loader.py
├── main.py
├── make.sh
├── model_loader.py
├── models
    ├── FRFBSSD_vgg.py
    ├── FSSD_mobile.py
    ├── FSSD_vgg.py
    ├── RFB_Net_E_vgg.py
    ├── RFB_Net_mobile.py
    ├── RFB_Net_vgg.py
    ├── RefineSSD_vgg.py
    ├── SSD_vgg.py
    ├── __init__.py
    ├── base_models.py
    ├── densenet.py
    ├── mobilenet.py
    ├── resnet.py
    └── vgg.py
├── multi_thread_score_pedestrian_detection.py
├── object_detector.py
├── pretrainedmodels
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── utils.py
    │   └── voc.py
    ├── models
    │   ├── __init__.py
    │   ├── bninception.py
    │   ├── cafferesnet.py
    │   ├── dpn.py
    │   ├── fbresnet.py
    │   ├── fbresnet
    │   │   ├── resnet152_dump.lua
    │   │   └── resnet152_load.py
    │   ├── inceptionresnetv2.py
    │   ├── inceptionv4.py
    │   ├── nasnet.py
    │   ├── nasnet_mobile.py
    │   ├── pnasnet.py
    │   ├── polynet.py
    │   ├── resnext.py
    │   ├── resnext_features
    │   │   ├── __init__.py
    │   │   ├── resnext101_32x4d_features.py
    │   │   └── resnext101_64x4d_features.py
    │   ├── senet.py
    │   ├── torchvision_models.py
    │   ├── utils.py
    │   ├── vggm.py
    │   ├── wideresnet.py
    │   └── xception.py
    ├── utils.py
    └── version.py
├── refinedet_train_test.py
├── score_pedestrian_detection.py
├── statics.py
├── train.py
├── train_test.py
├── train_test_fssd_mobile_pre.py
├── transforms.py
├── utils
    ├── __init__.py
    ├── box_utils.py
    ├── build.py
    ├── json_utils.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.c
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── pascal_utils.py
    ├── pycocotools
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── timer.py
    ├── utils.py
    └── visualization
    │   └── pascal_detection_visualize.py
└── wider_face_pedestrian_to_pascal.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Max deGroot, Ellis Brown
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RetinaNet applied to widerface pedestrian detection challenege in ECCV 2018, in PyTorch
 2 | 
 3 | This code placed 21 position among 168 teams. 
 4 | 
 5 | Improvement ideas (todos):
 6 |     <ol>
 7 |         <li>Run for more epoch</li>
 8 |         <li>User More data augmentation methods</li>
 9 |         <li>optimize hyper-parameter like learning rate, learning rate decay</li>
10 |         <li>Use ADAM, SGD with momentum</li>
11 |         <li>use soft nms</li>
12 |         <li>use multi scale testing.</li>
13 |         <li>Optimize FPN feature extractor for small pedestrian object.</li>
14 |         <li>Use GAN to generate training data for in context of road and pedestrian<li>
15 |     </ol>
16 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/__init__.py


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | parser = argparse.ArgumentParser(description='PyTorch student network training')
 3 | 
 4 | parser.add_argument('--lr',default=0.001,
 5 |                     type=float,
 6 |                     help='learning rate')
 7 | parser.add_argument('--resume',
 8 |                     action='store_true',
 9 |                     help='resume from checkpoint')
10 | parser.add_argument('--optimizer',
11 |                     type=str,
12 |                     help='optimizer type',
13 |                     default='adam')
14 | parser.add_argument('--criterion',
15 |                     type=str,
16 |                     help='criterion',
17 |                     default='MSE')
18 | parser.add_argument('--root',
19 |                     default='../data/',
20 |                     type=str,
21 |                     help='data root path')
22 | parser.add_argument('--datalist',
23 |                     default='../data/datalist/',
24 |                     type=str,
25 |                     help='datalist path')
26 | parser.add_argument('--batch_size',
27 |                     type=int,
28 |                     help='mini-batch size',
29 |                     default=150)
30 | parser.add_argument('--name',
31 |                     default='VGG19_BN',
32 |                     type=str,
33 |                     help='session name')
34 | parser.add_argument('--log_dir_path',
35 |                     default='./student_net_learning/logs',
36 |                     type=str,
37 |                     help='log directory path')
38 | parser.add_argument('--epochs',
39 |                     default=200,
40 |                     type=int,
41 |                     help='number of epochs')
42 | parser.add_argument('--cuda',
43 |                     type=int,
44 |                     default=1,
45 |                     help='use CUDA')
46 | parser.add_argument('--model_name',
47 |                     type=str,
48 |                     help='model name',
49 |                     default='ResNet50')
50 | parser.add_argument('--down_epoch',
51 |                     type=int,
52 |                     help='epoch number for lr * 1e-1',
53 |                     default=30)


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
3 | from .coco import COCODetection
4 | from .data_augment import *
5 | from .config import *
6 | 


--------------------------------------------------------------------------------
/data/coco_labels.txt:
--------------------------------------------------------------------------------
 1 | 1,1,person
 2 | 2,2,bicycle
 3 | 3,3,car
 4 | 4,4,motorcycle
 5 | 5,5,airplane
 6 | 6,6,bus
 7 | 7,7,train
 8 | 8,8,truck
 9 | 9,9,boat
10 | 10,10,traffic light
11 | 11,11,fire hydrant
12 | 13,12,stop sign
13 | 14,13,parking meter
14 | 15,14,bench
15 | 16,15,bird
16 | 17,16,cat
17 | 18,17,dog
18 | 19,18,horse
19 | 20,19,sheep
20 | 21,20,cow
21 | 22,21,elephant
22 | 23,22,bear
23 | 24,23,zebra
24 | 25,24,giraffe
25 | 27,25,backpack
26 | 28,26,umbrella
27 | 31,27,handbag
28 | 32,28,tie
29 | 33,29,suitcase
30 | 34,30,frisbee
31 | 35,31,skis
32 | 36,32,snowboard
33 | 37,33,sports ball
34 | 38,34,kite
35 | 39,35,baseball bat
36 | 40,36,baseball glove
37 | 41,37,skateboard
38 | 42,38,surfboard
39 | 43,39,tennis racket
40 | 44,40,bottle
41 | 46,41,wine glass
42 | 47,42,cup
43 | 48,43,fork
44 | 49,44,knife
45 | 50,45,spoon
46 | 51,46,bowl
47 | 52,47,banana
48 | 53,48,apple
49 | 54,49,sandwich
50 | 55,50,orange
51 | 56,51,broccoli
52 | 57,52,carrot
53 | 58,53,hot dog
54 | 59,54,pizza
55 | 60,55,donut
56 | 61,56,cake
57 | 62,57,chair
58 | 63,58,couch
59 | 64,59,potted plant
60 | 65,60,bed
61 | 67,61,dining table
62 | 70,62,toilet
63 | 72,63,tv
64 | 73,64,laptop
65 | 74,65,mouse
66 | 75,66,remote
67 | 76,67,keyboard
68 | 77,68,cell phone
69 | 78,69,microwave
70 | 79,70,oven
71 | 80,71,toaster
72 | 81,72,sink
73 | 82,73,refrigerator
74 | 84,74,book
75 | 85,75,clock
76 | 86,76,vase
77 | 87,77,scissors
78 | 88,78,teddy bear
79 | 89,79,hair drier
80 | 90,80,toothbrush
81 | 


--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
  1 | # config.py
  2 | 
  3 | # gets home dir cross platform
  4 | import cv2
  5 | cv2.setNumThreads(0)  # pytorch issue 1355: possible deadlock in dataloader
  6 | # note: if you used our download scripts, this should be right
  7 | VOCroot = '/media/milton/ssd1/dataset/pascal/VOCdevkit/'  # path to VOCdevkit root dir
  8 | COCOroot = '/home/user/Database/MSCOCO2017'
  9 | 
 10 | # RFB CONFIGS
 11 | VOC_300 = {
 12 |     'feature_maps': [38, 19, 10, 5, 3, 1],
 13 | 
 14 |     'min_dim': 300,
 15 | 
 16 |     'steps': [8, 16, 32, 64, 100, 300],
 17 | 
 18 |     'min_sizes': [30, 60, 111, 162, 213, 264],
 19 | 
 20 |     'max_sizes': [60, 111, 162, 213, 264, 315],
 21 | 
 22 |     'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
 23 | 
 24 |     'variance': [0.1, 0.2],
 25 | 
 26 |     'clip': True,
 27 | }
 28 | 
 29 | VOC_512 = {
 30 |     'feature_maps': [64, 32, 16, 8, 4, 2, 1],
 31 | 
 32 |     'min_dim': 512,
 33 | 
 34 |     'steps': [8, 16, 32, 64, 128, 256, 512],
 35 | 
 36 |     'min_sizes': [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8],
 37 | 
 38 |     'max_sizes': [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6],
 39 | 
 40 |     'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
 41 | 
 42 |     'variance': [0.1, 0.2],
 43 | 
 44 |     'clip': True,
 45 | }
 46 | 
 47 | COCO_300 = {
 48 |     'feature_maps': [38, 19, 10, 5, 3, 1],
 49 | 
 50 |     'min_dim': 300,
 51 | 
 52 |     'steps': [8, 16, 32, 64, 100, 300],
 53 | 
 54 |     'min_sizes': [21, 45, 99, 153, 207, 261],
 55 | 
 56 |     'max_sizes': [45, 99, 153, 207, 261, 315],
 57 | 
 58 |     'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
 59 | 
 60 |     'variance': [0.1, 0.2],
 61 | 
 62 |     'clip': True,
 63 | }
 64 | 
 65 | COCO_512 = {
 66 |     'feature_maps': [64, 32, 16, 8, 4, 2, 1],
 67 | 
 68 |     'min_dim': 512,
 69 | 
 70 |     'steps': [8, 16, 32, 64, 128, 256, 512],
 71 | 
 72 |     'min_sizes': [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8],
 73 | 
 74 |     'max_sizes': [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72],
 75 | 
 76 |     'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
 77 | 
 78 |     'variance': [0.1, 0.2],
 79 | 
 80 |     'clip': True,
 81 | }
 82 | 
 83 | COCO_mobile_300 = {
 84 |     'feature_maps': [19, 10, 5, 3, 2, 1],
 85 | 
 86 |     'min_dim': 300,
 87 | 
 88 |     'steps': [16, 32, 64, 100, 150, 300],
 89 | 
 90 |     'min_sizes': [45, 90, 135, 180, 225, 270],
 91 | 
 92 |     'max_sizes': [90, 135, 180, 225, 270, 315],
 93 | 
 94 |     'aspect_ratios': [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]],
 95 | 
 96 |     'variance': [0.1, 0.2],
 97 | 
 98 |     'clip': True,
 99 | }
100 | 
101 | VOC_320 = {
102 |     'feature_maps': [40, 20, 10, 5],
103 | 
104 |     'min_dim': 320,
105 | 
106 |     'steps': [8, 16, 32, 64],
107 | 
108 |     'min_sizes': [32, 64, 128, 256],
109 | 
110 |     'max_sizes': [],
111 | 
112 |     'aspect_ratios': [[2], [2], [2], [2]],
113 | 
114 |     'variance': [0.1, 0.2],
115 | 
116 |     'clip': True,
117 | }
118 | 


--------------------------------------------------------------------------------
/data/example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/data/example.jpg


--------------------------------------------------------------------------------
/data/scripts/COCO2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | start=`date +%s`
 4 | 
 5 | # handle optional download dir
 6 | if [ -z "$1" ]
 7 |   then
 8 |     # navigate to ~/data
 9 |     echo "navigating to ~/data/ ..."
10 |     mkdir -p ~/data
11 |     cd ~/data/
12 |     mkdir -p ./coco
13 |     cd ./coco
14 |     mkdir -p ./images
15 |     mkdir -p ./annotations
16 |   else
17 |     # check if specified dir is valid
18 |     if [ ! -d $1 ]; then
19 |         echo $1 " is not a valid directory"
20 |         exit 0
21 |     fi
22 |     echo "navigating to " $1 " ..."
23 |     cd $1
24 | fi
25 | 
26 | if [ ! -d images ]
27 |   then
28 |     mkdir -p ./images
29 | fi
30 | 
31 | # Download the image data.
32 | cd ./images
33 | echo "Downloading MSCOCO train images ..."
34 | curl -LO http://images.cocodataset.org/zips/train2014.zip
35 | echo "Downloading MSCOCO val images ..."
36 | curl -LO http://images.cocodataset.org/zips/val2014.zip
37 | 
38 | cd ../
39 | if [ ! -d annotations]
40 |   then
41 |     mkdir -p ./annotations
42 | fi
43 | 
44 | # Download the annotation data.
45 | cd ./annotations
46 | echo "Downloading MSCOCO train/val annotations ..."
47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
48 | echo "Finished downloading. Now extracting ..."
49 | 
50 | # Unzip data
51 | echo "Extracting train images ..."
52 | unzip ../images/train2014.zip -d ../images
53 | echo "Extracting val images ..."
54 | unzip ../images/val2014.zip -d ../images
55 | echo "Extracting annotations ..."
56 | unzip ./annotations_trainval2014.zip
57 | 
58 | echo "Removing zip files ..."
59 | rm ../images/train2014.zip
60 | rm ../images/val2014.zip
61 | rm ./annotations_trainval2014.zip
62 | 
63 | echo "Creating trainval35k dataset..."
64 | 
65 | # Download annotations json
66 | echo "Downloading trainval35k annotations from S3"
67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
68 | 
69 | # combine train and val 
70 | echo "Combining train and val images"
71 | mkdir ../images/trainval35k
72 | cd ../images/train2014
73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp
74 | cd ../val2014
75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} +
76 | 
77 | 
78 | end=`date +%s`
79 | runtime=$((end-start))
80 | 
81 | echo "Completed in " $runtime " seconds"
82 | 


--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import pickle
  8 | import xml.etree.ElementTree as ET
  9 | 
 10 | import numpy as np
 11 | import os
 12 | 
 13 | 
 14 | def parse_rec(filename):
 15 |     """ Parse a PASCAL VOC xml file """
 16 |     tree = ET.parse(filename)
 17 |     objects = []
 18 |     for obj in tree.findall('object'):
 19 |         obj_struct = {}
 20 |         obj_struct['name'] = obj.find('name').text
 21 |         obj_struct['pose'] = obj.find('pose').text
 22 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 23 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 24 |         bbox = obj.find('bndbox')
 25 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 26 |                               int(bbox.find('ymin').text),
 27 |                               int(bbox.find('xmax').text),
 28 |                               int(bbox.find('ymax').text)]
 29 |         objects.append(obj_struct)
 30 | 
 31 |     return objects
 32 | 
 33 | 
 34 | def voc_ap(rec, prec, use_07_metric=False):
 35 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 36 |     Compute VOC AP given precision and recall.
 37 |     If use_07_metric is true, uses the
 38 |     VOC 07 11 point method (default:False).
 39 |     """
 40 |     if use_07_metric:
 41 |         # 11 point metric
 42 |         ap = 0.
 43 |         for t in np.arange(0., 1.1, 0.1):
 44 |             if np.sum(rec >= t) == 0:
 45 |                 p = 0
 46 |             else:
 47 |                 p = np.max(prec[rec >= t])
 48 |             ap = ap + p / 11.
 49 |     else:
 50 |         # correct AP calculation
 51 |         # first append sentinel values at the end
 52 |         mrec = np.concatenate(([0.], rec, [1.]))
 53 |         mpre = np.concatenate(([0.], prec, [0.]))
 54 | 
 55 |         # compute the precision envelope
 56 |         for i in range(mpre.size - 1, 0, -1):
 57 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 58 | 
 59 |         # to calculate area under PR curve, look for points
 60 |         # where X axis (recall) changes value
 61 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 62 | 
 63 |         # and sum (\Delta recall) * prec
 64 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 65 |     return ap
 66 | 
 67 | 
 68 | def voc_eval(detpath,
 69 |              annopath,
 70 |              imagesetfile,
 71 |              classname,
 72 |              cachedir,
 73 |              ovthresh=0.5,
 74 |              use_07_metric=False):
 75 |     """rec, prec, ap = voc_eval(detpath,
 76 |                                 annopath,
 77 |                                 imagesetfile,
 78 |                                 classname,
 79 |                                 [ovthresh],
 80 |                                 [use_07_metric])
 81 | 
 82 |     Top level function that does the PASCAL VOC evaluation.
 83 | 
 84 |     detpath: Path to detections
 85 |         detpath.format(classname) should produce the detection results file.
 86 |     annopath: Path to annotations
 87 |         annopath.format(imagename) should be the xml annotations file.
 88 |     imagesetfile: Text file containing the list of images, one image per line.
 89 |     classname: Category name (duh)
 90 |     cachedir: Directory for caching the annotations
 91 |     [ovthresh]: Overlap threshold (default = 0.5)
 92 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 93 |         (default False)
 94 |     """
 95 |     # assumes detections are in detpath.format(classname)
 96 |     # assumes annotations are in annopath.format(imagename)
 97 |     # assumes imagesetfile is a text file with each line an image name
 98 |     # cachedir caches the annotations in a pickle file
 99 | 
100 |     # first load gt
101 |     if not os.path.isdir(cachedir):
102 |         os.mkdir(cachedir)
103 |     cachefile = os.path.join(cachedir, 'annots.pkl')
104 |     # read list of images
105 |     with open(imagesetfile, 'r') as f:
106 |         lines = f.readlines()
107 |     imagenames = [x.strip() for x in lines]
108 | 
109 |     if not os.path.isfile(cachefile):
110 |         # load annots
111 |         recs = {}
112 |         for i, imagename in enumerate(imagenames):
113 |             recs[imagename] = parse_rec(annopath.format(imagename))
114 |             if i % 100 == 0:
115 |                 print('Reading annotation for {:d}/{:d}'.format(
116 |                     i + 1, len(imagenames)))
117 |         # save
118 |         print('Saving cached annotations to {:s}'.format(cachefile))
119 |         with open(cachefile, 'wb') as f:
120 |             pickle.dump(recs, f)
121 |     else:
122 |         # load
123 |         with open(cachefile, 'rb') as f:
124 |             recs = pickle.load(f)
125 | 
126 |     # extract gt objects for this class
127 |     class_recs = {}
128 |     npos = 0
129 |     for imagename in imagenames:
130 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
131 |         bbox = np.array([x['bbox'] for x in R])
132 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
133 |         det = [False] * len(R)
134 |         npos = npos + sum(~difficult)
135 |         class_recs[imagename] = {'bbox': bbox,
136 |                                  'difficult': difficult,
137 |                                  'det': det}
138 | 
139 |     # read dets
140 |     detfile = detpath.format(classname)
141 |     with open(detfile, 'r') as f:
142 |         lines = f.readlines()
143 | 
144 |     splitlines = [x.strip().split(' ') for x in lines]
145 |     image_ids = [x[0] for x in splitlines]
146 |     confidence = np.array([float(x[1]) for x in splitlines])
147 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
148 | 
149 |     # sort by confidence
150 |     sorted_ind = np.argsort(-confidence)
151 |     sorted_scores = np.sort(-confidence)
152 |     BB = BB[sorted_ind, :]
153 |     image_ids = [image_ids[x] for x in sorted_ind]
154 | 
155 |     # go down dets and mark TPs and FPs
156 |     nd = len(image_ids)
157 |     tp = np.zeros(nd)
158 |     fp = np.zeros(nd)
159 |     for d in range(nd):
160 |         R = class_recs[image_ids[d]]
161 |         bb = BB[d, :].astype(float)
162 |         ovmax = -np.inf
163 |         BBGT = R['bbox'].astype(float)
164 | 
165 |         if BBGT.size > 0:
166 |             # compute overlaps
167 |             # intersection
168 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
169 |             iymin = np.maximum(BBGT[:, 1], bb[1])
170 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
171 |             iymax = np.minimum(BBGT[:, 3], bb[3])
172 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
173 |             ih = np.maximum(iymax - iymin + 1., 0.)
174 |             inters = iw * ih
175 | 
176 |             # union
177 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 | 
181 |             overlaps = inters / uni
182 |             ovmax = np.max(overlaps)
183 |             jmax = np.argmax(overlaps)
184 | 
185 |         if ovmax > ovthresh:
186 |             if not R['difficult'][jmax]:
187 |                 if not R['det'][jmax]:
188 |                     tp[d] = 1.
189 |                     R['det'][jmax] = 1
190 |                 else:
191 |                     fp[d] = 1.
192 |         else:
193 |             fp[d] = 1.
194 | 
195 |         # compute precision recall
196 |     fp = np.cumsum(fp)
197 |     tp = np.cumsum(tp)
198 |     rec = tp / float(npos)
199 |     # avoid divide by zero in case the first detection matches a difficult
200 |     # ground truth
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec, use_07_metric)
203 | 
204 |     return rec, prec, ap
205 | 


--------------------------------------------------------------------------------
/data_reader.py:
--------------------------------------------------------------------------------
  1 | from torch.utils.data.dataset import Dataset
  2 | from torchvision import transforms
  3 | from PIL import Image
  4 | from torchvision.transforms import *
  5 | 
  6 | data_set_name="ISIC 2018"
  7 | 
  8 | from layers.functions import Detect
  9 | 
 10 | 
 11 | def str2bool(v):
 12 |     return v.lower() in ("yes", "true", "t", "1")
 13 | 
 14 | """
 15 |  	    Train 	Val 	Test
 16 | Images 	11500 	5000 	3500
 17 | Labels 	46513 	19696 	
 18 | 
 19 | todo ignore parts set zero
 20 | """
 21 | 
 22 | data_set_name="Wider Face Pedestrian dataset."
 23 | 
 24 | def read_train_gt():
 25 |     annotations=[]
 26 |     with open(train_bbx_gt_file,'r') as train_bbx_file:
 27 |         content=train_bbx_file.readlines();
 28 |         for line in content:
 29 |             line_list=line.split(" ")
 30 |             file_name=line_list[0]
 31 |             row=[]
 32 |             for idx in range(1,len(line_list)-1,5):
 33 |                 class_num=line_list[idx]
 34 |                 left=line_list[idx+1]
 35 |                 top=line_list[idx+2]
 36 |                 w=line_list[idx+3]
 37 |                 h=line_list[idx+4].strip()
 38 |                 obj=[class_num, left, top, w, h]
 39 |                 if len(obj)>0:
 40 |                     row+=obj
 41 |             if len(row)>0:
 42 |                 annotations.append([file_name,row[:]])
 43 |     return annotations
 44 | 
 45 | 
 46 | def read_val_gt():
 47 |     annotations = []
 48 |     with open(val_bbx_gt_file, 'r') as train_bbx_file:
 49 |         content = train_bbx_file.readlines();
 50 |         for line in content:
 51 |             line_list = line.split(" ")
 52 |             file_name = line_list[0]
 53 |             row = []
 54 |             for idx in range(1, len(line_list) - 1, 5):
 55 |                 class_num = line_list[idx]
 56 |                 left = line_list[idx + 1]
 57 |                 top = line_list[idx + 2]
 58 |                 w = line_list[idx + 3]
 59 |                 h = line_list[idx + 4].strip()
 60 |                 obj = [class_num, left, top, w, h]
 61 |                 if len(obj) > 0:
 62 |                     row += obj
 63 |             if len(row) > 0:
 64 |                 annotations.append([file_name, row[:]])
 65 |     return annotations
 66 | 
 67 | 
 68 | # annotations=read_train_gt()
 69 | # print(len(annotations))
 70 | #
 71 | # count=0
 72 | # for anno in annotations:
 73 | #     count+=len(annotations[anno])
 74 | # print(count)
 75 | # # annos= read_train_gt()
 76 | # # for anno in annos:
 77 | # #     print(annos[anno])
 78 | 
 79 | def test_read_data():
 80 |     train_gt=read_train_gt()
 81 |     for row in train_gt:
 82 |         print(row)
 83 | 
 84 | 
 85 | def get_validation_data():
 86 |     return
 87 | 
 88 | class DatasetReader(Dataset):
 89 |     """
 90 |     """
 91 |     def __init__(self, data,mode='train',):
 92 |         print("{} count:{}".format(mode,len(data)))
 93 |         self.mode=mode
 94 |         self.data=np.asarray(data)
 95 |         self.transform_train_image=transforms.Compose([
 96 |             RandomCrop([224,224]),
 97 |             RandomHorizontalFlip(p=.2),
 98 |             # ColorJitter(.6),
 99 |             # RandomVerticalFlip(p=.2),
100 |             # RandomGrayscale(p=.2),
101 |             # transforms.RandomRotation(10),
102 |             # transforms.RandomAffine(10),
103 |             # ColorJitter(.6),
104 |             transforms.ToTensor(),
105 |             # transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
106 |         ]);
107 | 
108 |         self.transform_test_image = transforms.Compose([
109 |             transforms.Resize([224, 224]),
110 |             transforms.ToTensor()]);
111 | 
112 | 
113 |     def __getitem__(self, index):
114 |         img_path=self.data[index,0]
115 |         label=int(self.data[index,1])
116 | 
117 |         if not os.path.exists(img_path):
118 |             print("{} image not found".format(img_path))
119 |             exit(0);
120 |         img = Image.open(img_path)
121 |         if self.mode=="train":
122 |             data = self.transform_train_image(img)
123 |             return data, label
124 | 
125 |         elif self.mode=="valid":
126 |             data = self.transform_test_image(img)
127 |             return data, label
128 | 
129 |     def __len__(self):
130 |         return len(self.data)
131 | from statics import *
132 | from data import *
133 | def get_data_loader(args):
134 |     return get_voc_reader(args)
135 | 
136 | def get_voc_reader(args):
137 |     img_dim=args.size
138 |     rgb_means = (104, 117, 123)
139 |     rgb_std = (1, 1, 1)
140 |     p = (0.6, 0.2)[args.version == 'RFB_mobile']
141 |     train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
142 |     cfg = (VOC_300, VOC_512)[args.size == '512']
143 | 
144 |     testset = VOCDetection(
145 |         VOCroot, [('2007', 'test')], None, AnnotationTransform())
146 | 
147 |     train_dataset = VOCDetection(VOCroot, train_sets, preproc(
148 |         img_dim, rgb_means, rgb_std, p), AnnotationTransform())
149 | 
150 |     trainloader = torch.utils.data.DataLoader(train_dataset, args.batch_size,
151 |                                               shuffle=True, num_workers=args.num_workers,
152 |                                               collate_fn=detection_collate)
153 |     num_classes=len(args.classes.split(","))
154 |     detector = Detect(num_classes, 0, cfg)
155 | 
156 |     return (trainloader, (testset,detector))
157 | 
158 | def test():
159 |     trainloader, valloader = get_data_loader(100)
160 |     for idx, (inputs, targets) in enumerate(valloader):
161 |         print(inputs.shape)
162 | 
163 | """
164 | all the ignore parts of image will be zero.
165 | """
166 | from utils.file_utils import *
167 | 
168 | def get_ignore_parts_for_train():
169 |     annotations=[]
170 |     for line in read_text_file(train_bbx_ignore_file):
171 |         line_list = line.split(" ")
172 |         # print(len(line_list))
173 |         file_name = line_list[0]
174 |         for idx in range(1, len(line_list) - 1, 4):
175 |             left = line_list[idx + 1]
176 |             top = line_list[idx + 2]
177 |             w = line_list[idx + 3]
178 |             h = line_list[idx + 4]
179 |             annotations[file_name].append([ left, top, w, h])
180 |     return annotations
181 | 
182 | 
183 | 
184 | if __name__ == '__main__':
185 |     read_train_gt()
186 | 
187 | 


--------------------------------------------------------------------------------
/data_reader_pedestrian.py:
--------------------------------------------------------------------------------
1 | import glob
2 | 
3 | def get_test_loader_for_upload(batch_size):
4 |     test_files=glob.glob("/media/milton/ssd1/research/competitions/data_wider_pedestrian/test_new/test_new/**.jpg")
5 |     return test_files


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | '''Custom dataset for loading imgs and descriptors
 2 | '''
 3 | import os.path
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | import torch
 8 | import torch.utils.data as data
 9 | from PIL import Image
10 | 
11 | def default_loader(path):
12 |     with open(path, 'rb') as f:
13 |         with Image.open(f) as img:
14 |             return img.convert('RGB')
15 | 
16 | def np_loader(path):
17 |     return np.load(path)
18 | 
19 | 
20 | 
21 | def build_dataset_lists(list_path,split):
22 |     im_list = os.path.join(list_path, 'im_'+split+'.txt')
23 |     at_list = os.path.join(list_path, 'at_'+split+'.npy')
24 |     print(os.path.abspath(im_list))
25 |     images = pd.read_csv(im_list, header=None, names=['impath'])
26 |     targets = np.load(at_list)
27 |     return images.impath.values,targets
28 | 
29 | class ImageListDataset(data.Dataset):
30 |     """
31 |     Builds a dataset based on a list of images.
32 |     root -- path to images
33 |     list_path -- path to image lists
34 |     split -- train|val| - name of the dataset part (default train)
35 |     transform -- transform for images
36 |     """
37 |     def __init__(self, root, list_path, split = 'train', 
38 |                  transform=None, loader=default_loader):
39 |         
40 |         images, targets = build_dataset_lists(list_path,split)
41 |         self.root = root
42 |         self.images = root + images
43 |         self.targets = targets
44 |         self.transform = transform
45 |         self.loader = loader
46 | 
47 |     def __getitem__(self, index):
48 |         """
49 |         Args:
50 |             index (int): Index
51 |         Returns:
52 |             tuple: (image, target) 
53 |         """
54 |         path = self.images[index]
55 |         target = self.targets[index]
56 |         img = self.loader(path)
57 |         if self.transform is not None:
58 |             img = self.transform(img)
59 |         img = img.type(torch.FloatTensor)
60 |         return img, target
61 | 
62 |     def __len__(self):
63 |         return len(self.images)
64 | 


--------------------------------------------------------------------------------
/dataset/caltech_pedestrian.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | from PIL import Image
 4 | from utils.pascal_utils import write_pascal_annotation_aug
 5 | from utils.json_utils import read_json_file
 6 | 
 7 | data_dir='/media/milton/ssd1/dataset/pedestrian/caltech_pedestrian/caltech-pedestrian-dataset-converter/data'
 8 | images_dir=os.path.join(data_dir,'images')
 9 | json_file=os.path.join(data_dir,'annotations.json')
10 | 
11 | data=read_json_file(json_file)
12 | for set_key in data.keys():
13 |     set_data=data[set_key]
14 |     for v_key in set_data.keys():
15 |         frames=set_data[v_key]['frames']
16 |         for frame_key in frames.keys():
17 |             for frame_anno  in frames[frame_key]:
18 | 
19 |                 filename="{}_{}_{}.png".format(set_key.lower(),v_key,frame_key)
20 |                 file_path=os.path.join(images_dir, filename)
21 |                 # if not os.path.exists(file_path):
22 |                 #     print("{} not found".format(file_path))
23 |                 try:
24 |                     img=Image.open(file_path)
25 |                 except Exception as e:
26 |                     continue
27 |                 pass
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/dataset/inria_person.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | from utils.pascal_utils import write_pascal_annotation_aug
 5 | from utils.file_utils import read_text_file
 6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Train/annotations'
 7 | test_anno_dir='/media/milton/ssd1/dataset/pedestrian/INRIAPerson/Test/annotations'
 8 | 
 9 | 
10 | def inria_person_to_pascal(train_anno_dir):
11 |     anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt'))
12 |     for anno_file in anno_files:
13 |         filename = ''
14 |         obj_list = []
15 | 
16 |         for line in read_text_file(anno_file):
17 |             # xml_file=os.path.join(annodir, xml_file_name)
18 |             # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
19 |             # write_pascal_annotation(image_path,obj_list,xml_file)
20 | 
21 |             if 'Image filename' in line:
22 |                 filename = line.split(':')[1].strip()[1:-1]
23 |             if 'Bounding box for object' in line:
24 |                 bounds = line.split(':')[1].split('-')
25 |                 xmin, ymin = bounds[0].strip()[1:-1].split(',')
26 |                 xmax, ymax = bounds[1].strip()[1:-1].split(',')
27 |                 xmin = int(xmin.strip())
28 |                 ymin = int(ymin.strip())
29 |                 xmax = int(xmax.strip())
30 |                 ymax = int(ymax.strip())
31 |                 obj_list.append([xmin, ymin, xmax, ymax, 1])
32 |         image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/INRIAPerson', filename)
33 |         xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
34 |                                 os.path.basename(image_path).split('.')[0] + ".xml")
35 |         write_pascal_annotation_aug(image_path, obj_list, xml_file)
36 | 
37 | 
38 | inria_person_to_pascal(train_anno_dir)
39 | inria_person_to_pascal(test_anno_dir)
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/dataset/mall.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import scipy.io as sio
 4 | 
 5 | from utils.pascal_utils import write_pascal_annotation_aug
 6 | from utils.file_utils import read_text_file
 7 | data_dir='/media/milton/ssd1/dataset/pedestrian/mall/mall_dataset'
 8 | gt_file=os.path.join(data_dir,'mall_gt.mat')
 9 | 
10 | def inria_person_to_pascal(gt_file):
11 | 
12 |     gt=sio.loadmat(gt_file)
13 |     frames=gt['frame']
14 |     anno_files = glob.glob(os.path.join(gt_file, '**.txt'))
15 |     for anno_file in anno_files:
16 |         filename = ''
17 |         obj_list = []
18 | 
19 |         for line in read_text_file(anno_file):
20 |             # xml_file=os.path.join(annodir, xml_file_name)
21 |             # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
22 |             # write_pascal_annotation(image_path,obj_list,xml_file)
23 | 
24 |             if 'Image filename' in line:
25 |                 filename = line.split(':')[1].strip()[1:-1]
26 |             if 'Bounding box for object' in line:
27 |                 bounds = line.split(':')[1].split('-')
28 |                 xmin, ymin = bounds[0].strip()[1:-1].split(',')
29 |                 xmax, ymax = bounds[1].strip()[1:-1].split(',')
30 |                 xmin = int(xmin.strip())
31 |                 ymin = int(ymin.strip())
32 |                 xmax = int(xmax.strip())
33 |                 ymax = int(ymax.strip())
34 |                 obj_list.append([xmin, ymin, xmax, ymax, 1])
35 |         image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename)
36 |         xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
37 |                                 os.path.basename(image_path).split('.')[0] + ".xml")
38 |         write_pascal_annotation_aug(image_path, obj_list, xml_file)
39 | 
40 | 
41 | inria_person_to_pascal(gt_file)
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/dataset/upen_person.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | from utils.pascal_utils import write_pascal_annotation_aug
 5 | from utils.file_utils import read_text_file
 6 | train_anno_dir='/media/milton/ssd1/dataset/pedestrian/upenn/PennFudanPed/Annotation'
 7 | 
 8 | def inria_person_to_pascal(train_anno_dir):
 9 |     anno_files = glob.glob(os.path.join(train_anno_dir, '**.txt'))
10 |     for anno_file in anno_files:
11 |         filename = ''
12 |         obj_list = []
13 | 
14 |         for line in read_text_file(anno_file):
15 |             # xml_file=os.path.join(annodir, xml_file_name)
16 |             # image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
17 |             # write_pascal_annotation(image_path,obj_list,xml_file)
18 | 
19 |             if 'Image filename' in line:
20 |                 filename = line.split(':')[1].strip()[1:-1]
21 |             if 'Bounding box for object' in line:
22 |                 bounds = line.split(':')[1].split('-')
23 |                 xmin, ymin = bounds[0].strip()[1:-1].split(',')
24 |                 xmax, ymax = bounds[1].strip()[1:-1].split(',')
25 |                 xmin = int(xmin.strip())
26 |                 ymin = int(ymin.strip())
27 |                 xmax = int(xmax.strip())
28 |                 ymax = int(ymax.strip())
29 |                 obj_list.append([xmin, ymin, xmax, ymax, 1])
30 |         image_path = os.path.join('/media/milton/ssd1/dataset/pedestrian/upenn', filename)
31 |         xml_file = os.path.join('/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train',
32 |                                 os.path.basename(image_path).split('.')[0] + ".xml")
33 |         write_pascal_annotation_aug(image_path, obj_list, xml_file)
34 | 
35 | 
36 | inria_person_to_pascal(train_anno_dir)
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/demo/__init__.py


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
 1 | from utils.file_utils import read_text_file
 2 | import os
 3 | import cv2
 4 | 
 5 | val_dir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/val'
 6 | for line in read_text_file('scores.txt'):
 7 |     line_arr=line.split(' ')
 8 |     image_name=line_arr[0]
 9 |     image_path=os.path.join(val_dir,image_name)
10 |     save_path=os.path.join('out',image_name)
11 |     if os.path.exists(save_path):
12 |         image_path=save_path
13 |     print(image_path)
14 |     img_face_detect = cv2.imread(image_path)
15 |     print(line_arr)
16 |     x1, y1, w, h = line_arr[2:]
17 |     x1=float(x1)
18 |     y1=float(y1)
19 |     w=float(w)
20 |     h=float(h.strip())
21 |     x2=int(x1)+int(w)
22 |     y2=int(y1)+int(h)
23 |     cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
24 |     print(save_path)
25 |     print(img_face_detect.shape)
26 |     cv2.imwrite(save_path, img_face_detect)
27 |     


--------------------------------------------------------------------------------
/doc/RFB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/RFB.png


--------------------------------------------------------------------------------
/doc/SSD.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/SSD.jpg


--------------------------------------------------------------------------------
/doc/detection_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example.png


--------------------------------------------------------------------------------
/doc/detection_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_example2.png


--------------------------------------------------------------------------------
/doc/detection_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/detection_examples.png


--------------------------------------------------------------------------------
/doc/rfb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/rfb.png


--------------------------------------------------------------------------------
/doc/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/doc/ssd.png


--------------------------------------------------------------------------------
/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | def one_hot(index, classes):
 8 |     size = index.size() + (classes,)
 9 |     view = index.size() + (1,)
10 | 
11 |     mask = torch.Tensor(*size).fill_(0)
12 |     index = index.view(*view)
13 |     ones = 1.
14 | 
15 |     if isinstance(index, Variable):
16 |         ones = Variable(torch.Tensor(index.size()).fill_(1))
17 |         mask = Variable(mask)
18 | 
19 |     return mask.scatter_(1, index, ones)
20 | 
21 | 
22 | class FocalLoss(nn.Module):
23 | 
24 |     def __init__(self, gamma=0, eps=1e-7):
25 |         super(FocalLoss, self).__init__()
26 |         self.gamma = gamma
27 |         self.eps = eps
28 | 
29 |     def forward(self, input, target):
30 |         input=input.cpu()
31 |         target=target.cpu()
32 |         y = one_hot(target, input.size(-1))
33 |         logit = F.softmax(input, dim=-1)
34 |         logit = logit.clamp(self.eps, 1. - self.eps)
35 | 
36 |         loss = -1 * y * torch.log(logit) # cross entropy
37 |         loss = loss * (1 - logit) ** self.gamma # focal loss
38 | 
39 |         return loss.sum()


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from utils.box_utils import decode, center_size
 5 | 
 6 | 
 7 | class Detect(Function):
 8 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
 9 |     apply non-maximum suppression to location predictions based on conf
10 |     scores and threshold to a top_k number of output predictions for both
11 |     confidence score and locations.
12 |     """
13 | 
14 |     def __init__(self, num_classes, bkg_label, cfg, object_score=0):
15 |         self.num_classes = num_classes
16 |         self.background_label = bkg_label
17 |         self.object_score = object_score
18 |         # self.thresh = thresh
19 | 
20 |         # Parameters used in nms.
21 |         self.variance = cfg['variance']
22 | 
23 |     def forward(self, predictions, prior, arm_data=None):
24 |         """
25 |         Args:
26 |             loc_data: (tensor) Loc preds from loc layers
27 |                 Shape: [batch,num_priors*4]
28 |             conf_data: (tensor) Shape: Conf preds from conf layers
29 |                 Shape: [batch*num_priors,num_classes]
30 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
31 |                 Shape: [1,num_priors,4]
32 |         """
33 | 
34 |         loc, conf = predictions
35 |         loc_data = loc.data
36 |         conf_data = conf.data
37 |         prior_data = prior.data
38 |         num = loc_data.size(0)  # batch size
39 |         if arm_data:
40 |             arm_loc, arm_conf = arm_data
41 |             arm_loc_data = arm_loc.data
42 |             arm_conf_data = arm_conf.data
43 |             arm_object_conf = arm_conf_data[:, 1:]
44 |             no_object_index = arm_object_conf <= self.object_score
45 |             conf_data[no_object_index.expand_as(conf_data)] = 0
46 | 
47 |         self.num_priors = prior_data.size(0)
48 |         self.boxes = torch.zeros(num, self.num_priors, 4)
49 |         self.scores = torch.zeros(num, self.num_priors, self.num_classes)
50 | 
51 |         if num == 1:
52 |             # size batch x num_classes x num_priors
53 |             conf_preds = conf_data.unsqueeze(0)
54 | 
55 |         else:
56 |             conf_preds = conf_data.view(num, self.num_priors,
57 |                                         self.num_classes)
58 |             self.boxes.expand(num, self.num_priors, 4)
59 |             self.scores.expand(num, self.num_priors, self.num_classes)
60 |         # Decode predictions into bboxes.
61 |         for i in range(num):
62 |             if arm_data:
63 |                 default = decode(arm_loc_data[i], prior_data, self.variance)
64 |                 default = center_size(default)
65 |             else:
66 |                 default = prior_data
67 |             decoded_boxes = decode(loc_data[i], default, self.variance)
68 |             # For each class, perform nms
69 |             conf_scores = conf_preds[i].clone()
70 |             '''
71 |             c_mask = conf_scores.gt(self.thresh)
72 |             decoded_boxes = decoded_boxes[c_mask]
73 |             conf_scores = conf_scores[c_mask]
74 |             '''
75 | 
76 |             self.boxes[i] = decoded_boxes
77 |             self.scores[i] = conf_scores
78 | 
79 |         return self.boxes, self.scores
80 | 


--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from itertools import product as product
 2 | from math import sqrt as sqrt
 3 | 
 4 | import torch
 5 | 
 6 | if torch.cuda.is_available():
 7 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 8 | 
 9 | 
10 | class PriorBox(object):
11 |     """Compute priorbox coordinates in center-offset form for each source
12 |     feature map.
13 |     Note:
14 |     This 'layer' has changed between versions of the original SSD
15 |     paper, so we include both versions, but note v2 is the most tested and most
16 |     recent version of the paper.
17 | 
18 |     """
19 | 
20 |     def __init__(self, cfg):
21 |         super(PriorBox, self).__init__()
22 |         self.image_size = cfg['min_dim']
23 |         # number of priors for feature map location (either 4 or 6)
24 |         self.num_priors = len(cfg['aspect_ratios'])
25 |         self.variance = cfg['variance'] or [0.1]
26 |         self.feature_maps = cfg['feature_maps']
27 |         self.min_sizes = cfg['min_sizes']
28 |         self.max_sizes = cfg['max_sizes']
29 |         self.steps = cfg['steps']
30 |         self.aspect_ratios = cfg['aspect_ratios']
31 |         self.clip = cfg['clip']
32 |         for v in self.variance:
33 |             if v <= 0:
34 |                 raise ValueError('Variances must be greater than 0')
35 | 
36 |     def forward(self):
37 |         mean = []
38 |         for k, f in enumerate(self.feature_maps):
39 |             for i, j in product(range(f), repeat=2):
40 |                 f_k = self.image_size / self.steps[k]
41 |                 cx = (j + 0.5) / f_k
42 |                 cy = (i + 0.5) / f_k
43 | 
44 |                 s_k = self.min_sizes[k] / self.image_size
45 |                 mean += [cx, cy, s_k, s_k]
46 | 
47 |                 # aspect_ratio: 1
48 |                 # rel size: sqrt(s_k * s_(k+1))
49 |                 if self.max_sizes:
50 |                     s_k_prime = sqrt(s_k * (self.max_sizes[k] / self.image_size))
51 |                     mean += [cx, cy, s_k_prime, s_k_prime]
52 | 
53 |                 # rest of aspect ratios
54 |                 for ar in self.aspect_ratios[k]:
55 |                     mean += [cx, cy, s_k * sqrt(ar), s_k / sqrt(ar)]
56 |                     mean += [cx, cy, s_k / sqrt(ar), s_k * sqrt(ar)]
57 | 
58 |         # back to torch land
59 |         output = torch.Tensor(mean).view(-1, 4)
60 |         if self.clip:
61 |             output.clamp_(max=1, min=0)
62 |         return output
63 | 


--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | from .refine_multibox_loss import RefineMultiBoxLoss
3 | from .l2norm import L2Norm
4 | 
5 | __all__ = ['MultiBoxLoss','L2Norm']
6 | 


--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         x /= norm
22 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
23 |         return out
24 | 


--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from utils.box_utils import match, log_sum_exp
  6 | GPU = False
  7 | if torch.cuda.is_available():
  8 |     GPU = True
  9 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 10 | 
 11 | 
 12 | class MultiBoxLoss(nn.Module):
 13 |     """SSD Weighted Loss Function
 14 |     Compute Targets:
 15 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 16 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 17 |            (default threshold: 0.5).
 18 |         2) Produce localization target by 'encoding' variance into offsets of ground
 19 |            truth boxes and their matched  'priorboxes'.
 20 |         3) Hard negative mining to filter the excessive number of negative examples
 21 |            that comes with using a large number of default bounding boxes.
 22 |            (default negative:positive ratio 3:1)
 23 |     Objective Loss:
 24 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 25 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 26 |         weighted by α which is set to 1 by cross val.
 27 |         Args:
 28 |             c: class confidences,
 29 |             l: predicted boxes,
 30 |             g: ground truth boxes
 31 |             N: number of matched default boxes
 32 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 33 |     """
 34 | 
 35 | 
 36 |     def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target):
 37 |         super(MultiBoxLoss, self).__init__()
 38 |         self.num_classes = num_classes
 39 |         self.threshold = overlap_thresh
 40 |         self.background_label = bkg_label
 41 |         self.encode_target = encode_target
 42 |         self.use_prior_for_matching  = prior_for_matching
 43 |         self.do_neg_mining = neg_mining
 44 |         self.negpos_ratio = neg_pos
 45 |         self.neg_overlap = neg_overlap
 46 |         self.variance = [0.1,0.2]
 47 | 
 48 |     def forward(self, predictions, priors, targets):
 49 |         """Multibox Loss
 50 |         Args:
 51 |             predictions (tuple): A tuple containing loc preds, conf preds,
 52 |             and prior boxes from SSD net.
 53 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 54 |                 loc shape: torch.size(batch_size,num_priors,4)
 55 |                 priors shape: torch.size(num_priors,4)
 56 | 
 57 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 58 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 59 |         """
 60 | 
 61 |         loc_data, conf_data = predictions
 62 |         priors = priors
 63 |         num = loc_data.size(0)
 64 |         num_priors = (priors.size(0))
 65 |         num_classes = self.num_classes
 66 | 
 67 |         # match priors (default boxes) and ground truth boxes
 68 |         loc_t = torch.Tensor(num, num_priors, 4)
 69 |         conf_t = torch.LongTensor(num, num_priors)
 70 |         for idx in range(num):
 71 |             truths = targets[idx][:,:-1].data
 72 |             labels = targets[idx][:,-1].data
 73 |             defaults = priors.data
 74 |             match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
 75 |         if GPU:
 76 |             loc_t = loc_t.cuda()
 77 |             conf_t = conf_t.cuda()
 78 |         # wrap targets
 79 |         loc_t = Variable(loc_t, requires_grad=False)
 80 |         conf_t = Variable(conf_t,requires_grad=False)
 81 | 
 82 |         pos = conf_t > 0
 83 | 
 84 |         # Localization Loss (Smooth L1)
 85 |         # Shape: [batch,num_priors,4]
 86 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 87 |         loc_p = loc_data[pos_idx].view(-1,4)
 88 |         loc_t = loc_t[pos_idx].view(-1,4)
 89 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
 90 | 
 91 |         # Compute max conf across batch for hard negative mining
 92 |         batch_conf = conf_data.view(-1,self.num_classes)
 93 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
 94 | 
 95 |         # Hard Negative Mining
 96 |         loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now
 97 |         loss_c = loss_c.view(num, -1)
 98 |         _,loss_idx = loss_c.sort(1, descending=True)
 99 |         _,idx_rank = loss_idx.sort(1)
100 |         num_pos = pos.long().sum(1,keepdim=True)
101 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
102 |         neg = idx_rank < num_neg.expand_as(idx_rank)
103 | 
104 |         # Confidence Loss Including Positive and Negative Examples
105 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
106 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
107 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
108 |         targets_weighted = conf_t[(pos+neg).gt(0)]
109 |         loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
110 | 
111 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
112 | 
113 |         N = num_pos.data.float().sum()
114 |         loss_l/=N
115 |         loss_c/=N
116 |         return loss_l,loss_c
117 | 


--------------------------------------------------------------------------------
/layers/modules/refine_multibox_loss.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from utils.box_utils import match,refine_match, log_sum_exp,decode
  7 | GPU = False
  8 | if torch.cuda.is_available():
  9 |     GPU = True
 10 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 11 | 
 12 | 
 13 | class RefineMultiBoxLoss(nn.Module):
 14 |     """SSD Weighted Loss Function
 15 |     Compute Targets:
 16 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 17 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 18 |            (default threshold: 0.5).
 19 |         2) Produce localization target by 'encoding' variance into offsets of ground
 20 |            truth boxes and their matched  'priorboxes'.
 21 |         3) Hard negative mining to filter the excessive number of negative examples
 22 |            that comes with using a large number of default bounding boxes.
 23 |            (default negative:positive ratio 3:1)
 24 |     Objective Loss:
 25 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 26 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 27 |         weighted by α which is set to 1 by cross val.
 28 |         Args:
 29 |             c: class confidences,
 30 |             l: predicted boxes,
 31 |             g: ground truth boxes
 32 |             N: number of matched default boxes
 33 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 34 |     """
 35 | 
 36 | 
 37 |     def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target,object_score = 0):
 38 |         super(RefineMultiBoxLoss, self).__init__()
 39 |         self.num_classes = num_classes
 40 |         self.threshold = overlap_thresh
 41 |         self.background_label = bkg_label
 42 |         self.encode_target = encode_target
 43 |         self.use_prior_for_matching  = prior_for_matching
 44 |         self.do_neg_mining = neg_mining
 45 |         self.negpos_ratio = neg_pos
 46 |         self.neg_overlap = neg_overlap
 47 |         self.object_score = object_score
 48 |         self.variance = [0.1,0.2]
 49 | 
 50 |     def forward(self, odm_data,priors, targets,arm_data = None,filter_object = False):
 51 |         """Multibox Loss
 52 |         Args:
 53 |             predictions (tuple): A tuple containing loc preds, conf preds,
 54 |             and prior boxes from SSD net.
 55 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 56 |                 loc shape: torch.size(batch_size,num_priors,4)
 57 |                 priors shape: torch.size(num_priors,4)
 58 | 
 59 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 60 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 61 |             arm_data (tuple): arm branch containg arm_loc and arm_conf
 62 |             filter_object: whether filter out the  prediction according to the arm conf score
 63 |         """
 64 | 
 65 |         loc_data,conf_data = odm_data
 66 |         if arm_data:
 67 |             arm_loc,arm_conf = arm_data
 68 |         priors = priors.data
 69 |         num = loc_data.size(0)
 70 |         num_priors = (priors.size(0))
 71 | 
 72 |         # match priors (default boxes) and ground truth boxes
 73 |         loc_t = torch.Tensor(num, num_priors, 4)
 74 |         conf_t = torch.LongTensor(num, num_priors)
 75 |         for idx in range(num):
 76 |             truths = targets[idx][:,:-1].data
 77 |             labels = targets[idx][:,-1].data
 78 |             #for object detection
 79 |             if self.num_classes == 2:
 80 |                 labels = labels > 0
 81 |             if arm_data:
 82 |                 refine_match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx,arm_loc[idx].data)
 83 |             else:
 84 |                 match(self.threshold,truths,priors,self.variance,labels,loc_t,conf_t,idx)
 85 |         if GPU:
 86 |             loc_t = loc_t.cuda()
 87 |             conf_t = conf_t.cuda()
 88 |         # wrap targets
 89 |         loc_t = Variable(loc_t, requires_grad=False)
 90 |         conf_t = Variable(conf_t,requires_grad=False)
 91 |         if arm_data and filter_object:
 92 |             arm_conf_data = arm_conf.data[:,:,1]
 93 |             pos = conf_t > 0
 94 |             object_score_index = arm_conf_data <= self.object_score
 95 |             pos[object_score_index] = 0
 96 | 
 97 |         else:
 98 |             pos = conf_t > 0
 99 | 
100 |         # Localization Loss (Smooth L1)
101 |         # Shape: [batch,num_priors,4]
102 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
103 |         loc_p = loc_data[pos_idx].view(-1,4)
104 |         loc_t = loc_t[pos_idx].view(-1,4)
105 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
106 | 
107 |         # Compute max conf across batch for hard negative mining
108 |         batch_conf = conf_data.view(-1,self.num_classes)
109 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
110 | 
111 |         # Hard Negative Mining
112 |         loss_c[pos] = 0 # filter out pos boxes for now
113 |         loss_c = loss_c.view(num, -1)
114 |         _,loss_idx = loss_c.sort(1, descending=True)
115 |         _,idx_rank = loss_idx.sort(1)
116 |         num_pos = pos.long().sum(1,keepdim=True)
117 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
118 |         neg = idx_rank < num_neg.expand_as(idx_rank)
119 | 
120 |         # Confidence Loss Including Positive and Negative Examples
121 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
122 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
123 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
124 |         targets_weighted = conf_t[(pos+neg).gt(0)]
125 |         loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
126 | 
127 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
128 |         N = num_pos.data.sum()
129 |         loss_l/=N
130 |         loss_c/=N
131 |         return loss_l,loss_c
132 | 


--------------------------------------------------------------------------------
/loss_loader.py:
--------------------------------------------------------------------------------
 1 | from focal_loss import FocalLoss
 2 | from torch import nn
 3 | gamma = 2
 4 | 
 5 | def get_focal_loss(classifier):
 6 |     print("==> Using Focal Loss.....")
 7 |     classifier.writer.add_text('Info', "Using Focal Loss ")
 8 |     return FocalLoss(gamma)
 9 | 
10 | def get_cross_entropy(classifier):
11 |     print("==> Using CrossEntropy.....")
12 |     classifier.writer.add_text('Info', "Using Cross Entropy Loss ")
13 |     return nn.CrossEntropyLoss()
14 | 
15 | def get_vat_cross_entropy(classifier):
16 |     print("==> Using Adversarial Training Cross Entropy.....")
17 |     pass


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import  os
  2 | gpu=0
  3 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  4 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
  5 | os.environ['CUDA_LAUNCH_BLOCKING'] = str(gpu)
  6 | from object_detector import Detector
  7 | from torch import optim
  8 | from augment_data import augment_images
  9 | from model_loader import *
 10 | from loss_loader import *
 11 | from data_reader import *
 12 | 
 13 | import argparse
 14 | import pickle
 15 | import time
 16 | 
 17 | import numpy as np
 18 | import os
 19 | import torch
 20 | import torch.backends.cudnn as cudnn
 21 | import torch.nn.init as init
 22 | import torch.optim as optim
 23 | import torch.utils.data as data
 24 | from torch.autograd import Variable
 25 | 
 26 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \
 27 |     COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
 28 | from layers.functions import Detect, PriorBox
 29 | from layers.modules import MultiBoxLoss
 30 | from utils.nms_wrapper import nms
 31 | from utils.timer import Timer
 32 | 
 33 | 
 34 | def str2bool(v):
 35 |     return v.lower() in ("yes", "true", "t", "1")
 36 | 
 37 | classes=VOC_CLASSES
 38 | classes_delimited=','.join(classes)
 39 | num_classes=len(classes)
 40 | 
 41 | parser = argparse.ArgumentParser(
 42 |     description='Receptive Field Block Net Training')
 43 | 
 44 | parser.add_argument('-gpu', default=gpu,
 45 |                     type=int, help='gpu index for training.')
 46 | parser.add_argument('-v', '--version', default='RFB_vgg',
 47 |                     help='RFB_vgg ,RFB_E_vgg RFB_mobile SSD_vgg version.')
 48 | parser.add_argument('-s', '--size', default='300',type=int,
 49 |                     help='300 or 512 input size.')
 50 | parser.add_argument('-d', '--dataset', default='VOC',
 51 |                     help='VOC or COCO dataset')
 52 | 
 53 | parser.add_argument('-classes', default=classes_delimited,type=str,
 54 |                     help='class names delimited by ,')
 55 | parser.add_argument('-num_classes', default=num_classes, type=int,
 56 |                     help='total classes')
 57 | 
 58 | parser.add_argument(
 59 |     '--basenet', default='weights/vgg16_reducedfc.pth', help='pretrained base model')
 60 | parser.add_argument('--jaccard_threshold', default=0.5,
 61 |                     type=float, help='Min Jaccard index for matching')
 62 | parser.add_argument('-b', '--batch_size', default=8,
 63 |                     type=int, help='Batch size for training')
 64 | parser.add_argument('--num_workers', default=4,
 65 |                     type=int, help='Number of workers used in dataloading')
 66 | parser.add_argument('--cuda', default=True,
 67 |                     type=bool, help='Use cuda to train model')
 68 | parser.add_argument('--ngpu', default=2, type=int, help='gpus')
 69 | parser.add_argument('--lr', '--learning-rate',
 70 |                     default=4e-3, type=float, help='initial learning rate')
 71 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
 72 | 
 73 | parser.add_argument('--resume_net', default=False, help='resume net for retraining')
 74 | parser.add_argument('--resume_epoch', default=0,
 75 |                     type=int, help='resume iter for retraining')
 76 | parser.add_argument('-epochs', '--epochs', default=300,
 77 |                     type=int, help='max epoch for retraining')
 78 | parser.add_argument('--weight_decay', default=5e-4,
 79 |                     type=float, help='Weight decay for SGD')
 80 | parser.add_argument('-we', '--warm_epoch', default=1,
 81 |                     type=int, help='max epoch for retraining')
 82 | parser.add_argument('--gamma', default=0.1,
 83 |                     type=float, help='Gamma update for SGD')
 84 | 
 85 | parser.add_argument('--freeze_layers', default=0.80,
 86 |                     type=float, help='PErcentage of weight to be freezed.')
 87 | 
 88 | parser.add_argument('--log_iters', default=True,
 89 |                     type=bool, help='Print the loss at each iteration')
 90 | parser.add_argument('--save_folder', default='weights/',
 91 |                     help='Location to save checkpoint models')
 92 | parser.add_argument('--date', default='1213')
 93 | parser.add_argument('--save_frequency', default=10)
 94 | parser.add_argument('--retest', default=False, type=bool,
 95 |                     help='test cache results')
 96 | parser.add_argument('--test_frequency', default=10)
 97 | parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization')
 98 | parser.add_argument('--send_images_to_visdom', type=str2bool, default=False,
 99 |                     help='Sample a random image from each 10th batch, send it to visdom after augmentations step')
100 | args = parser.parse_args()
101 | 
102 | """
103 | sudo nvidia-smi -pl 180
104 | sudo nvidia-smi --gpu-reset -i 0
105 | use command line to run the training.
106 | 
107 | todo download more images using image_utils and isic-arhive. Also, use more online resources for data. 
108 | 
109 | """
110 | 
111 | from layers.modules.multibox_loss import MultiBoxLoss
112 | 
113 | from statics import *
114 | def get_loss_function(classifier):
115 |     return  MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
116 | 
117 | def get_model(args):
118 |     return get_ssd_model(args)
119 | 
120 | def get_optimizer(model_trainer):
121 |     epsilon=1e-8
122 |     momentum = 0.9
123 |     weight_decay=5e-4
124 |     # model_trainer.writer.add_scalar("leanring rate", learning_rate)
125 |     # model_trainer.writer.add_scalar("epsilon", epsilon)
126 |     # optimizer=optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()),
127 |     #                      lr=0.001,momentum=momentum,weight_decay=weight_decay)
128 |     optimizer = optim.Adam(filter(lambda p: p.requires_grad, model_trainer.model.parameters()),lr=0.01)
129 |     # optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_trainer.model.parameters()), lr=0.001, momentum=0.9,
130 |     #                       weight_decay=weight_decay)
131 |     return optimizer
132 | 
133 | def get_prior():
134 |     cfg = (VOC_300, VOC_512)[args.size == '512']
135 |     priorbox = PriorBox(cfg)
136 |     priors = Variable(priorbox.forward(), volatile=True)
137 |     return priors
138 | 
139 | class ModelDetails(object):
140 |     def __init__(self,args):
141 |         self.args=args
142 |         self.priors=get_prior()
143 |         self.model,self.model_name_str = get_model(args)
144 |         self.logs_dir  = "logs/{}/{}".format(args.gpu,self.model_name_str)
145 |         self.augment_images = augment_images
146 |         self.dataset_loader=get_data_loader(args)
147 |         self.get_loss_function = get_loss_function
148 |         self.get_optimizer = get_optimizer
149 |         self.dataset=data_set_name
150 |         self.class_names=VOC_CLASSES
151 | 
152 | 
153 | def start_training(args):
154 |     model_details=ModelDetails(args)
155 |     detector=Detector(model_details)
156 |     detector.load_data()
157 |     detector.load_model()
158 |     for epoch in range(detector.start_epoch, detector.start_epoch + args.epochs):
159 |         try:
160 |           detector.train(epoch)
161 |           detector.test(epoch)
162 |         except KeyboardInterrupt:
163 |           detector.test(epoch)
164 |           break;
165 |         detector.load_data()
166 | 
167 | start_training(args)


--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ./utils/
3 | 
4 | CUDA_PATH=/usr/local/cuda/
5 | 
6 | python build.py build_ext --inplace
7 | 
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/model_loader.py:
--------------------------------------------------------------------------------
  1 | from statics import voc
  2 | import argparse
  3 | import pickle
  4 | import time
  5 | 
  6 | import numpy as np
  7 | import os
  8 | import torch
  9 | import torch.backends.cudnn as cudnn
 10 | import torch.nn.init as init
 11 | import torch.optim as optim
 12 | import torch.utils.data as data
 13 | from torch.autograd import Variable
 14 | 
 15 | from data import VOCroot, COCOroot, VOC_300, VOC_512, COCO_300, COCO_512, COCO_mobile_300, AnnotationTransform, \
 16 |     COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
 17 | from layers.functions import Detect, PriorBox
 18 | from layers.modules import MultiBoxLoss
 19 | from utils.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | 
 22 | def get_ssd_model(args):
 23 |     save_folder = os.path.join(args.save_folder, args.version + '_' + str(args.size), args.date)
 24 |     if not os.path.exists(save_folder):
 25 |         os.makedirs(save_folder)
 26 |     test_save_dir = os.path.join(save_folder, 'ss_predict')
 27 |     if not os.path.exists(test_save_dir):
 28 |         os.makedirs(test_save_dir)
 29 |     gpu=args.gpu
 30 |     img_dim = args.size
 31 |     num_classes=args.num_classes
 32 |     print("==>Loading SSD model...")
 33 |     if args.version == 'RFB_vgg':
 34 |         from models.RFB_Net_vgg import build_net
 35 |     elif args.version == 'RFB_E_vgg':
 36 |         from models.RFB_Net_E_vgg import build_net
 37 |     elif args.version == 'RFB_mobile':
 38 |         from models.RFB_Net_mobile import build_net
 39 | 
 40 |         cfg = COCO_mobile_300
 41 |     elif args.version == 'SSD_vgg':
 42 |         from models.SSD_vgg import build_net
 43 |     elif args.version == 'FSSD_vgg':
 44 |         from models.FSSD_vgg import build_net
 45 |     elif args.version == 'FRFBSSD_vgg':
 46 |         from models.FRFBSSD_vgg import build_net
 47 |     else:
 48 |         print('Unkown version!')
 49 |     net = build_net(int(img_dim), num_classes)
 50 |     # model(model.cuda(), (3, height, width))
 51 |     if not args.resume_net:
 52 |         base_weights = torch.load(args.basenet)
 53 |         print('Loading base network...')
 54 |         net.base.load_state_dict(base_weights)
 55 | 
 56 |         def xavier(param):
 57 |             init.xavier_uniform(param)
 58 | 
 59 |         def weights_init(m):
 60 |             for key in m.state_dict():
 61 |                 if key.split('.')[-1] == 'weight':
 62 |                     if 'conv' in key:
 63 |                         init.kaiming_normal(m.state_dict()[key], mode='fan_out')
 64 |                     if 'bn' in key:
 65 |                         m.state_dict()[key][...] = 1
 66 |                 elif key.split('.')[-1] == 'bias':
 67 |                     m.state_dict()[key][...] = 0
 68 | 
 69 |         print('Initializing weights...')
 70 |         # initialize newly added layers' weights with kaiming_normal method
 71 |         net.extras.apply(weights_init)
 72 |         net.loc.apply(weights_init)
 73 |         net.conf.apply(weights_init)
 74 |         if args.version == 'FSSD_vgg' or args.version == 'FRFBSSD_vgg':
 75 |             net.ft_module.apply(weights_init)
 76 |             net.pyramid_ext.apply(weights_init)
 77 |         if 'RFB' in args.version:
 78 |             net.Norm.apply(weights_init)
 79 |         if args.version == 'RFB_E_vgg':
 80 |             net.reduce.apply(weights_init)
 81 |             net.up_reduce.apply(weights_init)
 82 | 
 83 |     else:
 84 |         # load resume network
 85 |         resume_net_path = os.path.join(save_folder, args.version + '_' + args.dataset + '_epoches_' + \
 86 |                                        str(args.resume_epoch) + '.pth')
 87 |         print('Loading resume network', resume_net_path)
 88 |         state_dict = torch.load(resume_net_path)
 89 |         # create new OrderedDict that does not contain `module.`
 90 |         from collections import OrderedDict
 91 | 
 92 |         new_state_dict = OrderedDict()
 93 |         for k, v in state_dict.items():
 94 |             head = k[:7]
 95 |             if head == 'module.':
 96 |                 name = k[7:]  # remove `module.`
 97 |             else:
 98 |                 name = k
 99 |             new_state_dict[name] = v
100 |         net.load_state_dict(new_state_dict)
101 |     return net,"ssd_{}_adam".format(gpu)
102 | 


--------------------------------------------------------------------------------
/models/FSSD_mobile.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | sys.path.append('./')
  8 | from .mobilenet import mobilenet_1
  9 | 
 10 | 
 11 | class BasicConv(nn.Module):
 12 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
 13 |                  bn=False, bias=True, up_size=0):
 14 |         super(BasicConv, self).__init__()
 15 |         self.out_channels = out_planes
 16 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
 17 |                               dilation=dilation, groups=groups, bias=bias)
 18 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
 19 |         self.relu = nn.ReLU(inplace=True) if relu else None
 20 |         self.up_size = up_size
 21 |         self.up_sample = nn.Upsample(size=(up_size, up_size), mode='bilinear') if up_size != 0 else None
 22 | 
 23 |     def forward(self, x):
 24 |         x = self.conv(x)
 25 |         if self.bn is not None:
 26 |             x = self.bn(x)
 27 |         if self.relu is not None:
 28 |             x = self.relu(x)
 29 |         if self.up_size > 0:
 30 |             x = self.up_sample(x)
 31 |         return x
 32 | 
 33 | 
 34 | class FSSD(nn.Module):
 35 |     """Single Shot Multibox Architecture
 36 |     The network is composed of a base VGG network followed by the
 37 |     added multibox conv layers.  Each multibox layer branches into
 38 |         1) conv2d for class conf scores
 39 |         2) conv2d for localization predictions
 40 |         3) associated priorbox layer to produce default bounding
 41 |            boxes specific to the layer's feature map size.
 42 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 43 | 
 44 |     Args:
 45 |         phase: (string) Can be "test" or "train"
 46 |         base: VGG16 layers for input, size of either 300 or 500
 47 |         extras: extra layers that feed to multibox loc and conf layers
 48 |         head: "multibox head" consists of loc and conf conv layers
 49 |     """
 50 | 
 51 |     def __init__(self, size, head, ft_module, pyramid_ext, num_classes):
 52 |         super(FSSD, self).__init__()
 53 |         self.num_classes = num_classes
 54 |         # TODO: implement __call__ in PriorBox
 55 |         self.size = size
 56 | 
 57 |         # SSD network
 58 |         self.base = mobilenet_1()
 59 |         # Layer learns to scale the l2 normalized features from conv4_3
 60 |         self.ft_module = nn.ModuleList(ft_module)
 61 |         self.pyramid_ext = nn.ModuleList(pyramid_ext)
 62 | 
 63 |         self.loc = nn.ModuleList(head[0])
 64 |         self.conf = nn.ModuleList(head[1])
 65 |         self.fea_bn = nn.BatchNorm2d(256 * len(self.ft_module), affine=True)
 66 | 
 67 |         self.softmax = nn.Softmax()
 68 | 
 69 |     def forward(self, x, test=False):
 70 |         """Applies network layers and ops on input image(s) x.
 71 | 
 72 |         Args:
 73 |             x: input image or batch of images. Shape: [batch,3*batch,300,300].
 74 | 
 75 |         Return:
 76 |             Depending on phase:
 77 |             test:
 78 |                 Variable(tensor) of output class label predictions,
 79 |                 confidence score, and corresponding location predictions for
 80 |                 each object detected. Shape: [batch,topk,7]
 81 | 
 82 |             train:
 83 |                 list of concat outputs from:
 84 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 85 |                     2: localization layers, Shape: [batch,num_priors*4]
 86 |                     3: priorbox layers, Shape: [2,num_priors*4]
 87 |         """
 88 |         source_features = list()
 89 |         transformed_features = list()
 90 |         loc = list()
 91 |         conf = list()
 92 | 
 93 |         base_out = self.base(x)
 94 |         source_features.append(base_out[0])  # mobilenet 4_1
 95 |         source_features.append(base_out[1])  # mobilent_5_5
 96 |         source_features.append(base_out[2])  # mobilenet 6_1
 97 | 
 98 |         assert len(self.ft_module) == len(source_features)
 99 |         for k, v in enumerate(self.ft_module):
100 |             transformed_features.append(v(source_features[k]))
101 |         concat_fea = torch.cat(transformed_features, 1)
102 |         x = self.fea_bn(concat_fea)
103 |         fea_bn = x
104 |         pyramid_fea = list()
105 |         for k, v in enumerate(self.pyramid_ext):
106 |             x = v(x)
107 |             pyramid_fea.append(x)
108 |         # apply multibox head to source layers
109 |         for (x, l, c) in zip(pyramid_fea, self.loc, self.conf):
110 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
111 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
112 | 
113 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
114 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
115 |         if test:
116 |             output = (
117 |                 loc.view(loc.size(0), -1, 4),  # loc preds
118 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
119 |             )
120 |             features = ()
121 |         else:
122 |             output = (
123 |                 loc.view(loc.size(0), -1, 4),
124 |                 conf.view(conf.size(0), -1, self.num_classes),
125 |             )
126 |             features = (
127 |                 fea_bn
128 |             )
129 |         return output
130 | 
131 |     def load_weights(self, base_file):
132 |         other, ext = os.path.splitext(base_file)
133 |         if ext == '.pkl' or '.pth':
134 |             print('Loading weights into state dict...')
135 |             state_dict = torch.load(base_file, map_location=lambda storage, loc: storage)
136 |             from collections import OrderedDict
137 |             new_state_dict = OrderedDict()
138 |             for k, v in state_dict.items():
139 |                 head = k[:7]
140 |                 if head == 'module.':
141 |                     name = k[7:]  # remove `module.`
142 |                 else:
143 |                     name = k
144 |                 new_state_dict[name] = v
145 |             self.base.load_state_dict(new_state_dict)
146 |             print('Finished!')
147 | 
148 |         else:
149 |             print('Sorry only .pth and .pkl files supported.')
150 | 
151 | 
152 | def feature_transform_module(scale_factor):
153 |     layers = []
154 |     # conv4_1
155 |     layers += [BasicConv(int(256 * scale_factor), 256, kernel_size=1, padding=0)]
156 |     # conv5_5
157 |     layers += [BasicConv(int(512 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)]
158 |     # conv6_mpo1
159 |     layers += [BasicConv(int(1024 * scale_factor), 256, kernel_size=1, padding=0, up_size=38)]
160 |     return layers
161 | 
162 | 
163 | def pyramid_feature_extractor():
164 |     '''
165 |     layers = [BasicConv(256*3,512,kernel_size=3,stride=1,padding=1),BasicConv(512,512,kernel_size=3,stride=2,padding=1), \
166 |               BasicConv(512,256,kernel_size=3,stride=2,padding=1),BasicConv(256,256,kernel_size=3,stride=2,padding=1), \
167 |               BasicConv(256,256,kernel_size=3,stride=1,padding=0),BasicConv(256,256,kernel_size=3,stride=1,padding=0)]
168 |     '''
169 |     from .mobilenet import DepthWiseBlock
170 |     layers = [DepthWiseBlock(256 * 3, 512, stride=1), DepthWiseBlock(512, 512, stride=2),
171 |               DepthWiseBlock(512, 256, stride=2), DepthWiseBlock(256, 256, stride=2), \
172 |               DepthWiseBlock(256, 128, stride=1, padding=0), DepthWiseBlock(128, 128, stride=1, padding=0)]
173 | 
174 |     return layers
175 | 
176 | 
177 | def multibox(fea_channels, cfg, num_classes):
178 |     loc_layers = []
179 |     conf_layers = []
180 |     assert len(fea_channels) == len(cfg)
181 |     for i, fea_channel in enumerate(fea_channels):
182 |         loc_layers += [nn.Conv2d(fea_channel, cfg[i] * 4, kernel_size=3, padding=1)]
183 |         conf_layers += [nn.Conv2d(fea_channel, cfg[i] * num_classes, kernel_size=3, padding=1)]
184 |     return (loc_layers, conf_layers)
185 | 
186 | 
187 | mbox = {
188 |     '300': [6, 6, 6, 6, 4, 4],  # number of boxes per feature map location
189 | }
190 | fea_channels = [512, 512, 256, 256, 128, 128]
191 | 
192 | 
193 | def build_net(size=300, num_classes=21):
194 |     if size != 300 and size != 512:
195 |         print("Error: Sorry only SSD300 and SSD512 is supported currently!")
196 |         return
197 | 
198 |     return FSSD(size, multibox(fea_channels, mbox[str(size)], num_classes), feature_transform_module(1),
199 |                 pyramid_feature_extractor(), \
200 |                 num_classes=num_classes)
201 | 
202 | 
203 | net = build_net()
204 | print(net)
205 | 


--------------------------------------------------------------------------------
/models/SSD_vgg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from layers import *
  7 | from .base_models import vgg, vgg_base
  8 | 
  9 | 
 10 | class SSD(nn.Module):
 11 |     """Single Shot Multibox Architecture
 12 |     The network is composed of a base VGG network followed by the
 13 |     added multibox conv layers.  Each multibox layer branches into
 14 |         1) conv2d for class conf scores
 15 |         2) conv2d for localization predictions
 16 |         3) associated priorbox layer to produce default bounding
 17 |            boxes specific to the layer's feature map size.
 18 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 19 | 
 20 |     Args:
 21 |         phase: (string) Can be "test" or "train"
 22 |         base: VGG16 layers for input, size of either 300 or 500
 23 |         extras: extra layers that feed to multibox loc and conf layers
 24 |         head: "multibox head" consists of loc and conf conv layers
 25 |     """
 26 | 
 27 |     def __init__(self, base, extras, head, num_classes,size):
 28 |         super(SSD, self).__init__()
 29 |         self.num_classes = num_classes
 30 |         # TODO: implement __call__ in PriorBox
 31 |         self.size = size
 32 | 
 33 |         # SSD network
 34 |         self.base = nn.ModuleList(base)
 35 |         # Layer learns to scale the l2 normalized features from conv4_3
 36 |         self.extras = nn.ModuleList(extras)
 37 |         self.L2Norm = L2Norm(512, 20)
 38 | 
 39 |         self.loc = nn.ModuleList(head[0])
 40 |         self.conf = nn.ModuleList(head[1])
 41 | 
 42 |         self.softmax = nn.Softmax()
 43 | 
 44 |     def forward(self, x, test=False):
 45 |         """Applies network layers and ops on input image(s) x.
 46 | 
 47 |         Args:
 48 |             x: input image or batch of images. Shape: [batch,3*batch,300,300].
 49 | 
 50 |         Return:
 51 |             Depending on phase:
 52 |             test:
 53 |                 Variable(tensor) of output class label predictions,
 54 |                 confidence score, and corresponding location predictions for
 55 |                 each object detected. Shape: [batch,topk,7]
 56 | 
 57 |             train:
 58 |                 list of concat outputs from:
 59 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 60 |                     2: localization layers, Shape: [batch,num_priors*4]
 61 |                     3: priorbox layers, Shape: [2,num_priors*4]
 62 |         """
 63 |         sources = list()
 64 |         loc = list()
 65 |         conf = list()
 66 | 
 67 |         # apply vgg up to conv4_3 relu
 68 |         for k in range(23):
 69 |             x = self.base[k](x)
 70 | 
 71 |         s = self.L2Norm(x)
 72 |         sources.append(s)
 73 | 
 74 |         # apply vgg up to fc7
 75 |         for k in range(23, len(self.base)):
 76 |             x = self.base[k](x)
 77 |         sources.append(x)
 78 | 
 79 |         # apply extra layers and cache source layer outputs
 80 |         for k, v in enumerate(self.extras):
 81 |             x = F.relu(v(x), inplace=True)
 82 |             if k % 2 == 1:
 83 |                 sources.append(x)
 84 | 
 85 |         # apply multibox head to source layers
 86 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 87 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 88 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 89 | 
 90 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 91 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 92 |         if test:
 93 |             output = (
 94 |                 loc.view(loc.size(0), -1, 4),  # loc preds
 95 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 96 |             )
 97 |         else:
 98 |             output = (
 99 |                 loc.view(loc.size(0), -1, 4),
100 |                 conf.view(conf.size(0), -1, self.num_classes),
101 |             )
102 |         return output
103 | 
104 |     def load_weights(self, base_file):
105 |         other, ext = os.path.splitext(base_file)
106 |         if ext == '.pkl' or '.pth':
107 |             print('Loading weights into state dict...')
108 |             self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage))
109 |             print('Finished!')
110 |         else:
111 |             print('Sorry only .pth and .pkl files supported.')
112 | 
113 | 
114 | def add_extras(cfg, i, batch_norm=False, size=300):
115 |     # Extra layers added to VGG for feature scaling
116 |     layers = []
117 |     in_channels = i
118 |     flag = False
119 |     for k, v in enumerate(cfg):
120 |         if in_channels != 'S':
121 |             if v == 'S':
122 |                 layers += [nn.Conv2d(in_channels, cfg[k + 1],
123 |                                      kernel_size=(1, 3)[flag], stride=2, padding=1)]
124 |             else:
125 |                 layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
126 |             flag = not flag
127 |         in_channels = v
128 |     if size == 512:
129 |         layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1))
130 |         layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1))
131 |     return layers
132 | 
133 | 
134 | def multibox(vgg, extra_layers, cfg, num_classes):
135 |     loc_layers = []
136 |     conf_layers = []
137 |     vgg_source = [24, -2]
138 |     for k, v in enumerate(vgg_source):
139 |         loc_layers += [nn.Conv2d(vgg[v].out_channels,
140 |                                  cfg[k] * 4, kernel_size=3, padding=1)]
141 |         conf_layers += [nn.Conv2d(vgg[v].out_channels,
142 |                                   cfg[k] * num_classes, kernel_size=3, padding=1)]
143 |     for k, v in enumerate(extra_layers[1::2], 2):
144 |         loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
145 |                                  * 4, kernel_size=3, padding=1)]
146 |         conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
147 |                                   * num_classes, kernel_size=3, padding=1)]
148 |     return vgg, extra_layers, (loc_layers, conf_layers)
149 | 
150 | 
151 | extras = {
152 |     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
153 |     '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256],
154 | }
155 | mbox = {
156 |     '300': [6, 6, 6, 6, 4, 4],  # number of boxes per feature map location
157 |     '512': [6, 6, 6, 6, 6, 4, 4],
158 | }
159 | 
160 | 
161 | def build_net(size=300, num_classes=21):
162 |     if size != 300 and size != 512:
163 |         print("Error: Sorry only SSD300 and SSD512 is supported currently!")
164 |         return
165 | 
166 |     return SSD(*multibox(vgg(vgg_base[str(size)], 3),
167 |                          add_extras(extras[str(size)], 1024, size=size),
168 |                          mbox[str(size)], num_classes), num_classes=num_classes,size=size)
169 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/models/__init__.py


--------------------------------------------------------------------------------
/models/base_models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | def vgg(cfg, i, batch_norm=False):
  6 |     layers = []
  7 |     in_channels = i
  8 |     for v in cfg:
  9 |         if v == 'M':
 10 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 11 |         elif v == 'C':
 12 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
 13 |         else:
 14 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
 15 |             if batch_norm:
 16 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 17 |             else:
 18 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 19 |             in_channels = v
 20 |     pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
 21 |     conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
 22 |     conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
 23 |     layers += [pool5, conv6,
 24 |                nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
 25 |     return layers
 26 | 
 27 | 
 28 | vgg_base = {
 29 |     '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
 30 |             512, 512, 512],
 31 |     '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
 32 |             512, 512, 512],
 33 | }
 34 | 
 35 | 
 36 | class BasicConv(nn.Module):
 37 | 
 38 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
 39 |                  bn=True, bias=False):
 40 |         super(BasicConv, self).__init__()
 41 |         self.out_channels = out_planes
 42 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
 43 |                               dilation=dilation, groups=groups, bias=bias)
 44 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
 45 |         self.relu = nn.ReLU(inplace=True) if relu else None
 46 | 
 47 |     def forward(self, x):
 48 |         x = self.conv(x)
 49 |         if self.bn is not None:
 50 |             x = self.bn(x)
 51 |         if self.relu is not None:
 52 |             x = self.relu(x)
 53 |         return x
 54 | 
 55 | 
 56 | class BasicRFB_a(nn.Module):
 57 | 
 58 |     def __init__(self, in_planes, out_planes, stride=1, scale=0.1):
 59 |         super(BasicRFB_a, self).__init__()
 60 |         self.scale = scale
 61 |         self.out_channels = out_planes
 62 |         inter_planes = in_planes // 4
 63 | 
 64 |         self.branch0 = nn.Sequential(
 65 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
 66 |             BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1, relu=False)
 67 |         )
 68 |         self.branch1 = nn.Sequential(
 69 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
 70 |             BasicConv(inter_planes, inter_planes, kernel_size=(3, 1), stride=1, padding=(1, 0)),
 71 |             BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
 72 |         )
 73 |         self.branch2 = nn.Sequential(
 74 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
 75 |             BasicConv(inter_planes, inter_planes, kernel_size=(1, 3), stride=stride, padding=(0, 1)),
 76 |             BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
 77 |         )
 78 |         '''
 79 |         self.branch3 = nn.Sequential(
 80 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
 81 |                 BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=1),
 82 |                 BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3, relu=False)
 83 |                 )
 84 |         '''
 85 |         self.branch3 = nn.Sequential(
 86 |             BasicConv(in_planes, inter_planes // 2, kernel_size=1, stride=1),
 87 |             BasicConv(inter_planes // 2, (inter_planes // 4) * 3, kernel_size=(1, 3), stride=1, padding=(0, 1)),
 88 |             BasicConv((inter_planes // 4) * 3, inter_planes, kernel_size=(3, 1), stride=stride, padding=(1, 0)),
 89 |             BasicConv(inter_planes, inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False)
 90 |         )
 91 | 
 92 |         self.ConvLinear = BasicConv(4 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
 93 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
 94 |         self.relu = nn.ReLU(inplace=False)
 95 | 
 96 |     def forward(self, x):
 97 |         x0 = self.branch0(x)
 98 |         x1 = self.branch1(x)
 99 |         x2 = self.branch2(x)
100 |         x3 = self.branch3(x)
101 | 
102 |         out = torch.cat((x0, x1, x2, x3), 1)
103 |         out = self.ConvLinear(out)
104 |         short = self.shortcut(x)
105 |         out = out * self.scale + short
106 |         out = self.relu(out)
107 | 
108 |         return out
109 | 


--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | class Bottleneck(nn.Module):
 12 |     def __init__(self, in_planes, growth_rate):
 13 |         super(Bottleneck, self).__init__()
 14 |         self.bn1 = nn.BatchNorm2d(in_planes)
 15 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 17 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 18 | 
 19 |     def forward(self, x):
 20 |         out = self.conv1(F.relu(self.bn1(x)))
 21 |         out = self.conv2(F.relu(self.bn2(out)))
 22 |         out = torch.cat([out,x], 1)
 23 |         return out
 24 | 
 25 | 
 26 | class Transition(nn.Module):
 27 |     def __init__(self, in_planes, out_planes):
 28 |         super(Transition, self).__init__()
 29 |         self.bn = nn.BatchNorm2d(in_planes)
 30 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 31 | 
 32 |     def forward(self, x):
 33 |         out = self.conv(F.relu(self.bn(x)))
 34 |         out = F.avg_pool2d(out, 2)
 35 |         return out
 36 | 
 37 | 
 38 | class DenseNet(nn.Module):
 39 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=512):
 40 |         super(DenseNet, self).__init__()
 41 |         self.growth_rate = growth_rate
 42 | 
 43 |         num_planes = 2*growth_rate
 44 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=2, padding=1, bias=False)
 45 | 
 46 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 47 |         num_planes += nblocks[0]*growth_rate
 48 |         out_planes = int(math.floor(num_planes*reduction))
 49 |         self.trans1 = Transition(num_planes, out_planes)
 50 |         num_planes = out_planes
 51 | 
 52 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 53 |         num_planes += nblocks[1]*growth_rate
 54 |         out_planes = int(math.floor(num_planes*reduction))
 55 |         self.trans2 = Transition(num_planes, out_planes)
 56 |         num_planes = out_planes
 57 | 
 58 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 59 |         num_planes += nblocks[2]*growth_rate
 60 |         out_planes = int(math.floor(num_planes*reduction))
 61 |         self.trans3 = Transition(num_planes, out_planes)
 62 |         num_planes = out_planes
 63 | 
 64 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 65 |         num_planes += nblocks[3]*growth_rate
 66 | 
 67 |         self.bn = nn.BatchNorm2d(num_planes)
 68 |         self.linear = nn.Linear(num_planes, num_classes)
 69 |         self.fc_bn = nn.BatchNorm1d(512)
 70 | 
 71 |     def _make_dense_layers(self, block, in_planes, nblock):
 72 |         layers = []
 73 |         for i in range(nblock):
 74 |             layers.append(block(in_planes, self.growth_rate))
 75 |             in_planes += self.growth_rate
 76 |         return nn.Sequential(*layers)
 77 | 
 78 |     def forward(self, x):
 79 |         out = self.conv1(x)
 80 |         out = self.trans1(self.dense1(out))
 81 |         out = self.trans2(self.dense2(out))
 82 |         out = self.trans3(self.dense3(out))
 83 |         out = self.dense4(out)
 84 |         out = F.avg_pool2d(F.relu(self.bn(out)), 7)
 85 |         out = out.view(out.size(0), -1)
 86 |         out = self.linear(out)
 87 |         out = self.fc_bn(out)
 88 |         return out
 89 | 
 90 | def DenseNet121():
 91 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
 92 | 
 93 | def DenseNet169():
 94 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
 95 | 
 96 | def DenseNet201():
 97 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
 98 | 
 99 | def DenseNet161():
100 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
101 | 
102 | def test():
103 |     net = DenseNet121()
104 |     x = torch.randn(2,3,112,112)
105 |     y = net(Variable(x))
106 |     print(y.size())
107 | 
108 | #test()
109 | 


--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | """ 
  4 | Creates a MobileNet Model as defined in:
  5 | Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017). 
  6 | MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. 
  7 | (c) Yang Lu
  8 | """
  9 | import math
 10 | import torch.nn as nn
 11 | 
 12 | __all__ = ['DepthWiseBlock', 'mobilenet', 'mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05',
 13 |            'mobilenet_025']
 14 | 
 15 | 
 16 | class DepthWiseBlock(nn.Module):
 17 |     def __init__(self, inplanes, planes, stride=1, padding=1):
 18 |         super(DepthWiseBlock, self).__init__()
 19 |         inplanes, planes = int(inplanes), int(planes)
 20 |         self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=padding, stride=stride, groups=inplanes,
 21 |                                  bias=False)
 22 |         self.bn_dw = nn.BatchNorm2d(inplanes)
 23 |         self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)
 24 |         self.bn_sep = nn.BatchNorm2d(planes)
 25 |         self.relu = nn.ReLU(inplace=True)
 26 | 
 27 |     def forward(self, x):
 28 |         out = self.conv_dw(x)
 29 |         out = self.bn_dw(out)
 30 |         out = self.relu(out)
 31 | 
 32 |         out = self.conv_sep(out)
 33 |         out = self.bn_sep(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         return out
 37 | 
 38 | 
 39 | class MobileNet(nn.Module):
 40 |     def __init__(self, widen_factor=1.0, num_classes=1000):
 41 |         """ Constructor
 42 |         Args:
 43 |             widen_factor: config of widen_factor
 44 |             num_classes: number of classes
 45 |         """
 46 |         super(MobileNet, self).__init__()
 47 | 
 48 |         block = DepthWiseBlock
 49 | 
 50 |         self.conv1 = nn.Conv2d(3, int(32 * widen_factor), kernel_size=3, stride=2, padding=1, bias=False)
 51 |         self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
 52 |         self.relu = nn.ReLU(inplace=True)
 53 | 
 54 |         self.dw2_1 = block(32 * widen_factor, 64 * widen_factor)
 55 |         self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2)
 56 | 
 57 |         self.dw3_1 = block(128 * widen_factor, 128 * widen_factor)
 58 |         self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2)
 59 | 
 60 |         self.dw4_1 = block(256 * widen_factor, 256 * widen_factor)
 61 |         self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2)
 62 | 
 63 |         self.dw5_1 = block(512 * widen_factor, 512 * widen_factor)
 64 |         self.dw5_2 = block(512 * widen_factor, 512 * widen_factor)
 65 |         self.dw5_3 = block(512 * widen_factor, 512 * widen_factor)
 66 |         self.dw5_4 = block(512 * widen_factor, 512 * widen_factor)
 67 |         self.dw5_5 = block(512 * widen_factor, 512 * widen_factor)
 68 |         self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2)
 69 | 
 70 |         self.dw6 = block(1024 * widen_factor, 1024 * widen_factor)
 71 | 
 72 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
 73 |         self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
 74 | 
 75 |         for m in self.modules():
 76 |             if isinstance(m, nn.Conv2d):
 77 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 78 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 79 |             elif isinstance(m, nn.BatchNorm2d):
 80 |                 m.weight.data.fill_(1)
 81 |                 m.bias.data.zero_()
 82 | 
 83 |     def forward(self, x):
 84 |         x = self.conv1(x)
 85 |         x = self.bn1(x)
 86 |         x = self.relu(x)
 87 | 
 88 |         x = self.dw2_1(x)
 89 |         x = self.dw2_2(x)
 90 |         x = self.dw3_1(x)
 91 |         x = self.dw3_2(x)
 92 |         x0 = self.dw4_1(x)
 93 |         x = self.dw4_2(x0)
 94 |         x = self.dw5_1(x)
 95 |         x = self.dw5_2(x)
 96 |         x = self.dw5_3(x)
 97 |         x = self.dw5_4(x)
 98 |         x1 = self.dw5_5(x)
 99 |         x = self.dw5_6(x1)
100 |         x2 = self.dw6(x)
101 |         return x0, x1, x2
102 | 
103 | 
104 | def mobilenet(widen_factor=1.0, num_classes=1000):
105 |     """
106 |     Construct MobileNet.
107 |     """
108 |     model = MobileNet(widen_factor=widen_factor, num_classes=num_classes)
109 |     return model
110 | 
111 | 
112 | def mobilenet_2():
113 |     """
114 |     Construct MobileNet.
115 |     """
116 |     model = MobileNet(widen_factor=2.0, num_classes=1000)
117 |     return model
118 | 
119 | 
120 | def mobilenet_1():
121 |     """
122 |     Construct MobileNet.
123 |     """
124 |     model = MobileNet(widen_factor=1.0, num_classes=1000)
125 |     return model
126 | 
127 | 
128 | def mobilenet_075():
129 |     """
130 |     Construct MobileNet.
131 |     """
132 |     model = MobileNet(widen_factor=0.75, num_classes=1000)
133 |     return model
134 | 
135 | 
136 | def mobilenet_05():
137 |     """
138 |     Construct MobileNet.
139 |     """
140 |     model = MobileNet(widen_factor=0.5, num_classes=1000)
141 |     return model
142 | 
143 | 
144 | def mobilenet_025():
145 |     """
146 |     Construct MobileNet.
147 |     """
148 |     model = MobileNet(widen_factor=0.25, num_classes=1000)
149 |     return model
150 | 
151 | 
152 | if __name__ == '__main__':
153 |     mobilenet = mobilenet_1()
154 |     print(mobilenet)
155 |     print(mobilenet.state_dict().keys())
156 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | Reference:
  3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  4 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  5 | '''
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | from torch.autograd import Variable
 11 | 
 12 | 
 13 | class BasicBlock(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 19 |         self.bn1 = nn.BatchNorm2d(planes)
 20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 | 
 23 |         self.shortcut = nn.Sequential()
 24 |         if stride != 1 or in_planes != self.expansion*planes:
 25 |             self.shortcut = nn.Sequential(
 26 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 27 |                 nn.BatchNorm2d(self.expansion*planes)
 28 |             )
 29 | 
 30 |     def forward(self, x):
 31 |         out = F.relu(self.bn1(self.conv1(x)))
 32 |         out = self.bn2(self.conv2(out))
 33 |         out += self.shortcut(x)
 34 |         out = F.relu(out)
 35 |         return out
 36 | 
 37 | 
 38 | class Bottleneck(nn.Module):
 39 |     expansion = 4
 40 | 
 41 |     def __init__(self, in_planes, planes, stride=1):
 42 |         super(Bottleneck, self).__init__()
 43 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 44 |         self.bn1 = nn.BatchNorm2d(planes)
 45 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 46 |         self.bn2 = nn.BatchNorm2d(planes)
 47 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 48 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 49 | 
 50 |         self.shortcut = nn.Sequential()
 51 |         if stride != 1 or in_planes != self.expansion*planes:
 52 |             self.shortcut = nn.Sequential(
 53 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 54 |                 nn.BatchNorm2d(self.expansion*planes)
 55 |             )
 56 | 
 57 |     def forward(self, x):
 58 |         out = F.relu(self.bn1(self.conv1(x)))
 59 |         out = F.relu(self.bn2(self.conv2(out)))
 60 |         out = self.bn3(self.conv3(out))
 61 |         out += self.shortcut(x)
 62 |         out = F.relu(out)
 63 |         return out
 64 | 
 65 | 
 66 | class ResNet(nn.Module):
 67 |     def __init__(self, block, num_blocks, num_classes=512):
 68 |         super(ResNet, self).__init__()
 69 |         self.in_planes = 64
 70 | 
 71 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False)
 72 |         self.bn1 = nn.BatchNorm2d(64)
 73 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 74 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 75 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 76 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 77 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 78 |         self.fc_bn = nn.BatchNorm1d(512)
 79 | 
 80 |     def _make_layer(self, block, planes, num_blocks, stride):
 81 |         strides = [stride] + [1]*(num_blocks-1)
 82 |         layers = []
 83 |         for stride in strides:
 84 |             layers.append(block(self.in_planes, planes, stride))
 85 |             self.in_planes = planes * block.expansion
 86 |         return nn.Sequential(*layers)
 87 | 
 88 |     def forward(self, x):
 89 |         out = F.relu(self.bn1(self.conv1(x)))
 90 |         out = self.layer1(out)
 91 |         out = self.layer2(out)
 92 |         out = self.layer3(out)
 93 |         out = self.layer4(out)
 94 |         out = F.avg_pool2d(out, 7)
 95 |         out = out.view(out.size(0), -1)
 96 |         out = self.linear(out)
 97 |         out = self.fc_bn(out)
 98 |         return out
 99 | 
100 | 
101 | def ResNet18():
102 |     return ResNet(BasicBlock, [2,2,2,2])
103 | 
104 | def ResNet34():
105 |     return ResNet(BasicBlock, [3,4,6,3])
106 | 
107 | def ResNet50():
108 |     return ResNet(Bottleneck, [3,4,6,3])
109 | 
110 | def ResNet101():
111 |     return ResNet(Bottleneck, [3,4,23,3])
112 | 
113 | def ResNet152():
114 |     return ResNet(Bottleneck, [3,8,36,3])
115 | 
116 | 
117 | def test():
118 |     net = ResNet34()
119 |     y = net(Variable(torch.randn(32,3,112,112)))
120 |     print(y.size())
121 | 
122 | #test()
123 | 


--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | cfg = {
 8 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 9 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
11 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
12 | }
13 | 
14 | 
15 | class VGG(nn.Module):
16 |     def __init__(self, vgg_name):
17 |         super(VGG, self).__init__()
18 |         self.features = self._make_layers(cfg[vgg_name])
19 |         self.classifier = nn.Linear(512, 512)
20 |         self.fc_bn = nn.BatchNorm1d(512)
21 | 
22 |     def forward(self, x):
23 |         out = self.features(x)
24 |         out = out.view(out.size(0), -1)
25 |         out = self.classifier(out)
26 |         out = self.fc_bn(out)
27 |         return out
28 | 
29 |     def _make_layers(self, cfg):
30 |         layers = []
31 |         in_channels = 3
32 |         for x in cfg:
33 |             if x == 'M':
34 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
35 |             else:
36 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
37 |                            nn.BatchNorm2d(x),
38 |                            nn.ReLU(inplace=True)]
39 |                 in_channels = x
40 |         layers += [nn.AvgPool2d(kernel_size=3, stride=3)]
41 |         return nn.Sequential(*layers)
42 | 
43 | def test():
44 |     net = VGG('VGG11')
45 |     x = torch.randn(2,3,112,112)
46 |     print(net(Variable(x)).size())
47 | 
48 | #test()
49 | 


--------------------------------------------------------------------------------
/multi_thread_score_pedestrian_detection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import argparse
  4 | import os.path as osp
  5 | from utils.utils import progress_bar
  6 | import time
  7 | 
  8 | def check_size(submission_file):
  9 |     max_size = 60*1024*1024
 10 |     if osp.getsize(submission_file) > max_size:
 11 |         raise IOError #File size exceeds the specified maximum size, which is 60M for the server.
 12 | 
 13 | def judge_overlap(pbox,ignore_box):
 14 |     overlap=[]
 15 |     delete=[]
 16 |     for p in pbox:
 17 |         pl=min(p[0],p[2])
 18 |         pr=max(p[0],p[2])
 19 |         pb=min(p[1],p[3])
 20 |         pt=max(p[1],p[3])
 21 |         s_p=(pr-pl)*(pt-pb)
 22 |         s_lap=-0.01
 23 |         for c in ignore_box:
 24 |             cl=min(c[0],c[2])
 25 |             cr=max(c[0],c[2])
 26 |             cb=min(c[1],c[3])
 27 |             ct=max(c[1],c[3])
 28 |             if not (cr<pl or cl>pr or ct<pb or cb>pt):
 29 |                 s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb))
 30 |         if s_lap>0:
 31 |             overlap.append([p,s_lap/s_p])
 32 |     for o in overlap:
 33 |         if o[1]>0.5:
 34 |             delete.append(o[0])
 35 |     remain_id = [p for p in pbox if p not in delete]
 36 |     return remain_id
 37 | 
 38 | def parse_ignore_file(ignore_file):
 39 |     with open(ignore_file,'r') as f:
 40 |         lines = f.readlines()
 41 |     ig = [x.strip().split() for x in lines]
 42 |     ignore = {}
 43 |     for item in ig:
 44 |         key = item[0]
 45 |         ignore_num = (len(item)-1)/4
 46 |         bbox = []
 47 |         for i in range(int(ignore_num)):
 48 |             b = []
 49 |             b.append(int(item[1+4*i]))
 50 |             b.append(int(item[2+4*i]))
 51 |             b.append(int(item[1+4*i])+int(item[3+4*i]))
 52 |             b.append(int(item[2+4*i])+int(item[4+4*i]))
 53 |             bbox.append(b)
 54 |         ignore[key] = bbox
 55 |     return ignore
 56 | 
 57 | def parse_submission(submission_file,ignore_file):
 58 |     ignore_zone = parse_ignore_file(ignore_file)
 59 |     ignore_keys = ignore_zone.keys()
 60 |     with open(submission_file, 'r') as f:
 61 |         lines = f.readlines()
 62 |     splitlines = [x.strip().split() for x in lines]
 63 |     image_ids = [x[0] for x in splitlines]
 64 |     confidence = np.array([float(x[1]) for x in splitlines])
 65 |     BB = []
 66 |     for x in splitlines:
 67 |         bb = []
 68 |         bb.append(float(x[2]))
 69 |         bb.append(float(x[3]))
 70 |         bb.append(float(x[2])+float(x[4]))
 71 |         bb.append(float(x[3])+float(x[5]))
 72 |         BB.append(bb)
 73 | 
 74 |     sub_key = []
 75 |     for x in image_ids:
 76 |         if x not in sub_key:
 77 |             sub_key.append(x)
 78 |     final_confidence = []
 79 |     final_ids = []
 80 |     final_BB = []
 81 | 
 82 |     for key in sub_key:
 83 |         find = [i for i,v in enumerate(image_ids) if v == key]
 84 |         BB_sub = [BB[i] for i in find]
 85 |         confid_sub = [confidence[i] for i in find]
 86 |         if key in ignore_keys:
 87 |             ignore_bbox = ignore_zone[key]
 88 |             bbox_remain = judge_overlap(BB_sub,ignore_bbox)
 89 |             find_remain = []
 90 |             for i,v in enumerate(BB_sub):
 91 |                 if v in bbox_remain:
 92 |                     find_remain.append(i)
 93 |             confid_remain = [confid_sub[i] for i in find_remain]
 94 |             BB_sub = bbox_remain
 95 |             confid_sub = confid_remain
 96 |         ids_sub = [key]*len(BB_sub)
 97 |         final_ids.extend(ids_sub)
 98 |         final_confidence.extend(confid_sub)
 99 |         final_BB.extend(BB_sub)
100 | 
101 |     final_BB = np.array(final_BB)
102 |     final_confidence = np.array(final_confidence)
103 |     sorted_ind = np.argsort(-final_confidence)
104 |     final_BB = final_BB[sorted_ind, :]
105 |     final_ids = [final_ids[x] for x in sorted_ind]
106 |     return final_ids, final_BB
107 | 
108 | def parse_gt_annotation(gt_file,ignore_file):
109 |     ignore_zone = parse_ignore_file(ignore_file)
110 |     ignore_keys = ignore_zone.keys()
111 |     with open(gt_file, 'r') as f:
112 |         lines = f.readlines()
113 |     info = [x.strip().split() for x in lines]
114 |     gt = {}
115 |     for item in info:
116 |         bbox = []
117 |         bbox_num = (len(item)-1)/5
118 |         for i in range(int(bbox_num)):
119 |             b = []
120 |             b.append(int(item[2+5*i]))
121 |             b.append(int(item[3+5*i]))
122 |             b.append(int(item[2+5*i])+int(item[4+5*i]))
123 |             b.append(int(item[3+5*i])+int(item[5+5*i]))
124 |             bbox.append(b)
125 |         if item[0] in ignore_keys:
126 |             ignore_bbox = ignore_zone[item[0]]
127 |             bbox_remain = judge_overlap(bbox,ignore_bbox)
128 |         else:
129 |             bbox_remain = bbox
130 |         gt[item[0]] = np.array(bbox_remain)
131 |     return gt
132 | 
133 | def compute_ap(rec, prec):
134 |     mrec = np.concatenate(([0.], rec, [1.]))
135 |     mpre = np.concatenate(([0.], prec, [0.]))
136 |     for i in range(mpre.size - 1, 0, -1):
137 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
138 |     i = np.where(mrec[1:] != mrec[:-1])[0]
139 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
140 |     return ap
141 | 
142 | 
143 | def pedestrian_eval(aap,input, gt_file, ignore_file, ovthresh):
144 |     gt = parse_gt_annotation(gt_file,ignore_file)
145 |     image_ids, BB = parse_submission(input,ignore_file)
146 |     npos = 0
147 |     recs = {}
148 |     for key in gt.keys():
149 |         det = [False]*len(gt[key])
150 |         recs[key] = {'bbox': gt[key], 'det': det}
151 |         npos += len(gt[key])
152 |     nd = len(image_ids)
153 |     tp = np.zeros(nd)
154 |     fp = np.zeros(nd)
155 |     for d in range(nd):
156 |         if image_ids[d] not in recs.keys():
157 |             raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d]))
158 |     for d in range(nd):
159 |         R = recs[image_ids[d]]
160 |         bb = BB[d, :].astype(float)
161 |         ovmax = -np.inf
162 |         BBGT = R['bbox'].astype(float)
163 |         if BBGT.size > 0:
164 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
165 |             iymin = np.maximum(BBGT[:, 1], bb[1])
166 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
167 |             iymax = np.minimum(BBGT[:, 3], bb[3])
168 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
169 |             ih = np.maximum(iymax - iymin + 1., 0.)
170 |             inters = iw * ih
171 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
172 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
173 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
174 |             overlaps = inters / uni
175 |             ovmax = np.max(overlaps)
176 |             jmax = np.argmax(overlaps)
177 | 
178 |         if ovmax > ovthresh:
179 |             if not R['det'][jmax]:
180 |                 tp[d] = 1.
181 |                 R['det'][jmax] = 1
182 |             else:
183 |                 fp[d] = 1.
184 |         else:
185 |             fp[d] = 1.
186 |     fp = np.cumsum(fp)
187 |     tp = np.cumsum(tp)
188 |     rec = tp / float(npos)
189 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
190 |     ap = compute_ap(rec, prec)
191 |     aap.append(ap)
192 |     return ap
193 | 
194 | import threading
195 | def wider_ped_eval(input, gt,ignore_file):
196 |     aap = []
197 |     threads=[]
198 |     for ove in np.arange(0.5, 1.0, 0.05):
199 |         # pedestrian_eval(aap, input, gt,ignore_file, ovthresh=ove)
200 |         t=threading.Thread(target=pedestrian_eval, args=(aap,input,gt,ignore_file),kwargs={'ovthresh':ove})
201 |         threads.append(t)
202 |         t.start()
203 |         time.sleep(5)
204 | 
205 |     print("Total threads:{}".format(len(threads)))
206 |     for index,t in enumerate(threads):
207 |         progress_bar(index, len(threads)," executing.")
208 |         t.join()
209 |     mAP = np.average(aap)
210 |     return mAP
211 | 
212 | 
213 | def eval():
214 |     input_dir = './'
215 |     output_dir = './'
216 |     ref_dir = osp.join(input_dir, 'ref')
217 |     submit_dir = osp.join(input_dir, 'res')
218 |     submit_file = 'submit_files/scores_validation.txt'
219 |     gt_file = osp.join(ref_dir, 'val_annotations.txt')
220 |     ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt')
221 |     check_size(submit_file)
222 |     mAP = wider_ped_eval(submit_file, gt_file, ignore_file)
223 |     out = {'Average AP': mAP}
224 |     strings = ['{}: {}\n'.format(k, v) for k, v in out.items()]
225 |     open(os.path.join(output_dir, 'scores_out.txt'), 'w').writelines(strings)
226 |     return mAP
227 | 
228 | 
229 | if __name__ == '__main__':
230 |     # parser = argparse.ArgumentParser()
231 |     # parser.add_argument("input", type=str, default='./')
232 |     # parser.add_argument("output", type=str, default='./')
233 |     # args = parser.parse_args()
234 |     eval()


--------------------------------------------------------------------------------
/pretrainedmodels/__init__.py:
--------------------------------------------------------------------------------
 1 | from .version import __version__
 2 | 
 3 | from . import models
 4 | from . import datasets
 5 | from .models.utils import pretrained_settings
 6 | from .models.utils import model_names
 7 | 
 8 | # to support pretrainedmodels.__dict__['nasnetalarge']
 9 | # but depreciated
10 | from .models.fbresnet import fbresnet152
11 | from .models.cafferesnet import cafferesnet101
12 | from .models.bninception import bninception
13 | from .models.resnext import resnext101_32x4d
14 | from .models.resnext import resnext101_64x4d
15 | from .models.inceptionv4 import inceptionv4
16 | from .models.inceptionresnetv2 import inceptionresnetv2
17 | from .models.nasnet import nasnetalarge
18 | from .models.nasnet_mobile import nasnetamobile
19 | from .models.torchvision_models import alexnet
20 | from .models.torchvision_models import densenet121
21 | from .models.torchvision_models import densenet169
22 | from .models.torchvision_models import densenet201
23 | from .models.torchvision_models import densenet161
24 | from .models.torchvision_models import resnet18
25 | from .models.torchvision_models import resnet34
26 | from .models.torchvision_models import resnet50
27 | from .models.torchvision_models import resnet101
28 | from .models.torchvision_models import resnet152
29 | from .models.torchvision_models import inceptionv3
30 | from .models.torchvision_models import squeezenet1_0
31 | from .models.torchvision_models import squeezenet1_1
32 | from .models.torchvision_models import vgg11
33 | from .models.torchvision_models import vgg11_bn
34 | from .models.torchvision_models import vgg13
35 | from .models.torchvision_models import vgg13_bn
36 | from .models.torchvision_models import vgg16
37 | from .models.torchvision_models import vgg16_bn
38 | from .models.torchvision_models import vgg19_bn
39 | from .models.torchvision_models import vgg19
40 | from .models.dpn import dpn68
41 | from .models.dpn import dpn68b
42 | from .models.dpn import dpn92
43 | from .models.dpn import dpn98
44 | from .models.dpn import dpn131
45 | from .models.dpn import dpn107
46 | from .models.xception import xception
47 | from .models.senet import senet154
48 | from .models.senet import se_resnet50
49 | from .models.senet import se_resnet101
50 | from .models.senet import se_resnet152
51 | from .models.senet import se_resnext50_32x4d
52 | from .models.senet import se_resnext101_32x4d
53 | from .models.pnasnet import pnasnet5large
54 | from .models.polynet import polynet
55 | 


--------------------------------------------------------------------------------
/pretrainedmodels/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc import Voc2007Classification


--------------------------------------------------------------------------------
/pretrainedmodels/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from urllib.request import urlretrieve
  3 | 
  4 | import torch
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | 
  8 | def load_imagenet_classes(path_synsets='data/imagenet_synsets.txt',
  9 |                           path_classes='data/imagenet_classes.txt'):
 10 |     with open(path_synsets, 'r') as f:
 11 |         synsets = f.readlines()
 12 | 
 13 |     synsets = [x.strip() for x in synsets]
 14 |     splits = [line.split(' ') for line in synsets]
 15 |     key_to_classname = {spl[0]:' '.join(spl[1:]) for spl in splits}
 16 | 
 17 |     with open(path_classes, 'r') as f:
 18 |         class_id_to_key = f.readlines()
 19 | 
 20 |     class_id_to_key = [x.strip() for x in class_id_to_key]
 21 | 
 22 |     cid_to_cname = []
 23 |     for i in range(len(class_id_to_key)):
 24 |         key = class_id_to_key[i]
 25 |         cname = key_to_classname[key]
 26 |         cid_to_cname.append(cname)
 27 | 
 28 |     return cid_to_cname
 29 | 
 30 | 
 31 | class Warp(object):
 32 |     def __init__(self, size, interpolation=Image.BILINEAR):
 33 |         self.size = int(size)
 34 |         self.interpolation = interpolation
 35 | 
 36 |     def __call__(self, img):
 37 |         return img.resize((self.size, self.size), self.interpolation)
 38 | 
 39 |     def __str__(self):
 40 |         return self.__class__.__name__ + ' (size={size}, interpolation={interpolation})'.format(size=self.size,
 41 |                                                                                                 interpolation=self.interpolation)
 42 | 
 43 | 
 44 | def download_url(url, destination=None, progress_bar=True):
 45 |     """Download a URL to a local file.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     url : str
 50 |         The URL to download.
 51 |     destination : str, None
 52 |         The destination of the file. If None is given the file is saved to a temporary directory.
 53 |     progress_bar : bool
 54 |         Whether to show a command-line progress bar while downloading.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     filename : str
 59 |         The location of the downloaded file.
 60 | 
 61 |     Notes
 62 |     -----
 63 |     Progress bar use/example adapted from tqdm documentation: https://github.com/tqdm/tqdm
 64 |     """
 65 | 
 66 |     def my_hook(t):
 67 |         last_b = [0]
 68 | 
 69 |         def inner(b=1, bsize=1, tsize=None):
 70 |             if tsize is not None:
 71 |                 t.total = tsize
 72 |             if b > 0:
 73 |                 t.update((b - last_b[0]) * bsize)
 74 |             last_b[0] = b
 75 | 
 76 |         return inner
 77 | 
 78 |     if progress_bar:
 79 |         with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
 80 |             filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t))
 81 |     else:
 82 |         filename, _ = urlretrieve(url, filename=destination)
 83 | 
 84 | 
 85 | class AveragePrecisionMeter(object):
 86 |     """
 87 |     The APMeter measures the average precision per class.
 88 |     The APMeter is designed to operate on `NxK` Tensors `output` and
 89 |     `target`, and optionally a `Nx1` Tensor weight where (1) the `output`
 90 |     contains model output scores for `N` examples and `K` classes that ought to
 91 |     be higher when the model is more convinced that the example should be
 92 |     positively labeled, and smaller when the model believes the example should
 93 |     be negatively labeled (for instance, the output of a sigmoid function); (2)
 94 |     the `target` contains only values 0 (for negative examples) and 1
 95 |     (for positive examples); and (3) the `weight` ( > 0) represents weight for
 96 |     each sample.
 97 |     """
 98 | 
 99 |     def __init__(self, difficult_examples=False):
100 |         super(AveragePrecisionMeter, self).__init__()
101 |         self.reset()
102 |         self.difficult_examples = difficult_examples
103 | 
104 |     def reset(self):
105 |         """Resets the meter with empty member variables"""
106 |         self.scores = torch.FloatTensor(torch.FloatStorage())
107 |         self.targets = torch.LongTensor(torch.LongStorage())
108 | 
109 |     def add(self, output, target):
110 |         """
111 |         Args:
112 |             output (Tensor): NxK tensor that for each of the N examples
113 |                 indicates the probability of the example belonging to each of
114 |                 the K classes, according to the model. The probabilities should
115 |                 sum to one over all classes
116 |             target (Tensor): binary NxK tensort that encodes which of the K
117 |                 classes are associated with the N-th input
118 |                     (eg: a row [0, 1, 0, 1] indicates that the example is
119 |                          associated with classes 2 and 4)
120 |             weight (optional, Tensor): Nx1 tensor representing the weight for
121 |                 each example (each weight > 0)
122 |         """
123 |         if not torch.is_tensor(output):
124 |             output = torch.from_numpy(output)
125 |         if not torch.is_tensor(target):
126 |             target = torch.from_numpy(target)
127 | 
128 |         if output.dim() == 1:
129 |             output = output.view(-1, 1)
130 |         else:
131 |             assert output.dim() == 2, \
132 |                 'wrong output size (should be 1D or 2D with one column \
133 |                 per class)'
134 |         if target.dim() == 1:
135 |             target = target.view(-1, 1)
136 |         else:
137 |             assert target.dim() == 2, \
138 |                 'wrong target size (should be 1D or 2D with one column \
139 |                 per class)'
140 |         if self.scores.numel() > 0:
141 |             assert target.size(1) == self.targets.size(1), \
142 |                 'dimensions for output should match previously added examples.'
143 | 
144 |         # make sure storage is of sufficient size
145 |         if self.scores.storage().size() < self.scores.numel() + output.numel():
146 |             new_size = math.ceil(self.scores.storage().size() * 1.5)
147 |             self.scores.storage().resize_(int(new_size + output.numel()))
148 |             self.targets.storage().resize_(int(new_size + output.numel()))
149 | 
150 |         # store scores and targets
151 |         offset = self.scores.size(0) if self.scores.dim() > 0 else 0
152 |         self.scores.resize_(offset + output.size(0), output.size(1))
153 |         self.targets.resize_(offset + target.size(0), target.size(1))
154 |         self.scores.narrow(0, offset, output.size(0)).copy_(output)
155 |         self.targets.narrow(0, offset, target.size(0)).copy_(target)
156 | 
157 |     def value(self):
158 |         """Returns the model's average precision for each class
159 |         Return:
160 |             ap (FloatTensor): 1xK tensor, with avg precision for each class k
161 |         """
162 | 
163 |         if self.scores.numel() == 0:
164 |             return 0
165 |         ap = torch.zeros(self.scores.size(1))
166 |         rg = torch.arange(1, self.scores.size(0)).float()
167 | 
168 |         # compute average precision for each class
169 |         for k in range(self.scores.size(1)):
170 |             # sort scores
171 |             scores = self.scores[:, k]
172 |             targets = self.targets[:, k]
173 | 
174 |             # compute average precision
175 |             ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples)
176 |         return ap
177 | 
178 |     @staticmethod
179 |     def average_precision(output, target, difficult_examples=True):
180 | 
181 |         # sort examples
182 |         sorted, indices = torch.sort(output, dim=0, descending=True)
183 | 
184 |         # Computes prec@i
185 |         pos_count = 0.
186 |         total_count = 0.
187 |         precision_at_i = 0.
188 |         for i in indices:
189 |             label = target[i]
190 |             if difficult_examples and label == 0:
191 |                 continue
192 |             if label == 1:
193 |                 pos_count += 1
194 |             total_count += 1
195 |             if label == 1:
196 |                 precision_at_i += pos_count / total_count
197 |         precision_at_i /= pos_count
198 |         return precision_at_i


--------------------------------------------------------------------------------
/pretrainedmodels/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/pretrainedmodels/models/__init__.py


--------------------------------------------------------------------------------
/pretrainedmodels/models/cafferesnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.utils.model_zoo as model_zoo
  6 | 
  7 | pretrained_settings = {
  8 |     'cafferesnet101': {
  9 |         'imagenet': {
 10 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/cafferesnet101-9d633cc0.pth',
 11 |             'input_space': 'BGR',
 12 |             'input_size': [3, 224, 224],
 13 |             'input_range': [0, 255],
 14 |             'mean': [102.9801, 115.9465, 122.7717],
 15 |             'std': [1, 1, 1],
 16 |             'num_classes': 1000
 17 |         }
 18 |     }
 19 | }
 20 | 
 21 | 
 22 | def conv3x3(in_planes, out_planes, stride=1):
 23 |   "3x3 convolution with padding"
 24 |   return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 25 |            padding=1, bias=False)
 26 | 
 27 | 
 28 | class BasicBlock(nn.Module):
 29 |   expansion = 1
 30 | 
 31 |   def __init__(self, inplanes, planes, stride=1, downsample=None):
 32 |     super(BasicBlock, self).__init__()
 33 |     self.conv1 = conv3x3(inplanes, planes, stride)
 34 |     self.bn1 = nn.BatchNorm2d(planes)
 35 |     self.relu = nn.ReLU(inplace=True)
 36 |     self.conv2 = conv3x3(planes, planes)
 37 |     self.bn2 = nn.BatchNorm2d(planes)
 38 |     self.downsample = downsample
 39 |     self.stride = stride
 40 | 
 41 |   def forward(self, x):
 42 |     residual = x
 43 | 
 44 |     out = self.conv1(x)
 45 |     out = self.bn1(out)
 46 |     out = self.relu(out)
 47 | 
 48 |     out = self.conv2(out)
 49 |     out = self.bn2(out)
 50 | 
 51 |     if self.downsample is not None:
 52 |       residual = self.downsample(x)
 53 | 
 54 |     out += residual
 55 |     out = self.relu(out)
 56 | 
 57 |     return out
 58 | 
 59 | 
 60 | class Bottleneck(nn.Module):
 61 |   expansion = 4
 62 | 
 63 |   def __init__(self, inplanes, planes, stride=1, downsample=None):
 64 |     super(Bottleneck, self).__init__()
 65 |     self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 66 |     self.bn1 = nn.BatchNorm2d(planes)
 67 |     self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 68 |                  padding=1, bias=False)
 69 |     self.bn2 = nn.BatchNorm2d(planes)
 70 |     self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 71 |     self.bn3 = nn.BatchNorm2d(planes * 4)
 72 |     self.relu = nn.ReLU(inplace=True)
 73 |     self.downsample = downsample
 74 |     self.stride = stride
 75 | 
 76 |   def forward(self, x):
 77 |     residual = x
 78 | 
 79 |     out = self.conv1(x)
 80 |     out = self.bn1(out)
 81 |     out = self.relu(out)
 82 | 
 83 |     out = self.conv2(out)
 84 |     out = self.bn2(out)
 85 |     out = self.relu(out)
 86 | 
 87 |     out = self.conv3(out)
 88 |     out = self.bn3(out)
 89 | 
 90 |     if self.downsample is not None:
 91 |       residual = self.downsample(x)
 92 | 
 93 |     out += residual
 94 |     out = self.relu(out)
 95 | 
 96 |     return out
 97 | 
 98 | 
 99 | class ResNet(nn.Module):
100 | 
101 |   def __init__(self, block, layers, num_classes=1000):
102 |     self.inplanes = 64
103 |     super(ResNet, self).__init__()
104 |     self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
105 |                  bias=False)
106 |     self.bn1 = nn.BatchNorm2d(64)
107 |     self.relu = nn.ReLU(inplace=True)
108 |     self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
109 |     self.layer1 = self._make_layer(block, 64, layers[0])
110 |     self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
111 |     self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
112 |     self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
113 |     # it is slightly better whereas slower to set stride = 1
114 |     # self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
115 |     self.avgpool = nn.AvgPool2d(7)
116 |     self.last_linear = nn.Linear(512 * block.expansion, num_classes)
117 | 
118 |     for m in self.modules():
119 |       if isinstance(m, nn.Conv2d):
120 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
121 |         m.weight.data.normal_(0, math.sqrt(2. / n))
122 |       elif isinstance(m, nn.BatchNorm2d):
123 |         m.weight.data.fill_(1)
124 |         m.bias.data.zero_()
125 | 
126 |   def _make_layer(self, block, planes, blocks, stride=1):
127 |     downsample = None
128 |     if stride != 1 or self.inplanes != planes * block.expansion:
129 |       downsample = nn.Sequential(
130 |         nn.Conv2d(self.inplanes, planes * block.expansion,
131 |               kernel_size=1, stride=stride, bias=False),
132 |         nn.BatchNorm2d(planes * block.expansion),
133 |       )
134 | 
135 |     layers = []
136 |     layers.append(block(self.inplanes, planes, stride, downsample))
137 |     self.inplanes = planes * block.expansion
138 |     for i in range(1, blocks):
139 |       layers.append(block(self.inplanes, planes))
140 | 
141 |     return nn.Sequential(*layers)
142 | 
143 |   def forward(self, x):
144 |     x = self.conv1(x)
145 |     x = self.bn1(x)
146 |     x = self.relu(x)
147 |     x = self.maxpool(x)
148 | 
149 |     x = self.layer1(x)
150 |     x = self.layer2(x)
151 |     x = self.layer3(x)
152 |     x = self.layer4(x)
153 | 
154 |     x = self.avgpool(x)
155 |     x = x.view(x.size(0), -1)
156 |     x = self.last_linear(x)
157 | 
158 |     return x
159 | 
160 | 
161 | def cafferesnet101(num_classes=1000, pretrained='imagenet'):
162 |     """Constructs a ResNet-101 model.
163 |     Args:
164 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
165 |     """
166 |     model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
167 |     if pretrained is not None:
168 |         settings = pretrained_settings['cafferesnet101'][pretrained]
169 |         assert num_classes == settings['num_classes'], \
170 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
171 |         model.load_state_dict(model_zoo.load_url(settings['url']))
172 |         model.input_space = settings['input_space']
173 |         model.input_size = settings['input_size']
174 |         model.input_range = settings['input_range']
175 |         model.mean = settings['mean']
176 |         model.std = settings['std']
177 |     return model


--------------------------------------------------------------------------------
/pretrainedmodels/models/fbresnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | 
  5 | 
  6 | __all__ = ['FBResNet',
  7 |            #'fbresnet18', 'fbresnet34', 'fbresnet50', 'fbresnet101',
  8 |            'fbresnet152']
  9 | 
 10 | pretrained_settings = {
 11 |     'fbresnet152': {
 12 |         'imagenet': {
 13 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/fbresnet152-2e20f6b4.pth',
 14 |             'input_space': 'RGB',
 15 |             'input_size': [3, 224, 224],
 16 |             'input_range': [0, 1],
 17 |             'mean': [0.485, 0.456, 0.406],
 18 |             'std': [0.229, 0.224, 0.225],
 19 |             'num_classes': 1000
 20 |         }
 21 |     }
 22 | }
 23 | 
 24 | 
 25 | def conv3x3(in_planes, out_planes, stride=1):
 26 |     "3x3 convolution with padding"
 27 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 28 |                      padding=1, bias=True)
 29 | 
 30 | 
 31 | class BasicBlock(nn.Module):
 32 |     expansion = 1
 33 | 
 34 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 35 |         super(BasicBlock, self).__init__()
 36 |         self.conv1 = conv3x3(inplanes, planes, stride)
 37 |         self.bn1 = nn.BatchNorm2d(planes)
 38 |         self.relu = nn.ReLU(inplace=True)
 39 |         self.conv2 = conv3x3(planes, planes)
 40 |         self.bn2 = nn.BatchNorm2d(planes)
 41 |         self.downsample = downsample
 42 |         self.stride = stride
 43 | 
 44 |     def forward(self, x):
 45 |         residual = x
 46 | 
 47 |         out = self.conv1(x)
 48 |         out = self.bn1(out)
 49 |         out = self.relu(out)
 50 | 
 51 |         out = self.conv2(out)
 52 |         out = self.bn2(out)
 53 | 
 54 |         if self.downsample is not None:
 55 |             residual = self.downsample(x)
 56 | 
 57 |         out += residual
 58 |         out = self.relu(out)
 59 | 
 60 |         return out
 61 | 
 62 | 
 63 | class Bottleneck(nn.Module):
 64 |     expansion = 4
 65 | 
 66 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 67 |         super(Bottleneck, self).__init__()
 68 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
 69 |         self.bn1 = nn.BatchNorm2d(planes)
 70 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 71 |                                padding=1, bias=True)
 72 |         self.bn2 = nn.BatchNorm2d(planes)
 73 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True)
 74 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 75 |         self.relu = nn.ReLU(inplace=True)
 76 |         self.downsample = downsample
 77 |         self.stride = stride
 78 | 
 79 |     def forward(self, x):
 80 |         residual = x
 81 | 
 82 |         out = self.conv1(x)
 83 |         out = self.bn1(out)
 84 |         out = self.relu(out)
 85 | 
 86 |         out = self.conv2(out)
 87 |         out = self.bn2(out)
 88 |         out = self.relu(out)
 89 | 
 90 |         out = self.conv3(out)
 91 |         out = self.bn3(out)
 92 | 
 93 |         if self.downsample is not None:
 94 |             residual = self.downsample(x)
 95 | 
 96 |         out += residual
 97 |         out = self.relu(out)
 98 | 
 99 |         return out
100 | 
101 | class FBResNet(nn.Module):
102 | 
103 |     def __init__(self, block, layers, num_classes=1000):
104 |         self.inplanes = 64
105 |         # Special attributs
106 |         self.input_space = None
107 |         self.input_size = (299, 299, 3)
108 |         self.mean = None
109 |         self.std = None
110 |         super(FBResNet, self).__init__()
111 |         # Modules
112 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
113 |                                 bias=True)
114 |         self.bn1 = nn.BatchNorm2d(64)
115 |         self.relu = nn.ReLU(inplace=True)
116 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
117 |         self.layer1 = self._make_layer(block, 64, layers[0])
118 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
119 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
120 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
121 |         self.avgpool = nn.AvgPool2d(7)
122 |         self.last_linear = nn.Linear(512 * block.expansion, num_classes)
123 | 
124 |         for m in self.modules():
125 |             if isinstance(m, nn.Conv2d):
126 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
128 |             elif isinstance(m, nn.BatchNorm2d):
129 |                 m.weight.data.fill_(1)
130 |                 m.bias.data.zero_()
131 | 
132 |     def _make_layer(self, block, planes, blocks, stride=1):
133 |         downsample = None
134 |         if stride != 1 or self.inplanes != planes * block.expansion:
135 |             downsample = nn.Sequential(
136 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
137 |                           kernel_size=1, stride=stride, bias=True),
138 |                 nn.BatchNorm2d(planes * block.expansion),
139 |             )
140 | 
141 |         layers = []
142 |         layers.append(block(self.inplanes, planes, stride, downsample))
143 |         self.inplanes = planes * block.expansion
144 |         for i in range(1, blocks):
145 |             layers.append(block(self.inplanes, planes))
146 | 
147 |         return nn.Sequential(*layers)
148 | 
149 |     def features(self, input):
150 |         x = self.conv1(input)
151 |         self.conv1_input = x.clone()
152 |         x = self.bn1(x)
153 |         x = self.relu(x)
154 |         x = self.maxpool(x)
155 | 
156 |         x = self.layer1(x)
157 |         x = self.layer2(x)
158 |         x = self.layer3(x)
159 |         x = self.layer4(x)
160 |         return x
161 | 
162 |     def logits(self, features):
163 |         x = self.avgpool(features)
164 |         x = x.view(x.size(0), -1)
165 |         x = self.last_linear(x)
166 |         return x
167 | 
168 |     def forward(self, input):
169 |         x = self.features(input)
170 |         x = self.logits(x)
171 |         return x
172 | 
173 | 
174 | def fbresnet18(num_classes=1000):
175 |     """Constructs a ResNet-18 model.
176 | 
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = FBResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
181 |     return model
182 | 
183 | 
184 | def fbresnet34(num_classes=1000):
185 |     """Constructs a ResNet-34 model.
186 | 
187 |     Args:
188 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
189 |     """
190 |     model = FBResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
191 |     return model
192 | 
193 | 
194 | def fbresnet50(num_classes=1000):
195 |     """Constructs a ResNet-50 model.
196 | 
197 |     Args:
198 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
199 |     """
200 |     model = FBResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
201 |     return model
202 | 
203 | 
204 | def fbresnet101(num_classes=1000):
205 |     """Constructs a ResNet-101 model.
206 | 
207 |     Args:
208 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
209 |     """
210 |     model = FBResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
211 |     return model
212 | 
213 | 
214 | def fbresnet152(num_classes=1000, pretrained='imagenet'):
215 |     """Constructs a ResNet-152 model.
216 | 
217 |     Args:
218 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
219 |     """
220 |     model = FBResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
221 |     if pretrained is not None:
222 |         settings = pretrained_settings['fbresnet152'][pretrained]
223 |         assert num_classes == settings['num_classes'], \
224 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
225 |         model.load_state_dict(model_zoo.load_url(settings['url']))
226 |         model.input_space = settings['input_space']
227 |         model.input_size = settings['input_size']
228 |         model.input_range = settings['input_range']
229 |         model.mean = settings['mean']
230 |         model.std = settings['std']
231 |     return model
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/pretrainedmodels/models/fbresnet/resnet152_dump.lua:
--------------------------------------------------------------------------------
 1 | require 'cutorch'
 2 | require 'cunn'
 3 | require 'cudnn'
 4 | require 'image'
 5 | vision=require 'torchnet-vision'
 6 | 
 7 | net=vision.models.resnet.load{filename='data/resnet152/net.t7',length=152}
 8 | print(net)
 9 | 
10 | require 'nn'
11 | nn.Module.parameters = function(self)
12 |    if self.weight and self.bias and self.running_mean and self.running_var then
13 |       return {self.weight, self.bias, self.running_mean, self.running_var}, {self.gradWeight, self.gradBias}
14 | 
15 |    elseif self.weight and self.bias then
16 |       return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
17 |    elseif self.weight then
18 |       return {self.weight}, {self.gradWeight}
19 |    elseif self.bias then
20 |       return {self.bias}, {self.gradBias}
21 |    else
22 |       return
23 |    end
24 | end
25 | 
26 | netparams, _ = net:parameters()
27 | print(#netparams)
28 | torch.save('data/resnet152/netparams.t7', netparams)
29 | 
30 | net=net:cuda()
31 | net:evaluate()
32 | --p, gp = net:getParameters()
33 | input = torch.ones(1,3,224,224)
34 | input[{1,1,1,1}] = -1
35 | input[1] = image.load('data/lena_224.png')
36 | print(input:sum())
37 | input = input:cuda()
38 | output=net:forward(input)
39 | 
40 | for i=1, 11 do
41 |     torch.save('data/resnet152/output'..i..'.t7', net:get(i).output:float())
42 | end
43 | 


--------------------------------------------------------------------------------
/pretrainedmodels/models/resnext.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.utils.model_zoo as model_zoo
  5 | from .resnext_features import resnext101_32x4d_features
  6 | from .resnext_features import resnext101_64x4d_features
  7 | 
  8 | __all__ = ['ResNeXt101_32x4d', 'resnext101_32x4d',
  9 |            'ResNeXt101_64x4d', 'resnext101_64x4d']
 10 | 
 11 | pretrained_settings = {
 12 |     'resnext101_32x4d': {
 13 |         'imagenet': {
 14 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_32x4d-29e315fa.pth',
 15 |             'input_space': 'RGB',
 16 |             'input_size': [3, 224, 224],
 17 |             'input_range': [0, 1],
 18 |             'mean': [0.485, 0.456, 0.406],
 19 |             'std': [0.229, 0.224, 0.225],
 20 |             'num_classes': 1000
 21 |         }
 22 |     },
 23 |     'resnext101_64x4d': {
 24 |         'imagenet': {
 25 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/resnext101_64x4d-e77a0586.pth',
 26 |             'input_space': 'RGB',
 27 |             'input_size': [3, 224, 224],
 28 |             'input_range': [0, 1],
 29 |             'mean': [0.485, 0.456, 0.406],
 30 |             'std': [0.229, 0.224, 0.225],
 31 |             'num_classes': 1000
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | class ResNeXt101_32x4d(nn.Module):
 37 | 
 38 |     def __init__(self, num_classes=1000):
 39 |         super(ResNeXt101_32x4d, self).__init__()
 40 |         self.num_classes = num_classes
 41 |         self.features = resnext101_32x4d_features
 42 |         self.avg_pool = nn.AvgPool2d((7, 7), (1, 1))
 43 |         self.last_linear = nn.Linear(2048, num_classes)
 44 | 
 45 |     def logits(self, input):
 46 |         x = self.avg_pool(input)
 47 |         x = x.view(x.size(0), -1)
 48 |         x = self.last_linear(x)
 49 |         return x
 50 | 
 51 |     def forward(self, input):
 52 |         x = self.features(input)
 53 |         x = self.logits(x)
 54 |         return x
 55 | 
 56 | 
 57 | class ResNeXt101_64x4d(nn.Module):
 58 | 
 59 |     def __init__(self, num_classes=1000):
 60 |         super(ResNeXt101_64x4d, self).__init__()
 61 |         self.num_classes = num_classes
 62 |         self.features = resnext101_64x4d_features
 63 |         self.avg_pool = nn.AvgPool2d((7, 7), (1, 1))
 64 |         self.last_linear = nn.Linear(2048, num_classes)
 65 | 
 66 |     def logits(self, input):
 67 |         x = self.avg_pool(input)
 68 |         x = x.view(x.size(0), -1)
 69 |         x = self.last_linear(x)
 70 |         return x
 71 | 
 72 |     def forward(self, input):
 73 |         x = self.features(input)
 74 |         x = self.logits(x)
 75 |         return x
 76 | 
 77 | 
 78 | def resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
 79 |     model = ResNeXt101_32x4d(num_classes=num_classes)
 80 |     if pretrained is not None:
 81 |         settings = pretrained_settings['resnext101_32x4d'][pretrained]
 82 |         assert num_classes == settings['num_classes'], \
 83 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
 84 |         model.load_state_dict(model_zoo.load_url(settings['url']))
 85 |         model.input_space = settings['input_space']
 86 |         model.input_size = settings['input_size']
 87 |         model.input_range = settings['input_range']
 88 |         model.mean = settings['mean']
 89 |         model.std = settings['std']
 90 |     return model
 91 | 
 92 | def resnext101_64x4d(num_classes=1000, pretrained='imagenet'):
 93 |     model = ResNeXt101_64x4d(num_classes=num_classes)
 94 |     if pretrained is not None:
 95 |         settings = pretrained_settings['resnext101_64x4d'][pretrained]
 96 |         assert num_classes == settings['num_classes'], \
 97 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
 98 |         model.load_state_dict(model_zoo.load_url(settings['url']))
 99 |         model.input_space = settings['input_space']
100 |         model.input_size = settings['input_size']
101 |         model.input_range = settings['input_range']
102 |         model.mean = settings['mean']
103 |         model.std = settings['std']
104 |     return model
105 | 


--------------------------------------------------------------------------------
/pretrainedmodels/models/resnext_features/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnext101_32x4d_features import resnext101_32x4d_features
2 | from .resnext101_64x4d_features import resnext101_64x4d_features


--------------------------------------------------------------------------------
/pretrainedmodels/models/utils.py:
--------------------------------------------------------------------------------
 1 | from .fbresnet import pretrained_settings as fbresnet_settings
 2 | from .bninception import pretrained_settings as bninception_settings
 3 | from .resnext import pretrained_settings as resnext_settings
 4 | from .inceptionv4 import pretrained_settings as inceptionv4_settings
 5 | from .inceptionresnetv2 import pretrained_settings as inceptionresnetv2_settings
 6 | from .torchvision_models import pretrained_settings as torchvision_models_settings
 7 | from .nasnet_mobile import pretrained_settings as nasnet_mobile_settings
 8 | from .nasnet import pretrained_settings as nasnet_settings
 9 | from .dpn import pretrained_settings as dpn_settings
10 | from .xception import pretrained_settings as xception_settings
11 | from .senet import pretrained_settings as senet_settings
12 | from .cafferesnet import pretrained_settings as cafferesnet_settings
13 | from .pnasnet import pretrained_settings as pnasnet_settings
14 | from .polynet import pretrained_settings as polynet_settings
15 | 
16 | all_settings = [
17 |     fbresnet_settings,
18 |     bninception_settings,
19 |     resnext_settings,
20 |     inceptionv4_settings,
21 |     inceptionresnetv2_settings,
22 |     torchvision_models_settings,
23 |     nasnet_mobile_settings,
24 |     nasnet_settings,
25 |     dpn_settings,
26 |     xception_settings,
27 |     senet_settings,
28 |     cafferesnet_settings,
29 |     pnasnet_settings,
30 |     polynet_settings
31 | ]
32 | 
33 | model_names = []
34 | pretrained_settings = {}
35 | for settings in all_settings:
36 |     for model_name, model_settings in settings.items():
37 |         pretrained_settings[model_name] = model_settings
38 |         model_names.append(model_name)
39 | 


--------------------------------------------------------------------------------
/pretrainedmodels/models/vggm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | #from torch.legacy import nn as nnl
  5 | import torch.utils.model_zoo as model_zoo
  6 | 
  7 | __all__ = ['vggm']
  8 | 
  9 | pretrained_settings = {
 10 |     'vggm': {
 11 |         'imagenet': {
 12 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/vggm-786f2434.pth',
 13 |             'input_space': 'BGR',
 14 |             'input_size': [3, 221, 221],
 15 |             'input_range': [0, 255],
 16 |             'mean': [123.68, 116.779, 103.939],
 17 |             'std': [1, 1, 1],
 18 |             'num_classes': 1000
 19 |         }
 20 |     }
 21 | }
 22 | 
 23 | class SpatialCrossMapLRN(nn.Module):
 24 |     def __init__(self, local_size=1, alpha=1.0, beta=0.75, k=1, ACROSS_CHANNELS=True):
 25 |         super(SpatialCrossMapLRN, self).__init__()
 26 |         self.ACROSS_CHANNELS = ACROSS_CHANNELS
 27 |         if ACROSS_CHANNELS:
 28 |             self.average=nn.AvgPool3d(kernel_size=(local_size, 1, 1),
 29 |                     stride=1,
 30 |                     padding=(int((local_size-1.0)/2), 0, 0))
 31 |         else:
 32 |             self.average=nn.AvgPool2d(kernel_size=local_size,
 33 |                     stride=1,
 34 |                     padding=int((local_size-1.0)/2))
 35 |         self.alpha = alpha
 36 |         self.beta = beta
 37 |         self.k = k
 38 | 
 39 |     def forward(self, x):
 40 |         if self.ACROSS_CHANNELS:
 41 |             div = x.pow(2).unsqueeze(1)
 42 |             div = self.average(div).squeeze(1)
 43 |             div = div.mul(self.alpha).add(self.k).pow(self.beta)
 44 |         else:
 45 |             div = x.pow(2)
 46 |             div = self.average(div)
 47 |             div = div.mul(self.alpha).add(self.k).pow(self.beta)
 48 |         x = x.div(div)
 49 |         return x
 50 | 
 51 | class LambdaBase(nn.Sequential):
 52 |     def __init__(self, fn, *args):
 53 |         super(LambdaBase, self).__init__(*args)
 54 |         self.lambda_func = fn
 55 | 
 56 |     def forward_prepare(self, input):
 57 |         output = []
 58 |         for module in self._modules.values():
 59 |             output.append(module(input))
 60 |         return output if output else input
 61 | 
 62 | class Lambda(LambdaBase):
 63 |     def forward(self, input):
 64 |         return self.lambda_func(self.forward_prepare(input))
 65 | 
 66 | class VGGM(nn.Module):
 67 | 
 68 |     def __init__(self, num_classes=1000):
 69 |         super(VGGM, self).__init__()
 70 |         self.num_classes = num_classes
 71 |         self.features = nn.Sequential(
 72 |             nn.Conv2d(3,96,(7, 7),(2, 2)),
 73 |             nn.ReLU(),
 74 |             SpatialCrossMapLRN(5, 0.0005, 0.75, 2),
 75 |             nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True),
 76 |             nn.Conv2d(96,256,(5, 5),(2, 2),(1, 1)),
 77 |             nn.ReLU(),
 78 |             SpatialCrossMapLRN(5, 0.0005, 0.75, 2),
 79 |             nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True),
 80 |             nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)),
 81 |             nn.ReLU(),
 82 |             nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
 83 |             nn.ReLU(),
 84 |             nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
 85 |             nn.ReLU(),
 86 |             nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
 87 |         )
 88 |         self.classif = nn.Sequential(
 89 |             nn.Linear(18432,4096),
 90 |             nn.ReLU(),
 91 |             nn.Dropout(0.5),
 92 |             nn.Linear(4096,4096),
 93 |             nn.ReLU(),
 94 |             nn.Dropout(0.5),
 95 |             nn.Linear(4096,num_classes)
 96 |         )
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.features(x)
100 |         x = x.view(x.size(0), -1)
101 |         x = self.classif(x)
102 |         return x
103 | 
104 | def vggm(num_classes=1000, pretrained='imagenet'):
105 |     if pretrained:
106 |         settings = pretrained_settings['vggm'][pretrained]
107 |         assert num_classes == settings['num_classes'], \
108 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
109 | 
110 |         model = VGGM(num_classes=1000)
111 |         model.load_state_dict(model_zoo.load_url(settings['url']))
112 | 
113 |         model.input_space = settings['input_space']
114 |         model.input_size = settings['input_size']
115 |         model.input_range = settings['input_range']
116 |         model.mean = settings['mean']
117 |         model.std = settings['std']
118 |     else:
119 |         model = VGGM(num_classes=num_classes)
120 |     return model


--------------------------------------------------------------------------------
/pretrainedmodels/models/wideresnet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import expanduser
 3 | import hickle as hkl
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | 
 8 | __all__ = ['wideresnet50']
 9 | 
10 | model_urls = {
11 |     'wideresnet152': 'https://s3.amazonaws.com/pytorch/h5models/wide-resnet-50-2-export.hkl'
12 | }
13 | 
14 | def define_model(params):
15 |     def conv2d(input, params, base, stride=1, pad=0):
16 |         return F.conv2d(input, params[base + '.weight'],
17 |                         params[base + '.bias'], stride, pad)
18 | 
19 |     def group(input, params, base, stride, n):
20 |         o = input
21 |         for i in range(0,n):
22 |             b_base = ('%s.block%d.conv') % (base, i)
23 |             x = o
24 |             o = conv2d(x, params, b_base + '0')
25 |             o = F.relu(o)
26 |             o = conv2d(o, params, b_base + '1', stride=i==0 and stride or 1, pad=1)
27 |             o = F.relu(o)
28 |             o = conv2d(o, params, b_base + '2')
29 |             if i == 0:
30 |                 o += conv2d(x, params, b_base + '_dim', stride=stride)
31 |             else:
32 |                 o += x
33 |             o = F.relu(o)
34 |         return o
35 | 
36 |     # determine network size by parameters
37 |     blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None
38 |                    for k in params.keys()]) for j in range(4)]
39 | 
40 |     def f(input, params, pooling_classif=True):
41 |         o = F.conv2d(input, params['conv0.weight'], params['conv0.bias'], 2, 3)
42 |         o = F.relu(o)
43 |         o = F.max_pool2d(o, 3, 2, 1)
44 |         o_g0 = group(o, params, 'group0', 1, blocks[0])
45 |         o_g1 = group(o_g0, params, 'group1', 2, blocks[1])
46 |         o_g2 = group(o_g1, params, 'group2', 2, blocks[2])
47 |         o_g3 = group(o_g2, params, 'group3', 2, blocks[3])
48 |         if pooling_classif:
49 |             o = F.avg_pool2d(o_g3, 7, 1, 0)
50 |             o = o.view(o.size(0), -1)
51 |             o = F.linear(o, params['fc.weight'], params['fc.bias'])
52 |         return o
53 | 
54 |     return f
55 | 
56 | 
57 | class WideResNet(nn.Module):
58 | 
59 |     def __init__(self, pooling):
60 |         super(WideResNet, self).__init__()
61 |         self.pooling = pooling
62 |         self.params = params
63 | 
64 |     def forward(self, x):
65 |         x = f(x, self.params, self.pooling)
66 |         return x
67 | 
68 | 
69 | def wideresnet50(pooling):
70 |     dir_models = os.path.join(expanduser("~"), '.torch/wideresnet')
71 |     path_hkl = os.path.join(dir_models, 'wideresnet50.hkl')
72 |     if os.path.isfile(path_hkl):
73 |         params = hkl.load(path_hkl)
74 |         # convert numpy arrays to torch Variables
75 |         for k,v in sorted(params.items()):
76 |             print k, v.shape
77 |             params[k] = Variable(torch.from_numpy(v), requires_grad=True)
78 |     else:
79 |         os.system('mkdir -p ' + dir_models)
80 |         os.system('wget {} -O {}'.format(model_urls['wideresnet50'], path_hkl))
81 |     f = define_model(params)
82 |     model = WideResNet(pooling)
83 |     return model
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/pretrainedmodels/models/xception.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Ported to pytorch thanks to [tstandley](https://github.com/tstandley/Xception-PyTorch)
  3 | 
  4 | @author: tstandley
  5 | Adapted by cadene
  6 | 
  7 | Creates an Xception Model as defined in:
  8 | 
  9 | Francois Chollet
 10 | Xception: Deep Learning with Depthwise Separable Convolutions
 11 | https://arxiv.org/pdf/1610.02357.pdf
 12 | 
 13 | This weights ported from the Keras implementation. Achieves the following performance on the validation set:
 14 | 
 15 | Loss:0.9173 Prec@1:78.892 Prec@5:94.292
 16 | 
 17 | REMEMBER to set your image size to 3x299x299 for both test and validation
 18 | 
 19 | normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
 20 |                                   std=[0.5, 0.5, 0.5])
 21 | 
 22 | The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
 23 | """
 24 | import math
 25 | import torch
 26 | import torch.nn as nn
 27 | import torch.nn.functional as F
 28 | import torch.utils.model_zoo as model_zoo
 29 | from torch.nn import init
 30 | 
 31 | __all__ = ['xception']
 32 | 
 33 | pretrained_settings = {
 34 |     'xception': {
 35 |         'imagenet': {
 36 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth',
 37 |             'input_space': 'RGB',
 38 |             'input_size': [3, 299, 299],
 39 |             'input_range': [0, 1],
 40 |             'mean': [0.5, 0.5, 0.5],
 41 |             'std': [0.5, 0.5, 0.5],
 42 |             'num_classes': 1000,
 43 |             'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | 
 49 | class SeparableConv2d(nn.Module):
 50 |     def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
 51 |         super(SeparableConv2d,self).__init__()
 52 | 
 53 |         self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
 54 |         self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
 55 |     
 56 |     def forward(self,x):
 57 |         x = self.conv1(x)
 58 |         x = self.pointwise(x)
 59 |         return x
 60 | 
 61 | 
 62 | class Block(nn.Module):
 63 |     def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
 64 |         super(Block, self).__init__()
 65 | 
 66 |         if out_filters != in_filters or strides!=1:
 67 |             self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
 68 |             self.skipbn = nn.BatchNorm2d(out_filters)
 69 |         else:
 70 |             self.skip=None
 71 |         
 72 |         self.relu = nn.ReLU(inplace=True)
 73 |         rep=[]
 74 | 
 75 |         filters=in_filters
 76 |         if grow_first:
 77 |             rep.append(self.relu)
 78 |             rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
 79 |             rep.append(nn.BatchNorm2d(out_filters))
 80 |             filters = out_filters
 81 | 
 82 |         for i in range(reps-1):
 83 |             rep.append(self.relu)
 84 |             rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
 85 |             rep.append(nn.BatchNorm2d(filters))
 86 |         
 87 |         if not grow_first:
 88 |             rep.append(self.relu)
 89 |             rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
 90 |             rep.append(nn.BatchNorm2d(out_filters))
 91 | 
 92 |         if not start_with_relu:
 93 |             rep = rep[1:]
 94 |         else:
 95 |             rep[0] = nn.ReLU(inplace=False)
 96 | 
 97 |         if strides != 1:
 98 |             rep.append(nn.MaxPool2d(3,strides,1))
 99 |         self.rep = nn.Sequential(*rep)
100 | 
101 |     def forward(self,inp):
102 |         x = self.rep(inp)
103 | 
104 |         if self.skip is not None:
105 |             skip = self.skip(inp)
106 |             skip = self.skipbn(skip)
107 |         else:
108 |             skip = inp
109 | 
110 |         x+=skip
111 |         return x
112 | 
113 | 
114 | class Xception(nn.Module):
115 |     """
116 |     Xception optimized for the ImageNet dataset, as specified in
117 |     https://arxiv.org/pdf/1610.02357.pdf
118 |     """
119 |     def __init__(self, num_classes=1000):
120 |         """ Constructor
121 |         Args:
122 |             num_classes: number of classes
123 |         """
124 |         super(Xception, self).__init__()
125 |         self.num_classes = num_classes
126 | 
127 |         self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
128 |         self.bn1 = nn.BatchNorm2d(32)
129 |         self.relu = nn.ReLU(inplace=True)
130 | 
131 |         self.conv2 = nn.Conv2d(32,64,3,bias=False)
132 |         self.bn2 = nn.BatchNorm2d(64)
133 |         #do relu here
134 | 
135 |         self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
136 |         self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
137 |         self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)
138 | 
139 |         self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
140 |         self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
141 |         self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
142 |         self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)
143 | 
144 |         self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
145 |         self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
146 |         self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
147 |         self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)
148 | 
149 |         self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)
150 | 
151 |         self.conv3 = SeparableConv2d(1024,1536,3,1,1)
152 |         self.bn3 = nn.BatchNorm2d(1536)
153 | 
154 |         #do relu here
155 |         self.conv4 = SeparableConv2d(1536,2048,3,1,1)
156 |         self.bn4 = nn.BatchNorm2d(2048)
157 | 
158 |         self.fc = nn.Linear(2048, num_classes)
159 | 
160 |         # #------- init weights --------
161 |         # for m in self.modules():
162 |         #     if isinstance(m, nn.Conv2d):
163 |         #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
164 |         #         m.weight.data.normal_(0, math.sqrt(2. / n))
165 |         #     elif isinstance(m, nn.BatchNorm2d):
166 |         #         m.weight.data.fill_(1)
167 |         #         m.bias.data.zero_()
168 |         # #-----------------------------
169 | 
170 |     def features(self, input):
171 |         x = self.conv1(input)
172 |         x = self.bn1(x)
173 |         x = self.relu(x)
174 |         
175 |         x = self.conv2(x)
176 |         x = self.bn2(x)
177 |         x = self.relu(x)
178 |         
179 |         x = self.block1(x)
180 |         x = self.block2(x)
181 |         x = self.block3(x)
182 |         x = self.block4(x)
183 |         x = self.block5(x)
184 |         x = self.block6(x)
185 |         x = self.block7(x)
186 |         x = self.block8(x)
187 |         x = self.block9(x)
188 |         x = self.block10(x)
189 |         x = self.block11(x)
190 |         x = self.block12(x)
191 |         
192 |         x = self.conv3(x)
193 |         x = self.bn3(x)
194 |         x = self.relu(x)
195 |         
196 |         x = self.conv4(x)
197 |         x = self.bn4(x)
198 |         return x
199 | 
200 |     def logits(self, features):
201 |         x = self.relu(features)
202 | 
203 |         x = F.adaptive_avg_pool2d(x, (1, 1))
204 |         x = x.view(x.size(0), -1)
205 |         x = self.last_linear(x)
206 |         return x
207 | 
208 |     def forward(self, input):
209 |         x = self.features(input)
210 |         x = self.logits(x)
211 |         return x
212 | 
213 | 
214 | def xception(num_classes=1000, pretrained='imagenet'):
215 |     model = Xception(num_classes=num_classes)
216 |     if pretrained:
217 |         settings = pretrained_settings['xception'][pretrained]
218 |         assert num_classes == settings['num_classes'], \
219 |             "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
220 | 
221 |         model = Xception(num_classes=num_classes)
222 |         model.load_state_dict(model_zoo.load_url(settings['url']))
223 | 
224 |         model.input_space = settings['input_space']
225 |         model.input_size = settings['input_size']
226 |         model.input_range = settings['input_range']
227 |         model.mean = settings['mean']
228 |         model.std = settings['std']
229 | 
230 |     # TODO: ugly
231 |     model.last_linear = model.fc
232 |     del model.fc
233 |     return model
234 | 


--------------------------------------------------------------------------------
/pretrainedmodels/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torchvision.transforms as transforms
  5 | from PIL import Image
  6 | from munch import munchify
  7 | 
  8 | class ToSpaceBGR(object):
  9 | 
 10 |     def __init__(self, is_bgr):
 11 |         self.is_bgr = is_bgr
 12 | 
 13 |     def __call__(self, tensor):
 14 |         if self.is_bgr:
 15 |             new_tensor = tensor.clone()
 16 |             new_tensor[0] = tensor[2]
 17 |             new_tensor[2] = tensor[0]
 18 |             tensor = new_tensor
 19 |         return tensor
 20 | 
 21 | 
 22 | class ToRange255(object):
 23 | 
 24 |     def __init__(self, is_255):
 25 |         self.is_255 = is_255
 26 | 
 27 |     def __call__(self, tensor):
 28 |         if self.is_255:
 29 |             tensor.mul_(255)
 30 |         return tensor
 31 | 
 32 | 
 33 | class TransformImage(object):
 34 | 
 35 |     def __init__(self, opts, scale=0.875, random_crop=False,
 36 |                  random_hflip=False, random_vflip=False,
 37 |                  preserve_aspect_ratio=True):
 38 |         if type(opts) == dict:
 39 |             opts = munchify(opts)
 40 |         self.input_size = opts.input_size
 41 |         self.input_space = opts.input_space
 42 |         self.input_range = opts.input_range
 43 |         self.mean = opts.mean
 44 |         self.std = opts.std
 45 | 
 46 |         # https://github.com/tensorflow/models/blob/master/research/inception/inception/image_processing.py#L294
 47 |         self.scale = scale
 48 |         self.random_crop = random_crop
 49 |         self.random_hflip = random_hflip
 50 |         self.random_vflip = random_vflip
 51 | 
 52 |         tfs = []
 53 |         if preserve_aspect_ratio:
 54 |             tfs.append(transforms.Resize(int(math.floor(max(self.input_size)/self.scale))))
 55 |         else:
 56 |             height = int(self.input_size[1] / self.scale)
 57 |             width = int(self.input_size[2] / self.scale)
 58 |             tfs.append(transforms.Resize((height, width)))
 59 | 
 60 |         if random_crop:
 61 |             tfs.append(transforms.RandomCrop(max(self.input_size)))
 62 |         else:
 63 |             tfs.append(transforms.CenterCrop(max(self.input_size)))
 64 | 
 65 |         if random_hflip:
 66 |             tfs.append(transforms.RandomHorizontalFlip())
 67 | 
 68 |         if random_vflip:
 69 |             tfs.append(transforms.RandomVerticalFlip())
 70 | 
 71 |         tfs.append(transforms.ToTensor())
 72 |         tfs.append(ToSpaceBGR(self.input_space=='BGR'))
 73 |         tfs.append(ToRange255(max(self.input_range)==255))
 74 |         tfs.append(transforms.Normalize(mean=self.mean, std=self.std))
 75 | 
 76 |         self.tf = transforms.Compose(tfs)
 77 |             
 78 |     def __call__(self, img):
 79 |         tensor = self.tf(img)
 80 |         return tensor
 81 | 
 82 | 
 83 | class LoadImage(object):
 84 | 
 85 |     def __init__(self, space='RGB'):
 86 |         self.space = space
 87 | 
 88 |     def __call__(self, path_img):
 89 |         with open(path_img, 'rb') as f:
 90 |             with Image.open(f) as img:
 91 |                 img = img.convert(self.space)
 92 |         return img
 93 | 
 94 | 
 95 | class LoadTransformImage(object):
 96 | 
 97 |     def __init__(self, model, scale=0.875):
 98 |         self.load = LoadImage()
 99 |         self.tf = TransformImage(model, scale=scale)
100 | 
101 |     def __call__(self, path_img):
102 |         img = self.load(path_img)
103 |         tensor = self.tf(img)
104 |         return tensor
105 | 
106 | 
107 | class Identity(nn.Module):
108 | 
109 |     def __init__(self):
110 |         super(Identity, self).__init__()
111 | 
112 |     def forward(self, x):
113 |         return x


--------------------------------------------------------------------------------
/pretrainedmodels/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.6.2'
2 | 


--------------------------------------------------------------------------------
/score_pedestrian_detection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import argparse
  4 | import os.path as osp
  5 | 
  6 | def check_size(submission_file):
  7 |     max_size = 60*1024*1024
  8 |     if osp.getsize(submission_file) > max_size:
  9 |         raise IOError #File size exceeds the specified maximum size, which is 60M for the server.
 10 | 
 11 | def judge_overlap(pbox,ignore_box):
 12 |     overlap=[]
 13 |     delete=[]
 14 |     for p in pbox:
 15 |         pl=min(p[0],p[2])
 16 |         pr=max(p[0],p[2])
 17 |         pb=min(p[1],p[3])
 18 |         pt=max(p[1],p[3])
 19 |         s_p=(pr-pl)*(pt-pb)
 20 |         s_lap=-0.01
 21 |         for c in ignore_box:
 22 |             cl=min(c[0],c[2])
 23 |             cr=max(c[0],c[2])
 24 |             cb=min(c[1],c[3])
 25 |             ct=max(c[1],c[3])
 26 |             if not (cr<pl or cl>pr or ct<pb or cb>pt):
 27 |                 s_lap+=(min(cr,pr)-max(cl,pl))*(min(ct,pt)-max(cb,pb))
 28 |         if s_lap>0:
 29 |             overlap.append([p,s_lap/s_p])
 30 |     for o in overlap:
 31 |         if o[1]>0.5:
 32 |             delete.append(o[0])
 33 |     remain_id = [p for p in pbox if p not in delete]
 34 |     return remain_id
 35 | 
 36 | def parse_ignore_file(ignore_file):
 37 |     with open(ignore_file,'r') as f:
 38 |         lines = f.readlines()
 39 |     ig = [x.strip().split() for x in lines]
 40 |     ignore = {}
 41 |     for item in ig:
 42 |         key = item[0]
 43 |         ignore_num = (len(item)-1)/4
 44 |         bbox = []
 45 |         for i in range(int(ignore_num)):
 46 |             b = []
 47 |             b.append(int(item[1+4*i]))
 48 |             b.append(int(item[2+4*i]))
 49 |             b.append(int(item[1+4*i])+int(item[3+4*i]))
 50 |             b.append(int(item[2+4*i])+int(item[4+4*i]))
 51 |             bbox.append(b)
 52 |         ignore[key] = bbox
 53 |     return ignore
 54 | 
 55 | def parse_submission(submission_file,ignore_file):
 56 |     ignore_zone = parse_ignore_file(ignore_file)
 57 |     ignore_keys = ignore_zone.keys()
 58 |     with open(submission_file, 'r') as f:
 59 |         lines = f.readlines()
 60 |     splitlines = [x.strip().split() for x in lines]
 61 |     image_ids = [x[0] for x in splitlines]
 62 |     confidence = np.array([float(x[1]) for x in splitlines])
 63 |     BB = []
 64 |     for x in splitlines:
 65 |         bb = []
 66 |         bb.append(float(x[2]))
 67 |         bb.append(float(x[3]))
 68 |         bb.append(float(x[2])+float(x[4]))
 69 |         bb.append(float(x[3])+float(x[5]))
 70 |         BB.append(bb)
 71 | 
 72 |     sub_key = []
 73 |     for x in image_ids:
 74 |         if x not in sub_key:
 75 |             sub_key.append(x)
 76 |     final_confidence = []
 77 |     final_ids = []
 78 |     final_BB = []
 79 | 
 80 |     for key in sub_key:
 81 |         find = [i for i,v in enumerate(image_ids) if v == key]
 82 |         BB_sub = [BB[i] for i in find]
 83 |         confid_sub = [confidence[i] for i in find]
 84 |         if key in ignore_keys:
 85 |             ignore_bbox = ignore_zone[key]
 86 |             bbox_remain = judge_overlap(BB_sub,ignore_bbox)
 87 |             find_remain = []
 88 |             for i,v in enumerate(BB_sub):
 89 |                 if v in bbox_remain:
 90 |                     find_remain.append(i)
 91 |             confid_remain = [confid_sub[i] for i in find_remain]
 92 |             BB_sub = bbox_remain
 93 |             confid_sub = confid_remain
 94 |         ids_sub = [key]*len(BB_sub)
 95 |         final_ids.extend(ids_sub)
 96 |         final_confidence.extend(confid_sub)
 97 |         final_BB.extend(BB_sub)
 98 | 
 99 |     final_BB = np.array(final_BB)
100 |     final_confidence = np.array(final_confidence)
101 |     sorted_ind = np.argsort(-final_confidence)
102 |     final_BB = final_BB[sorted_ind, :]
103 |     final_ids = [final_ids[x] for x in sorted_ind]
104 |     return final_ids, final_BB
105 | 
106 | def parse_gt_annotation(gt_file,ignore_file):
107 |     ignore_zone = parse_ignore_file(ignore_file)
108 |     ignore_keys = ignore_zone.keys()
109 |     with open(gt_file, 'r') as f:
110 |         lines = f.readlines()
111 |     info = [x.strip().split() for x in lines]
112 |     gt = {}
113 |     for item in info:
114 |         bbox = []
115 |         bbox_num = (len(item)-1)/5
116 |         for i in range(int(bbox_num)):
117 |             b = []
118 |             b.append(int(item[2+5*i]))
119 |             b.append(int(item[3+5*i]))
120 |             b.append(int(item[2+5*i])+int(item[4+5*i]))
121 |             b.append(int(item[3+5*i])+int(item[5+5*i]))
122 |             bbox.append(b)
123 |         if item[0] in ignore_keys:
124 |             ignore_bbox = ignore_zone[item[0]]
125 |             bbox_remain = judge_overlap(bbox,ignore_bbox)
126 |         else:
127 |             bbox_remain = bbox
128 |         gt[item[0]] = np.array(bbox_remain)
129 |     return gt
130 | 
131 | def compute_ap(rec, prec):
132 |     mrec = np.concatenate(([0.], rec, [1.]))
133 |     mpre = np.concatenate(([0.], prec, [0.]))
134 |     for i in range(mpre.size - 1, 0, -1):
135 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
136 |     i = np.where(mrec[1:] != mrec[:-1])[0]
137 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
138 |     return ap
139 | 
140 | 
141 | def pedestrian_eval(input, gt_file, ignore_file, ovthresh):
142 |     gt = parse_gt_annotation(gt_file,ignore_file)
143 |     image_ids, BB = parse_submission(input,ignore_file)
144 |     npos = 0
145 |     recs = {}
146 |     for key in gt.keys():
147 |         det = [False]*len(gt[key])
148 |         recs[key] = {'bbox': gt[key], 'det': det}
149 |         npos += len(gt[key])
150 |     nd = len(image_ids)
151 |     tp = np.zeros(nd)
152 |     fp = np.zeros(nd)
153 |     for d in range(nd):
154 |         if image_ids[d] not in recs.keys():
155 |             raise KeyError("Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?".format(image_ids[d]))
156 |     for d in range(nd):
157 |         R = recs[image_ids[d]]
158 |         bb = BB[d, :].astype(float)
159 |         ovmax = -np.inf
160 |         BBGT = R['bbox'].astype(float)
161 |         if BBGT.size > 0:
162 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
163 |             iymin = np.maximum(BBGT[:, 1], bb[1])
164 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
165 |             iymax = np.minimum(BBGT[:, 3], bb[3])
166 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
167 |             ih = np.maximum(iymax - iymin + 1., 0.)
168 |             inters = iw * ih
169 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
170 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
171 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
172 |             overlaps = inters / uni
173 |             ovmax = np.max(overlaps)
174 |             jmax = np.argmax(overlaps)
175 | 
176 |         if ovmax > ovthresh:
177 |             if not R['det'][jmax]:
178 |                 tp[d] = 1.
179 |                 R['det'][jmax] = 1
180 |             else:
181 |                 fp[d] = 1.
182 |         else:
183 |             fp[d] = 1.
184 |     fp = np.cumsum(fp)
185 |     tp = np.cumsum(tp)
186 |     rec = tp / float(npos+1e-8)
187 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
188 |     ap = compute_ap(rec, prec)
189 |     return ap
190 | 
191 | 
192 | def wider_ped_eval(input, gt,ignore_file):
193 |     aap = []
194 |     for ove in np.arange(0.5, 1.0, 0.05):
195 |         ap = pedestrian_eval(input, gt,ignore_file, ovthresh=ove)
196 |         aap.append(ap)
197 |     mAP = np.average(aap)
198 |     return mAP
199 | 
200 | 
201 | def get_average_precision_validation():
202 |     input_dir = './'
203 |     output_dir = './'
204 |     ref_dir = osp.join(input_dir, 'ref')
205 |     submit_dir = osp.join(input_dir, 'res')
206 |     submit_file = 'submit_files/scores_validation.txt'
207 |     gt_file = osp.join(ref_dir, 'val_annotations.txt')
208 |     ignore_file = osp.join(ref_dir, 'pedestrian_ignore_part_val.txt')
209 |     check_size(submit_file)
210 |     mAP = wider_ped_eval(submit_file, gt_file, ignore_file)
211 |     out = {'Average AP': mAP}
212 |     print(out)
213 |     return mAP
214 | 
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     # parser = argparse.ArgumentParser()
219 |     # parser.add_argument("input", type=str)
220 |     # parser.add_argument("output", type=str)
221 |     # args = parser.parse_args()
222 |     get_average_precision_validation()
223 |     # strings = ['{}: {}\n'.format(k, v) for k, v in out.items()]
224 |     # open(os.path.join(output_dir, 'scores.txt'), 'w').writelines(strings)


--------------------------------------------------------------------------------
/statics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | data_dir="../data_wider_pedestrian"
 4 | 
 5 | train_bbx_gt_file=os.path.join(data_dir,'train_annotations.txt')
 6 | train_img_dir=os.path.join(data_dir,'train')
 7 | 
 8 | val_bbx_gt_file=os.path.join(data_dir,'val_annotations.txt')
 9 | val_img_dir=os.path.join(data_dir,'val')
10 | 
11 | train_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_train.txt')
12 | 
13 | val_bbx_ignore_file=os.path.join(data_dir,'pedestrian_ignore_part_val.txt')
14 | 
15 | 
16 | # config.py
17 | import os.path
18 | 
19 | # gets home dir cross platform
20 | HOME = os.path.expanduser("~")
21 | 
22 | # for making bounding boxes pretty
23 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
24 |           (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
25 | 
26 | MEANS = (104, 117, 123)
27 | 
28 | # SSD300 CONFIGS
29 | voc = {
30 |     'num_classes': 21,
31 |     'lr_steps': (80000, 100000, 120000),
32 |     'max_iter': 120000,
33 |     'feature_maps': [38, 19, 10, 5, 3, 1],
34 |     'min_dim': 300,
35 |     'steps': [8, 16, 32, 64, 100, 300],
36 |     'min_sizes': [30, 60, 111, 162, 213, 264],
37 |     'max_sizes': [60, 111, 162, 213, 264, 315],
38 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
39 |     'variance': [0.1, 0.2],
40 |     'clip': True,
41 |     'name': 'VOC',
42 | }
43 | 
44 | coco = {
45 |     'num_classes': 201,
46 |     'lr_steps': (280000, 360000, 400000),
47 |     'max_iter': 400000,
48 |     'feature_maps': [38, 19, 10, 5, 3, 1],
49 |     'min_dim': 300,
50 |     'steps': [8, 16, 32, 64, 100, 300],
51 |     'min_sizes': [21, 45, 99, 153, 207, 261],
52 |     'max_sizes': [45, 99, 153, 207, 261, 315],
53 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
54 |     'variance': [0.1, 0.2],
55 |     'clip': True,
56 |     'name': 'COCO',
57 | }
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/__init__.py


--------------------------------------------------------------------------------
/utils/build.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | def locate_cuda():
 27 |     """Locate the CUDA environment on the system
 28 | 
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 | 
 32 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 33 |     is based on finding 'nvcc' in the PATH.
 34 |     """
 35 | 
 36 |     # first check if the CUDAHOME env variable is in use
 37 |     if 'CUDAHOME' in os.environ:
 38 |         home = os.environ['CUDAHOME']
 39 |         nvcc = pjoin(home, 'bin', 'nvcc')
 40 |     else:
 41 |         # otherwise, search the PATH for NVCC
 42 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 43 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 44 |         if nvcc is None:
 45 |             raise EnvironmentError('The nvcc binary could not be '
 46 |                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 47 |         home = os.path.dirname(os.path.dirname(nvcc))
 48 | 
 49 |     cudaconfig = {'home': home, 'nvcc': nvcc,
 50 |                   'include': pjoin(home, 'include'),
 51 |                   'lib64': pjoin(home, 'lib64')}
 52 |     for k, v in cudaconfig.items():
 53 |         if not os.path.exists(v):
 54 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 55 | 
 56 |     return cudaconfig
 57 | 
 58 | 
 59 | CUDA = locate_cuda()
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | 
 68 | def customize_compiler_for_nvcc(self):
 69 |     """inject deep into distutils to customize how the dispatch
 70 |     to gcc/nvcc works.
 71 | 
 72 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 73 |     injected in, and still have the right customizations (i.e.
 74 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 75 |     the OO route, I have this. Note, it's kindof like a wierd functional
 76 |     subclassing going on."""
 77 | 
 78 |     # tell the compiler it can processes .cu
 79 |     self.src_extensions.append('.cu')
 80 | 
 81 |     # save references to the default compiler_so and _comple methods
 82 |     default_compiler_so = self.compiler_so
 83 |     super = self._compile
 84 | 
 85 |     # now redefine the _compile method. This gets executed for each
 86 |     # object but distutils doesn't have the ability to change compilers
 87 |     # based on source extension: we add it.
 88 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 89 |         print(extra_postargs)
 90 |         if os.path.splitext(src)[1] == '.cu':
 91 |             # use the cuda for .cu files
 92 |             self.set_executable('compiler_so', CUDA['nvcc'])
 93 |             # use only a subset of the extra_postargs, which are 1-1 translated
 94 |             # from the extra_compile_args in the Extension class
 95 |             postargs = extra_postargs['nvcc']
 96 |         else:
 97 |             postargs = extra_postargs['gcc']
 98 | 
 99 |         super(obj, src, ext, cc_args, postargs, pp_opts)
100 |         # reset the default compiler_so, which we might have changed for cuda
101 |         self.compiler_so = default_compiler_so
102 | 
103 |     # inject our redefined _compile method into the class
104 |     self._compile = _compile
105 | 
106 | 
107 | # run the customize_compiler
108 | class custom_build_ext(build_ext):
109 |     def build_extensions(self):
110 |         customize_compiler_for_nvcc(self.compiler)
111 |         build_ext.build_extensions(self)
112 | 
113 | 
114 | ext_modules = [
115 |     Extension(
116 |         "nms.cpu_nms",
117 |         ["nms/cpu_nms.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs=[numpy_include]
120 |     ),
121 |     Extension('nms.gpu_nms',
122 |               ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
123 |               library_dirs=[CUDA['lib64']],
124 |               libraries=['cudart'],
125 |               language='c++',
126 |               runtime_library_dirs=[CUDA['lib64']],
127 |               # this syntax is specific to this build system
128 |               # we're only going to use certain compiler args with nvcc and not with gcc
129 |               # the implementation of this trick is in customize_compiler() below
130 |               extra_compile_args={'gcc': ["-Wno-unused-function"],
131 |                                   'nvcc': ['-arch=sm_52',
132 |                                            '--ptxas-options=-v',
133 |                                            '-c',
134 |                                            '--compiler-options',
135 |                                            "'-fPIC'"]},
136 |               include_dirs=[numpy_include, CUDA['include']]
137 |               ),
138 |     Extension(
139 |         'pycocotools._mask',
140 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
141 |         include_dirs=[numpy_include, 'pycocotools'],
142 |         extra_compile_args={
143 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
144 |     ),
145 | ]
146 | 
147 | setup(
148 |     name='mot_utils',
149 |     ext_modules=ext_modules,
150 |     # inject our custom trigger
151 |     cmdclass={'build_ext': custom_build_ext},
152 | )
153 | 


--------------------------------------------------------------------------------
/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | def read_json_file(json_file):
4 |     with open(json_file) as f:
5 |         data = json.load(f)
6 |         return data


--------------------------------------------------------------------------------
/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miltonbd/ECCV_2018_pedestrian_detection_challenege/24448247530555e8f34f8caa35dd7a3a40cc17c0/utils/nms/__init__.py


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 71 |     cdef unsigned int N = boxes.shape[0]
 72 |     cdef float iw, ih, box_area
 73 |     cdef float ua
 74 |     cdef int pos = 0
 75 |     cdef float maxscore = 0
 76 |     cdef int maxpos = 0
 77 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 78 | 
 79 |     for i in range(N):
 80 |         maxscore = boxes[i, 4]
 81 |         maxpos = i
 82 | 
 83 |         tx1 = boxes[i,0]
 84 |         ty1 = boxes[i,1]
 85 |         tx2 = boxes[i,2]
 86 |         ty2 = boxes[i,3]
 87 |         ts = boxes[i,4]
 88 | 
 89 |         pos = i + 1
 90 | 	# get max box
 91 |         while pos < N:
 92 |             if maxscore < boxes[pos, 4]:
 93 |                 maxscore = boxes[pos, 4]
 94 |                 maxpos = pos
 95 |             pos = pos + 1
 96 | 
 97 | 	# add max box as a detection 
 98 |         boxes[i,0] = boxes[maxpos,0]
 99 |         boxes[i,1] = boxes[maxpos,1]
100 |         boxes[i,2] = boxes[maxpos,2]
101 |         boxes[i,3] = boxes[maxpos,3]
102 |         boxes[i,4] = boxes[maxpos,4]
103 | 
104 | 	# swap ith box with position of max box
105 |         boxes[maxpos,0] = tx1
106 |         boxes[maxpos,1] = ty1
107 |         boxes[maxpos,2] = tx2
108 |         boxes[maxpos,3] = ty2
109 |         boxes[maxpos,4] = ts
110 | 
111 |         tx1 = boxes[i,0]
112 |         ty1 = boxes[i,1]
113 |         tx2 = boxes[i,2]
114 |         ty2 = boxes[i,3]
115 |         ts = boxes[i,4]
116 | 
117 |         pos = i + 1
118 | 	# NMS iterations, note that N changes if detection boxes fall below threshold
119 |         while pos < N:
120 |             x1 = boxes[pos, 0]
121 |             y1 = boxes[pos, 1]
122 |             x2 = boxes[pos, 2]
123 |             y2 = boxes[pos, 3]
124 |             s = boxes[pos, 4]
125 | 
126 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
127 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
128 |             if iw > 0:
129 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
130 |                 if ih > 0:
131 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
132 |                     ov = iw * ih / ua #iou between max box and detection box
133 | 
134 |                     if method == 1: # linear
135 |                         if ov > Nt: 
136 |                             weight = 1 - ov
137 |                         else:
138 |                             weight = 1
139 |                     elif method == 2: # gaussian
140 |                         weight = np.exp(-(ov * ov)/sigma)
141 |                     else: # original NMS
142 |                         if ov > Nt: 
143 |                             weight = 0
144 |                         else:
145 |                             weight = 1
146 | 
147 |                     boxes[pos, 4] = weight*boxes[pos, 4]
148 | 		    
149 | 		    # if box score falls below threshold, discard the box by swapping with last box
150 | 		    # update N
151 |                     if boxes[pos, 4] < threshold:
152 |                         boxes[pos,0] = boxes[N-1, 0]
153 |                         boxes[pos,1] = boxes[N-1, 1]
154 |                         boxes[pos,2] = boxes[N-1, 2]
155 |                         boxes[pos,3] = boxes[N-1, 3]
156 |                         boxes[pos,4] = boxes[N-1, 4]
157 |                         N = N - 1
158 |                         pos = pos - 1
159 | 
160 |             pos = pos + 1
161 | 
162 |     keep = [i for i in range(N)]
163 |     return keep
164 | 


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/utils/pascal_utils.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import xml.etree.ElementTree as ET
  3 | from PIL import  Image
  4 | from xml.dom import minidom
  5 | from statics import *
  6 | from data_reader import *
  7 | 
  8 | def write_pascal_annotation(file_name,obj_list,xml_file):
  9 |     annotation=ET.Element('annotation')
 10 |     filename=ET.SubElement(annotation,'filename')
 11 |     filename.text=file_name
 12 |     size = ET.SubElement(annotation, 'size')
 13 |     img=Image.open(file_name)
 14 |     width, height = img.size
 15 |     height_elem=ET.SubElement(size,'height')
 16 |     width_elem=ET.SubElement(size,'width')
 17 |     height_elem.text=str(height)
 18 |     width_elem.text=str(width)
 19 |     # print(obj_list)
 20 |     for i in range(0, len(obj_list), 5):
 21 |         class_index = obj_list[i]
 22 |         obj_cord = obj_list[i + 1:i + 5]
 23 |         obj_cord[2] = int(obj_cord[2]) + int(obj_cord[0])
 24 |         obj_cord[3] = int(obj_cord[3]) + int(obj_cord[1])
 25 |         object = ET.SubElement(annotation, 'object')
 26 |         get_object(object, obj_cord)
 27 | 
 28 |     # print(ET.dump(annotation))
 29 |     anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml()
 30 |     text_file = open(xml_file, "w")
 31 |     text_file.write(anno_txt)
 32 |     text_file.close()
 33 |     return
 34 | 
 35 | 
 36 | def write_pascal_annotation_aug(file_name,obj_list,xml_file):
 37 |     annotation=ET.Element('annotation')
 38 |     filename=ET.SubElement(annotation,'filename')
 39 |     filename.text=file_name
 40 |     size = ET.SubElement(annotation, 'size')
 41 |     img=Image.open(file_name)
 42 |     width, height = img.size
 43 |     height_elem=ET.SubElement(size,'height')
 44 |     width_elem=ET.SubElement(size,'width')
 45 |     height_elem.text=str(height)
 46 |     width_elem.text=str(width)
 47 |     # print(obj_list)
 48 |     for i,obj in enumerate(obj_list):
 49 |         class_index = obj[4]
 50 |         obj_cord = obj[0:4]
 51 |         object = ET.SubElement(annotation, 'object')
 52 |         get_object(object, obj_cord)
 53 | 
 54 |     # print(ET.dump(annotation))
 55 |     anno_txt=minidom.parseString(ET.tostring(annotation)).toprettyxml()
 56 |     text_file = open(xml_file, "w")
 57 |     text_file.write(anno_txt)
 58 |     text_file.close()
 59 |     return
 60 | 
 61 | 
 62 | def get_object(object, obj_cord):
 63 |     name = ET.SubElement(object, 'name')
 64 |     name.text = 'pedestrian'
 65 |     bndbox = ET.SubElement(object, 'bndbox')
 66 |     difficult=ET.SubElement(object,'difficult')
 67 |     difficult.text=str(0)
 68 |     xmin = ET.SubElement(bndbox, 'xmin')
 69 |     ymin = ET.SubElement(bndbox, 'ymin')
 70 |     xmax = ET.SubElement(bndbox, 'xmax')
 71 |     ymax = ET.SubElement(bndbox, 'ymax')
 72 | 
 73 |     xmin.text=str(obj_cord[0])
 74 |     ymin.text=str(obj_cord[1])
 75 |     xmax.text=str(obj_cord[2])
 76 |     ymax.text=str(obj_cord[3])
 77 | 
 78 | 
 79 |     return
 80 | 
 81 | 
 82 | def read_pascal_annotation(anno_file):
 83 |     """
 84 | 
 85 |     :param anno_file:
 86 |     :return:
 87 | 
 88 |     """
 89 |     tree = ET.parse(anno_file)
 90 |     root = tree.getroot()
 91 |     filename=root.find('filename').text
 92 |     height=int(root.find('size/height').text)
 93 |     width=int(root.find('size/width').text)
 94 |     objs=root.findall('object')
 95 |     objects=[]
 96 |     for obj in objs:
 97 |         class_label=obj.find('name').text
 98 |         xmin=int(float(obj.find('bndbox/xmin').text))
 99 |         xmax=int(float(obj.find('bndbox/xmax').text))
100 |         ymin=int(float(obj.find('bndbox/ymin').text))
101 |         ymax=int(float(obj.find('bndbox/ymax').text))
102 |         objects.append([xmin,ymin,xmax,ymax,1])
103 |     res={
104 |         'filename':filename,
105 |         'height':height,
106 |          'width':width,
107 |          'objects':objects
108 |          }
109 |     return res


--------------------------------------------------------------------------------
/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | #import pycocotools._mask as _mask
  4 | from . import _mask
  5 | 
  6 | # Interface for manipulating masks stored in RLE format.
  7 | #
  8 | # RLE is a simple yet efficient format for storing binary masks. RLE
  9 | # first divides a vector (or vectorized image) into a series of piecewise
 10 | # constant regions and then for each piece simply stores the length of
 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 13 | # (note that the odd counts are always the numbers of zeros). Instead of
 14 | # storing the counts directly, additional compression is achieved with a
 15 | # variable bitrate representation based on a common scheme called LEB128.
 16 | #
 17 | # Compression is greatest given large piecewise constant regions.
 18 | # Specifically, the size of the RLE is proportional to the number of
 19 | # *boundaries* in M (or for an image the number of boundaries in the y
 20 | # direction). Assuming fairly simple shapes, the RLE representation is
 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 22 | # is substantially lower, especially for large simple objects (large n).
 23 | #
 24 | # Many common operations on masks can be computed directly using the RLE
 25 | # (without need for decoding). This includes computations such as area,
 26 | # union, intersection, etc. All of these operations are linear in the
 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 28 | # of the object. Computing these operations on the original mask is O(n).
 29 | # Thus, using the RLE can result in substantial computational savings.
 30 | #
 31 | # The following API functions are defined:
 32 | #  encode         - Encode binary masks using RLE.
 33 | #  decode         - Decode binary masks encoded via RLE.
 34 | #  merge          - Compute union or intersection of encoded masks.
 35 | #  iou            - Compute intersection over union between masks.
 36 | #  area           - Compute area of encoded masks.
 37 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 38 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 39 | #
 40 | # Usage:
 41 | #  Rs     = encode( masks )
 42 | #  masks  = decode( Rs )
 43 | #  R      = merge( Rs, intersect=false )
 44 | #  o      = iou( dt, gt, iscrowd )
 45 | #  a      = area( Rs )
 46 | #  bbs    = toBbox( Rs )
 47 | #  Rs     = frPyObjects( [pyObjects], h, w )
 48 | #
 49 | # In the API the following formats are used:
 50 | #  Rs      - [dict] Run-length encoding of binary masks
 51 | #  R       - dict Run-length encoding of binary mask
 52 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 53 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 54 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 55 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 56 | #  dt,gt   - May be either bounding boxes or encoded masks
 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 58 | #
 59 | # Finally, a note about the intersection over union (iou) computation.
 60 | # The standard iou of a ground truth (gt) and detected (dt) object is
 61 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 62 | # For "crowd" regions, we use a modified criteria. If a gt object is
 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 66 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 67 | # For crowd gt regions we use this modified criteria above for the iou.
 68 | #
 69 | # To compile run "python setup.py build_ext --inplace"
 70 | # Please do not contact us for help with compiling.
 71 | #
 72 | # Microsoft COCO Toolbox.      version 2.0
 73 | # Data, paper, and tutorials available at:  http://mscoco.org/
 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 75 | # Licensed under the Simplified BSD License [see coco/license.txt]
 76 | 
 77 | iou         = _mask.iou
 78 | merge       = _mask.merge
 79 | frPyObjects = _mask.frPyObjects
 80 | 
 81 | def encode(bimask):
 82 |     if len(bimask.shape) == 3:
 83 |         return _mask.encode(bimask)
 84 |     elif len(bimask.shape) == 2:
 85 |         h, w = bimask.shape
 86 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 87 | 
 88 | def decode(rleObjs):
 89 |     if type(rleObjs) == list:
 90 |         return _mask.decode(rleObjs)
 91 |     else:
 92 |         return _mask.decode([rleObjs])[:,:,0]
 93 | 
 94 | def area(rleObjs):
 95 |     if type(rleObjs) == list:
 96 |         return _mask.area(rleObjs)
 97 |     else:
 98 |         return _mask.area([rleObjs])[0]
 99 | 
100 | def toBbox(rleObjs):
101 |     if type(rleObjs) == list:
102 |         return _mask.toBbox(rleObjs)
103 |     else:
104 |         return _mask.toBbox([rleObjs])[0]
105 | 


--------------------------------------------------------------------------------
/utils/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
 4 | import os
 5 | import cv2
 6 | import os
 7 | """
 8 | face=[[x1,x2,x2,y2]]
 9 | """
10 | 
11 | 
12 | def draw_rectangle_w_h_box(img_path, faces, save_dir='./detected_face'):
13 |     create_dir_if_not_exists(save_dir)
14 |     img_face_detect = cv2.imread(img_path)
15 |     for face in faces:
16 |         x1, y1, x2, y2 = face
17 |         cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
18 |         cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect)
19 | 
20 | def draw_rectangle(img_path, faces, save_dir='./detected_face'):
21 |     create_dir_if_not_exists(save_dir)
22 |     img_face_detect = cv2.imread(img_path)
23 |     for face in faces:
24 |         x1, y1, x2, y2 = face
25 |         cv2.rectangle(img_face_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
26 |         cv2.imwrite(os.path.join(save_dir, os.path.basename(img_path)), img_face_detect)
27 | 
28 | def drawbbox(file_name,bbox,save_dir):
29 |     source_img = Image.open(file_name).convert("RGBA")
30 | 
31 |     draw = ImageDraw.Draw(source_img)
32 |     # draw.rectangle(((0, 00), (100, 100)), fill="black")
33 |     # draw.text((20, 70), "something123", font=ImageFont.truetype("font_path123"))
34 | 
35 |     create_dir_if_not_exists(save_dir)
36 |     save_file=os.path.join(save_dir,os.path.basename(file_name))
37 |     source_img.save(save_file, "JPEG")
38 | 
39 | def get_total_params(model):
40 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
41 |     params = sum([np.prod(p.size()) for p in model_parameters])
42 |     return  params
43 | 
44 | def create_dir_if_not_exists(dir):
45 |     if not os.path.exists(dir):
46 |         os.makedirs(dir)
47 | 
48 | def check_if_exists(dir):
49 |     return os.path.exists(dir)
50 | 
51 | def progress_bar(progress, count ,message):
52 |     sys.stdout.write('\r' + "{} of {}: {}".format(progress, count, message))
53 | 


--------------------------------------------------------------------------------
/utils/visualization/pascal_detection_visualize.py:
--------------------------------------------------------------------------------
 1 | from utils.file_utils import read_text_file
 2 | from utils.utils import create_dir_if_not_exists
 3 | import os
 4 | import cv2
 5 | from utils.pascal_utils import read_pascal_annotation
 6 | 
 7 | def draw_bbox_pascal(anno_path,image_dir=None):
 8 |     annotation = read_pascal_annotation(anno_path)
 9 |     image_path = annotation['filename']
10 |     if image_dir!=None:
11 |         image_path=os.path.join(image_dir,image_path)
12 |     print(image_path)
13 |     objects = annotation['objects']
14 |     # objects=[[100,100,200,200,1]]
15 |     create_dir_if_not_exists('pascal_images')
16 |     img_demo_detect = cv2.imread(image_path)
17 |     save_path = os.path.join('pascal_images', os.path.basename(image_path))
18 |     for object in objects:
19 |         x1, y1, x2, y2 = object[:4]
20 |         cv2.rectangle(img_demo_detect, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1)
21 |     cv2.imwrite(save_path, img_demo_detect)
22 | 
23 | 
24 | def show_augment():
25 |     imageid = 'img08456_0'
26 |     anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_aug/{}.xml'.format(
27 |         imageid)
28 |     draw_bbox_pascal(anno_path)
29 | 
30 | def show_original_512():
31 |     imageid = 'img00175_6683'
32 |     anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations_512/{}.xml'.format(
33 |         imageid)
34 |     draw_bbox_pascal(anno_path)
35 | 
36 | def show_inria_person():
37 |     # anno_path = '/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train/PennPed00001.xml'
38 |     anno_path='/media/milton/ssd1/dataset/pedestrian/tud_brussels/annotations/img-000-2.xml'
39 |     draw_bbox_pascal(anno_path)
40 | 
41 | show_inria_person()
42 | # show_original_512()
43 | # show_augment()
44 | # anno_path='/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/Annotations/000247.xml'
45 | # draw_bbox_pascal(anno_path,'/media/milton/ssd1/dataset/pascal/VOCdevkit/VOC2007/JPEGImages')
46 | 
47 | 


--------------------------------------------------------------------------------
/wider_face_pedestrian_to_pascal.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | from PIL import  Image
 3 | from xml.dom import minidom
 4 | from statics import *
 5 | from data_reader import *
 6 | from utils.utils import create_dir_if_not_exists
 7 | from utils.pascal_utils import *
 8 | 
 9 | 
10 | def convert_wider_pedestrian_to_pascal():
11 |     data=read_train_gt()
12 |     trainvalids=[]
13 |     for row in data:
14 |         obj_list = row[1]
15 |         image_name = row[0]
16 |         annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations'
17 |         create_dir_if_not_exists(annodir)
18 |         create_dir_if_not_exists('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/JPEGImages')
19 |         xml_file_name=image_name.split('.')[0]+".xml"
20 |         xml_file=os.path.join(annodir, xml_file_name)
21 |         image_path=os.path.abspath(os.path.join(data_dir,"train", image_name))
22 |         write_pascal_annotation(image_path,obj_list,xml_file)
23 | 
24 |         voc_anno_train_dir="/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_train"
25 |         if not os.path.exists(voc_anno_train_dir):
26 |             os.makedirs(voc_anno_train_dir)
27 |         anno_path=os.path.join(voc_anno_train_dir,xml_file_name)
28 |         write_pascal_annotation(image_path,obj_list,anno_path)
29 | 
30 |         trainvalids.append(image_name.split('.')[0])
31 |         # break
32 |     with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/trainval.txt', mode='wt', encoding='utf-8') as myfile:
33 |         myfile.write('\n'.join(trainvalids))
34 |     testids=[]
35 |     for row in read_val_gt():
36 |         obj_list = row[1]
37 |         image_name = row[0]
38 |         annodir='/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/Annotations'
39 |         xml_file_name=image_name.split('.')[0]+".xml"
40 |         xml_file=os.path.join(annodir, xml_file_name)
41 |         image_path=os.path.abspath(os.path.join(data_dir,"val", image_name))
42 |         write_pascal_annotation(image_path,obj_list,xml_file)
43 |         testids.append(image_name.split('.')[0])
44 | 
45 |         voc_anno_train_dir = "/media/milton/ssd1/research/competitions/data_wider_pedestrian/annotations_valid"
46 |         if not os.path.exists(voc_anno_train_dir):
47 |             os.makedirs(voc_anno_train_dir)
48 |         anno_path = os.path.join(voc_anno_train_dir, xml_file_name)
49 |         write_pascal_annotation(image_path, obj_list, anno_path)
50 |         testids.append(image_name.split('.')[0])
51 | 
52 |         # break
53 |     with open('/media/milton/ssd1/research/competitions/data_wider_pedestrian/VOC_Wider_pedestrian/ImageSets/Main/test.txt', mode='wt', encoding='utf-8') as myfile:
54 |         myfile.write('\n'.join(testids))
55 | 
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     convert_wider_pedestrian_to_pascal()


--------------------------------------------------------------------------------