├── active_learning_package
├── __init__.py
├── human_active_learning.py
├── uncertainty_helpers.py
└── voc_eval_helpers.py
├── utils
├── __init__.py
└── augmentations.py
├── layers
├── __init__.py
├── functions
│ ├── __init__.py
│ ├── prior_box.py
│ └── detection.py
├── modules
│ ├── __init__.py
│ ├── l2norm.py
│ ├── kl_loss.py
│ └── multibox_loss.py
└── box_utils.py
├── doc
├── SSD.jpg
├── ssd.png
├── detection_example.png
├── detection_example2.png
└── detection_examples.png
├── Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf
├── run_scripts
└── scripts
│ ├── run_script.py
│ ├── boat_image_budget_pre-nms-avg_42_200.job
│ └── 6class_image_budget_pre-nms-avg_42_200.job
├── data
├── horse_seed_set.json
├── sheep_seed_set.json
├── scripts
│ ├── VOC2012.sh
│ ├── VOC2007.sh
│ └── COCO2014.sh
├── car_seed_set.json
├── pottedplant_seed_set.json
├── imageset_files
│ ├── sheep_trainval_detect.txt
│ ├── sheep_test_detect.txt
│ ├── boat_test_detect.txt
│ ├── boat_trainval_detect.txt
│ ├── bottle_test_detect.txt
│ ├── pottedplant_test_detect.txt
│ ├── bottle_trainval_detect.txt
│ ├── pottedplant_trainval_detect.txt
│ ├── horse_test_detect.txt
│ ├── horse_trainval_detect.txt
│ ├── car_trainval_detect.txt
│ └── car_test_detect.txt
├── boat_seed_set.json
├── bottle_seed_set.json
├── __init__.py
├── config.py
├── 6_class_seed_set.json
└── empty_seed.json
├── LICENSE
├── .gitignore
├── create_initial_networks.py
├── README.md
├── requirements
├── create_spoc_features.py
└── ssd.py
/active_learning_package/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/doc/SSD.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/SSD.jpg
--------------------------------------------------------------------------------
/doc/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/ssd.png
--------------------------------------------------------------------------------
/active_learning_package/human_active_learning.py:
--------------------------------------------------------------------------------
1 | def human_active_learning():
2 | raise NotImplementedError()
--------------------------------------------------------------------------------
/doc/detection_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example.png
--------------------------------------------------------------------------------
/doc/detection_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example2.png
--------------------------------------------------------------------------------
/doc/detection_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_examples.png
--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 |
4 |
5 | __all__ = ['Detect', 'PriorBox']
6 |
--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | from .kl_loss import KLLoss
4 |
5 | __all__ = ['L2Norm', 'MultiBoxLoss','KLLoss']
6 |
--------------------------------------------------------------------------------
/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf
--------------------------------------------------------------------------------
/run_scripts/scripts/run_script.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import datetime
4 |
5 |
6 |
7 | def main():
8 | run_scripts = [
9 | 'boat_image_budget_pre-nms-avg_42_200.job',
10 | # '6class_image_budget_pre-nms-avg_42_200.job',
11 | ]
12 |
13 |
14 | curr_dir = os.getcwd()
15 |
16 |
17 | for script in run_scripts:
18 | print('Starting script: ', script)
19 | print('Time start: ', datetime.datetime.now())
20 | subprocess.call(['bash', script])
21 |
22 | print('Time stop: ', datetime.datetime.now())
23 | print('finished script :)!')
24 | print('________________________\n\n\n\n\n\n\n')
25 |
26 | if __name__ == '__main__':
27 | main()
28 |
--------------------------------------------------------------------------------
/data/horse_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "002786",
6 | "009841",
7 | "009114",
8 | "004834",
9 | "001628",
10 | "006445",
11 | "009874",
12 | "006285",
13 | "009318"
14 | ],
15 | "class_dist": []
16 | },
17 | "val_set": {
18 | "image_set_idx": [
19 | "001236",
20 | "004625",
21 | "007216",
22 | "005145",
23 | "009138",
24 | "008596",
25 | "003492",
26 | "007448",
27 | "009407",
28 | "005236"
29 | ],
30 | "class_dist": []
31 | },
32 | "train_set": [
33 | "002786",
34 | "009841",
35 | "009114",
36 | "004834",
37 | "001628",
38 | "006445",
39 | "009874",
40 | "006285",
41 | "009318"
42 | ]
43 | }
--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Function
4 | from torch.autograd import Variable
5 | import torch.nn.init as init
6 |
7 | class L2Norm(nn.Module):
8 | def __init__(self,n_channels, scale):
9 | super(L2Norm,self).__init__()
10 | self.n_channels = n_channels
11 | self.gamma = scale or None
12 | self.eps = 1e-10
13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 | self.reset_parameters()
15 |
16 | def reset_parameters(self):
17 | init.constant_(self.weight,self.gamma)
18 |
19 | def forward(self, x):
20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 | #x /= norm
22 | x = torch.div(x,norm)
23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 | return out
25 |
--------------------------------------------------------------------------------
/data/sheep_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "008592",
6 | "009842",
7 | "003681",
8 | "007230",
9 | "001714",
10 | "007165",
11 | "004423",
12 | "004490",
13 | "006679",
14 | "001750",
15 | "006833"
16 | ],
17 | "class_dist": []
18 | },
19 | "val_set": {
20 | "image_set_idx": [
21 | "003705",
22 | "002263",
23 | "005469",
24 | "009816",
25 | "000107",
26 | "002593",
27 | "003698",
28 | "006944",
29 | "000900",
30 | "004312"
31 | ],
32 | "class_dist": []
33 | },
34 | "train_set": [
35 | "008592",
36 | "009842",
37 | "003681",
38 | "007230",
39 | "001714",
40 | "007165",
41 | "004423",
42 | "004490",
43 | "006679",
44 | "001750",
45 | "006833"
46 | ]
47 | }
--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 |
28 |
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 |
35 | end=`date +%s`
36 | runtime=$((end-start))
37 |
38 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/car_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "009863",
6 | "007208",
7 | "007821",
8 | "009900",
9 | "002533",
10 | "002643",
11 | "007374",
12 | "005020",
13 | "001980",
14 | "009106",
15 | "002691",
16 | "005756",
17 | "001360",
18 | "008706",
19 | "007475"
20 | ],
21 | "class_dist": []
22 | },
23 | "val_set": {
24 | "image_set_idx": [
25 | "003636",
26 | "002116",
27 | "006206",
28 | "004244",
29 | "000245",
30 | "003256",
31 | "009839",
32 | "007663",
33 | "006330",
34 | "000545"
35 | ],
36 | "class_dist": []
37 | },
38 | "train_set": [
39 | "009863",
40 | "007208",
41 | "007821",
42 | "009900",
43 | "002533",
44 | "002643",
45 | "007374",
46 | "005020",
47 | "001980",
48 | "009106",
49 | "002691",
50 | "005756",
51 | "001360",
52 | "008706",
53 | "007475"
54 | ]
55 | }
--------------------------------------------------------------------------------
/data/pottedplant_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "005821",
6 | "007396",
7 | "001777",
8 | "006159",
9 | "009279",
10 | "004446",
11 | "006188",
12 | "007302",
13 | "004105",
14 | "003758",
15 | "009638",
16 | "009175",
17 | "002946",
18 | "008749",
19 | "008082",
20 | "002775"
21 | ],
22 | "class_dist": []
23 | },
24 | "val_set": {
25 | "image_set_idx": [
26 | "007558",
27 | "000592",
28 | "003301",
29 | "007999",
30 | "004631",
31 | "007390",
32 | "007890",
33 | "006351",
34 | "000710",
35 | "001451"
36 | ],
37 | "class_dist": []
38 | },
39 | "train_set": [
40 | "005821",
41 | "007396",
42 | "001777",
43 | "006159",
44 | "009279",
45 | "004446",
46 | "006188",
47 | "007302",
48 | "004105",
49 | "003758",
50 | "009638",
51 | "009175",
52 | "002946",
53 | "008749",
54 | "008082",
55 | "002775"
56 | ]
57 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 JappaB
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 |
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 |
39 | end=`date +%s`
40 | runtime=$((end-start))
41 |
42 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/imageset_files/sheep_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000107
2 | 000225
3 | 000233
4 | 000244
5 | 000416
6 | 000581
7 | 000588
8 | 000654
9 | 000676
10 | 000820
11 | 000900
12 | 001191
13 | 001310
14 | 001594
15 | 001598
16 | 001661
17 | 001714
18 | 001733
19 | 001734
20 | 001750
21 | 001872
22 | 002209
23 | 002263
24 | 002267
25 | 002330
26 | 002452
27 | 002593
28 | 002615
29 | 002618
30 | 002648
31 | 002683
32 | 002845
33 | 002873
34 | 002963
35 | 002986
36 | 003161
37 | 003170
38 | 003382
39 | 003593
40 | 003681
41 | 003698
42 | 003705
43 | 003714
44 | 003874
45 | 004239
46 | 004312
47 | 004423
48 | 004490
49 | 004527
50 | 004768
51 | 004842
52 | 004954
53 | 005001
54 | 005058
55 | 005223
56 | 005288
57 | 005319
58 | 005363
59 | 005395
60 | 005469
61 | 005947
62 | 005964
63 | 006128
64 | 006136
65 | 006247
66 | 006251
67 | 006534
68 | 006678
69 | 006679
70 | 006833
71 | 006899
72 | 006944
73 | 007048
74 | 007165
75 | 007217
76 | 007230
77 | 007621
78 | 007859
79 | 008116
80 | 008150
81 | 008223
82 | 008292
83 | 008342
84 | 008349
85 | 008592
86 | 008618
87 | 008871
88 | 008920
89 | 008942
90 | 009227
91 | 009271
92 | 009349
93 | 009417
94 | 009700
95 | 009816
96 | 009842
97 | 009945
98 |
--------------------------------------------------------------------------------
/data/imageset_files/sheep_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000062
2 | 000175
3 | 000176
4 | 000376
5 | 000451
6 | 000458
7 | 000475
8 | 000574
9 | 000596
10 | 000627
11 | 000779
12 | 000789
13 | 000875
14 | 000884
15 | 000925
16 | 000992
17 | 001271
18 | 001344
19 | 001416
20 | 001477
21 | 001625
22 | 001645
23 | 001671
24 | 001852
25 | 002033
26 | 002121
27 | 002133
28 | 002137
29 | 002338
30 | 002802
31 | 002882
32 | 003025
33 | 003050
34 | 003069
35 | 003087
36 | 003152
37 | 003190
38 | 003241
39 | 003318
40 | 003366
41 | 003471
42 | 003595
43 | 003914
44 | 004072
45 | 004357
46 | 004582
47 | 004610
48 | 004617
49 | 004642
50 | 004646
51 | 004669
52 | 004774
53 | 004827
54 | 004854
55 | 004969
56 | 005137
57 | 005147
58 | 005164
59 | 005324
60 | 005578
61 | 005795
62 | 005886
63 | 005915
64 | 005916
65 | 006217
66 | 006491
67 | 006557
68 | 006815
69 | 006832
70 | 006837
71 | 007028
72 | 007076
73 | 007402
74 | 007440
75 | 007444
76 | 007462
77 | 007534
78 | 007596
79 | 007894
80 | 008187
81 | 008192
82 | 008215
83 | 008333
84 | 008334
85 | 008343
86 | 008344
87 | 008598
88 | 008685
89 | 008795
90 | 008912
91 | 008992
92 | 009031
93 | 009169
94 | 009320
95 | 009451
96 | 009569
97 | 009818
98 | 009861
99 |
--------------------------------------------------------------------------------
/data/boat_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "005496",
6 | "007743",
7 | "006965",
8 | "000154",
9 | "002234",
10 | "003337",
11 | "005614",
12 | "004532",
13 | "000184",
14 | "002372",
15 | "000382",
16 | "008498",
17 | "005181",
18 | "007139",
19 | "007685",
20 | "003098",
21 | "003722",
22 | "002659",
23 | "007460",
24 | "007803",
25 | "002208"
26 | ],
27 | "class_dist": []
28 | },
29 | "val_set": {
30 | "image_set_idx": [
31 | "000931",
32 | "002465",
33 | "007361",
34 | "001732",
35 | "003860",
36 | "000826",
37 | "006123",
38 | "009603",
39 | "005705",
40 | "007365"
41 | ],
42 | "class_dist": []
43 | },
44 | "train_set": [
45 | "005496",
46 | "007743",
47 | "006965",
48 | "000154",
49 | "002234",
50 | "003337",
51 | "005614",
52 | "004532",
53 | "000184",
54 | "002372",
55 | "000382",
56 | "008498",
57 | "005181",
58 | "007139",
59 | "007685",
60 | "003098",
61 | "003722",
62 | "002659",
63 | "007460",
64 | "007803",
65 | "002208"
66 | ]
67 | }
--------------------------------------------------------------------------------
/data/bottle_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "009679",
6 | "009388",
7 | "009100",
8 | "007798",
9 | "002881",
10 | "000250",
11 | "004152",
12 | "006576",
13 | "000344",
14 | "008204",
15 | "002253",
16 | "005467",
17 | "009290",
18 | "007457",
19 | "006626",
20 | "000367",
21 | "008931",
22 | "002350",
23 | "007141",
24 | "006363",
25 | "006727",
26 | "000498"
27 | ],
28 | "class_dist": []
29 | },
30 | "val_set": {
31 | "image_set_idx": [
32 | "006409",
33 | "000269",
34 | "006648",
35 | "002641",
36 | "004671",
37 | "007121",
38 | "007649",
39 | "009878",
40 | "000381",
41 | "004886"
42 | ],
43 | "class_dist": []
44 | },
45 | "train_set": [
46 | "009679",
47 | "009388",
48 | "009100",
49 | "007798",
50 | "002881",
51 | "000250",
52 | "004152",
53 | "006576",
54 | "000344",
55 | "008204",
56 | "002253",
57 | "005467",
58 | "009290",
59 | "007457",
60 | "006626",
61 | "000367",
62 | "008931",
63 | "002350",
64 | "007141",
65 | "006363",
66 | "006727",
67 | "000498"
68 | ]
69 | }
--------------------------------------------------------------------------------
/run_scripts/scripts/boat_image_budget_pre-nms-avg_42_200.job:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../../
4 |
5 | BASE_DIR=$PWD
6 | echo $BASE_DIR
7 |
8 | EXPERIMENT_DIR=test_boat/
9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR"
10 |
11 |
12 | python active_learning_main.py \
13 | --budget_measured_in_objects False \
14 | --rescaled_foreground_probs True \
15 | --sample_select_nms_conf_thresh 0.01 \
16 | --skip_sample_selection_first_iter False \
17 | --batch_size 8 \
18 | --sampling_strategy none_covariance-obj \
19 | --merging_method pre_nms_avg \
20 | --lr 1e-4 \
21 | --ensemble_size 3 \
22 | --annotate_all_objects True \
23 | --experiment_dir "$EXPERIMENT_DIR" \
24 | --train_from_basenet_every_iter True \
25 | --paths_to_weights \
26 | weights/initial_net_0 \
27 | weights/initial_net_1 \
28 | weights/initial_net_2 \
29 | --trained_models \
30 | weights/initial_net_0 \
31 | weights/initial_net_1 \
32 | weights/initial_net_2 \
33 | --samples_per_iter 10 10 10 10 10 \
34 | --eval_every_iter False \
35 | --debug False \
36 | --fixed_number_of_epochs 200 \
37 | --seed 92 \
38 | --dataset VOC07_1_class \
39 | --imageset_train boat_trainval_detect \
40 | --imageset_test boat_test_detect \
41 | --seed_set_file data/boat_seed_set.json \
42 | --optimizer SGD \
43 | --skip_sample_selection_first_iter False \
44 | --relevant_class boat \
45 |
46 |
47 | wait
48 |
49 | echo end of job
50 |
--------------------------------------------------------------------------------
/run_scripts/scripts/6class_image_budget_pre-nms-avg_42_200.job:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../../
4 |
5 | BASE_DIR=$PWD
6 | echo $BASE_DIR
7 |
8 | EXPERIMENT_DIR=test_6class/
9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR"
10 |
11 |
12 | python active_learning_main.py \
13 | --budget_measured_in_objects False \
14 | --rescaled_foreground_probs True \
15 | --sample_select_nms_conf_thresh 0.01 \
16 | --skip_sample_selection_first_iter False \
17 | --batch_size 8 \
18 | --sampling_strategy none_covariance-obj \
19 | --merging_method pre_nms_avg \
20 | --lr 1e-4 \
21 | --ensemble_size 6 \
22 | --annotate_all_objects True \
23 | --experiment_dir "$EXPERIMENT_DIR" \
24 | --train_from_basenet_every_iter True \
25 | --paths_to_weights \
26 | weights/initial_net_0 \
27 | weights/initial_net_1 \
28 | weights/initial_net_2 \
29 | --trained_models \
30 | weights/initial_net_0 \
31 | weights/initial_net_1 \
32 | weights/initial_net_2 \
33 | --samples_per_iter 25 25 25 25 25 25 \
34 | --eval_every_iter True \
35 | --debug False \
36 | --short_gpu False \
37 | --fixed_number_of_epochs 200 \
38 | --seed 42 \
39 | --dataset VOC07_6_class \
40 | --imageset_train 6_class_trainval_detect \
41 | --imageset_test 6_class_test_detect \
42 | --seed_set_file data/6_class_seed_set.json \
43 | --optimizer SGD \
44 | --skip_sample_selection_first_iter False
45 |
46 |
47 | wait
48 |
49 | echo end of job
50 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT, VOC_ROOT_LOCAL
2 |
3 | from .config import *
4 | import torch
5 | import cv2
6 | import numpy as np
7 |
8 | def detection_collate(batch):
9 | """Custom collate fn for dealing with batches of images that have a different
10 | number of associated object annotations (bounding boxes).
11 |
12 | Arguments:
13 | batch: (tuple) A tuple of tensor images and lists of annotations
14 |
15 | Return:
16 | A tuple containing:
17 | 1) (tensor) batch of images stacked on their 0 dim
18 | 2) (list of tensors) annotations for a given image are stacked on
19 | 0 dim
20 | """
21 | targets = []
22 | imgs = []
23 | for sample in batch:
24 | imgs.append(sample[0])
25 | targets.append(torch.FloatTensor(sample[1]))
26 | return torch.stack(imgs, 0), targets
27 |
28 |
29 | def base_transform(image, size, mean):
30 | # cv2.resize: default uses linear interpolation, and doesnt preserve aspact ratio: https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=resize#void%20resize(InputArray%20src,%20OutputArray%20dst,%20Size%20dsize,%20double%20fx,%20double%20fy,%20int%20interpolation)
31 | x = cv2.resize(image, (size, size)).astype(np.float32)
32 | x -= mean
33 | x = x.astype(np.float32)
34 | return x
35 |
36 |
37 | class BaseTransform:
38 | def __init__(self, size, mean):
39 | # if modeltype == 'SSD300':
40 | # size = 300
41 | self.size = size
42 | self.mean = np.array(mean, dtype=np.float32)
43 |
44 | def __call__(self, image, boxes=None, labels=None):
45 | return base_transform(image, self.size, self.mean), boxes, labels
46 |
--------------------------------------------------------------------------------
/data/imageset_files/boat_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000069
2 | 000080
3 | 000105
4 | 000179
5 | 000240
6 | 000295
7 | 000350
8 | 000371
9 | 000375
10 | 000444
11 | 000449
12 | 000481
13 | 000529
14 | 000538
15 | 000558
16 | 000576
17 | 000631
18 | 000792
19 | 000811
20 | 000837
21 | 000907
22 | 000914
23 | 000995
24 | 001049
25 | 001059
26 | 001076
27 | 001141
28 | 001155
29 | 001188
30 | 001227
31 | 001342
32 | 001355
33 | 001394
34 | 001410
35 | 001474
36 | 001487
37 | 001527
38 | 001591
39 | 001634
40 | 001646
41 | 001698
42 | 001705
43 | 001770
44 | 001815
45 | 001822
46 | 001895
47 | 001974
48 | 002029
49 | 002085
50 | 002175
51 | 002225
52 | 002274
53 | 002390
54 | 002395
55 | 002449
56 | 002474
57 | 002580
58 | 002628
59 | 002719
60 | 002764
61 | 002811
62 | 002821
63 | 002948
64 | 002983
65 | 003012
66 | 003073
67 | 003111
68 | 003131
69 | 003409
70 | 003498
71 | 003543
72 | 003571
73 | 003652
74 | 003799
75 | 003836
76 | 003892
77 | 003900
78 | 003910
79 | 003929
80 | 003952
81 | 004123
82 | 004160
83 | 004184
84 | 004225
85 | 004299
86 | 004374
87 | 004382
88 | 004443
89 | 004573
90 | 004613
91 | 004665
92 | 004843
93 | 004899
94 | 004914
95 | 004937
96 | 005022
97 | 005074
98 | 005098
99 | 005112
100 | 005213
101 | 005243
102 | 005272
103 | 005329
104 | 005382
105 | 005427
106 | 005604
107 | 005616
108 | 005670
109 | 005720
110 | 005771
111 | 005849
112 | 005950
113 | 006014
114 | 006048
115 | 006110
116 | 006160
117 | 006164
118 | 006205
119 | 006302
120 | 006332
121 | 006408
122 | 006490
123 | 006528
124 | 006604
125 | 006659
126 | 006889
127 | 006977
128 | 006997
129 | 007066
130 | 007091
131 | 007173
132 | 007233
133 | 007357
134 | 007377
135 | 007400
136 | 007415
137 | 007428
138 | 007455
139 | 007464
140 | 007635
141 | 007660
142 | 007788
143 | 007850
144 | 007952
145 | 007990
146 | 008099
147 | 008145
148 | 008217
149 | 008219
150 | 008249
151 | 008278
152 | 008369
153 | 008373
154 | 008545
155 | 008571
156 | 008578
157 | 008590
158 | 008643
159 | 008704
160 | 008820
161 | 008868
162 | 008869
163 | 008998
164 | 009001
165 | 009026
166 | 009102
167 | 009120
168 | 009137
169 | 009240
170 | 009423
171 | 009538
172 | 009727
173 | 009728
174 | 009793
175 | 009811
176 | 009885
177 |
--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
1 | # config.py
2 | import os.path
3 |
4 | # gets home dir cross platform
5 | HOME = os.path.expanduser("~")
6 |
7 | # for making bounding boxes pretty
8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
9 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
10 | MEANS = (104, 117, 123) # channel means for Pascal VOC
11 |
12 | # SSD300 CONFIGS
13 | voc = {
14 | 'num_classes': 21,
15 | 'dataset_mean': (104, 117, 123),
16 | 'lr_steps': (80000, 100000, 120000),
17 | 'max_iter': 120000,
18 | 'feature_maps': [38, 19, 10, 5, 3, 1],
19 | 'min_dim': 300,
20 | 'steps': [8, 16, 32, 64, 100, 300],
21 | 'min_sizes': [30, 60, 111, 162, 213, 264],
22 | 'max_sizes': [60, 111, 162, 213, 264, 315],
23 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
24 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
25 | 'clip': True,
26 | 'name': 'VOC',
27 | }
28 |
29 |
30 | voc_1_class = {
31 | 'num_classes': 2,
32 | 'dataset_mean': (104, 117, 123),
33 | 'lr_steps': (80000, 100000, 120000),
34 | 'max_iter': 120000,
35 | 'feature_maps': [38, 19, 10, 5, 3, 1],
36 | 'min_dim': 300,
37 | 'steps': [8, 16, 32, 64, 100, 300],
38 | 'min_sizes': [30, 60, 111, 162, 213, 264],
39 | 'max_sizes': [60, 111, 162, 213, 264, 315],
40 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
41 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
42 | 'clip': True,
43 | 'name': 'VOC_1_class',
44 | }
45 |
46 | voc_6_class = {
47 | 'num_classes': 7,
48 | 'dataset_mean': (104, 117, 123),
49 | 'lr_steps': (80000, 100000, 120000),
50 | 'max_iter': 120000,
51 | 'feature_maps': [38, 19, 10, 5, 3, 1],
52 | 'min_dim': 300,
53 | 'steps': [8, 16, 32, 64, 100, 300],
54 | 'min_sizes': [30, 60, 111, 162, 213, 264],
55 | 'max_sizes': [60, 111, 162, 213, 264, 315],
56 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
57 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
58 | 'clip': True,
59 | 'name': 'VOC_6_class',
60 | }
61 |
--------------------------------------------------------------------------------
/data/scripts/COCO2014.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | start=`date +%s`
4 |
5 | # handle optional download dir
6 | if [ -z "$1" ]
7 | then
8 | # navigate to ~/data
9 | echo "navigating to ~/data/ ..."
10 | mkdir -p ~/data
11 | cd ~/data/
12 | mkdir -p ./coco
13 | cd ./coco
14 | mkdir -p ./images
15 | mkdir -p ./annotations
16 | else
17 | # check if specified dir is valid
18 | if [ ! -d $1 ]; then
19 | echo $1 " is not a valid directory"
20 | exit 0
21 | fi
22 | echo "navigating to " $1 " ..."
23 | cd $1
24 | fi
25 |
26 | if [ ! -d images ]
27 | then
28 | mkdir -p ./images
29 | fi
30 |
31 | # Download the image data.
32 | cd ./images
33 | echo "Downloading MSCOCO train images ..."
34 | curl -LO http://images.cocodataset.org/zips/train2014.zip
35 | echo "Downloading MSCOCO val images ..."
36 | curl -LO http://images.cocodataset.org/zips/val2014.zip
37 |
38 | cd ../
39 | if [ ! -d annotations]
40 | then
41 | mkdir -p ./annotations
42 | fi
43 |
44 | # Download the annotation data.
45 | cd ./annotations
46 | echo "Downloading MSCOCO train/val annotations ..."
47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
48 | echo "Finished downloading. Now extracting ..."
49 |
50 | # Unzip data
51 | echo "Extracting train images ..."
52 | unzip ../images/train2014.zip -d ../images
53 | echo "Extracting val images ..."
54 | unzip ../images/val2014.zip -d ../images
55 | echo "Extracting annotations ..."
56 | unzip ./annotations_trainval2014.zip
57 |
58 | echo "Removing zip files ..."
59 | rm ../images/train2014.zip
60 | rm ../images/val2014.zip
61 | rm ./annotations_trainval2014.zip
62 |
63 | echo "Creating trainval35k dataset..."
64 |
65 | # Download annotations json
66 | echo "Downloading trainval35k annotations from S3"
67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
68 |
69 | # combine train and val
70 | echo "Combining train and val images"
71 | mkdir ../images/trainval35k
72 | cd ../images/train2014
73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp
74 | cd ../val2014
75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} +
76 |
77 |
78 | end=`date +%s`
79 | runtime=$((end-start))
80 |
81 | echo "Completed in " $runtime " seconds"
82 |
--------------------------------------------------------------------------------
/data/imageset_files/boat_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000061
2 | 000073
3 | 000130
4 | 000154
5 | 000184
6 | 000232
7 | 000241
8 | 000296
9 | 000370
10 | 000382
11 | 000474
12 | 000513
13 | 000519
14 | 000563
15 | 000608
16 | 000713
17 | 000740
18 | 000742
19 | 000826
20 | 000862
21 | 000906
22 | 000931
23 | 001002
24 | 001121
25 | 001136
26 | 001176
27 | 001205
28 | 001247
29 | 001298
30 | 001325
31 | 001463
32 | 001481
33 | 001484
34 | 001497
35 | 001732
36 | 001847
37 | 001887
38 | 001976
39 | 002000
40 | 002039
41 | 002091
42 | 002208
43 | 002234
44 | 002288
45 | 002302
46 | 002372
47 | 002392
48 | 002403
49 | 002435
50 | 002465
51 | 002479
52 | 002500
53 | 002605
54 | 002653
55 | 002659
56 | 002738
57 | 002838
58 | 003008
59 | 003065
60 | 003098
61 | 003223
62 | 003284
63 | 003337
64 | 003404
65 | 003413
66 | 003639
67 | 003657
68 | 003669
69 | 003722
70 | 003797
71 | 003811
72 | 003860
73 | 003861
74 | 003863
75 | 003919
76 | 003974
77 | 004170
78 | 004194
79 | 004241
80 | 004246
81 | 004258
82 | 004325
83 | 004371
84 | 004387
85 | 004464
86 | 004498
87 | 004532
88 | 004592
89 | 004651
90 | 004839
91 | 004885
92 | 004931
93 | 004958
94 | 005073
95 | 005108
96 | 005128
97 | 005150
98 | 005171
99 | 005181
100 | 005217
101 | 005257
102 | 005320
103 | 005325
104 | 005337
105 | 005340
106 | 005358
107 | 005369
108 | 005370
109 | 005413
110 | 005420
111 | 005431
112 | 005496
113 | 005614
114 | 005699
115 | 005705
116 | 005713
117 | 005714
118 | 005742
119 | 005825
120 | 005860
121 | 005914
122 | 006046
123 | 006079
124 | 006098
125 | 006123
126 | 006215
127 | 006281
128 | 006304
129 | 006357
130 | 006436
131 | 006542
132 | 006549
133 | 006660
134 | 006673
135 | 006773
136 | 006867
137 | 006874
138 | 006953
139 | 006965
140 | 006983
141 | 007052
142 | 007125
143 | 007139
144 | 007289
145 | 007361
146 | 007365
147 | 007449
148 | 007460
149 | 007477
150 | 007521
151 | 007533
152 | 007685
153 | 007718
154 | 007743
155 | 007803
156 | 007833
157 | 007980
158 | 008091
159 | 008112
160 | 008163
161 | 008166
162 | 008251
163 | 008261
164 | 008306
165 | 008449
166 | 008454
167 | 008456
168 | 008498
169 | 008568
170 | 008584
171 | 008604
172 | 008645
173 | 008725
174 | 008730
175 | 008784
176 | 008884
177 | 008900
178 | 008999
179 | 009131
180 | 009177
181 | 009347
182 | 009527
183 | 009533
184 | 009603
185 | 009717
186 | 009836
187 | 009947
188 | 009955
189 |
--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from math import sqrt as sqrt
3 | from itertools import product as product
4 | import torch
5 |
6 | import layers.box_utils as box_utils
7 |
8 |
9 | class PriorBox(object):
10 | """Compute priorbox coordinates in center-offset form for each source
11 | feature map.
12 | """
13 | def __init__(self, cfg, modeltype = 'SSD300'):
14 | super(PriorBox, self).__init__()
15 | self.image_size = cfg['min_dim']
16 | # number of priors for feature map location (either 4 or 6)
17 | self.num_priors = len(cfg['aspect_ratios'])
18 | self.variance = cfg['variance'] or [0.1]
19 | self.feature_maps = cfg['feature_maps']
20 | self.min_sizes = cfg['min_sizes']
21 | self.max_sizes = cfg['max_sizes']
22 | self.steps = cfg['steps']
23 | self.aspect_ratios = cfg['aspect_ratios']
24 | self.clip = cfg['clip']
25 | self.version = cfg['name']
26 | self.modeltype = modeltype
27 | for v in self.variance:
28 | if v <= 0:
29 | raise ValueError('Variances must be greater than 0')
30 |
31 | def forward(self):
32 | mean = []
33 | for k, f in enumerate(self.feature_maps):
34 | for i, j in product(range(f), repeat=2):
35 | f_k = self.image_size / self.steps[k]
36 | # unit center x,y
37 | cx = (j + 0.5) / f_k
38 | cy = (i + 0.5) / f_k
39 |
40 | # aspect_ratio: 1
41 | # rel size: min_size
42 | s_k = self.min_sizes[k]/self.image_size
43 | mean += [cx, cy, s_k, s_k]
44 |
45 | # aspect_ratio: 1
46 | # rel size: sqrt(s_k * s_(k+1))
47 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
48 | mean += [cx, cy, s_k_prime, s_k_prime]
49 |
50 | # rest of aspect ratios
51 | for ar in self.aspect_ratios[k]:
52 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
53 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
54 |
55 | # back to torch land
56 | output = torch.Tensor(mean).view(-1, 4)
57 | if self.clip:
58 | output.clamp_(max=1, min=0)
59 |
60 | # # todo
61 | # if self.modeltype == 'SSD300KL':
62 | # # transform to x1y1x2y2 form
63 | # output = box_utils.point_form(output)
64 | # # print('todo')
65 | #
66 | # if self.clip:
67 | # output.clamp_(max=1, min=0)
68 |
69 | return output
70 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | # atom remote-sync package
92 | .remote-sync.json
93 |
94 | # weights
95 | weights/
96 |
97 | #DS_Store
98 | .DS_Store
99 |
100 | # dev stuff
101 | eval/*/
102 | eval.ipynb
103 | dev.ipynb
104 | .vscode/
105 |
106 | # not ready
107 | videos/
108 | templates/
109 | data/ssd_dataloader.py
110 | data/datasets/
111 | doc/visualize.py
112 | read_results.py
113 | ssd300_120000/
114 | demos/live
115 | webdemo.py
116 | test_data_aug.py
117 |
118 | # attributes
119 | # pycharm
120 | .idea/
121 |
122 | # temp checkout soln
123 | data/datasets/
124 | data/ssd_dataloader.py
125 |
126 | # pylint
127 | .pylintrc
128 |
129 | ###### Above gitignores are simply copied from the SSD.pytorch repo by AMDegroot######
130 | ###### Below are my own (Jasper Bakker) ######
131 |
132 |
133 | # Coco dataset
134 | data/images/coco
135 |
136 | # weights of experiments
137 | active_learning_dir/*/weights/
138 | active_learning_dir/*/optimizers/
139 |
140 | #some large files
141 | **/sample_selection/
142 | **/eval/*detections*
143 | active_learning_dir/*
144 |
145 | # server stuff
146 | .nfs*
147 |
148 | # pandas core differs locally
149 | data/stats*.pickle
150 |
151 | #some local folder that sometimes contains large files
152 | debug2/*
153 | active_learning_dir/
154 |
--------------------------------------------------------------------------------
/data/imageset_files/bottle_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000136
2 | 000144
3 | 000151
4 | 000178
5 | 000202
6 | 000277
7 | 000280
8 | 000299
9 | 000327
10 | 000335
11 | 000346
12 | 000369
13 | 000414
14 | 000447
15 | 000517
16 | 000587
17 | 000606
18 | 000611
19 | 000737
20 | 000762
21 | 000825
22 | 000893
23 | 001021
24 | 001023
25 | 001035
26 | 001047
27 | 001086
28 | 001105
29 | 001167
30 | 001177
31 | 001244
32 | 001295
33 | 001349
34 | 001354
35 | 001407
36 | 001429
37 | 001431
38 | 001456
39 | 001489
40 | 001569
41 | 001585
42 | 001602
43 | 001631
44 | 001652
45 | 001720
46 | 001812
47 | 001829
48 | 001921
49 | 001925
50 | 001957
51 | 001975
52 | 001992
53 | 002026
54 | 002207
55 | 002231
56 | 002235
57 | 002389
58 | 002429
59 | 002536
60 | 002538
61 | 002560
62 | 002575
63 | 002614
64 | 002656
65 | 002705
66 | 002711
67 | 002712
68 | 002743
69 | 002773
70 | 002857
71 | 002904
72 | 002905
73 | 002945
74 | 002950
75 | 002951
76 | 002982
77 | 003010
78 | 003067
79 | 003071
80 | 003076
81 | 003249
82 | 003278
83 | 003309
84 | 003431
85 | 003446
86 | 003459
87 | 003488
88 | 003544
89 | 003590
90 | 003643
91 | 003649
92 | 003665
93 | 003697
94 | 003707
95 | 003725
96 | 003736
97 | 003776
98 | 003802
99 | 003842
100 | 003867
101 | 003906
102 | 003928
103 | 003942
104 | 003943
105 | 003944
106 | 004032
107 | 004042
108 | 004055
109 | 004056
110 | 004068
111 | 004072
112 | 004118
113 | 004127
114 | 004134
115 | 004162
116 | 004180
117 | 004236
118 | 004268
119 | 004311
120 | 004348
121 | 004355
122 | 004422
123 | 004469
124 | 004476
125 | 004546
126 | 004599
127 | 004640
128 | 004712
129 | 004717
130 | 004720
131 | 004721
132 | 004819
133 | 004880
134 | 004919
135 | 004922
136 | 005002
137 | 005096
138 | 005216
139 | 005226
140 | 005233
141 | 005276
142 | 005294
143 | 005428
144 | 005442
145 | 005491
146 | 005570
147 | 005622
148 | 005635
149 | 005721
150 | 005926
151 | 005936
152 | 005942
153 | 006003
154 | 006056
155 | 006057
156 | 006072
157 | 006086
158 | 006093
159 | 006152
160 | 006195
161 | 006248
162 | 006359
163 | 006360
164 | 006380
165 | 006390
166 | 006402
167 | 006432
168 | 006452
169 | 006500
170 | 006644
171 | 006662
172 | 006732
173 | 007225
174 | 007237
175 | 007404
176 | 007456
177 | 007494
178 | 007500
179 | 007504
180 | 007532
181 | 007569
182 | 007598
183 | 007652
184 | 007700
185 | 007717
186 | 007739
187 | 007744
188 | 007778
189 | 007783
190 | 007862
191 | 007937
192 | 008089
193 | 008094
194 | 008134
195 | 008182
196 | 008192
197 | 008287
198 | 008330
199 | 008379
200 | 008382
201 | 008400
202 | 008405
203 | 008414
204 | 008458
205 | 008516
206 | 008520
207 | 008555
208 | 008583
209 | 008591
210 | 008627
211 | 008686
212 | 008697
213 | 008740
214 | 008778
215 | 008894
216 | 008902
217 | 008922
218 | 008925
219 | 009012
220 | 009075
221 | 009164
222 | 009171
223 | 009257
224 | 009297
225 | 009311
226 | 009366
227 | 009431
228 | 009482
229 | 009547
230 | 009570
231 | 009626
232 | 009630
233 | 009633
234 | 009645
235 | 009782
236 | 009798
237 | 009840
238 | 009871
239 | 009891
240 | 009929
241 |
--------------------------------------------------------------------------------
/create_initial_networks.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import torch
4 |
5 | import active_learning_package.helpers as helpers
6 | from data import config
7 |
8 | def str2bool(v):
9 | return v.lower() in ("yes", "true", "t", "1")
10 |
11 | # parser arguments from train.py
12 | parser = argparse.ArgumentParser(description='Active Learning With Single Shot MultiBox Detector Training With Pytorch')
13 |
14 | parser.add_argument('--modeltype', default='SSD300',choices=['SSD300', 'SSD300KL'],
15 | help='Which model to use: standard SSD or the SSD with uncertainty in the bounding box regression and KL loss ') #SSD300KL doesn't work well
16 | parser.add_argument('--dataset', default='VOC07_1_class', choices=['VOC07', 'VOC12','VOC07_1_class','VOC07_1_class','VOC07_6_class'],
17 | type=str, help='VOC07_1_class is with one class of interest and the background class')
18 | parser.add_argument('--sample_select_nms_conf_thresh', default = 0.01, type = float,
19 | help = 'The conf threshold used in before non maximum suppression. Only detections with a confidence above '
20 | 'this threshold for a certain class will go trough nms')
21 | parser.add_argument('--paths_to_weights', default=None,type=str, nargs='+',
22 | help='These are the weights that ere used the initial evaluation of the unlabeled dataset') # if no trained model is given, this will return an error when loading the model.
23 | parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth',
24 | help='Pretrained base model')
25 |
26 | parser.add_argument('--ensemble_size', default=3,type=int)
27 | parser.add_argument('--num_classes', default=1,type=int,
28 | help='number of classes of interest (so excluding background class')
29 |
30 |
31 | if __name__ == '__main__':
32 | args = parser.parse_args()
33 | if args.dataset in ['VOC12','VOC07']:
34 | args.cfg = config.voc # adapted from pytorch SSD code
35 |
36 | elif args.dataset == 'VOC07_1_class':
37 | args.cfg = config.voc_1_class
38 |
39 | elif args.dataset == 'VOC07_6_class':
40 | args.cfg = config.voc_6_class
41 |
42 | if torch.cuda.is_available():
43 | device = 'cuda'
44 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
45 | else:
46 | device = 'cpu'
47 | torch.set_default_tensor_type('torch.FloatTensor')
48 |
49 | args.device = device
50 |
51 | args.num_classes = args.num_classes + 1
52 | print('Creating ',args.ensemble_size,' number of SSDs for ',args.num_classes,' (+ 1 background class) number of classes')
53 | print('...')
54 |
55 |
56 |
57 |
58 | for i in range(args.ensemble_size):
59 | # make net
60 | net = helpers.build_sample_selection_net(args,
61 | args.num_classes)
62 |
63 | args.experiment_dir = os.getcwd()+'/'
64 | path = 'weights/initial_net_'+str(i)
65 |
66 | # save net
67 | helpers.save_weights(weights=net,
68 | args=args,
69 | path=path)
70 | print()
71 |
72 |
73 | print('Initial nets created!')
--------------------------------------------------------------------------------
/data/imageset_files/pottedplant_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000006
2 | 000070
3 | 000116
4 | 000124
5 | 000149
6 | 000196
7 | 000234
8 | 000389
9 | 000397
10 | 000429
11 | 000575
12 | 000606
13 | 000650
14 | 000692
15 | 000698
16 | 000737
17 | 000784
18 | 000788
19 | 000840
20 | 000890
21 | 000933
22 | 000953
23 | 000983
24 | 001023
25 | 001035
26 | 001070
27 | 001141
28 | 001180
29 | 001255
30 | 001276
31 | 001380
32 | 001391
33 | 001411
34 | 001437
35 | 001469
36 | 001478
37 | 001540
38 | 001631
39 | 001648
40 | 001652
41 | 001665
42 | 001666
43 | 001712
44 | 001715
45 | 001779
46 | 001786
47 | 001811
48 | 001812
49 | 001867
50 | 001869
51 | 001880
52 | 001905
53 | 002018
54 | 002026
55 | 002035
56 | 002074
57 | 002123
58 | 002161
59 | 002240
60 | 002297
61 | 002339
62 | 002381
63 | 002414
64 | 002422
65 | 002451
66 | 002482
67 | 002489
68 | 002510
69 | 002527
70 | 002557
71 | 002604
72 | 002708
73 | 002743
74 | 002753
75 | 002777
76 | 002887
77 | 002925
78 | 002945
79 | 003014
80 | 003020
81 | 003029
82 | 003060
83 | 003062
84 | 003113
85 | 003192
86 | 003221
87 | 003281
88 | 003297
89 | 003323
90 | 003442
91 | 003448
92 | 003488
93 | 003572
94 | 003591
95 | 003624
96 | 003626
97 | 003649
98 | 003665
99 | 003676
100 | 003756
101 | 003761
102 | 003778
103 | 003787
104 | 003882
105 | 003934
106 | 003943
107 | 004027
108 | 004038
109 | 004045
110 | 004081
111 | 004116
112 | 004124
113 | 004155
114 | 004159
115 | 004173
116 | 004211
117 | 004222
118 | 004276
119 | 004311
120 | 004422
121 | 004451
122 | 004486
123 | 004712
124 | 004778
125 | 004780
126 | 004858
127 | 004862
128 | 004893
129 | 004941
130 | 005088
131 | 005092
132 | 005174
133 | 005180
134 | 005184
135 | 005196
136 | 005266
137 | 005284
138 | 005494
139 | 005525
140 | 005556
141 | 005567
142 | 005650
143 | 005661
144 | 005673
145 | 005703
146 | 005754
147 | 005804
148 | 005857
149 | 005866
150 | 005922
151 | 005926
152 | 005937
153 | 005941
154 | 005943
155 | 005976
156 | 006022
157 | 006049
158 | 006083
159 | 006115
160 | 006231
161 | 006297
162 | 006303
163 | 006307
164 | 006311
165 | 006359
166 | 006426
167 | 006496
168 | 006504
169 | 006646
170 | 006691
171 | 006700
172 | 006712
173 | 006750
174 | 006776
175 | 006792
176 | 006795
177 | 006798
178 | 006807
179 | 006816
180 | 006843
181 | 006861
182 | 006902
183 | 006937
184 | 006964
185 | 007012
186 | 007057
187 | 007106
188 | 007164
189 | 007237
190 | 007248
191 | 007272
192 | 007319
193 | 007332
194 | 007335
195 | 007364
196 | 007406
197 | 007473
198 | 007499
199 | 007549
200 | 007643
201 | 007644
202 | 007722
203 | 007752
204 | 007839
205 | 007913
206 | 008010
207 | 008039
208 | 008071
209 | 008155
210 | 008156
211 | 008162
212 | 008167
213 | 008185
214 | 008196
215 | 008206
216 | 008207
217 | 008239
218 | 008245
219 | 008271
220 | 008380
221 | 008382
222 | 008389
223 | 008457
224 | 008458
225 | 008471
226 | 008547
227 | 008570
228 | 008599
229 | 008626
230 | 008661
231 | 008754
232 | 008821
233 | 008846
234 | 008850
235 | 008904
236 | 008937
237 | 008947
238 | 009172
239 | 009261
240 | 009313
241 | 009367
242 | 009416
243 | 009462
244 | 009514
245 | 009581
246 | 009602
247 | 009606
248 | 009630
249 | 009741
250 | 009750
251 | 009787
252 | 009854
253 | 009925
254 | 009928
255 |
--------------------------------------------------------------------------------
/data/imageset_files/bottle_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000050
2 | 000170
3 | 000200
4 | 000222
5 | 000224
6 | 000229
7 | 000250
8 | 000269
9 | 000344
10 | 000367
11 | 000381
12 | 000498
13 | 000531
14 | 000559
15 | 000564
16 | 000684
17 | 000726
18 | 000753
19 | 000865
20 | 000929
21 | 000951
22 | 000999
23 | 001045
24 | 001185
25 | 001229
26 | 001272
27 | 001292
28 | 001346
29 | 001393
30 | 001408
31 | 001444
32 | 001451
33 | 001465
34 | 001490
35 | 001493
36 | 001522
37 | 001537
38 | 001563
39 | 001580
40 | 001590
41 | 001673
42 | 001682
43 | 001754
44 | 001756
45 | 001777
46 | 001843
47 | 001963
48 | 001970
49 | 002030
50 | 002098
51 | 002112
52 | 002139
53 | 002221
54 | 002226
55 | 002253
56 | 002287
57 | 002350
58 | 002404
59 | 002442
60 | 002444
61 | 002518
62 | 002545
63 | 002572
64 | 002613
65 | 002641
66 | 002658
67 | 002745
68 | 002881
69 | 002931
70 | 002939
71 | 002941
72 | 002953
73 | 002965
74 | 003056
75 | 003088
76 | 003108
77 | 003122
78 | 003149
79 | 003159
80 | 003181
81 | 003185
82 | 003207
83 | 003219
84 | 003247
85 | 003259
86 | 003262
87 | 003269
88 | 003376
89 | 003449
90 | 003451
91 | 003496
92 | 003556
93 | 003566
94 | 003577
95 | 003678
96 | 003729
97 | 003783
98 | 003859
99 | 003865
100 | 003932
101 | 004075
102 | 004110
103 | 004140
104 | 004152
105 | 004158
106 | 004193
107 | 004279
108 | 004280
109 | 004291
110 | 004296
111 | 004322
112 | 004333
113 | 004370
114 | 004466
115 | 004468
116 | 004479
117 | 004493
118 | 004495
119 | 004508
120 | 004588
121 | 004671
122 | 004701
123 | 004710
124 | 004718
125 | 004742
126 | 004753
127 | 004886
128 | 004916
129 | 004926
130 | 004943
131 | 005052
132 | 005122
133 | 005160
134 | 005195
135 | 005242
136 | 005245
137 | 005304
138 | 005345
139 | 005367
140 | 005385
141 | 005388
142 | 005430
143 | 005441
144 | 005467
145 | 005471
146 | 005541
147 | 005563
148 | 005605
149 | 005606
150 | 005662
151 | 005672
152 | 005814
153 | 005908
154 | 005919
155 | 006061
156 | 006141
157 | 006180
158 | 006189
159 | 006241
160 | 006299
161 | 006335
162 | 006363
163 | 006381
164 | 006398
165 | 006409
166 | 006448
167 | 006483
168 | 006486
169 | 006503
170 | 006519
171 | 006576
172 | 006610
173 | 006626
174 | 006648
175 | 006687
176 | 006727
177 | 006731
178 | 006828
179 | 006858
180 | 006892
181 | 006908
182 | 006912
183 | 006917
184 | 006919
185 | 006939
186 | 006948
187 | 007007
188 | 007021
189 | 007048
190 | 007056
191 | 007104
192 | 007117
193 | 007121
194 | 007141
195 | 007177
196 | 007191
197 | 007250
198 | 007323
199 | 007351
200 | 007370
201 | 007410
202 | 007457
203 | 007461
204 | 007493
205 | 007511
206 | 007558
207 | 007572
208 | 007631
209 | 007649
210 | 007740
211 | 007751
212 | 007762
213 | 007790
214 | 007798
215 | 007899
216 | 007987
217 | 008042
218 | 008061
219 | 008137
220 | 008139
221 | 008141
222 | 008164
223 | 008204
224 | 008213
225 | 008319
226 | 008467
227 | 008592
228 | 008783
229 | 008841
230 | 008931
231 | 008953
232 | 008962
233 | 008967
234 | 008982
235 | 008989
236 | 009016
237 | 009027
238 | 009100
239 | 009108
240 | 009174
241 | 009242
242 | 009290
243 | 009299
244 | 009375
245 | 009388
246 | 009420
247 | 009458
248 | 009481
249 | 009497
250 | 009585
251 | 009613
252 | 009647
253 | 009654
254 | 009664
255 | 009666
256 | 009679
257 | 009687
258 | 009709
259 | 009726
260 | 009878
261 | 009902
262 | 009917
263 |
--------------------------------------------------------------------------------
/data/imageset_files/pottedplant_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000052
2 | 000073
3 | 000235
4 | 000307
5 | 000391
6 | 000484
7 | 000592
8 | 000601
9 | 000628
10 | 000661
11 | 000710
12 | 000711
13 | 000729
14 | 000780
15 | 000854
16 | 000865
17 | 000867
18 | 000868
19 | 000889
20 | 000917
21 | 000923
22 | 000962
23 | 001079
24 | 001082
25 | 001091
26 | 001112
27 | 001142
28 | 001149
29 | 001154
30 | 001164
31 | 001212
32 | 001408
33 | 001420
34 | 001430
35 | 001444
36 | 001451
37 | 001464
38 | 001479
39 | 001580
40 | 001593
41 | 001729
42 | 001754
43 | 001777
44 | 001898
45 | 002015
46 | 002049
47 | 002108
48 | 002116
49 | 002176
50 | 002179
51 | 002192
52 | 002284
53 | 002305
54 | 002320
55 | 002347
56 | 002361
57 | 002362
58 | 002378
59 | 002427
60 | 002483
61 | 002542
62 | 002559
63 | 002569
64 | 002594
65 | 002598
66 | 002658
67 | 002668
68 | 002745
69 | 002775
70 | 002798
71 | 002817
72 | 002820
73 | 002924
74 | 002946
75 | 002967
76 | 003002
77 | 003004
78 | 003024
79 | 003034
80 | 003044
81 | 003093
82 | 003146
83 | 003186
84 | 003189
85 | 003202
86 | 003211
87 | 003239
88 | 003284
89 | 003301
90 | 003367
91 | 003374
92 | 003419
93 | 003436
94 | 003450
95 | 003462
96 | 003466
97 | 003506
98 | 003509
99 | 003555
100 | 003622
101 | 003629
102 | 003694
103 | 003699
104 | 003758
105 | 003791
106 | 003796
107 | 003834
108 | 003877
109 | 003924
110 | 003932
111 | 003956
112 | 004017
113 | 004028
114 | 004037
115 | 004105
116 | 004121
117 | 004212
118 | 004274
119 | 004275
120 | 004292
121 | 004364
122 | 004392
123 | 004433
124 | 004446
125 | 004455
126 | 004484
127 | 004558
128 | 004631
129 | 004682
130 | 004699
131 | 004707
132 | 004714
133 | 004742
134 | 004825
135 | 004895
136 | 004898
137 | 004939
138 | 004948
139 | 004960
140 | 004991
141 | 005061
142 | 005129
143 | 005143
144 | 005145
145 | 005160
146 | 005183
147 | 005186
148 | 005190
149 | 005224
150 | 005346
151 | 005515
152 | 005517
153 | 005531
154 | 005605
155 | 005641
156 | 005676
157 | 005682
158 | 005687
159 | 005762
160 | 005794
161 | 005813
162 | 005817
163 | 005821
164 | 005874
165 | 005894
166 | 005919
167 | 005923
168 | 005985
169 | 005991
170 | 006029
171 | 006069
172 | 006088
173 | 006100
174 | 006159
175 | 006188
176 | 006189
177 | 006270
178 | 006299
179 | 006319
180 | 006351
181 | 006447
182 | 006575
183 | 006605
184 | 006627
185 | 006681
186 | 006726
187 | 006755
188 | 006765
189 | 006777
190 | 006806
191 | 006859
192 | 006956
193 | 007078
194 | 007130
195 | 007180
196 | 007302
197 | 007344
198 | 007356
199 | 007390
200 | 007396
201 | 007451
202 | 007511
203 | 007519
204 | 007558
205 | 007600
206 | 007619
207 | 007624
208 | 007664
209 | 007673
210 | 007704
211 | 007715
212 | 007781
213 | 007795
214 | 007814
215 | 007865
216 | 007890
217 | 007909
218 | 007925
219 | 007956
220 | 007999
221 | 008012
222 | 008029
223 | 008043
224 | 008064
225 | 008082
226 | 008106
227 | 008127
228 | 008140
229 | 008171
230 | 008199
231 | 008216
232 | 008318
233 | 008341
234 | 008424
235 | 008465
236 | 008468
237 | 008536
238 | 008549
239 | 008557
240 | 008587
241 | 008655
242 | 008688
243 | 008733
244 | 008749
245 | 008806
246 | 008835
247 | 008933
248 | 008970
249 | 008987
250 | 008997
251 | 009032
252 | 009068
253 | 009078
254 | 009087
255 | 009123
256 | 009175
257 | 009181
258 | 009194
259 | 009215
260 | 009252
261 | 009279
262 | 009342
263 | 009419
264 | 009443
265 | 009491
266 | 009519
267 | 009634
268 | 009638
269 | 009647
270 | 009686
271 | 009878
272 | 009884
273 | 009887
274 |
--------------------------------------------------------------------------------
/data/imageset_files/horse_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000010
2 | 000022
3 | 000056
4 | 000166
5 | 000168
6 | 000237
7 | 000248
8 | 000267
9 | 000319
10 | 000330
11 | 000356
12 | 000378
13 | 000392
14 | 000393
15 | 000410
16 | 000412
17 | 000413
18 | 000434
19 | 000445
20 | 000455
21 | 000475
22 | 000604
23 | 000616
24 | 000623
25 | 000666
26 | 000704
27 | 000737
28 | 000783
29 | 000836
30 | 000866
31 | 000978
32 | 000986
33 | 001013
34 | 001063
35 | 001114
36 | 001183
37 | 001245
38 | 001300
39 | 001317
40 | 001320
41 | 001398
42 | 001412
43 | 001417
44 | 001452
45 | 001574
46 | 001584
47 | 001624
48 | 001635
49 | 001692
50 | 001703
51 | 001769
52 | 001794
53 | 001819
54 | 001823
55 | 001865
56 | 001900
57 | 001939
58 | 001955
59 | 002017
60 | 002031
61 | 002046
62 | 002050
63 | 002071
64 | 002072
65 | 002204
66 | 002269
67 | 002283
68 | 002331
69 | 002360
70 | 002398
71 | 002409
72 | 002421
73 | 002511
74 | 002553
75 | 002573
76 | 002596
77 | 002655
78 | 002692
79 | 002780
80 | 002790
81 | 002819
82 | 002828
83 | 002837
84 | 002909
85 | 002921
86 | 003022
87 | 003041
88 | 003084
89 | 003173
90 | 003182
91 | 003187
92 | 003193
93 | 003222
94 | 003232
95 | 003235
96 | 003237
97 | 003304
98 | 003473
99 | 003531
100 | 003557
101 | 003562
102 | 003579
103 | 003583
104 | 003584
105 | 003653
106 | 003680
107 | 003720
108 | 003734
109 | 003764
110 | 003832
111 | 003852
112 | 003922
113 | 003951
114 | 003955
115 | 003978
116 | 004006
117 | 004084
118 | 004103
119 | 004126
120 | 004177
121 | 004234
122 | 004266
123 | 004294
124 | 004301
125 | 004309
126 | 004381
127 | 004407
128 | 004492
129 | 004522
130 | 004538
131 | 004545
132 | 004561
133 | 004564
134 | 004589
135 | 004629
136 | 004749
137 | 004756
138 | 004803
139 | 004804
140 | 004807
141 | 004865
142 | 004874
143 | 004932
144 | 004957
145 | 004970
146 | 005053
147 | 005132
148 | 005139
149 | 005182
150 | 005198
151 | 005299
152 | 005334
153 | 005339
154 | 005342
155 | 005409
156 | 005456
157 | 005505
158 | 005513
159 | 005529
160 | 005564
161 | 005572
162 | 005610
163 | 005677
164 | 005722
165 | 005733
166 | 005822
167 | 005882
168 | 005890
169 | 005944
170 | 005967
171 | 005973
172 | 006002
173 | 006106
174 | 006122
175 | 006143
176 | 006207
177 | 006237
178 | 006361
179 | 006364
180 | 006365
181 | 006368
182 | 006401
183 | 006478
184 | 006511
185 | 006521
186 | 006522
187 | 006540
188 | 006555
189 | 006586
190 | 006623
191 | 006649
192 | 006672
193 | 006728
194 | 006756
195 | 006779
196 | 006793
197 | 006811
198 | 006863
199 | 006873
200 | 006875
201 | 006897
202 | 006936
203 | 006982
204 | 007055
205 | 007134
206 | 007229
207 | 007252
208 | 007253
209 | 007265
210 | 007293
211 | 007303
212 | 007310
213 | 007392
214 | 007412
215 | 007509
216 | 007587
217 | 007609
218 | 007616
219 | 007620
220 | 007623
221 | 007636
222 | 007750
223 | 007766
224 | 007774
225 | 007789
226 | 007805
227 | 007874
228 | 007942
229 | 007972
230 | 007975
231 | 008010
232 | 008027
233 | 008059
234 | 008109
235 | 008124
236 | 008147
237 | 008183
238 | 008192
239 | 008205
240 | 008228
241 | 008256
242 | 008259
243 | 008298
244 | 008303
245 | 008314
246 | 008354
247 | 008399
248 | 008476
249 | 008574
250 | 008593
251 | 008605
252 | 008786
253 | 008802
254 | 008804
255 | 008812
256 | 008882
257 | 008899
258 | 009046
259 | 009067
260 | 009074
261 | 009101
262 | 009130
263 | 009231
264 | 009369
265 | 009380
266 | 009505
267 | 009525
268 | 009535
269 | 009569
270 | 009661
271 | 009665
272 | 009722
273 | 009768
274 | 009803
275 | 009903
276 | 009931
277 | 009933
278 | 009937
279 | 009957
280 |
--------------------------------------------------------------------------------
/data/imageset_files/horse_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000009
2 | 000017
3 | 000133
4 | 000150
5 | 000173
6 | 000194
7 | 000214
8 | 000275
9 | 000328
10 | 000332
11 | 000347
12 | 000372
13 | 000407
14 | 000435
15 | 000470
16 | 000483
17 | 000523
18 | 000524
19 | 000577
20 | 000667
21 | 000695
22 | 000702
23 | 000760
24 | 000764
25 | 000799
26 | 000879
27 | 000904
28 | 000991
29 | 001064
30 | 001071
31 | 001084
32 | 001236
33 | 001241
34 | 001254
35 | 001287
36 | 001337
37 | 001405
38 | 001420
39 | 001515
40 | 001521
41 | 001523
42 | 001556
43 | 001586
44 | 001628
45 | 001632
46 | 001690
47 | 001699
48 | 001711
49 | 001724
50 | 001727
51 | 001730
52 | 001766
53 | 001807
54 | 001894
55 | 001927
56 | 001945
57 | 001960
58 | 002042
59 | 002043
60 | 002183
61 | 002187
62 | 002190
63 | 002213
64 | 002238
65 | 002249
66 | 002261
67 | 002273
68 | 002329
69 | 002405
70 | 002448
71 | 002471
72 | 002520
73 | 002554
74 | 002555
75 | 002633
76 | 002657
77 | 002678
78 | 002684
79 | 002713
80 | 002735
81 | 002774
82 | 002786
83 | 002800
84 | 002804
85 | 002842
86 | 002858
87 | 003015
88 | 003017
89 | 003027
90 | 003121
91 | 003189
92 | 003210
93 | 003290
94 | 003294
95 | 003303
96 | 003307
97 | 003362
98 | 003403
99 | 003424
100 | 003429
101 | 003436
102 | 003492
103 | 003611
104 | 003618
105 | 003625
106 | 003645
107 | 003691
108 | 003696
109 | 003732
110 | 003735
111 | 003754
112 | 003772
113 | 003779
114 | 003889
115 | 003939
116 | 003948
117 | 003960
118 | 003970
119 | 004051
120 | 004077
121 | 004117
122 | 004146
123 | 004168
124 | 004195
125 | 004307
126 | 004323
127 | 004339
128 | 004372
129 | 004389
130 | 004450
131 | 004487
132 | 004535
133 | 004537
134 | 004553
135 | 004625
136 | 004634
137 | 004656
138 | 004662
139 | 004686
140 | 004722
141 | 004788
142 | 004789
143 | 004815
144 | 004834
145 | 004872
146 | 004902
147 | 004935
148 | 004950
149 | 004968
150 | 004995
151 | 005081
152 | 005094
153 | 005114
154 | 005145
155 | 005161
156 | 005236
157 | 005248
158 | 005278
159 | 005306
160 | 005331
161 | 005351
162 | 005419
163 | 005453
164 | 005461
165 | 005511
166 | 005550
167 | 005568
168 | 005611
169 | 005636
170 | 005700
171 | 005728
172 | 005732
173 | 005741
174 | 005877
175 | 005948
176 | 006095
177 | 006134
178 | 006151
179 | 006249
180 | 006276
181 | 006285
182 | 006295
183 | 006382
184 | 006392
185 | 006428
186 | 006429
187 | 006445
188 | 006455
189 | 006459
190 | 006506
191 | 006572
192 | 006611
193 | 006617
194 | 006645
195 | 006722
196 | 006769
197 | 006789
198 | 006797
199 | 006836
200 | 006847
201 | 006850
202 | 006858
203 | 006862
204 | 006866
205 | 006911
206 | 006933
207 | 006944
208 | 007018
209 | 007035
210 | 007109
211 | 007138
212 | 007163
213 | 007185
214 | 007216
215 | 007234
216 | 007243
217 | 007260
218 | 007308
219 | 007325
220 | 007439
221 | 007448
222 | 007526
223 | 007547
224 | 007570
225 | 007576
226 | 007594
227 | 007603
228 | 007611
229 | 007637
230 | 007697
231 | 007705
232 | 007727
233 | 007748
234 | 007809
235 | 007919
236 | 008019
237 | 008040
238 | 008069
239 | 008142
240 | 008191
241 | 008209
242 | 008248
243 | 008279
244 | 008307
245 | 008311
246 | 008320
247 | 008427
248 | 008437
249 | 008509
250 | 008524
251 | 008526
252 | 008564
253 | 008596
254 | 008610
255 | 008612
256 | 008653
257 | 008699
258 | 008731
259 | 008744
260 | 008805
261 | 008862
262 | 008867
263 | 008876
264 | 008886
265 | 008914
266 | 008919
267 | 009020
268 | 009024
269 | 009114
270 | 009138
271 | 009148
272 | 009202
273 | 009208
274 | 009236
275 | 009251
276 | 009255
277 | 009318
278 | 009331
279 | 009337
280 | 009407
281 | 009438
282 | 009465
283 | 009468
284 | 009512
285 | 009698
286 | 009719
287 | 009732
288 | 009767
289 | 009807
290 | 009808
291 | 009841
292 | 009851
293 | 009874
294 | 009954
295 |
--------------------------------------------------------------------------------
/layers/modules/kl_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 |
6 |
7 | class KLLoss(nn.Module):
8 | # def KLLoss(xg,xe,alpha):
9 | """
10 | Kl-loss function for bounding box regression from CVPR 2019 paper:
11 | Bounding Box Regression with Uncertainty for Accurate Object Detection
12 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
13 |
14 | It is a replacement for the Smooth L1 loss often used in bounding box regression.
15 |
16 | The regression loss for a coordinate depends on |xg − xe| > 1 or not:
17 |
18 | Loss |xg − xe| ≤ 1:
19 |
20 | Lreg1 ∝ e^{−α} * 1/2(xg − xe)^2 + 1/2α
21 |
22 | and if |xg − xe| > 1, Loss:
23 |
24 | Lreg2 = e^{−α} (|xg − xe| − 1/2) + 1/2α
25 |
26 | PyTorch implementation by Jasper Bakker (JappaB @github)
27 | """
28 | def __init__(self, loc_loss_weight=1.0):
29 | super(KLLoss, self).__init__()
30 |
31 | # Insert your own parameters here if you want to adjust the KL-Loss function
32 |
33 | # option to adjust the size of the loss
34 | self.loc_loss_weight = loc_loss_weight
35 |
36 | def forward(self,xg,xe,alpha):
37 |
38 | """
39 | :param xg: The ground truth of the bounding box coordinates in x1y1x2y2 format
40 | shape: [number_of_boxes, 4]
41 | :param xe: The estimated bounding box coordinates in x1y1x2y2 format
42 | shape: [number_of_boxes, 4]
43 | :param alpha: The log(sigma^2) of the bounding box coordinates in x1y1x2y2 format
44 | shape: [number_of_boxes, 4]
45 | :return: total_kl_loss
46 | """
47 |
48 | assert (xg.shape == xe.shape and xg.shape == alpha.shape),"The shapes of the input tensors must be the same"
49 |
50 |
51 | smooth_l1 = F.smooth_l1_loss(xe,xg, reduction='none')
52 |
53 | # e^{-α}
54 | exp_min_alpha = torch.exp(-alpha)
55 |
56 | # 1/2α
57 | half_alpha = 0.5*alpha
58 |
59 | total_kl_loss = (exp_min_alpha * smooth_l1 + half_alpha).sum()
60 | # total_kl_loss = total_kl_loss.sum()
61 |
62 | #
63 | # # xg − xe
64 | # delta = xg-xe
65 | #
66 | # # |xg − xe|
67 | # abs_delta = torch.abs(delta)
68 | #
69 | # ## mask for Lreg1 and Lreg2
70 | # Lreg1_mask = abs_delta.le(1.0) # |xg − xe| ≤ 1
71 | # Lreg2_mask = abs_delta.gt(1.0) # |xg − xe| > 1
72 | #
73 | # ## calculate all elements for Lreg1
74 | # # (xg − xe) for Lreg1
75 | # delta_Lreg1 = delta[Lreg1_mask]
76 | #
77 | # # e^{-α}
78 | # exp_min_alpha1 = torch.exp(-alpha[Lreg1_mask])
79 | #
80 | # # 1/2α
81 | # half_alpha1 = 0.5*alpha[Lreg1_mask]
82 | #
83 | # L_reg1 = exp_min_alpha1 * 0.5 * torch.pow(delta_Lreg1,2) + half_alpha1
84 | # L_reg1 = L_reg1.sum()
85 | #
86 | # ## calculate all elements for Lreg2
87 | # # |xg − xe| for Lreg2
88 | #
89 | #
90 | #
91 | # abs_delta_Lreg2 = abs_delta[Lreg2_mask]
92 | #
93 | # # e^{-α}
94 | # exp_min_alpha2 = torch.exp(-alpha[Lreg2_mask])
95 | #
96 | # # 1/2α
97 | # half_alpha2 = 0.5*alpha[Lreg2_mask]
98 | #
99 | # L_reg2 = exp_min_alpha2 * (abs_delta_Lreg2 - 0.5) + half_alpha2
100 | # L_reg2 = L_reg2.sum()
101 | #
102 | #
103 | # ## total
104 | # total_kl_loss = L_reg1+L_reg2
105 | # # total_kl_loss *= self.loc_loss_weight
106 |
107 | # todo: remove after debugging
108 | # print()
109 | # print('Debug kl-loss: ')
110 | # print('delta', delta)
111 | # print('abs_delta', abs_delta)
112 | # print('alpha', alpha)
113 | # print('exp_min_alpha1', exp_min_alpha1)
114 | # print('exp_min_alpha1', exp_min_alpha2)
115 | # print('Lreg1mask', Lreg1_mask.sum())
116 | # print('Lreg2mask', Lreg2_mask.sum())
117 |
118 | return total_kl_loss
119 |
120 |
121 |
--------------------------------------------------------------------------------
/data/6_class_seed_set.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [
5 | "009679",
6 | "009388",
7 | "009100",
8 | "007798",
9 | "002881",
10 | "000250",
11 | "004152",
12 | "006576",
13 | "000344",
14 | "008204",
15 | "002253",
16 | "005467",
17 | "009290",
18 | "007457",
19 | "006626",
20 | "000367",
21 | "008931",
22 | "002350",
23 | "007141",
24 | "006363",
25 | "006727",
26 | "000498",
27 |
28 | "005496",
29 | "007743",
30 | "006965",
31 | "000154",
32 | "002234",
33 | "003337",
34 | "005614",
35 | "004532",
36 | "000184",
37 | "002372",
38 | "000382",
39 | "008498",
40 | "005181",
41 | "007139",
42 | "007685",
43 | "003098",
44 | "003722",
45 | "002659",
46 | "007460",
47 | "007803",
48 | "002208",
49 |
50 | "002786",
51 | "009841",
52 | "009114",
53 | "004834",
54 | "001628",
55 | "006445",
56 | "009874",
57 | "006285",
58 | "009318",
59 |
60 | "005821",
61 | "007396",
62 | "001777",
63 | "006159",
64 | "009279",
65 | "004446",
66 | "006188",
67 | "007302",
68 | "004105",
69 | "003758",
70 | "009638",
71 | "009175",
72 | "002946",
73 | "008749",
74 | "008082",
75 | "002775",
76 |
77 | "009863",
78 | "007208",
79 | "007821",
80 | "009900",
81 | "002533",
82 | "002643",
83 | "007374",
84 | "005020",
85 | "001980",
86 | "009106",
87 | "002691",
88 | "005756",
89 | "001360",
90 | "008706",
91 | "007475",
92 |
93 | "008592",
94 | "009842",
95 | "003681",
96 | "007230",
97 | "001714",
98 | "007165",
99 | "004423",
100 | "004490",
101 | "006679",
102 | "001750",
103 | "006833"
104 | ],
105 | "class_dist": []
106 | },
107 | "val_set": {
108 | "image_set_idx": [
109 | "006409",
110 | "000269",
111 | "006648",
112 | "002641",
113 | "004671",
114 | "007121",
115 | "007649",
116 | "009878",
117 | "000381",
118 | "004886",
119 |
120 | "000931",
121 | "002465",
122 | "007361",
123 | "001732",
124 | "003860",
125 | "000826",
126 | "006123",
127 | "009603",
128 | "005705",
129 | "007365",
130 |
131 | "001236",
132 | "004625",
133 | "007216",
134 | "005145",
135 | "009138",
136 | "008596",
137 | "003492",
138 | "007448",
139 | "009407",
140 | "005236",
141 |
142 | "007558",
143 | "000592",
144 | "003301",
145 | "007999",
146 | "004631",
147 | "007390",
148 | "007890",
149 | "006351",
150 | "000710",
151 | "001451",
152 |
153 | "003636",
154 | "002116",
155 | "006206",
156 | "004244",
157 | "000245",
158 | "003256",
159 | "009839",
160 | "007663",
161 | "006330",
162 | "000545",
163 |
164 | "003705",
165 | "002263",
166 | "005469",
167 | "009816",
168 | "000107",
169 | "002593",
170 | "003698",
171 | "006944",
172 | "000900",
173 | "004312"
174 |
175 | ],
176 | "class_dist": []
177 | },
178 | "train_set": [
179 | "009679",
180 | "009388",
181 | "009100",
182 | "007798",
183 | "002881",
184 | "000250",
185 | "004152",
186 | "006576",
187 | "000344",
188 | "008204",
189 | "002253",
190 | "005467",
191 | "009290",
192 | "007457",
193 | "006626",
194 | "000367",
195 | "008931",
196 | "002350",
197 | "007141",
198 | "006363",
199 | "006727",
200 | "000498",
201 |
202 | "005496",
203 | "007743",
204 | "006965",
205 | "000154",
206 | "002234",
207 | "003337",
208 | "005614",
209 | "004532",
210 | "000184",
211 | "002372",
212 | "000382",
213 | "008498",
214 | "005181",
215 | "007139",
216 | "007685",
217 | "003098",
218 | "003722",
219 | "002659",
220 | "007460",
221 | "007803",
222 | "002208",
223 |
224 | "002786",
225 | "009841",
226 | "009114",
227 | "004834",
228 | "001628",
229 | "006445",
230 | "009874",
231 | "006285",
232 | "009318",
233 |
234 | "005821",
235 | "007396",
236 | "001777",
237 | "006159",
238 | "009279",
239 | "004446",
240 | "006188",
241 | "007302",
242 | "004105",
243 | "003758",
244 | "009638",
245 | "009175",
246 | "002946",
247 | "008749",
248 | "008082",
249 | "002775",
250 |
251 | "009863",
252 | "007208",
253 | "007821",
254 | "009900",
255 | "002533",
256 | "002643",
257 | "007374",
258 | "005020",
259 | "001980",
260 | "009106",
261 | "002691",
262 | "005756",
263 | "001360",
264 | "008706",
265 | "007475",
266 |
267 | "008592",
268 | "009842",
269 | "003681",
270 | "007230",
271 | "001714",
272 | "007165",
273 | "004423",
274 | "004490",
275 | "006679",
276 | "001750",
277 | "006833"
278 | ]
279 | }
--------------------------------------------------------------------------------
/data/empty_seed.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataset_name": "VOC07",
3 | "seed_set": {
4 | "image_set_idx": [],
5 | "class_dist": [
6 | ]
7 | },
8 | "val_set": {
9 | "image_set_idx": [
10 | "004857",
11 | "000702",
12 | "004895",
13 | "001012",
14 | "001149",
15 | "005138",
16 | "008688",
17 | "009098",
18 | "003085",
19 | "005812",
20 | "007813",
21 | "006681",
22 | "004872",
23 | "001510",
24 | "005903",
25 | "009762",
26 | "000023",
27 | "007486",
28 | "008456",
29 | "006939",
30 | "003824",
31 | "003889",
32 | "007154",
33 | "000997",
34 | "004100",
35 | "003603",
36 | "002957",
37 | "000814",
38 | "004753",
39 | "009813",
40 | "007691",
41 | "000078",
42 | "006869",
43 | "006866",
44 | "003696",
45 | "006038",
46 | "002965",
47 | "004423",
48 | "004548",
49 | "003162",
50 | "000541",
51 | "000777",
52 | "008031",
53 | "008744",
54 | "009805",
55 | "005006",
56 | "005895",
57 | "003165",
58 | "007074",
59 | "005676",
60 | "004146",
61 | "003118",
62 | "000337",
63 | "001563",
64 | "000501",
65 | "004797",
66 | "001383",
67 | "002987",
68 | "007208",
69 | "008292",
70 | "001455",
71 | "000661",
72 | "005405",
73 | "004003",
74 | "001768",
75 | "006275",
76 | "002512",
77 | "004450",
78 | "002323",
79 | "008453",
80 | "009897",
81 | "005101",
82 | "008514",
83 | "006844",
84 | "009726",
85 | "003331",
86 | "001053",
87 | "004565",
88 | "001978",
89 | "001442",
90 | "007398",
91 | "000637",
92 | "008503",
93 | "004842",
94 | "002595",
95 | "005263",
96 | "004837",
97 | "002471",
98 | "007449",
99 | "001989",
100 | "009368",
101 | "005461",
102 | "009153",
103 | "007297",
104 | "002476",
105 | "008883",
106 | "002801",
107 | "005257",
108 | "003455",
109 | "005672",
110 | "009822",
111 | "001203",
112 | "009619",
113 | "004902",
114 | "002362",
115 | "006233",
116 | "003847",
117 | "006968",
118 | "004242",
119 | "002348",
120 | "000394",
121 | "008628",
122 | "000050",
123 | "000889",
124 | "000438",
125 | "007089",
126 | "007614",
127 | "003912",
128 | "005868",
129 | "007090",
130 | "001881",
131 | "008132",
132 | "008940",
133 | "004359",
134 | "004105",
135 | "002540",
136 | "001247",
137 | "002477",
138 | "002658",
139 | "001004",
140 | "001408",
141 | "002815",
142 | "001944",
143 | "007432",
144 | "006911",
145 | "000713",
146 | "009695",
147 | "000771",
148 | "000917",
149 | "002569",
150 | "002934",
151 | "005719",
152 | "006747",
153 | "007054",
154 | "009373",
155 | "007590",
156 | "006718",
157 | "009060",
158 | "004727",
159 | "008968",
160 | "000060",
161 | "009105",
162 | "007915",
163 | "009270",
164 | "007600",
165 | "002284",
166 | "002226",
167 | "005047",
168 | "000513",
169 | "002134",
170 | "001738",
171 | "008638",
172 | "004768",
173 | "006848",
174 | "008526",
175 | "007902",
176 | "009446",
177 | "005039",
178 | "003363",
179 | "002256",
180 | "008730",
181 | "006515",
182 | "002696",
183 | "005156",
184 | "005655",
185 | "000892",
186 | "003259",
187 | "006251",
188 | "004326",
189 | "009422",
190 | "007490",
191 | "000219",
192 | "001011",
193 | "000153",
194 | "007300",
195 | "002779",
196 | "006066",
197 | "009413",
198 | "002151",
199 | "007540",
200 | "005208",
201 | "008900",
202 | "008160",
203 | "008268",
204 | "008211",
205 | "003335",
206 | "005114",
207 | "009692",
208 | "008144",
209 | "009955",
210 | "002359",
211 | "001273",
212 | "001707",
213 | "008523",
214 | "002366",
215 | "009351",
216 | "002450",
217 | "001484",
218 | "002337",
219 | "008933",
220 | "004879",
221 | "004936",
222 | "002901",
223 | "007025",
224 | "000387",
225 | "005588",
226 | "003382",
227 | "000171",
228 | "005841",
229 | "007968",
230 | "003116",
231 | "009911",
232 | "001250",
233 | "009268",
234 | "006341",
235 | "009215",
236 | "006739",
237 | "003137",
238 | "006825",
239 | "007394",
240 | "006609",
241 | "008341",
242 | "008873",
243 | "005327",
244 | "006437",
245 | "000032",
246 | "002501",
247 | "009405",
248 | "008454",
249 | "005599",
250 | "001148",
251 | "001724",
252 | "008452",
253 | "008833",
254 | "001486",
255 | "006240",
256 | "002935",
257 | "002183",
258 | "004011",
259 | "003253"
260 | ],
261 | "class_dist": [
262 | 17,
263 | 24,
264 | 25,
265 | 17,
266 | 38,
267 | 13,
268 | 67,
269 | 15,
270 | 33,
271 | 9,
272 | 10,
273 | 30,
274 | 22,
275 | 20,
276 | 255,
277 | 32,
278 | 21,
279 | 12,
280 | 8,
281 | 18
282 | ]
283 | },
284 | "train_set": []
285 | }
--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | from ..box_utils import match, log_sum_exp
7 | from .kl_loss import KLLoss
8 | from data import voc as cfg
9 |
10 | class MultiBoxLoss(nn.Module):
11 | """SSD Weighted Loss Function
12 | Compute Targets:
13 | 1) Produce Confidence Target Indices by matching ground truth boxes
14 | with (default) 'priorboxes' that have jaccard index > threshold parameter
15 | (default threshold: 0.5).
16 | 2) Produce localization target by 'encoding' variance into offsets of ground
17 | truth boxes and their matched 'priorboxes'.
18 | 3) Hard negative mining to filter the excessive number of negative examples
19 | that comes with using a large number of default bounding boxes.
20 | (default negative:positive ratio 3:1)
21 | Objective Loss:
22 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
23 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
24 | weighted by α which is set to 1 by cross val.
25 | Args:
26 | c: class confidences,
27 | l: predicted boxes,
28 | g: ground truth boxes
29 | N: number of matched default boxes
30 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
31 | """
32 |
33 | def __init__(self, num_classes, overlap_thresh, prior_for_matching,
34 | bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
35 | use_gpu=True, modeltype='SSD300'):
36 | super(MultiBoxLoss, self).__init__()
37 | self.use_gpu = use_gpu
38 | self.num_classes = num_classes
39 | self.threshold = overlap_thresh
40 | self.background_label = bkg_label
41 | self.encode_target = encode_target
42 | self.use_prior_for_matching = prior_for_matching
43 | self.do_neg_mining = neg_mining
44 | self.negpos_ratio = neg_pos
45 | self.neg_overlap = neg_overlap
46 | self.variance = cfg['variance']
47 | self.modeltype = modeltype
48 | if modeltype == 'SSD300KL':
49 | self.kl_loss = KLLoss(loc_loss_weight = 1.0)
50 |
51 | def forward(self, predictions, targets,args = None): #todo: remove args
52 | """Multibox Loss
53 | Args:
54 | predictions (tuple): A tuple containing loc preds, conf preds,
55 | and prior boxes from SSD net.
56 | conf shape: torch.size(batch_size,num_priors,num_classes)
57 | loc shape: torch.size(batch_size,num_priors,4)
58 | priors shape: torch.size(num_priors,4)
59 |
60 | targets (tensor): Ground truth boxes and labels for a batch,
61 | shape: [batch_size,num_objs,5] (last idx is the label).
62 | """
63 | if self.modeltype != 'SSD300KL':
64 | loc_data, conf_data, priors = predictions
65 | else:
66 | loc_data, conf_data, priors, loc_std = predictions
67 |
68 | num = loc_data.size(0)
69 | priors = priors[:loc_data.size(1), :]
70 | num_priors = (priors.size(0))
71 |
72 | # match priors (default boxes) and ground truth boxes
73 | loc_t = torch.Tensor(num, num_priors, 4)
74 | conf_t = torch.LongTensor(num, num_priors) # just a mask if matched or not
75 |
76 | for idx in range(num):
77 | truths = targets[idx][:, :-1].data
78 | labels = targets[idx][:, -1].data
79 | defaults = priors.data
80 | match(self.threshold, truths, defaults, self.variance, labels,
81 | loc_t, conf_t, idx, self.modeltype)
82 |
83 | if self.use_gpu:
84 | loc_t = loc_t.cuda()
85 | conf_t = conf_t.cuda()
86 |
87 | # wrap targets
88 | loc_t = Variable(loc_t, requires_grad=False)
89 | conf_t = Variable(conf_t, requires_grad=False)
90 |
91 | pos = conf_t > 0
92 | num_pos = pos.sum(dim=1, keepdim=True)
93 |
94 | # Localization Loss (Smooth L1)
95 | # Shape: [batch,num_priors,4]
96 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
97 | loc_p = loc_data[pos_idx].view(-1, 4)
98 | loc_t = loc_t[pos_idx].view(-1, 4)
99 | if self.modeltype != 'SSD300KL':
100 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False).double()
101 | else:
102 | loss_l = self.kl_loss(loc_p, loc_t, loc_std[pos_idx].view(-1, 4)).double()
103 |
104 | # Compute max conf across batch for hard negative mining
105 | batch_conf = conf_data.view(-1, self.num_classes)
106 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
107 |
108 | # Hard Negative Mining
109 | loss_c = loss_c.view(num, -1)
110 | loss_c[pos] = 0 # filter out pos boxes for now
111 |
112 | _, loss_idx = loss_c.sort(1, descending=True)
113 | _, idx_rank = loss_idx.sort(1)
114 | num_pos = pos.long().sum(1, keepdim=True)
115 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
116 | neg = idx_rank < num_neg.expand_as(idx_rank)
117 |
118 | # Confidence Loss Including Positive and Negative Examples
119 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
120 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
121 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
122 | targets_weighted = conf_t[(pos+neg).gt(0)]
123 | # if num_classes == 2:
124 | # loss_c = F.binary_cross_entropy(torch.sigmoid(conf_p)[:,1], targets_weighted.float(),size_average=False).double()
125 | # else:
126 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False).double()
127 |
128 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
129 |
130 | N = num_pos.data.sum().double()
131 | loss_l /= N
132 | loss_c /= N
133 | return loss_l, loss_c
134 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes
2 | In this repo the code belonging to my master thesis titled: "Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes" can be found. I also uploaded the pdf of my thesis [here](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf). As a very short summary: I researched the use of a localization uncertainty, obtained trough an ensemble of object detectors to select more informative images to be labeled. It shows promissing results on Pascal VOC 2007, but has not been used on other datasets. Please let me know your experiences if you use it on different datasets.
3 |
4 | As a basis for my repository I used the excellent repository by Max de Groot and Ellis Brown [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch), retrieved on 19-02-2019. However, as I used the then newest stable version of PyTorch (1.0.1), I did change some of their code to be able to run it. Note that their repo is probably more suitable if you just want to use an SSD written in PyTorch and don't want to perform acive learning. Some parts of this readme are directly copy-pasted from Max de Groot and Ellis Brown their repo as my work is built upon their code anyways.
5 |
6 | After finishing my thesis, in order to make it more useable for others, I cleaned the code a bit and wrote this readme. I hope this helps, however, bear in mind that the code is research code and should be viewed as such. Currently I'm traveling trough Central and South America. I know the code could still be improved but it should work if you follow the instructions below. Please post issues if you are serious about using it and don't understand certain parts. I'll see what I can do when I'm back.
7 |
8 |
9 | ### Table of Contents
10 | - Getting Started
11 | - Datasets
12 | - Reference
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | ## SSD: Single Shot MultiBox Object Detector, in PyTorch
21 | A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detector](http://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang, and Alexander C. Berg. The official and original Caffe code can be found [here](https://github.com/weiliu89/caffe/tree/ssd).
22 |
23 |
24 |
25 |
26 |
27 | ## Getting started
28 | - I supplied a list of the conda environment I used for my experiments in the [requirements](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/requirements) file for reproducability. The most important packages are probably: PyTorch, NumPy, SciPy, cv2 and hdbscan.
29 | - Clone this repository.
30 | - Then download the dataset by following the [instructions](#datasets) below. Note that the Active Learning code has only been completely implemented for the Pascal VOC 2007 dataset.
31 | - As the SSD uses a reduced VGG-16 backbone, download the fc-reduced [VGG-16](https://arxiv.org/abs/1409.1556) PyTorch base network weights at: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth and put them in a directory called 'weights'
32 | - By default, we assume you have downloaded the file in the `Active_Learning_Object_Detection/weights` dir:
33 |
34 | ```Shell
35 | mkdir weights
36 | cd weights
37 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
38 | ```
39 | - Note that a GPU is highly recommended for training the SSD.
40 |
41 | - As there are many parser argument options, I provide two sample run scripts in the [run_scripts](https://github.com/JappaB/Active_Learning_Object_Detection/tree/master/run_scripts/scripts) directory to get a headstart. I provided one for the six classes I used in my experiments and one for a single class of interest (thus background vs non-background). To use them, you also need to copy the appropriate imageset files to the imageset folder. You can find the imageset files in `data/imageset_files` and they need to be copied to `~/data/VOCdevkit/VOC2007/ImageSets/Main/.
42 |
43 | - You are required to give a list of paths to the currently best networks. I provided a script `create_initial_networks.py` to generate these if you don't have any yet. The current settings of this script correspond to the sample run script with the single class. NOTE: A single saved network requires approximately 100MB of storage. Make sure you have enough diskspace before running the script.
44 |
45 | - Finally, if you don't want to use one of the provided scripts, the entry point for active learning is the `active_learning_main.py` file.
46 |
47 |
48 | ## What can I find where?
49 | For active learning the two most important folders are the `active_learning_dir` and `active_learning_package`. In the first the (intermediate) results of the runs (e.g. which images to label next) will be saved and in the second the code for the active learning can be found.
50 |
51 |
52 | ## Datasets
53 | To make things easy, we provide bash scripts to handle the dataset (Pascal VOC) downloads and setup for you. We also provide simple dataset loaders that inherit `torch.utils.data.Dataset`, making them fully compatible with the `torchvision.datasets` [API](http://pytorch.org/docs/torchvision/datasets.html).
54 |
55 |
56 | ### VOC Dataset
57 | PASCAL VOC: Visual Object Classes
58 |
59 | ##### Download VOC2007 trainval & test
60 | ```Shell
61 | # specify a directory for dataset to be downloaded into, else default is ~/data/
62 | sh data/scripts/VOC2007.sh #
63 | ```
64 |
65 | ##### Download VOC2012 trainval
66 | ```Shell
67 | # specify a directory for dataset to be downloaded into, else default is ~/data/
68 | sh data/scripts/VOC2012.sh #
69 | ```
70 |
71 | ### Use a pre-trained SSD network for detection
72 | #### Download a pre-trained network
73 | - We are trying to provide PyTorch `state_dicts` (dict of weight tensors) of the latest SSD model definitions trained on different datasets.
74 | - Currently, we provide the following PyTorch models:
75 | * SSD300 trained on VOC0712 (newest PyTorch weights)
76 | - https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth
77 | * SSD300 trained on VOC0712 (original Caffe weights)
78 | - https://s3.amazonaws.com/amdegroot-models/ssd_300_VOC0712.pth
79 |
80 | ## Authors
81 | Active learning part:
82 | * [**Jasper Bakker**](https://github.com/jappab)
83 |
84 | SSD, Dataloaders, etc. (check their excellent repo at [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch)):
85 | * [**Max deGroot**](https://github.com/amdegroot)
86 | * [**Ellis Brown**](http://github.com/ellisbrown)
87 |
88 |
--------------------------------------------------------------------------------
/active_learning_package/uncertainty_helpers.py:
--------------------------------------------------------------------------------
1 | import torch.nn.functional as F
2 | import utils.augmentations as augmentations
3 | import torch
4 |
5 | def entropy(confs, already_normalized = True):
6 | """
7 | https://discuss.pytorch.org/t/calculating-the-entropy-loss/14510
8 | softmax proof: https://math.stackexchange.com/questions/331275/softmax-function-and-modelling-probability-distributions
9 |
10 | :param confs: (tensor)
11 | shape: (batch, observations, class_probabilities) where class probabilities are real probabilities (already normalized)
12 | :return: H: (tensor) entropy
13 | shape: (batch, observations)
14 | """
15 | # tested with a uniform and a peak distribution in a tensor
16 |
17 | if already_normalized == False:
18 | H = F.softmax(confs, dim=2) * F.log_softmax(confs, dim=2)
19 | H = H.sum(dim=2) * -1.0
20 | else:
21 | H = confs * torch.log(confs)
22 | H = H.sum(dim=2) * -1.0
23 |
24 | return H
25 |
26 | def trace_covariance(cov_0, cov_1):
27 | """
28 | https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
29 | see trace calculation, however, now we keep the first two dimensions (batches and observations) as free variables
30 |
31 |
32 | TODO: below is just
33 | args:
34 | cov_0: (tensor)
35 | shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy
36 | cov_1: tensor)
37 | shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy
38 | :return:
39 | traces_0: (tensor)
40 | shape: [batch, observation]
41 | traces_1: (tensor)
42 | shape: [batch, observation]
43 | """
44 |
45 | # todo: assert that the trace must be positive
46 | traces_0 = torch.einsum('boxx->bo',cov_0)
47 | traces_1 = torch.einsum('boxx->bo',cov_1)
48 |
49 | return traces_0, traces_1
50 |
51 |
52 |
53 |
54 | def dist_means_observation(mu_0,mu_1):
55 | """
56 | calculate the (euclidean) distance between the mean of the upper left corner (mu_0) and lower right corner (mu_1) of the bounding box
57 |
58 | args:
59 | mu_0:
60 | shape: [batch, observations, 2] where the last dim is x1y1
61 | mu_1:
62 | shape: [batch, observations, 2] where the last dim is x2y2
63 | :return:
64 | distances:
65 | shape:
66 | """
67 |
68 |
69 |
70 | mu_1_minus_0 = mu_1-mu_0
71 | squared = torch.pow(mu_1_minus_0,2)
72 | summed = squared.sum(dim=2)
73 | distances = torch.pow(summed,0.5)
74 |
75 | return distances
76 |
77 | def means_observation(observations):
78 | """
79 | This function is exactly the same as the means_covs_observation below, without the cov part.
80 | """
81 | max_boxes = observations.shape[2]
82 | num_observations = observations.shape[1]
83 | num_batches = observations.shape[0]
84 |
85 | # per bounding box, sum each individual coordinate
86 | summed_coordinates = observations.sum(dim=2)
87 | zeros = observations.le(0.)
88 | zeros_per_box = zeros.sum(dim=3)
89 | N = zeros_per_box.le(3).sum(dim=2).float()
90 | mean = torch.div(summed_coordinates, N.unsqueeze(-1))
91 | return mean
92 |
93 | def means_covs_observation(observations):
94 | """
95 | For a guide on np.einsum (vs using for loops, a lot faster)
96 | (which is really similar to torch.einsum, which is used below to keep gpu speed-ups)
97 | check:
98 | - (short) http://ajcr.net/Basic-guide-to-einsum/
99 | - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
100 | - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66
101 |
102 | args:
103 | observations: (tensor) combined bounding boxes, only spatial information
104 | one bounding box shnumber ofould have the coordinates like this:
105 | [x0,y0,x1,y1], the coordinates of the upper left and lower right corners
106 | respectively. As each observation can have a variable number of bounding boxes,
107 | the observations that have less than the maximum number of bounding are assumed to be padded
108 | with zeros.
109 |
110 | shape: [batch, observations, max(n_boxes_of_all_obs) ,4]
111 |
112 | :return:
113 | means_covs_observation: last dim is (mu0,mu1,cov0,cov1)
114 | shape: [batch, observation, 4]
115 | """
116 | max_boxes = observations.shape[2]
117 | num_observations = observations.shape[1]
118 | num_batches = observations.shape[0]
119 |
120 | # per bounding box, sum each individual coordinate
121 | summed_coordinates = observations.sum(dim=2)
122 | zeros = observations.le(0.)
123 | zeros_per_box = zeros.sum(dim=3)
124 | N = zeros_per_box.le(3).sum(dim=2).float()
125 | mean = torch.div(summed_coordinates, N.unsqueeze(-1))
126 | # mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1))
127 | #### covariances
128 | # must be done seperately for upperleft corner (0) and lower right corner (1) of bounding box
129 | mean_0 = mean[:, :, 0:2]
130 | mean_1 = mean[:, :, 2:4]
131 | observations_0 = observations[:, :, :, 0:2]
132 | observations_1 = observations[:, :, :, 2:4]
133 |
134 | # Batch Observation boXes coordinatesTransposed and Batch Observation boXes Coordinates
135 | cov_first_part_summed_0 = torch.einsum('boxt,boxc -> botc', observations_0, observations_0)
136 | cov_first_part_summed_1 = torch.einsum('boxt,boxc -> botc', observations_1, observations_1)
137 |
138 | # double unsqueeze to allow for batches
139 | stacked_N = N.unsqueeze(-1).unsqueeze(-1)
140 |
141 | cov_first_part_0 = torch.div(cov_first_part_summed_0, stacked_N)
142 | cov_first_part_1 = torch.div(cov_first_part_summed_1, stacked_N)
143 |
144 | cov_second_part_0 = torch.einsum('bik,bij-> bijk',mean_0, mean_0)
145 | cov_second_part_1 = torch.einsum('bik,bij-> bijk',mean_1, mean_1)
146 |
147 | cov_0 = cov_first_part_0 - cov_second_part_0
148 | cov_1 = cov_first_part_1 - cov_second_part_1
149 |
150 |
151 | return mean ,cov_0, cov_1
152 |
153 |
154 | def means_observations(observations):
155 | """
156 | For a guide on np.einsum (vs using for loops, a lot faster)
157 | (which is really similar to torch.einsum, which is used below to keep gpu speed-ups)
158 | check:
159 | - (short) http://ajcr.net/Basic-guide-to-einsum/
160 | - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
161 | - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66
162 |
163 | args:
164 | observations: (tensor) combined bounding boxes, only spatial information
165 | one bounding box should have the coordinates like this:
166 | [x0,y0,x1,y1], the coordinates of the upper left and lower right corners
167 | respectively. As each observation can have a variable number of bounding boxes,
168 | the observations that have less than the maximum number of bounding are assumed to be padded
169 | with zeros.
170 |
171 | shape: [max(n_boxes), batch, observations,4]
172 |
173 | :return:
174 | means_observation: last dim is (mu0,mu1)
175 | shape: [batch, observation, 2]
176 | """
177 |
178 | # per bounding box, sum each individual coordinate
179 | summed_coordinates = observations.sum(dim=2)
180 | zeros = observations.le(0.)
181 | zeros_per_box = zeros.sum(dim=0)
182 | N = zeros_per_box.le(3).sum(dim=2).float()
183 | mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1))
184 |
185 | return mean
186 |
--------------------------------------------------------------------------------
/data/imageset_files/car_trainval_detect.txt:
--------------------------------------------------------------------------------
1 | 000007
2 | 000012
3 | 000020
4 | 000026
5 | 000047
6 | 000060
7 | 000083
8 | 000091
9 | 000131
10 | 000134
11 | 000142
12 | 000153
13 | 000156
14 | 000159
15 | 000161
16 | 000169
17 | 000180
18 | 000210
19 | 000220
20 | 000221
21 | 000233
22 | 000245
23 | 000251
24 | 000262
25 | 000263
26 | 000288
27 | 000289
28 | 000296
29 | 000302
30 | 000303
31 | 000311
32 | 000317
33 | 000318
34 | 000320
35 | 000329
36 | 000334
37 | 000338
38 | 000343
39 | 000355
40 | 000367
41 | 000373
42 | 000387
43 | 000396
44 | 000404
45 | 000406
46 | 000420
47 | 000431
48 | 000461
49 | 000463
50 | 000469
51 | 000474
52 | 000477
53 | 000494
54 | 000509
55 | 000515
56 | 000522
57 | 000541
58 | 000543
59 | 000545
60 | 000554
61 | 000579
62 | 000590
63 | 000605
64 | 000620
65 | 000628
66 | 000648
67 | 000653
68 | 000663
69 | 000672
70 | 000688
71 | 000700
72 | 000754
73 | 000776
74 | 000787
75 | 000800
76 | 000818
77 | 000823
78 | 000829
79 | 000855
80 | 000860
81 | 000871
82 | 000888
83 | 000906
84 | 000911
85 | 000935
86 | 000937
87 | 000972
88 | 000977
89 | 000987
90 | 001052
91 | 001057
92 | 001060
93 | 001069
94 | 001071
95 | 001093
96 | 001112
97 | 001119
98 | 001124
99 | 001125
100 | 001148
101 | 001237
102 | 001258
103 | 001268
104 | 001281
105 | 001290
106 | 001292
107 | 001294
108 | 001330
109 | 001334
110 | 001352
111 | 001360
112 | 001364
113 | 001371
114 | 001384
115 | 001385
116 | 001386
117 | 001409
118 | 001414
119 | 001432
120 | 001445
121 | 001455
122 | 001466
123 | 001472
124 | 001488
125 | 001492
126 | 001494
127 | 001532
128 | 001559
129 | 001561
130 | 001576
131 | 001604
132 | 001618
133 | 001622
134 | 001640
135 | 001654
136 | 001662
137 | 001676
138 | 001693
139 | 001699
140 | 001746
141 | 001780
142 | 001801
143 | 001821
144 | 001845
145 | 001862
146 | 001877
147 | 001881
148 | 001899
149 | 001902
150 | 001931
151 | 001937
152 | 001944
153 | 001950
154 | 001954
155 | 001962
156 | 001980
157 | 002019
158 | 002023
159 | 002045
160 | 002056
161 | 002116
162 | 002125
163 | 002134
164 | 002135
165 | 002153
166 | 002170
167 | 002174
168 | 002178
169 | 002180
170 | 002182
171 | 002197
172 | 002228
173 | 002241
174 | 002244
175 | 002247
176 | 002248
177 | 002281
178 | 002291
179 | 002307
180 | 002311
181 | 002340
182 | 002343
183 | 002355
184 | 002373
185 | 002374
186 | 002393
187 | 002411
188 | 002420
189 | 002436
190 | 002439
191 | 002454
192 | 002478
193 | 002490
194 | 002493
195 | 002497
196 | 002504
197 | 002533
198 | 002534
199 | 002544
200 | 002559
201 | 002563
202 | 002566
203 | 002584
204 | 002595
205 | 002606
206 | 002625
207 | 002643
208 | 002646
209 | 002647
210 | 002666
211 | 002667
212 | 002682
213 | 002691
214 | 002693
215 | 002704
216 | 002730
217 | 002734
218 | 002759
219 | 002772
220 | 002776
221 | 002779
222 | 002783
223 | 002804
224 | 002812
225 | 002833
226 | 002854
227 | 002884
228 | 002917
229 | 002937
230 | 002994
231 | 003007
232 | 003009
233 | 003013
234 | 003027
235 | 003032
236 | 003051
237 | 003053
238 | 003054
239 | 003057
240 | 003083
241 | 003092
242 | 003094
243 | 003103
244 | 003117
245 | 003165
246 | 003176
247 | 003195
248 | 003214
249 | 003228
250 | 003231
251 | 003240
252 | 003243
253 | 003256
254 | 003261
255 | 003271
256 | 003285
257 | 003313
258 | 003331
259 | 003344
260 | 003355
261 | 003359
262 | 003363
263 | 003379
264 | 003390
265 | 003397
266 | 003406
267 | 003420
268 | 003461
269 | 003484
270 | 003551
271 | 003555
272 | 003587
273 | 003596
274 | 003606
275 | 003608
276 | 003634
277 | 003636
278 | 003655
279 | 003703
280 | 003711
281 | 003713
282 | 003721
283 | 003790
284 | 003798
285 | 003806
286 | 003809
287 | 003820
288 | 003824
289 | 003835
290 | 003885
291 | 003891
292 | 003936
293 | 003954
294 | 003971
295 | 003973
296 | 003974
297 | 003987
298 | 003997
299 | 003998
300 | 004011
301 | 004019
302 | 004073
303 | 004087
304 | 004089
305 | 004091
306 | 004108
307 | 004136
308 | 004145
309 | 004186
310 | 004203
311 | 004228
312 | 004231
313 | 004242
314 | 004244
315 | 004284
316 | 004295
317 | 004303
318 | 004304
319 | 004329
320 | 004346
321 | 004365
322 | 004367
323 | 004384
324 | 004386
325 | 004387
326 | 004429
327 | 004439
328 | 004481
329 | 004488
330 | 004494
331 | 004519
332 | 004526
333 | 004539
334 | 004544
335 | 004563
336 | 004576
337 | 004581
338 | 004591
339 | 004604
340 | 004618
341 | 004660
342 | 004687
343 | 004691
344 | 004705
345 | 004719
346 | 004727
347 | 004747
348 | 004748
349 | 004750
350 | 004779
351 | 004786
352 | 004793
353 | 004805
354 | 004808
355 | 004823
356 | 004828
357 | 004830
358 | 004850
359 | 004863
360 | 004873
361 | 004890
362 | 004903
363 | 004946
364 | 004961
365 | 004962
366 | 004973
367 | 004983
368 | 005003
369 | 005020
370 | 005028
371 | 005047
372 | 005065
373 | 005067
374 | 005068
375 | 005071
376 | 005072
377 | 005090
378 | 005102
379 | 005110
380 | 005156
381 | 005159
382 | 005169
383 | 005199
384 | 005209
385 | 005259
386 | 005262
387 | 005273
388 | 005305
389 | 005318
390 | 005331
391 | 005350
392 | 005373
393 | 005387
394 | 005423
395 | 005457
396 | 005475
397 | 005481
398 | 005483
399 | 005486
400 | 005489
401 | 005499
402 | 005509
403 | 005536
404 | 005547
405 | 005549
406 | 005566
407 | 005577
408 | 005584
409 | 005585
410 | 005588
411 | 005592
412 | 005593
413 | 005609
414 | 005640
415 | 005645
416 | 005669
417 | 005679
418 | 005738
419 | 005747
420 | 005749
421 | 005756
422 | 005760
423 | 005782
424 | 005791
425 | 005806
426 | 005815
427 | 005830
428 | 005831
429 | 005839
430 | 005861
431 | 005868
432 | 005897
433 | 005899
434 | 005918
435 | 005956
436 | 005979
437 | 005988
438 | 005998
439 | 006009
440 | 006011
441 | 006018
442 | 006035
443 | 006038
444 | 006043
445 | 006058
446 | 006062
447 | 006079
448 | 006089
449 | 006097
450 | 006103
451 | 006104
452 | 006120
453 | 006124
454 | 006128
455 | 006133
456 | 006151
457 | 006196
458 | 006201
459 | 006203
460 | 006206
461 | 006210
462 | 006218
463 | 006223
464 | 006224
465 | 006225
466 | 006235
467 | 006250
468 | 006261
469 | 006277
470 | 006290
471 | 006301
472 | 006320
473 | 006325
474 | 006329
475 | 006330
476 | 006346
477 | 006362
478 | 006369
479 | 006375
480 | 006396
481 | 006417
482 | 006421
483 | 006438
484 | 006458
485 | 006459
486 | 006484
487 | 006497
488 | 006524
489 | 006588
490 | 006593
491 | 006625
492 | 006632
493 | 006654
494 | 006660
495 | 006668
496 | 006706
497 | 006719
498 | 006734
499 | 006736
500 | 006748
501 | 006766
502 | 006783
503 | 006821
504 | 006822
505 | 006858
506 | 006868
507 | 006884
508 | 006893
509 | 006900
510 | 006918
511 | 006931
512 | 006988
513 | 007003
514 | 007004
515 | 007040
516 | 007058
517 | 007068
518 | 007074
519 | 007090
520 | 007133
521 | 007153
522 | 007159
523 | 007167
524 | 007205
525 | 007208
526 | 007247
527 | 007261
528 | 007270
529 | 007279
530 | 007283
531 | 007284
532 | 007285
533 | 007294
534 | 007305
535 | 007346
536 | 007363
537 | 007374
538 | 007376
539 | 007383
540 | 007396
541 | 007414
542 | 007422
543 | 007424
544 | 007427
545 | 007446
546 | 007468
547 | 007475
548 | 007479
549 | 007490
550 | 007497
551 | 007525
552 | 007527
553 | 007566
554 | 007592
555 | 007601
556 | 007614
557 | 007622
558 | 007647
559 | 007650
560 | 007653
561 | 007663
562 | 007667
563 | 007691
564 | 007699
565 | 007709
566 | 007721
567 | 007731
568 | 007736
569 | 007745
570 | 007779
571 | 007790
572 | 007815
573 | 007819
574 | 007821
575 | 007843
576 | 007855
577 | 007856
578 | 007883
579 | 007898
580 | 007905
581 | 007921
582 | 007931
583 | 007932
584 | 007950
585 | 007963
586 | 007964
587 | 007970
588 | 007971
589 | 008001
590 | 008019
591 | 008026
592 | 008031
593 | 008037
594 | 008044
595 | 008057
596 | 008060
597 | 008079
598 | 008087
599 | 008093
600 | 008098
601 | 008105
602 | 008108
603 | 008160
604 | 008169
605 | 008174
606 | 008188
607 | 008197
608 | 008225
609 | 008232
610 | 008268
611 | 008279
612 | 008294
613 | 008296
614 | 008315
615 | 008329
616 | 008336
617 | 008359
618 | 008360
619 | 008376
620 | 008388
621 | 008391
622 | 008397
623 | 008429
624 | 008444
625 | 008449
626 | 008461
627 | 008466
628 | 008478
629 | 008482
630 | 008483
631 | 008484
632 | 008502
633 | 008503
634 | 008517
635 | 008524
636 | 008549
637 | 008550
638 | 008562
639 | 008572
640 | 008581
641 | 008586
642 | 008601
643 | 008633
644 | 008663
645 | 008665
646 | 008676
647 | 008680
648 | 008706
649 | 008716
650 | 008727
651 | 008739
652 | 008747
653 | 008750
654 | 008768
655 | 008784
656 | 008793
657 | 008794
658 | 008801
659 | 008826
660 | 008838
661 | 008843
662 | 008848
663 | 008859
664 | 008891
665 | 008892
666 | 008909
667 | 008911
668 | 008923
669 | 008929
670 | 008939
671 | 008958
672 | 008960
673 | 008966
674 | 008968
675 | 008969
676 | 008978
677 | 009000
678 | 009006
679 | 009015
680 | 009029
681 | 009045
682 | 009053
683 | 009058
684 | 009060
685 | 009064
686 | 009073
687 | 009078
688 | 009106
689 | 009116
690 | 009121
691 | 009163
692 | 009174
693 | 009178
694 | 009179
695 | 009186
696 | 009205
697 | 009213
698 | 009214
699 | 009254
700 | 009269
701 | 009282
702 | 009283
703 | 009286
704 | 009318
705 | 009326
706 | 009336
707 | 009350
708 | 009358
709 | 009368
710 | 009392
711 | 009406
712 | 009409
713 | 009411
714 | 009424
715 | 009434
716 | 009448
717 | 009469
718 | 009477
719 | 009507
720 | 009515
721 | 009517
722 | 009532
723 | 009558
724 | 009596
725 | 009614
726 | 009620
727 | 009623
728 | 009641
729 | 009644
730 | 009671
731 | 009676
732 | 009699
733 | 009711
734 | 009718
735 | 009729
736 | 009733
737 | 009737
738 | 009745
739 | 009762
740 | 009774
741 | 009776
742 | 009785
743 | 009801
744 | 009810
745 | 009822
746 | 009830
747 | 009834
748 | 009839
749 | 009845
750 | 009848
751 | 009862
752 | 009863
753 | 009879
754 | 009898
755 | 009900
756 | 009904
757 | 009913
758 | 009920
759 | 009932
760 | 009938
761 | 009959
762 |
--------------------------------------------------------------------------------
/data/imageset_files/car_test_detect.txt:
--------------------------------------------------------------------------------
1 | 000004
2 | 000014
3 | 000071
4 | 000074
5 | 000082
6 | 000103
7 | 000135
8 | 000137
9 | 000152
10 | 000172
11 | 000188
12 | 000197
13 | 000240
14 | 000252
15 | 000254
16 | 000271
17 | 000284
18 | 000293
19 | 000300
20 | 000301
21 | 000313
22 | 000341
23 | 000351
24 | 000358
25 | 000361
26 | 000390
27 | 000402
28 | 000415
29 | 000425
30 | 000440
31 | 000453
32 | 000465
33 | 000471
34 | 000488
35 | 000505
36 | 000507
37 | 000529
38 | 000548
39 | 000580
40 | 000585
41 | 000586
42 | 000593
43 | 000602
44 | 000607
45 | 000624
46 | 000634
47 | 000646
48 | 000649
49 | 000669
50 | 000679
51 | 000687
52 | 000693
53 | 000715
54 | 000719
55 | 000721
56 | 000724
57 | 000727
58 | 000736
59 | 000743
60 | 000747
61 | 000757
62 | 000778
63 | 000788
64 | 000801
65 | 000809
66 | 000844
67 | 000881
68 | 000883
69 | 000894
70 | 000932
71 | 000945
72 | 000961
73 | 000984
74 | 000985
75 | 001003
76 | 001005
77 | 001022
78 | 001034
79 | 001058
80 | 001063
81 | 001080
82 | 001085
83 | 001090
84 | 001111
85 | 001134
86 | 001135
87 | 001155
88 | 001198
89 | 001222
90 | 001252
91 | 001267
92 | 001280
93 | 001283
94 | 001291
95 | 001308
96 | 001318
97 | 001321
98 | 001328
99 | 001331
100 | 001335
101 | 001356
102 | 001358
103 | 001369
104 | 001376
105 | 001379
106 | 001382
107 | 001394
108 | 001403
109 | 001422
110 | 001428
111 | 001435
112 | 001476
113 | 001491
114 | 001511
115 | 001525
116 | 001535
117 | 001550
118 | 001552
119 | 001560
120 | 001569
121 | 001572
122 | 001605
123 | 001613
124 | 001616
125 | 001619
126 | 001623
127 | 001626
128 | 001652
129 | 001658
130 | 001700
131 | 001701
132 | 001770
133 | 001776
134 | 001804
135 | 001820
136 | 001838
137 | 001846
138 | 001851
139 | 001857
140 | 001863
141 | 001873
142 | 001883
143 | 001891
144 | 001908
145 | 001913
146 | 001919
147 | 001923
148 | 001924
149 | 001935
150 | 001942
151 | 001951
152 | 001956
153 | 001965
154 | 001991
155 | 002040
156 | 002041
157 | 002057
158 | 002118
159 | 002141
160 | 002143
161 | 002149
162 | 002154
163 | 002177
164 | 002185
165 | 002210
166 | 002223
167 | 002232
168 | 002242
169 | 002245
170 | 002271
171 | 002294
172 | 002319
173 | 002331
174 | 002346
175 | 002349
176 | 002358
177 | 002370
178 | 002383
179 | 002402
180 | 002406
181 | 002416
182 | 002418
183 | 002424
184 | 002446
185 | 002484
186 | 002517
187 | 002522
188 | 002526
189 | 002531
190 | 002532
191 | 002543
192 | 002548
193 | 002556
194 | 002562
195 | 002577
196 | 002583
197 | 002602
198 | 002607
199 | 002610
200 | 002622
201 | 002650
202 | 002681
203 | 002701
204 | 002703
205 | 002729
206 | 002733
207 | 002740
208 | 002746
209 | 002750
210 | 002752
211 | 002758
212 | 002789
213 | 002790
214 | 002793
215 | 002808
216 | 002814
217 | 002829
218 | 002840
219 | 002871
220 | 002900
221 | 002920
222 | 002927
223 | 002955
224 | 002961
225 | 002993
226 | 003006
227 | 003033
228 | 003046
229 | 003052
230 | 003055
231 | 003070
232 | 003101
233 | 003109
234 | 003128
235 | 003143
236 | 003168
237 | 003179
238 | 003217
239 | 003220
240 | 003234
241 | 003257
242 | 003265
243 | 003276
244 | 003289
245 | 003302
246 | 003306
247 | 003321
248 | 003328
249 | 003334
250 | 003348
251 | 003353
252 | 003357
253 | 003364
254 | 003375
255 | 003385
256 | 003387
257 | 003405
258 | 003414
259 | 003434
260 | 003456
261 | 003460
262 | 003476
263 | 003481
264 | 003483
265 | 003486
266 | 003501
267 | 003503
268 | 003512
269 | 003515
270 | 003517
271 | 003523
272 | 003527
273 | 003545
274 | 003552
275 | 003553
276 | 003559
277 | 003569
278 | 003570
279 | 003578
280 | 003607
281 | 003630
282 | 003631
283 | 003661
284 | 003666
285 | 003677
286 | 003683
287 | 003692
288 | 003701
289 | 003718
290 | 003719
291 | 003733
292 | 003746
293 | 003769
294 | 003810
295 | 003815
296 | 003833
297 | 003878
298 | 003893
299 | 003904
300 | 003916
301 | 003917
302 | 003940
303 | 003962
304 | 003967
305 | 004001
306 | 004026
307 | 004030
308 | 004043
309 | 004050
310 | 004071
311 | 004080
312 | 004088
313 | 004097
314 | 004104
315 | 004107
316 | 004115
317 | 004147
318 | 004154
319 | 004172
320 | 004183
321 | 004187
322 | 004188
323 | 004216
324 | 004217
325 | 004240
326 | 004245
327 | 004249
328 | 004254
329 | 004290
330 | 004305
331 | 004313
332 | 004320
333 | 004335
334 | 004337
335 | 004378
336 | 004426
337 | 004442
338 | 004453
339 | 004456
340 | 004458
341 | 004478
342 | 004492
343 | 004521
344 | 004543
345 | 004545
346 | 004554
347 | 004560
348 | 004578
349 | 004580
350 | 004586
351 | 004596
352 | 004602
353 | 004615
354 | 004650
355 | 004667
356 | 004684
357 | 004688
358 | 004697
359 | 004731
360 | 004734
361 | 004745
362 | 004755
363 | 004762
364 | 004763
365 | 004764
366 | 004780
367 | 004800
368 | 004804
369 | 004806
370 | 004810
371 | 004821
372 | 004844
373 | 004860
374 | 004870
375 | 004891
376 | 004927
377 | 004933
378 | 004940
379 | 004959
380 | 004965
381 | 004981
382 | 004989
383 | 004996
384 | 005005
385 | 005021
386 | 005030
387 | 005035
388 | 005041
389 | 005074
390 | 005083
391 | 005091
392 | 005099
393 | 005105
394 | 005125
395 | 005126
396 | 005142
397 | 005157
398 | 005158
399 | 005163
400 | 005166
401 | 005192
402 | 005204
403 | 005275
404 | 005287
405 | 005296
406 | 005316
407 | 005333
408 | 005357
409 | 005372
410 | 005381
411 | 005401
412 | 005432
413 | 005437
414 | 005443
415 | 005447
416 | 005468
417 | 005484
418 | 005493
419 | 005501
420 | 005520
421 | 005523
422 | 005546
423 | 005558
424 | 005627
425 | 005638
426 | 005649
427 | 005663
428 | 005666
429 | 005678
430 | 005694
431 | 005706
432 | 005708
433 | 005717
434 | 005739
435 | 005746
436 | 005763
437 | 005770
438 | 005775
439 | 005793
440 | 005809
441 | 005835
442 | 005842
443 | 005862
444 | 005869
445 | 005870
446 | 005904
447 | 005924
448 | 005929
449 | 005932
450 | 005953
451 | 005959
452 | 005974
453 | 005987
454 | 006006
455 | 006016
456 | 006017
457 | 006019
458 | 006024
459 | 006034
460 | 006047
461 | 006082
462 | 006094
463 | 006109
464 | 006113
465 | 006121
466 | 006137
467 | 006155
468 | 006213
469 | 006228
470 | 006242
471 | 006246
472 | 006256
473 | 006283
474 | 006324
475 | 006326
476 | 006327
477 | 006331
478 | 006333
479 | 006334
480 | 006340
481 | 006358
482 | 006376
483 | 006383
484 | 006386
485 | 006397
486 | 006405
487 | 006415
488 | 006420
489 | 006423
490 | 006435
491 | 006441
492 | 006454
493 | 006469
494 | 006481
495 | 006493
496 | 006502
497 | 006510
498 | 006525
499 | 006527
500 | 006567
501 | 006581
502 | 006590
503 | 006634
504 | 006653
505 | 006685
506 | 006693
507 | 006717
508 | 006724
509 | 006733
510 | 006741
511 | 006749
512 | 006754
513 | 006757
514 | 006785
515 | 006790
516 | 006793
517 | 006817
518 | 006853
519 | 006854
520 | 006882
521 | 006890
522 | 006907
523 | 006925
524 | 006955
525 | 006970
526 | 006974
527 | 006996
528 | 007001
529 | 007014
530 | 007015
531 | 007034
532 | 007061
533 | 007082
534 | 007085
535 | 007112
536 | 007118
537 | 007126
538 | 007143
539 | 007164
540 | 007173
541 | 007176
542 | 007179
543 | 007242
544 | 007246
545 | 007267
546 | 007273
547 | 007278
548 | 007281
549 | 007282
550 | 007288
551 | 007304
552 | 007337
553 | 007339
554 | 007347
555 | 007358
556 | 007362
557 | 007368
558 | 007386
559 | 007399
560 | 007405
561 | 007423
562 | 007429
563 | 007447
564 | 007452
565 | 007459
566 | 007478
567 | 007496
568 | 007501
569 | 007507
570 | 007510
571 | 007518
572 | 007522
573 | 007556
574 | 007562
575 | 007580
576 | 007589
577 | 007591
578 | 007613
579 | 007617
580 | 007634
581 | 007665
582 | 007676
583 | 007690
584 | 007693
585 | 007701
586 | 007714
587 | 007734
588 | 007757
589 | 007761
590 | 007797
591 | 007800
592 | 007806
593 | 007807
594 | 007818
595 | 007835
596 | 007839
597 | 007844
598 | 007861
599 | 007866
600 | 007882
601 | 007906
602 | 007927
603 | 007948
604 | 007960
605 | 007961
606 | 007967
607 | 007969
608 | 007992
609 | 008006
610 | 008020
611 | 008030
612 | 008035
613 | 008047
614 | 008052
615 | 008088
616 | 008104
617 | 008114
618 | 008120
619 | 008126
620 | 008129
621 | 008133
622 | 008135
623 | 008136
624 | 008143
625 | 008152
626 | 008158
627 | 008161
628 | 008212
629 | 008215
630 | 008231
631 | 008246
632 | 008259
633 | 008264
634 | 008270
635 | 008271
636 | 008276
637 | 008283
638 | 008289
639 | 008290
640 | 008324
641 | 008353
642 | 008357
643 | 008363
644 | 008375
645 | 008378
646 | 008383
647 | 008408
648 | 008414
649 | 008421
650 | 008432
651 | 008447
652 | 008451
653 | 008464
654 | 008479
655 | 008481
656 | 008488
657 | 008504
658 | 008548
659 | 008560
660 | 008579
661 | 008593
662 | 008609
663 | 008622
664 | 008632
665 | 008657
666 | 008658
667 | 008668
668 | 008682
669 | 008684
670 | 008693
671 | 008694
672 | 008708
673 | 008711
674 | 008715
675 | 008724
676 | 008734
677 | 008761
678 | 008777
679 | 008785
680 | 008788
681 | 008797
682 | 008800
683 | 008824
684 | 008828
685 | 008829
686 | 008895
687 | 008896
688 | 008903
689 | 008906
690 | 008910
691 | 008915
692 | 008916
693 | 008996
694 | 009008
695 | 009023
696 | 009033
697 | 009052
698 | 009071
699 | 009077
700 | 009081
701 | 009092
702 | 009096
703 | 009111
704 | 009119
705 | 009122
706 | 009125
707 | 009134
708 | 009140
709 | 009149
710 | 009156
711 | 009182
712 | 009201
713 | 009206
714 | 009210
715 | 009241
716 | 009243
717 | 009261
718 | 009267
719 | 009284
720 | 009302
721 | 009304
722 | 009321
723 | 009322
724 | 009335
725 | 009341
726 | 009360
727 | 009376
728 | 009381
729 | 009384
730 | 009387
731 | 009396
732 | 009426
733 | 009427
734 | 009430
735 | 009475
736 | 009492
737 | 009530
738 | 009536
739 | 009564
740 | 009590
741 | 009593
742 | 009599
743 | 009601
744 | 009643
745 | 009652
746 | 009675
747 | 009680
748 | 009683
749 | 009688
750 | 009694
751 | 009701
752 | 009704
753 | 009705
754 | 009723
755 | 009740
756 | 009757
757 | 009768
758 | 009770
759 | 009777
760 | 009779
761 | 009804
762 | 009806
763 | 009821
764 | 009827
765 | 009829
766 | 009847
767 | 009849
768 | 009856
769 | 009873
770 | 009883
771 | 009895
772 | 009903
773 | 009927
774 | 009943
775 | 009963
776 |
--------------------------------------------------------------------------------
/requirements:
--------------------------------------------------------------------------------
1 | # Name Version Build Channel
2 | _libgcc_mutex 0.1 main
3 | asn1crypto 0.24.0 py37_1003 conda-forge
4 | backcall 0.1.0 py37_0
5 | blas 1.0 mkl
6 | bleach 3.1.0 py37_0
7 | bzip2 1.0.6 h14c3975_5
8 | ca-certificates 2019.8.28 0
9 | cairo 1.14.12 h8948797_3
10 | certifi 2019.9.11 py37_0
11 | cffi 1.12.1 py37h2e261b9_0
12 | chardet 3.0.4 py37_1003 conda-forge
13 | cloudpickle 0.8.0 py_0 conda-forge
14 | cryptography 2.5 py37h9d9f1b6_1 conda-forge
15 | cudatoolkit 9.0 h13b8566_0
16 | cycler 0.10.0 py37_0
17 | cython 0.29.7 py37he6710b0_0
18 | cytoolz 0.9.0.1 py37h14c3975_1001 conda-forge
19 | dask-core 1.1.3 py_0 conda-forge
20 | dbus 1.13.6 h746ee38_0
21 | decorator 4.3.2 py37_0
22 | easydict 1.9 pypi_0 pypi
23 | entrypoints 0.3 py37_0
24 | expat 2.2.6 he6710b0_0
25 | ffmpeg 4.0 hcdf2ecd_0
26 | fontconfig 2.13.0 h9420a91_0
27 | freeglut 3.0.0 hf484d3e_5
28 | freetype 2.9.1 h8a8886c_1
29 | git 2.20.1 pl526hacde149_0
30 | glib 2.56.2 hd408876_0
31 | gmp 6.1.2 h6c8ec71_1
32 | graphite2 1.3.13 h23475e2_0
33 | gst-plugins-base 1.14.0 hbbd80ab_1
34 | gstreamer 1.14.0 hb453b48_1
35 | harfbuzz 1.8.8 hffaf4a1_0
36 | hdbscan 0.8.22 py37hd352d35_1 conda-forge
37 | hdf5 1.10.2 hba1933b_1
38 | icu 58.2 h9c2bf20_1
39 | idna 2.8 py37_1000 conda-forge
40 | imageio 2.5.0 py37_0 conda-forge
41 | intel-openmp 2019.1 144
42 | ipykernel 5.1.0 py37h39e3cac_0
43 | ipython 7.3.0 py37h39e3cac_0
44 | ipython_genutils 0.2.0 py37_0
45 | ipywidgets 7.4.2 py37_0
46 | jasper 2.0.14 h07fcdf6_1
47 | jedi 0.13.3 py37_0
48 | jinja2 2.10 py37_0
49 | joblib 0.13.2 py_0 conda-forge
50 | jpeg 9b h024ee3a_2
51 | jsonschema 2.6.0 py37_0
52 | jupyter 1.0.0 py37_7
53 | jupyter_client 5.2.4 py37_0
54 | jupyter_console 6.0.0 py37_0
55 | jupyter_core 4.4.0 py37_0
56 | kiwisolver 1.0.1 py37hf484d3e_0
57 | krb5 1.16.1 h173b8e3_7
58 | libcurl 7.64.1 h20c2e04_0
59 | libedit 3.1.20181209 hc058e9b_0
60 | libffi 3.2.1 hd88cf55_4
61 | libgcc-ng 8.2.0 hdf63c60_1
62 | libgfortran-ng 7.3.0 hdf63c60_0
63 | libglu 9.0.0 hf484d3e_1
64 | libopencv 3.4.2 hb342d67_1
65 | libopus 1.3 h7b6447c_0
66 | libpng 1.6.36 hbc83047_0
67 | libsodium 1.0.16 h1bed415_0
68 | libssh2 1.8.2 h1ba5d50_0
69 | libstdcxx-ng 8.2.0 hdf63c60_1
70 | libtiff 4.0.10 h2733197_2
71 | libuuid 1.0.3 h1bed415_2
72 | libvpx 1.7.0 h439df22_0
73 | libxcb 1.13 h1bed415_1
74 | libxml2 2.9.9 he19cac6_0
75 | markupsafe 1.1.1 py37h7b6447c_0
76 | matplotlib 3.0.2 py37h5429711_0
77 | matplotlib-base 3.0.2 py37h167e16e_1001 conda-forge
78 | mistune 0.8.4 py37h7b6447c_0
79 | mkl 2019.4 243
80 | mkl-service 2.3.0 py37he904b0f_0
81 | mkl_fft 1.0.10 py37ha843d7b_0
82 | mkl_random 1.0.2 py37hd81dba3_0
83 | nbconvert 5.3.1 py37_0
84 | nbformat 4.4.0 py37_0
85 | ncurses 6.1 he6710b0_1
86 | networkx 2.2 py_1 conda-forge
87 | ninja 1.8.2 py37h6bb024c_1
88 | notebook 5.7.4 py37_0
89 | numpy 1.16.1 py37h7e9f1db_0
90 | numpy-base 1.16.1 py37hde5b4d6_0
91 | olefile 0.46 py37_0
92 | opencv 3.4.2 py37h6fd60c2_1
93 | openssl 1.1.1d h7b6447c_3
94 | pandas 0.24.1 py37he6710b0_0
95 | pandoc 2.2.3.2 0
96 | pandocfilters 1.4.2 py37_1
97 | parso 0.3.4 py37_0
98 | patsy 0.5.1 py37_0
99 | pcre 8.42 h439df22_0
100 | perl 5.26.2 h14c3975_0
101 | pexpect 4.6.0 py37_0
102 | pickleshare 0.7.5 py37_0
103 | pillow 5.4.1 py37h34e0f95_0
104 | pip 19.0.3 py37_0
105 | pixman 0.36.0 h7b6447c_0
106 | prometheus_client 0.6.0 py37_0
107 | prompt_toolkit 2.0.9 py37_0
108 | ptyprocess 0.6.0 py37_0
109 | py-opencv 3.4.2 py37hb342d67_1
110 | pyclustering 0.9.0 pypi_0 pypi
111 | pycocotools 2.0.0 pypi_0 pypi
112 | pycparser 2.19 py37_0
113 | pygments 2.3.1 py37_0
114 | pyopenssl 19.0.0 py37_0 conda-forge
115 | pyparsing 2.3.1 py37_0
116 | pyqt 5.9.2 py37h05f1152_2
117 | pysocks 1.6.8 py37_1002 conda-forge
118 | python 3.7.2 h0371630_0
119 | python-dateutil 2.8.0 py37_0
120 | pytorch 1.0.1 py3.7_cuda9.0.176_cudnn7.4.2_2 pytorch
121 | pytz 2018.9 py37_0
122 | pywavelets 1.0.2 py37h3010b51_0 conda-forge
123 | pyzmq 18.0.0 py37he6710b0_0
124 | qt 5.9.7 h5867ecd_1
125 | qtconsole 4.4.3 py37_0
126 | readline 7.0 h7b6447c_5
127 | requests 2.21.0 py37_1000 conda-forge
128 | scikit-image 0.14.2 py37hf484d3e_1 conda-forge
129 | scikit-learn 0.21.3 py37hd81dba3_0
130 | scipy 1.2.1 py37h7c811a0_0
131 | seaborn 0.9.0 py37_0
132 | send2trash 1.5.0 py37_0
133 | setuptools 40.8.0 py37_0
134 | sip 4.19.8 py37hf484d3e_0
135 | six 1.12.0 py37_0
136 | sqlite 3.26.0 h7b6447c_0
137 | statsmodels 0.10.1 py37hdd07704_0
138 | terminado 0.8.1 py37_1
139 | testpath 0.4.2 py37_0
140 | tk 8.6.8 hbc83047_0
141 | toolz 0.9.0 py_1 conda-forge
142 | torchfile 0.1.0 py_0 conda-forge
143 | torchvision 0.2.2 py_2 pytorch
144 | tornado 5.1.1 py37h7b6447c_0
145 | traitlets 4.3.2 py37_0
146 | urllib3 1.24.1 py37_1000 conda-forge
147 | visdom 0.1.8.8 0 conda-forge
148 | wcwidth 0.1.7 py37_0
149 | webencodings 0.5.1 py37_1
150 | websocket-client 0.55.0 py37_0 conda-forge
151 | wheel 0.33.1 py37_0
152 | widgetsnbextension 3.4.2 py37_0
153 | xz 5.2.4 h14c3975_4
154 | zeromq 4.3.1 he6710b0_3
155 | zlib 1.2.11 h7b6447c_3
156 | zstd 1.3.7 h0b5b093_0
157 |
--------------------------------------------------------------------------------
/create_spoc_features.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import json
4 |
5 | from sklearn.decomposition import PCA
6 | from torch.autograd import Variable
7 | import torchvision.models as models
8 |
9 | from data import *
10 | import active_learning_package.helpers as helpers
11 |
12 |
13 |
14 | def get_feature_maps(dataset,
15 | net,
16 | imageset_name,
17 | save_dir):
18 |
19 | path_to_image_feature_dir = os.path.join(save_dir, imageset_name + '586_conv5_3_features_before_relu/')
20 | # path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/'
21 |
22 | if not os.path.exists(path_to_image_feature_dir):
23 | os.mkdir(path_to_image_feature_dir)
24 |
25 | # go trough all images in imageset
26 | already_saved = os.listdir(path_to_image_feature_dir)
27 |
28 | transform = BaseTransform(586, (104, 117, 123))
29 |
30 | for i, idx in enumerate(dataset.ids):
31 | image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle'
32 | if str(idx[1]) + '.pickle' in already_saved:
33 | print(i, '/', len(dataset.ids), ' was already saved')
34 |
35 | # load feature and append it
36 | # features = helpers.unpickle(image_feature_path)
37 |
38 | # conv_feature_list.append(features)
39 |
40 | continue
41 |
42 | print(i, '/', len(dataset.ids))
43 |
44 | # load image and transform (colors in different order)
45 | img = dataset.pull_image_using_imageset_id(idx)
46 |
47 | # if features already saved, load them
48 | x = torch.from_numpy(transform(img)[0][:, :, (2, 1, 0)]).permute(2, 0, 1) # We use pre-trained model from pytorch model zoo, which is trained with RGB, cv2.imread loads in BGR
49 |
50 | x = Variable(x.unsqueeze(0))
51 |
52 | if torch.cuda.is_available():
53 | torch.cuda.empty_cache()
54 | x = x.to('cuda')
55 |
56 | # directly calculate sum over channels
57 | features = net(x)
58 |
59 | # # take the sum of the 512 channels as features (NOTE: 512 is specific to VGG16 conv5_3)
60 | features = features.reshape(1, 512, -1).sum(dim=-1)
61 |
62 | # set detections back to cpu
63 | if torch.cuda.is_available():
64 | features = features.to('cpu')
65 |
66 | # append to conv_feature list
67 | # conv_feature_list.append(features)
68 |
69 | with open(image_feature_path, 'wb') as f:
70 | pickle.dump(features, f)
71 |
72 | return
73 | def calculate_PCA_and_whitening_parameters(dataset,
74 | imageset_name,
75 | save_dir):
76 |
77 |
78 | conv_feature_list = []
79 | if '2007' in imageset_name:
80 | print('This is to get PCA, should be done with 2012 dataset, this is a failsafe to not overwrite the 2012 PCA with 2007 PCA')
81 | raise NotImplementedError
82 |
83 | # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features_before_relu/')
84 | path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/'
85 | #
86 |
87 | # load features
88 | pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle'
89 |
90 | if os.path.exists(pca_save_path):
91 | print('already did this PCA')
92 | return
93 | print('load features:')
94 | for i, idx in enumerate(dataset.ids):
95 | print('load feature', i, '/', len(dataset.ids),' and L2 normalize features before PCA')
96 | image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle'
97 |
98 | # load feature and append it
99 | features = helpers.unpickle(image_feature_path)
100 |
101 | # L2 normalize
102 | features = features / features.norm(2)
103 |
104 |
105 | conv_feature_list.append(features)
106 |
107 | np_features = torch.cat(conv_feature_list).detach().numpy()
108 | print('loaded all features and transformed them into a numpy array')
109 |
110 | ## calculate PCA parameters (which dimensions should be kept)
111 | # numpy array
112 | print('Do PCA')
113 | pca = PCA(n_components = 256, svd_solver = 'full', random_state = 42,whiten=True)
114 | pca.fit(np_features)
115 | print('did PCA')
116 |
117 | # save PCA
118 | pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle'
119 |
120 | with open(pca_save_path, 'wb') as f:
121 | pickle.dump(pca, f)
122 |
123 | print('Saved PCA')
124 |
125 | return
126 |
127 | def create_spoc_features(dataset,
128 | image_features_path,
129 | PCA_param_path,
130 | imageset_name,
131 | save_dir):
132 | """
133 | See Babenko 2014
134 |
135 | """
136 |
137 | # load pca and whitening parameters
138 | pca = helpers.unpickle(PCA_param_path)
139 |
140 | if not os.path.exists(save_dir):
141 | os.mkdir(save_dir)
142 |
143 | # load image features
144 | for i, idx in enumerate(dataset.ids):
145 | image_feature_path = image_features_path + str(idx[1]) + '.pickle'
146 |
147 | # load feature and append it
148 | features = helpers.unpickle(image_feature_path)
149 |
150 | print(i, '/', len(dataset.ids))
151 |
152 | # l2 normalization
153 | features = features/features.norm(2)
154 |
155 | # apply pca transform + whitening to features
156 | features = pca.transform(features.detach().numpy())
157 | features = torch.tensor(features)
158 |
159 | # l2-normalization
160 | features = features/features.norm(2)
161 | spoc_feature_path = save_dir + str(idx[1]) + '.pickle'
162 |
163 | # save SPoC representation
164 |
165 | with open(spoc_feature_path,'wb') as f:
166 | pickle.dump(features, f)
167 |
168 | print('Created and Saved all SpoC representations of images')
169 |
170 | return
171 |
172 |
173 | def calculate_scalar_product_image_similarity(tensor_a,tensor_b):
174 | """
175 | https://datascience.stackexchange.com/questions/744/cosine-similarity-versus-dot-product-as-distance-metrics
176 |
177 | calculates image similarity between two images using a simple scalar product matching kernel
178 | L. Bo and C. Sminchisescu. Efficient match kernel between
179 | sets of features for visual recognition. In Advances in Neural Information Processing Systems (NIPS)., pages 135–143,
180 | 2009.
181 |
182 | :return: similarity
183 | """
184 |
185 | return torch.dot(tensor_a.squeeze(),tensor_b.squeeze())
186 |
187 |
188 | def calculate_all_images_similarities(dataset, load_dir_spoc_features):
189 | """
190 |
191 | :return:
192 | """
193 |
194 | # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a)
195 |
196 | save_dir = load_dir_spoc_features + 'image_similarities/'
197 | if not os.path.exists(save_dir):
198 | os.mkdir(save_dir)
199 |
200 | already_saved = os.listdir(save_dir)
201 |
202 | # go trough dataset
203 | for i,idx in enumerate(dataset.ids):
204 | if str(idx[1]) + '.pickle' in already_saved:
205 | print(i, '/', len(dataset.ids), ' was already saved')
206 |
207 | print(i,'/',len(dataset.ids))
208 | # placeholder to store similarities between all images
209 | image_similarity_dir = {}
210 |
211 | # load image description
212 | image_path_a = load_dir_spoc_features+ str(idx[1]) + '.pickle'
213 | image_a = helpers.unpickle(image_path_a)
214 |
215 |
216 | # go trough all OTHER images except the idx whe are currently at
217 | other_images = [idj for idj in dataset.ids if idj != idx]
218 |
219 | for j, idj in enumerate(other_images):
220 |
221 | # load image description
222 | image_path_b = load_dir_spoc_features + str(idj[1]) + '.pickle'
223 | image_b = helpers.unpickle(image_path_b)
224 |
225 |
226 | # calculate similarity
227 | similarity = calculate_scalar_product_image_similarity(image_a,image_b)
228 |
229 | if similarity.shape == torch.Size([0]):
230 | print(similarity)
231 | print('similarity should bed a scalar')
232 | raise NotImplementedError
233 |
234 | # store similarity
235 | image_similarity_dir[idj[1]] = similarity.item()
236 |
237 | # save image similarity dir
238 | path = save_dir + str(idx[1]) + '.pickle'
239 |
240 | with open(path,'wb') as f:
241 | pickle.dump(image_similarity_dir, f)
242 |
243 | return save_dir
244 |
245 |
246 | def calculate_density_per_imageset(dataset,load_dir_similarities):
247 | """
248 | density is the mean similarity of one image to all other images in the dataset (see Settles 2008)
249 | """
250 |
251 | # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a)
252 | # go trough dataset
253 | density = {}
254 | for i,idx in enumerate(dataset.ids):
255 | print(i,'/',len(dataset.ids))
256 | # load similarity between all images in trainval and current image (idx)
257 | path = load_dir_similarities + str(idx[1]) + '.pickle'
258 |
259 | similarities_idx = helpers.unpickle(path)
260 |
261 | # go trough all OTHER images in the dataset (can be a subset of trainval, e.g. only the car images)
262 | # except the id where are currently
263 | other_images = [idj for idj in dataset.ids if idj != idx]
264 |
265 | # placeholder
266 | density[idx[1]] = 0
267 | for i, idj in enumerate(other_images):
268 |
269 | density[idx[1]] += similarities_idx[idj[1]]
270 |
271 | # divide by number of images to get mean
272 | density[idx[1]] /= len(other_images)
273 |
274 |
275 |
276 | # save image density dir
277 | path = load_dir_similarities + dataset.image_set[0][1] + '.pickle'
278 |
279 |
280 |
281 | with open(path,'wb') as f:
282 | pickle.dump(density, f)
283 |
284 |
285 |
286 |
287 | # def create_image_affinity_propagation_clusters(features,
288 | # dataset,
289 | # imageset_name):
290 | #
291 | # return
292 |
293 | if __name__ == '__main__':
294 |
295 | save_dir = 'data/'
296 |
297 | """get feature maps"""
298 | # imagesets = [[('2012', 'trainval')],
299 | # [('2007', 'trainval')]
300 | # [('2012', 'bottle_trainval_detect')],
301 | # [('2012', 'car_trainval_detect')],
302 | # [('2012', 'horse_trainval_detect')],
303 | # [('2012', 'sheep_trainval_detect')],
304 | # [('2012', 'pottedplant_trainval_detect')]
305 | # ]
306 |
307 |
308 | # load network
309 | # vgg16 = models.vgg16(pretrained=True) #NOTE: I adjusted the source code of the vgg16 such that it only goes up to the conv5_3 layer in forward passes
310 | # vgg16.eval()
311 | #
312 | # for imageset in imagesets:
313 | # # load dataset
314 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']),
315 | # VOCAnnotationTransform())
316 | #
317 | # get_feature_maps(dataset = dataset,
318 | # net = vgg16,
319 | # imageset_name=imageset[0][0] + imageset[0][1],
320 | # save_dir= save_dir)
321 |
322 | """ Get PCA and whitening params on hold-out dataset (VOC2012)"""
323 |
324 | #
325 | #
326 | # imagesets = [[('2012', 'trainval')],
327 | # [('2012', 'bottle_trainval_detect')],
328 | # [('2012', 'car_trainval_detect')],
329 | # [('2012', 'horse_trainval_detect')],
330 | # [('2012', 'sheep_trainval_detect')],
331 | # [('2012', 'pottedplant_trainval_detect')]
332 | # ]
333 | #
334 | # for imageset in imagesets:
335 | #
336 | # # load dataset
337 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform())
338 | #
339 | # calculate_PCA_and_whitening_parameters(dataset=dataset,
340 | # imageset_name=imageset[0][0]+imageset[0][1],
341 | # save_dir=save_dir)
342 | #
343 | #
344 |
345 | """ Make spoc features """
346 | # Imagesets
347 | # imagesets = [[('2007', 'trainval')]]
348 | #
349 | # for imageset in imagesets:
350 | # # load dataset
351 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(586, config.voc['dataset_mean']),
352 | # VOCAnnotationTransform())
353 | # #
354 | # # calculate_PCA_and_whitening_parameters(dataset=dataset,
355 | # # imageset_name=imageset[0][0]+imageset[0][1],
356 | # # save_dir=save_dir,
357 | # # net=vgg16)
358 | # pca_dir = save_dir+'2012trainval586_conv5_3_features_before_relu/'
359 | # PCA_param_path = pca_dir + '2012trainvalPCA.pickle' # for now only using the 2012 full trainval PCA
360 | # image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/')
361 | # # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features/')
362 | #
363 | # create_spoc_features(dataset,
364 | # image_features_path,
365 | # PCA_param_path,
366 | # imageset_name=imageset[0][0] + imageset[0][1],
367 | # save_dir=image_features_path + '2012trainvalPCA/')
368 |
369 |
370 | """ Calculate complete similarities from each image in trainval 2007 to all other images"""
371 |
372 | # dataset = VOCDetection(VOC_ROOT_LOCAL, [('2007', 'trainval')], BaseTransform(586, config.voc['dataset_mean']),
373 | # VOCAnnotationTransform())
374 | # image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/')
375 | # load_dir_spoc_features = image_features_path + '2012trainvalPCA/'
376 | # similarity_dir = calculate_all_images_similarities(dataset=dataset,
377 | # load_dir_spoc_features = load_dir_spoc_features)
378 | #
379 |
380 | """ Create density per imageset """
381 | image_sim_dir = save_dir+'2007trainval586_conv5_3_features_before_relu/2012trainvalPCA/image_similarities/'
382 |
383 |
384 | imagesets = [[('2007', 'trainval')],
385 | [('2007', 'bottle_trainval_detect')],
386 | [('2007', 'car_trainval_detect')],
387 | [('2007', 'horse_trainval_detect')],
388 | [('2007', 'sheep_trainval_detect')],
389 | [('2007', 'pottedplant_trainval_detect')]
390 | ]
391 |
392 | for imageset in imagesets:
393 | print(imageset)
394 | # load dataset
395 | dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform())
396 | calculate_density_per_imageset(dataset=dataset,
397 | load_dir_similarities = image_sim_dir)
--------------------------------------------------------------------------------
/ssd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | from layers import *
6 | from data import voc
7 | import os
8 |
9 |
10 | class SSD(nn.Module):
11 | """Single Shot Multibox Architecture
12 | The network is composed of a base VGG network followed by the
13 | added multibox conv layers. Each multibox layer branches into
14 | 1) conv2d for class conf scores
15 | 2) conv2d for localization predictions
16 | 3) associated priorbox layer to produce default bounding
17 | boxes specific to the layer's feature map size.
18 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
19 |
20 | Also implemented a version predicting a standard deviation per bounding box coordinate, following:
21 | CVPR 2019 paper:
22 | Bounding Box Regression with Uncertainty for Accurate Object Detection
23 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
24 |
25 | Args:
26 | phase: (string) Can be "test" or "train"
27 | size: input image size
28 | base: VGG16 layers for input, size of either 300 or 500
29 | extras: extra layers that feed to multibox loc and conf layers
30 | head: "multibox head" consists of loc and conf conv layers
31 | """
32 |
33 | def __init__(self, phase, modeltype, base, extras, head, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only):
34 | super(SSD, self).__init__()
35 | self.phase = phase
36 | self.num_classes = num_classes
37 | self.cfg = cfg
38 | self.priorbox = PriorBox(self.cfg, modeltype)
39 | with torch.no_grad():
40 | self.priors = Variable(self.priorbox.forward())
41 |
42 | # todo: convert to x1y1x2y2 format here if necessary
43 |
44 |
45 | self.size = 300
46 |
47 | # SSD network
48 | self.vgg = nn.ModuleList(base)
49 | # Layer learns to scale the l2 normalized features from conv4_3
50 | self.L2Norm = L2Norm(512, 20)
51 | self.extras = nn.ModuleList(extras)
52 |
53 | self.loc = nn.ModuleList(head[0])
54 | self.modeltype = modeltype
55 | if self.modeltype == 'SSD300KL':
56 | self.loc_std = nn.ModuleList(head[2])
57 | self.conf = nn.ModuleList(head[1])
58 | if self.modeltype in ['SSD300','SSD300KL']:
59 | self.size = 300
60 | else:
61 | raise NotImplementedError()
62 |
63 | if phase == 'test':
64 | self.softmax = nn.Softmax(dim=-1)
65 |
66 |
67 | if sample_select_forward and merging_method in ['bsas','hbdscan','pre_nms_avg']:
68 | conf_tresh = sample_select_nms_conf_thresh # merging of boxes can be expensive, to have less boxes, we can apply a more agressive conf treshold
69 | else:
70 | conf_tresh = 0.01
71 | # Active Learning parameters added to enable experiments with and usage of Active Learning
72 | self.detect = Detect(num_classes, 0, 200, conf_tresh, 0.45, # default values in paper: num_classes,0,200,0.01,0.45
73 | default_forward,
74 | merging_method,
75 | sampling_strategy,
76 | modeltype)
77 |
78 | self.forward_vgg_base_only = forward_vgg_base_only
79 |
80 |
81 | def forward(self, x):
82 | """Applies network layers and ops on input image(s) x.
83 |
84 | Args:
85 | x: input image or batch of images. Shape: [batch,3,300,300].
86 |
87 | Return:
88 | Depending on phase:
89 | test:
90 | Variable(tensor) of output class label predictions,
91 | confidence score, and corresponding location predictions for
92 | each object detected. Shape: [batch,topk,7]
93 |
94 | train:
95 | list of concat outputs from:
96 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
97 | 2: localization layers, Shape: [batch,num_priors*4]
98 | 3: priorbox layers, Shape: [2,num_priors*4]
99 | """
100 |
101 | sources = list()
102 | loc = list()
103 | conf = list()
104 | if self.modeltype == 'SSD300KL':
105 | loc_std = list()
106 | # apply vgg up to conv4_3 relu
107 | for k in range(23):
108 | # print('debug: apply vgg')
109 | x = self.vgg[k](x)
110 |
111 | if self.forward_vgg_base_only:
112 | return x
113 | # TODO: Why apply L2norm already? => because conv4_3 has larger scale than the rest
114 | s = self.L2Norm(x)
115 | sources.append(s)
116 |
117 | # apply vgg up to fc7 TODO: Why FC layers? => Doesn't use FC layers, UP TO FC layers..
118 | for k in range(23, len(self.vgg)):
119 | # print('debug2: apply vgg')
120 | x = self.vgg[k](x)
121 | sources.append(x)
122 |
123 | # apply extra layers and cache source layer outputs
124 | for k, v in enumerate(self.extras):
125 | # print('debug3: apply extra layers')
126 | x = F.relu(v(x), inplace=True)
127 | if k % 2 == 1: #TODO: Why only every second layer of the extra layers? => because thats how the paper states it. It has conv blocks of 2 conv layers
128 | sources.append(x)
129 |
130 | if self.modeltype != 'SSD300KL':
131 | # apply multibox head to source layers
132 | for (x, l, c) in zip(sources, self.loc, self.conf):
133 | # print('debug4: apply multibox head')
134 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
135 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
136 |
137 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
138 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
139 | # print('debug foward 1')
140 | if self.phase == "test":
141 | # if self.sampling_strategy != 'p-max_localization-stability' :
142 | output = self.detect(loc.view(loc.size(0), -1, 4), # loc preds
143 | self.softmax(conf.view(conf.size(0), -1,self.num_classes)), # conf preds
144 | self.priors.type(type(x.data)), # default boxes
145 | )
146 | # else:
147 | # output = self.detect()
148 |
149 | # training phase => no merging or other forwards used
150 | else:
151 | output = (
152 | loc.view(loc.size(0), -1, 4),
153 | conf.view(conf.size(0), -1, self.num_classes),
154 | self.priors
155 | )
156 | else:
157 | # apply multibox head to source layers
158 | for (x, l, c, std) in zip(sources, self.loc, self.conf, self.loc_std):
159 | # print('debug4: apply multibox head')
160 | loc.append(l(x).permute(0, 2, 3, 1).contiguous())
161 | conf.append(c(x).permute(0, 2, 3, 1).contiguous())
162 | loc_std.append(std(x).permute(0, 2, 3, 1).contiguous())
163 |
164 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
165 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
166 | loc_std = torch.cat([o.view(o.size(0), -1) for o in loc_std], 1)
167 |
168 | if self.phase == "test":
169 | # during training alpha = log(sigma^2), during testing, this needs to be converted back
170 | loc_std = torch.exp(loc_std)
171 |
172 | output = self.detect(loc.view(loc.size(0), -1, 4), # loc preds
173 | self.softmax(conf.view(conf.size(0), -1,self.num_classes)), # conf preds
174 | self.priors.type(type(x.data)), # default boxes
175 | torch.abs(loc_std.view(loc_std.size(0), -1, 4)) # alphas (predicted log of std deviations of loc preds)
176 | )
177 | else:
178 | # during training, alpha = log(sigma^2) is predicted
179 | output = (
180 | loc.view(loc.size(0), -1, 4),
181 | conf.view(conf.size(0), -1, self.num_classes),
182 | self.priors,
183 | torch.abs(loc_std.view(loc_std.size(0), -1, 4)) #alphas
184 | )
185 |
186 | return output
187 |
188 |
189 | def load_weights(self, base_file):
190 | other, ext = os.path.splitext(base_file)
191 | if ext == '.pkl' or '.pth':
192 | print('Loading weights into state dict...')
193 | self.load_state_dict(torch.load(base_file,
194 | map_location=lambda storage, loc: storage))
195 | print('Finished!')
196 | else:
197 | print('Sorry only .pth and .pkl files supported.')
198 | def vgg(cfg, i, batch_norm=False):
199 | layers = []
200 | in_channels = i
201 | for v in cfg:
202 | if v == 'M':
203 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
204 | elif v == 'C': #TODO: ceil mode not used in https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py => impacts output shape
205 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
206 | else:
207 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
208 | if batch_norm:
209 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
210 | else:
211 | layers += [conv2d, nn.ReLU(inplace=True)]
212 | in_channels = v
213 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
214 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) # A TROUS algorithm (dilated conv)
215 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
216 | layers += [pool5, conv6,
217 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
218 | return layers
219 |
220 |
221 | # This function is derived from torchvision VGG make_layers()
222 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
223 | def add_extras(cfg, i, batch_norm=False):
224 | # Extra layers added to VGG for feature scaling
225 | layers = []
226 | in_channels = i
227 | flag = False
228 | for k, v in enumerate(cfg):
229 | if in_channels != 'S':
230 | if v == 'S':
231 | layers += [nn.Conv2d(in_channels, cfg[k + 1],
232 | kernel_size=(1, 3)[flag], stride=2, padding=1)]
233 | else:
234 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
235 | flag = not flag
236 | in_channels = v
237 | return layers
238 |
239 |
240 |
241 | def multibox(vgg, extra_layers, cfg, num_classes, model_type):
242 | #cfg = number of boxes per feature map location
243 |
244 | loc_layers = []
245 | conf_layers = []
246 | vgg_source = [21, -2]
247 | if model_type != 'SSD300KL':
248 | for k, v in enumerate(vgg_source):
249 | loc_layers += [nn.Conv2d(vgg[v].out_channels,
250 | cfg[k] * 4, kernel_size=3,
251 | padding=1)] # 4 is for the 4 corners of the bounding box
252 | conf_layers += [nn.Conv2d(vgg[v].out_channels,
253 | cfg[k] * num_classes, kernel_size=3,
254 | padding=1)] # out = #boxes*classes (per feature map)
255 |
256 | for k, v in enumerate(extra_layers[1::2], 2):
257 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
258 | * 4, kernel_size=3, padding=1)]
259 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
260 | * num_classes, kernel_size=3, padding=1)]
261 | return vgg, extra_layers, (loc_layers, conf_layers)
262 |
263 | else:
264 | """
265 | Also predict a standard deviation per bounding box coordinate, from CVPR 2019 paper:
266 | Bounding Box Regression with Uncertainty for Accurate Object Detection
267 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
268 | """
269 | loc_std_layers = []
270 | for k, v in enumerate(vgg_source):
271 | loc_layers += [nn.Conv2d(vgg[v].out_channels,
272 | cfg[k] * 4, kernel_size=3,
273 | padding=1)] # 4 is for the 4 corners of the bounding box
274 | loc_std_layers += [nn.Conv2d(vgg[v].out_channels,
275 | cfg[k] * 4, kernel_size=3,
276 | padding=1)] # 4 is for the 4 corners of the bounding box
277 |
278 | conf_layers += [nn.Conv2d(vgg[v].out_channels,
279 | cfg[k] * num_classes, kernel_size=3,
280 | padding=1)] # out = #boxes*classes (per feature map)
281 |
282 | for k, v in enumerate(extra_layers[1::2], 2):
283 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
284 | * 4, kernel_size=3, padding=1)]
285 |
286 | loc_std_layers += [nn.Conv2d(v.out_channels, cfg[k]
287 | * 4, kernel_size=3, padding=1)]
288 |
289 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
290 | * num_classes, kernel_size=3, padding=1)]
291 |
292 | return vgg, extra_layers, (loc_layers, conf_layers, loc_std_layers)
293 |
294 |
295 | # 300D is SSD300 with dropout layers to be able to make Bayesian using MC-Dropout
296 | # TODO: upconvolution first and then downconvolution?? NOPE => zijn de channels!
297 | base = {
298 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
299 | 512, 512, 512],
300 | '512': [],
301 | # '300D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512,'D', 'M',
302 | # 512, 512, 512,'D']
303 | }
304 | #todo: should the dropout layers be inbetween base and extra? and also between
305 | extras = {
306 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
307 | '512': [],
308 | '300D': [256, 'S', 512, 128, 'D', 'S', 256, 128, 256, 128, 256]
309 | }
310 | mbox = {
311 | '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location
312 | '512': [],
313 | # '300D': [4, 6, 6, 6, 4, 4]
314 | }
315 |
316 |
317 | def build_ssd(phase, model_type='SSD300', num_classes=21, default_forward = True, merging_method = None, sampling_strategy = None, sample_select_forward = False, sample_select_nms_conf_thresh = None, cfg = None, forward_vgg_base_only = False):
318 | " Active learning parameter here is the sample selection part"
319 |
320 | if phase != "test" and phase != "train":
321 | print("ERROR: Phase: " + phase + " not recognized")
322 | return
323 | if model_type not in ['SSD300','SSD300KL']:
324 | print("ERROR: You specified size " + repr(model_type) + ". However, " +
325 | "currently only SSD300 (size=300) is supported!")
326 | return
327 |
328 | if model_type in ['SSD300','SSD300KL']: # if wished add other SSD models with input dim 300 to this list
329 | size = 300
330 |
331 | base_, extras_, head_ = multibox(vgg(base[str(size)], 3),
332 | add_extras(extras[str(size)], 1024),
333 | mbox[str(size)], num_classes, model_type) #cfg
334 | return SSD(phase, model_type, base_, extras_, head_, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only)
335 |
--------------------------------------------------------------------------------
/layers/box_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 |
4 |
5 | def point_form(boxes):
6 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
7 | representation for comparison to point form ground truth data.
8 | Args:
9 | boxes: (tensor) center-size default boxes from priorbox layers.
10 | Return:
11 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
12 | """
13 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
14 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
15 |
16 |
17 | def center_size(boxes):
18 | """ Convert prior_boxes to (cx, cy, w, h)
19 | representation for comparison to center-size form ground truth data.
20 | Args:
21 | boxes: (tensor) point_form boxes
22 | Return:
23 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
24 | """
25 | return torch.cat(((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
26 | boxes[:, 2:] - boxes[:, :2]), 1) # w, h
27 |
28 | # def center_size2(boxes):
29 | # """
30 | #
31 | # :param boxes:
32 | # :return:
33 | # """
34 | #
35 | # output_boxes[:, 0] = (input_boxes[:, 2] + input_boxes[:, 0]) / 2
36 | # output_boxes[:, 1] = (input_boxes[:, 3] + input_boxes[:, 1]) / 2
37 | # output_boxes[:, 2] = input_boxes[:, 2] - input_boxes[:, 0]
38 | # output_boxes[:, 3] = input_boxes[:, 3] - input_boxes[:, 1]
39 | # return torch.cat((boxes[:, 2:] + (torch.abs(boxes[:, :2]))/2, # cx, cy
40 | # boxes[:, 2:] - boxes[:, :2]), 1) # w, h
41 |
42 | def intersect(box_a, box_b):
43 | """ We resize both tensors to [A,B,2] without new malloc:
44 | [A,2] -> [A,1,2] -> [A,B,2]
45 | [B,2] -> [1,B,2] -> [A,B,2]
46 | Then we compute the area of intersect between box_a and box_b.
47 | Args:
48 | box_a: (tensor) bounding boxes, Shape: [A,4].
49 | box_b: (tensor) bounding boxes, Shape: [B,4].
50 | Return:
51 | (tensor) intersection area, Shape: [A,B].
52 | """
53 | A = box_a.size(0)
54 | B = box_b.size(0)
55 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
56 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
57 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
58 | box_b[:, :2].unsqueeze(0).expand(A, B, 2))
59 | inter = torch.clamp((max_xy - min_xy), min=0)
60 | return inter[:, :, 0] * inter[:, :, 1]
61 |
62 |
63 | def jaccard(box_a, box_b):
64 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
65 | is simply the intersection over union of two boxes. Here we operate on
66 | ground truth boxes and default boxes.
67 | E.g.:
68 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
69 | Args:
70 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
71 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
72 | Return:
73 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
74 | """
75 | inter = intersect(box_a, box_b)
76 | area_a = ((box_a[:, 2]-box_a[:, 0]) *
77 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
78 | area_b = ((box_b[:, 2]-box_b[:, 0]) *
79 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
80 | union = area_a + area_b - inter
81 | return inter / union # [A,B]
82 |
83 |
84 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx,
85 | modeltype = 'SSD300'):
86 | """Match each prior box with the ground truth box of the highest jaccard
87 | overlap, encode the bounding boxes, then return the matched indices
88 | corresponding to both confidence and location preds.
89 |
90 | For KL Loss, we need to predict everything in x1y1x2y2 format. So the matching should not transform to center form
91 |
92 | Args:
93 | threshold: (float) The overlap threshold used when mathing boxes.
94 | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
95 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
96 | variances: (tensor) Variances corresponding to each prior coord,
97 | Shape: [num_priors, 4].
98 | labels: (tensor) All the class labels for the image, Shape: [num_obj].
99 | loc_t: (tensor) Tensor to be filled w/ encoded location targets.
100 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
101 | idx: (int) current batch index
102 | Return:
103 | The matched indices corresponding to 1) location and 2) confidence preds.
104 | """
105 | # jaccard index
106 | # if modeltype != 'SSD300KL':
107 | overlaps = jaccard(
108 | truths,
109 | point_form(priors)
110 | )
111 | # else:
112 | # overlaps = jaccard(
113 | # truths,
114 | # priors # already in point form if KL-Loss is used
115 | # )
116 | # (Bipartite Matching)
117 | # [1,num_objects] best prior for each ground truth
118 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
119 | # [1,num_priors] best ground truth for each prior
120 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
121 | best_truth_idx.squeeze_(0)
122 | best_truth_overlap.squeeze_(0)
123 | best_prior_idx.squeeze_(1)
124 | best_prior_overlap.squeeze_(1)
125 | best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior
126 | # TODO refactor: index best_prior_idx with long tensor
127 | # ensure every gt matches with its prior of max overlap
128 | for j in range(best_prior_idx.size(0)):
129 | best_truth_idx[best_prior_idx[j]] = j
130 | matches = truths[best_truth_idx] # Shape: [num_priors,4]
131 | conf = labels[best_truth_idx] + 1 # Shape: [num_priors]
132 | conf[best_truth_overlap < threshold] = 0 # label as background
133 |
134 | # If KL Loss, encoding shouldnt use center offset form
135 | loc = encode(matches, priors, variances, modeltype)
136 |
137 |
138 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
139 | conf_t[idx] = conf # [num_priors] top class label for each prior
140 |
141 |
142 | def encode(matched, priors, variances, modeltype = 'SSD300'):
143 | """
144 | Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
145 | corroborated by original author: https://github.com/weiliu89/caffe/issues/155#issuecomment-243541464
146 | and more: https://github.com/rykov8/ssd_keras/issues/53
147 |
148 | Encode the variances from the priorbox layers into the ground truth boxes
149 | we have matched (based on jaccard overlap) with the prior boxes.
150 | Args:
151 | matched: (tensor) Coords of ground truth for each prior in point-form
152 | Shape: [num_priors, 4].
153 | priors: (tensor) Prior boxes in center-offset form
154 | Shape: [num_priors,4].
155 | variances: (list[float]) Variances of priorboxes
156 | Return:
157 | encoded boxes (tensor), Shape: [num_priors, 4]
158 | """
159 |
160 | # transform everything back to center-form
161 |
162 |
163 | # dist b/t match center and prior's center
164 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
165 | # encode variance
166 | # todo: check of dit goed gaat, priors is in point-form
167 | g_cxcy /= (variances[0] * priors[:, 2:])
168 | # match wh / prior wh
169 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
170 | g_wh = torch.log(g_wh) / variances[1]
171 | # return target for smooth_l1_loss
172 | # todo
173 | if modeltype == 'SSD300KL':
174 | # transform to x1y1x2y2 form
175 | return point_form(torch.cat([g_cxcy, g_wh], 1)) # [num_priors,4]
176 |
177 | else:
178 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
179 |
180 |
181 | # Adapted from https://github.com/Hakuyume/chainer-ssd
182 | def decode(loc, priors, variances, modeltype = 'SSD300'):
183 | """
184 | Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
185 |
186 | Decode locations from predictions using priors to undo
187 | the encoding we did for offset regression at train time.
188 | Args:
189 | loc (tensor): location predictions for loc layers,
190 | Shape: [num_priors,4]
191 | priors (tensor): Prior boxes in center-offset form.
192 | Shape: [num_priors,4].
193 | variances: (list[float]) Variances of priorboxes
194 | Return:
195 | decoded bounding box predictions; decoded in x1y1x2y2-form with x1y1 at upper left and x2y2 lower right, all in a range [0,1]
196 | """
197 |
198 | # todo check of dit goed gaat met heen en weer schrijven van de
199 |
200 |
201 | if modeltype == 'SSD300KL':
202 | # transform predictions from x1y1x2y2 to cx, cy, w, h form. The variances are precalculated cx,cy,w,h variances
203 | loc = center_size(loc)
204 |
205 | boxes = torch.cat((
206 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
207 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
208 | boxes[:, :2] -= boxes[:, 2:] / 2
209 | boxes[:, 2:] += boxes[:, :2]
210 |
211 | return boxes # [num_priors,4]
212 |
213 |
214 | # def batch_decode(loc, priors, variances):
215 | # """
216 | # Same as decode, but adjusted to work for batches
217 | #
218 | # Decode locations from predictions using priors to undo
219 | # the encoding we did for offset regression at train time.
220 | # Args:
221 | # loc (tensor): location predictions for loc layers,
222 | # Shape: [ensemble_size,batch,num_priors,4]
223 | # priors (tensor): Prior boxes in center-offset form.
224 | # Shape: [ensemble_size,batch, num_priors,4].
225 | # variances: (list[float]) Variances of priorboxes
226 | # Return:
227 | # decoded bounding box predictions
228 | # """
229 | # boxes = torch.cat((
230 | # priors[:,:, :2] + loc[:,:, :2] * variances[0] * priors[:,:, 2:],
231 | # priors[:,:, 2:] * torch.exp(loc[:,:, 2:] * variances[1])), 1)
232 | # boxes[:,:, :2] -= boxes[:,:, 2:] / 2
233 | # boxes[:,:, 2:] += boxes[:,:, :2]
234 | # return boxes
235 |
236 | def log_sum_exp(x):
237 | """Utility function for computing log_sum_exp while determining
238 | This will be used to determine unaveraged confidence loss across
239 | all examples in a batch.
240 | Args:
241 | x (Variable(tensor)): conf_preds from conf layers
242 | """
243 | x_max = x.data.max()
244 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
245 |
246 |
247 | # Original author: Francisco Massa:
248 | # https://github.com/fmassa/object-detection.torch
249 | # Ported to PyTorch by Max deGroot (02/01/2017)
250 | def nms(boxes, scores, overlap=0.5, top_k=200): # todo: overlap default in paper 0.45
251 | """Apply non-maximum suppression at test time to avoid detecting too many
252 | overlapping bounding boxes for a given object.
253 | Args:
254 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
255 | scores: (tensor) The class predscores for the img, Shape:[num_priors].
256 | overlap: (float) The overlap thresh for suppressing unnecessary boxes.
257 | top_k: (int) The Maximum number of box preds to consider. (default in paper = 200)
258 | Return:
259 | The indices of the kept boxes with respect to num_priors.
260 |
261 | todo: pure numpy implementation might be faster according to the issues on github
262 | possible implementation https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
263 | """
264 |
265 | keep = scores.new(scores.size(0)).zero_().long()
266 | if boxes.numel() == 0: #number of elements
267 | return keep # for a class, there are no bounding boxes
268 | x1 = boxes[:, 0]
269 | y1 = boxes[:, 1]
270 | x2 = boxes[:, 2]
271 | y2 = boxes[:, 3]
272 | area = torch.mul(x2 - x1, y2 - y1)
273 | v, idx = scores.sort(0) # sort in ascending order
274 | # I = I[v >= 0.01]
275 | idx = idx[-top_k:] # indices of the top-k largest vals
276 | xx1 = boxes.new()
277 | yy1 = boxes.new()
278 | xx2 = boxes.new()
279 | yy2 = boxes.new()
280 | w = boxes.new()
281 | h = boxes.new()
282 |
283 | # keep = torch.Tensor()
284 | count = 0
285 | while idx.numel() > 0:
286 | i = idx[-1] # index of current largest val
287 | # keep.append(i)
288 | keep[count] = i
289 | count += 1
290 | if idx.size(0) == 1:
291 | break
292 | idx = idx[:-1] # remove kept element from view
293 | # load bboxes of next highest vals
294 | torch.index_select(x1, 0, idx, out=xx1)
295 | torch.index_select(y1, 0, idx, out=yy1)
296 | torch.index_select(x2, 0, idx, out=xx2)
297 | torch.index_select(y2, 0, idx, out=yy2)
298 | # store element-wise max with next highest score
299 | xx1 = torch.clamp(xx1, min=x1[i])
300 | yy1 = torch.clamp(yy1, min=y1[i])
301 | xx2 = torch.clamp(xx2, max=x2[i])
302 | yy2 = torch.clamp(yy2, max=y2[i])
303 | w.resize_as_(xx2)
304 | h.resize_as_(yy2)
305 | w = xx2 - xx1
306 | h = yy2 - yy1
307 | # check sizes of xx1 and xx2.. after each iteration
308 | w = torch.clamp(w, min=0.0)
309 | h = torch.clamp(h, min=0.0)
310 | inter = w*h
311 | # IoU = i / (area(a) + area(b) - i)
312 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
313 | union = (rem_areas - inter) + area[i]
314 | IoU = inter/union # store result in iou
315 |
316 | # keep only elements with an IoU <= overlap
317 | # print(IoU.le(overlap)) #le = less or equal, creates a binary mask
318 | idx = idx[IoU.le(overlap)]
319 | return keep, count
320 |
321 | #
322 | # def nms_uncertainty_sampling(boxes, scores, overlap=0.5, top_k=200, object_treshold = None):
323 | # """
324 | # This function takes (un)certainty scores and bounding boxes, and returns the top b
325 | #
326 | #
327 | # """
328 | #
329 | # keep = scores.new(scores.size(0)).zero_().long()
330 | # if boxes.numel() == 0: #number of elements
331 | # return keep # for a class, there are no bounding boxes
332 | # x1 = boxes[:, 0]
333 | # y1 = boxes[:, 1]
334 | # x2 = boxes[:, 2]
335 | # y2 = boxes[:, 3]
336 | # area = torch.mul(x2 - x1, y2 - y1)
337 | # v, idx = scores.sort(0) # sort in ascending order
338 | # # I = I[v >= 0.01]
339 | # idx = idx[-top_k:] # indices of the top-k largest vals
340 | # xx1 = boxes.new()
341 | # yy1 = boxes.new()
342 | # xx2 = boxes.new()
343 | # yy2 = boxes.new()
344 | # w = boxes.new()
345 | # h = boxes.new()
346 | #
347 | # # keep = torch.Tensor()
348 | # count = 0
349 | # while idx.numel() > 0:
350 | # i = idx[-1] # index of current largest val
351 | # # keep.append(i)
352 | # keep[count] = i
353 | # count += 1
354 | # if idx.size(0) == 1:
355 | # break
356 | # idx = idx[:-1] # remove kept element from view
357 | # # load bboxes of next highest vals
358 | # torch.index_select(x1, 0, idx, out=xx1)
359 | # torch.index_select(y1, 0, idx, out=yy1)
360 | # torch.index_select(x2, 0, idx, out=xx2)
361 | # torch.index_select(y2, 0, idx, out=yy2)
362 | # # store element-wise max with next highest score
363 | # xx1 = torch.clamp(xx1, min=x1[i])
364 | # yy1 = torch.clamp(yy1, min=y1[i])
365 | # xx2 = torch.clamp(xx2, max=x2[i])
366 | # yy2 = torch.clamp(yy2, max=y2[i])
367 | # w.resize_as_(xx2)
368 | # h.resize_as_(yy2)
369 | # w = xx2 - xx1
370 | # h = yy2 - yy1
371 | # # check sizes of xx1 and xx2.. after each iteration
372 | # w = torch.clamp(w, min=0.0)
373 | # h = torch.clamp(h, min=0.0)
374 | # inter = w*h
375 | # # IoU = i / (area(a) + area(b) - i)
376 | # rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
377 | # union = (rem_areas - inter) + area[i]
378 | # IoU = inter/union # store result in iou
379 | #
380 | # # keep only elements with an IoU <= overlap
381 | # print(IoU.le(overlap)) #le = less or equal, creates a binary mask
382 | # idx = idx[IoU.le(overlap)]
383 | # return keep, count
384 |
--------------------------------------------------------------------------------
/utils/augmentations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | import cv2
4 | import numpy as np
5 | import types
6 | from numpy import random
7 |
8 |
9 | def intersect(box_a, box_b):
10 | max_xy = np.minimum(box_a[:, 2:], box_b[2:])
11 | min_xy = np.maximum(box_a[:, :2], box_b[:2])
12 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
13 | return inter[:, 0] * inter[:, 1]
14 |
15 |
16 | def jaccard_numpy(box_a, box_b):
17 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
18 | is simply the intersection over union of two boxes.
19 | E.g.:
20 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
21 | Args:
22 | box_a: Multiple bounding boxes, Shape: [num_boxes,4]
23 | box_b: Single bounding box, Shape: [4]
24 | Return:
25 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
26 | """
27 | inter = intersect(box_a, box_b)
28 | area_a = ((box_a[:, 2]-box_a[:, 0]) *
29 | (box_a[:, 3]-box_a[:, 1])) # [A,B]
30 | area_b = ((box_b[2]-box_b[0]) *
31 | (box_b[3]-box_b[1])) # [A,B]
32 | union = area_a + area_b - inter
33 | return inter / union # [A,B]
34 |
35 |
36 | class Compose(object):
37 | """Composes several augmentations together.
38 | Args:
39 | transforms (List[Transform]): list of transforms to compose.
40 | Example:
41 | >>> augmentations.Compose([
42 | >>> transforms.CenterCrop(10),
43 | >>> transforms.ToTensor(),
44 | >>> ])
45 | """
46 |
47 | def __init__(self, transforms):
48 | self.transforms = transforms
49 |
50 | def __call__(self, img, boxes=None, labels=None):
51 | for t in self.transforms:
52 | img, boxes, labels = t(img, boxes, labels)
53 | return img, boxes, labels
54 |
55 |
56 | class Lambda(object):
57 | """Applies a lambda as a transform."""
58 |
59 | def __init__(self, lambd):
60 | assert isinstance(lambd, types.LambdaType)
61 | self.lambd = lambd
62 |
63 | def __call__(self, img, boxes=None, labels=None):
64 | return self.lambd(img, boxes, labels)
65 |
66 |
67 | class ConvertFromInts(object):
68 | def __call__(self, image, boxes=None, labels=None):
69 | return image.astype(np.float32), boxes, labels
70 |
71 |
72 | class SubtractMeans(object):
73 | def __init__(self, mean):
74 | self.mean = np.array(mean, dtype=np.float32)
75 |
76 | def __call__(self, image, boxes=None, labels=None):
77 | image = image.astype(np.float32)
78 | image -= self.mean
79 | return image.astype(np.float32), boxes, labels
80 |
81 |
82 | class ToAbsoluteCoords(object):
83 | def __call__(self, image, boxes=None, labels=None):
84 | height, width, channels = image.shape
85 | boxes[:, 0] *= width
86 | boxes[:, 2] *= width
87 | boxes[:, 1] *= height
88 | boxes[:, 3] *= height
89 |
90 | return image, boxes, labels
91 |
92 |
93 | class ToPercentCoords(object):
94 | def __call__(self, image, boxes=None, labels=None):
95 | height, width, channels = image.shape
96 | boxes[:, 0] /= width
97 | boxes[:, 2] /= width
98 | boxes[:, 1] /= height
99 | boxes[:, 3] /= height
100 |
101 | return image, boxes, labels
102 |
103 |
104 | class Resize(object):
105 | def __init__(self, size=300):
106 | self.size = size
107 |
108 | def __call__(self, image, boxes=None, labels=None):
109 | image = cv2.resize(image, (self.size,
110 | self.size))
111 | return image, boxes, labels
112 |
113 |
114 | class RandomSaturation(object):
115 | def __init__(self, lower=0.5, upper=1.5):
116 | self.lower = lower
117 | self.upper = upper
118 | assert self.upper >= self.lower, "contrast upper must be >= lower."
119 | assert self.lower >= 0, "contrast lower must be non-negative."
120 |
121 | def __call__(self, image, boxes=None, labels=None):
122 | if random.randint(2):
123 | image[:, :, 1] *= random.uniform(self.lower, self.upper)
124 |
125 | return image, boxes, labels
126 |
127 |
128 | class RandomHue(object):
129 | def __init__(self, delta=18.0):
130 | assert delta >= 0.0 and delta <= 360.0
131 | self.delta = delta
132 |
133 | def __call__(self, image, boxes=None, labels=None):
134 | if random.randint(2):
135 | image[:, :, 0] += random.uniform(-self.delta, self.delta)
136 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
137 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
138 | return image, boxes, labels
139 |
140 |
141 | class RandomLightingNoise(object):
142 | def __init__(self):
143 | self.perms = ((0, 1, 2), (0, 2, 1),
144 | (1, 0, 2), (1, 2, 0),
145 | (2, 0, 1), (2, 1, 0))
146 |
147 | def __call__(self, image, boxes=None, labels=None):
148 | if random.randint(2):
149 | swap = self.perms[random.randint(len(self.perms))]
150 | shuffle = SwapChannels(swap) # shuffle channels
151 | image = shuffle(image)
152 | return image, boxes, labels
153 |
154 |
155 | class ConvertColor(object):
156 | def __init__(self, current='BGR', transform='HSV'):
157 | self.transform = transform
158 | self.current = current
159 |
160 | def __call__(self, image, boxes=None, labels=None):
161 | if self.current == 'BGR' and self.transform == 'HSV':
162 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
163 | elif self.current == 'HSV' and self.transform == 'BGR':
164 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
165 | else:
166 | raise NotImplementedError
167 | return image, boxes, labels
168 |
169 |
170 | class RandomContrast(object):
171 | def __init__(self, lower=0.5, upper=1.5):
172 | self.lower = lower
173 | self.upper = upper
174 | assert self.upper >= self.lower, "contrast upper must be >= lower."
175 | assert self.lower >= 0, "contrast lower must be non-negative."
176 |
177 | # expects float image
178 | def __call__(self, image, boxes=None, labels=None):
179 | if random.randint(2):
180 | alpha = random.uniform(self.lower, self.upper)
181 | image *= alpha
182 | return image, boxes, labels
183 |
184 |
185 | class RandomBrightness(object):
186 | def __init__(self, delta=32):
187 | assert delta >= 0.0
188 | assert delta <= 255.0
189 | self.delta = delta
190 |
191 | def __call__(self, image, boxes=None, labels=None):
192 | if random.randint(2):
193 | delta = random.uniform(-self.delta, self.delta)
194 | image += delta
195 | return image, boxes, labels
196 |
197 |
198 | class ToCV2Image(object):
199 | def __call__(self, tensor, boxes=None, labels=None):
200 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
201 |
202 |
203 | class ToTensor(object):
204 | def __call__(self, cvimage, boxes=None, labels=None):
205 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
206 |
207 |
208 | class RandomSampleCrop(object):
209 | """Crop
210 | Arguments:
211 | img (Image): the image being input during training
212 | boxes (Tensor): the original bounding boxes in pt form
213 | labels (Tensor): the class labels for each bbox
214 | mode (float tuple): the min and max jaccard overlaps
215 | Return:
216 | (img, boxes, classes)
217 | img (Image): the cropped image
218 | boxes (Tensor): the adjusted bounding boxes in pt form
219 | labels (Tensor): the class labels for each bbox
220 | """
221 | def __init__(self):
222 | self.sample_options = (
223 | # using entire original input image
224 | None,
225 | # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
226 | (0.1, None),
227 | (0.3, None),
228 | (0.7, None),
229 | (0.9, None),
230 | # randomly sample a patch
231 | (None, None),
232 | )
233 |
234 | def __call__(self, image, boxes=None, labels=None):
235 | height, width, _ = image.shape
236 | while True:
237 | # randomly choose a mode
238 | mode = random.choice(self.sample_options)
239 | if mode is None:
240 | return image, boxes, labels
241 |
242 | min_iou, max_iou = mode
243 | if min_iou is None:
244 | min_iou = float('-inf')
245 | if max_iou is None:
246 | max_iou = float('inf')
247 |
248 | # max trails (50)
249 | for _ in range(50):
250 | current_image = image
251 |
252 | w = random.uniform(0.3 * width, width)
253 | h = random.uniform(0.3 * height, height)
254 |
255 | # aspect ratio constraint b/t .5 & 2
256 | if h / w < 0.5 or h / w > 2:
257 | continue
258 |
259 | left = random.uniform(width - w)
260 | top = random.uniform(height - h)
261 |
262 | # convert to integer rect x1,y1,x2,y2
263 | rect = np.array([int(left), int(top), int(left+w), int(top+h)])
264 |
265 | # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
266 | overlap = jaccard_numpy(boxes, rect)
267 |
268 | # is min and max overlap constraint satisfied? if not try again
269 | if overlap.min() < min_iou and max_iou < overlap.max():
270 | continue
271 |
272 | # cut the crop from the image
273 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
274 | :]
275 |
276 | # keep overlap with gt box IF center in sampled patch
277 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
278 |
279 | # mask in all gt boxes that above and to the left of centers
280 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
281 |
282 | # mask in all gt boxes that under and to the right of centers
283 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
284 |
285 | # mask in that both m1 and m2 are true
286 | mask = m1 * m2
287 |
288 | # have any valid boxes? try again if not
289 | if not mask.any():
290 | continue
291 |
292 | # take only matching gt boxes
293 | current_boxes = boxes[mask, :].copy()
294 |
295 | # take only matching gt labels
296 | current_labels = labels[mask]
297 |
298 | # should we use the box left and top corner or the crop's
299 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
300 | rect[:2])
301 | # adjust to crop (by substracting crop's left,top)
302 | current_boxes[:, :2] -= rect[:2]
303 |
304 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
305 | rect[2:])
306 | # adjust to crop (by substracting crop's left,top)
307 | current_boxes[:, 2:] -= rect[:2]
308 |
309 | return current_image, current_boxes, current_labels
310 |
311 |
312 | class Expand(object):
313 | def __init__(self, mean):
314 | self.mean = mean
315 |
316 | def __call__(self, image, boxes, labels):
317 | if random.randint(2):
318 | return image, boxes, labels
319 |
320 | height, width, depth = image.shape
321 | ratio = random.uniform(1, 4)
322 | left = random.uniform(0, width*ratio - width)
323 | top = random.uniform(0, height*ratio - height)
324 |
325 | expand_image = np.zeros(
326 | (int(height*ratio), int(width*ratio), depth),
327 | dtype=image.dtype)
328 | expand_image[:, :, :] = self.mean
329 | expand_image[int(top):int(top + height),
330 | int(left):int(left + width)] = image
331 | image = expand_image
332 |
333 | boxes = boxes.copy()
334 | boxes[:, :2] += (int(left), int(top))
335 | boxes[:, 2:] += (int(left), int(top))
336 |
337 | return image, boxes, labels
338 |
339 |
340 | class RandomMirror(object):
341 | def __call__(self, image, boxes, classes):
342 | _, width, _ = image.shape
343 | if random.randint(2):
344 | image = image[:, ::-1]
345 | boxes = boxes.copy()
346 | boxes[:, 0::2] = width - boxes[:, 2::-2]
347 | return image, boxes, classes
348 |
349 |
350 | class SwapChannels(object):
351 | """Transforms a tensorized image by swapping the channels in the order
352 | specified in the swap tuple.
353 | Args:
354 | swaps (int triple): final order of channels
355 | eg: (2, 1, 0)
356 | """
357 |
358 | def __init__(self, swaps):
359 | self.swaps = swaps
360 |
361 | def __call__(self, image):
362 | """
363 | Args:
364 | image (Tensor): image tensor to be transformed
365 | Return:
366 | a tensor with channels swapped according to swap
367 | """
368 | # if torch.is_tensor(image):
369 | # image = image.data.cpu().numpy()
370 | # else:
371 | # image = np.array(image)
372 | image = image[:, :, self.swaps]
373 | return image
374 |
375 |
376 | class PhotometricDistort(object):
377 | def __init__(self):
378 | self.pd = [
379 | RandomContrast(),
380 | ConvertColor(transform='HSV'),
381 | RandomSaturation(),
382 | RandomHue(),
383 | ConvertColor(current='HSV', transform='BGR'),
384 | RandomContrast()
385 | ]
386 | self.rand_brightness = RandomBrightness()
387 | self.rand_light_noise = RandomLightingNoise()
388 |
389 | def __call__(self, image, boxes, labels):
390 | im = image.copy()
391 | im, boxes, labels = self.rand_brightness(im, boxes, labels)
392 | if random.randint(2):
393 | distort = Compose(self.pd[:-1])
394 | else:
395 | distort = Compose(self.pd[1:])
396 | im, boxes, labels = distort(im, boxes, labels)
397 | return self.rand_light_noise(im, boxes, labels)
398 |
399 |
400 | # class GaussianRandomPixelNoise():
401 | # # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu
402 | # # http://www.merl.com/publications/docs/TR2018-166.pdf
403 | # """
404 | # To each pixel value gaussian noise is added.
405 | # The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel.
406 | # Here we will do it per pixel.
407 | # """
408 | #
409 | # def __init__(self, std, image_len = 90000):# 300*300 pixels
410 | # self.std = std
411 | # self.image_len = image_len
412 | #
413 | # def __call__(self, image):
414 | #
415 | # # add noise
416 | # noise = np.random.normal(0, self.std, self.image_len)
417 | # image = image.flatten()
418 | # image = image+noise
419 | #
420 | # # set max and min values
421 | # low_values = image < 0
422 | # high_values = image > 255
423 | #
424 | # image[low_values] = 0
425 | # image[high_values] = 255
426 | #
427 | # # reshape
428 | # # image = image.reshape(300,300,:)
429 | #
430 | # return image
431 |
432 |
433 | def GaussianRandomPixelNoise(image, std, image_len = 90000):# 300*300 pixels
434 |
435 | # add noise
436 | normal_dist = torch.distributions.normal.Normal(torch.zeros(image_len), torch.ones(image_len) * std)
437 | noise = normal_dist.sample().unsqueeze(-1).to('cpu')
438 | image = torch.from_numpy(image).reshape(-1,3)
439 | image = image+noise
440 |
441 | # set max and min values
442 | # NOTE: I didn't do this as I subtract the (dataset) mean per channel before pulling it trough this function (the base-transform
443 | # the network is also trained on this (on average) zero mean input.
444 | # low_values = abs(image) < 0
445 | # high_values = abs(image) > 255
446 | #
447 | # image[low_values] = 0
448 | # image[high_values] = 255
449 |
450 | # reshape
451 | image = image.reshape(300,300,-1)
452 |
453 | return image
454 |
455 |
456 | # # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu
457 | # # http://www.merl.com/publications/docs/TR2018-166.pdf
458 | # """
459 | # To each pixel value gaussian noise is added.
460 | # The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel.
461 | # Here
462 |
463 | class SSDAugmentation(object):
464 | def __init__(self, size=300, mean=(104, 117, 123)):
465 | self.mean = mean
466 | self.size = size
467 | self.augment = Compose([
468 | ConvertFromInts(),
469 | ToAbsoluteCoords(),
470 | PhotometricDistort(),
471 | Expand(self.mean),
472 | RandomSampleCrop(),
473 | RandomMirror(),
474 | ToPercentCoords(),
475 | Resize(self.size),
476 | SubtractMeans(self.mean)
477 | ])
478 |
479 | def __call__(self, img, boxes, labels):
480 | return self.augment(img, boxes, labels)
481 |
--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from ..box_utils import decode, nms
4 | from data import voc as cfg
5 | from active_learning_package import uncertainty_helpers
6 | import math
7 |
8 |
9 | class Detect(Function):
10 | """At test time, Detect is the final layer of SSD. Decode location preds,
11 | apply non-maximum suppression to location predictions based on conf
12 | scores and threshold to a top_k number of output predictions for both
13 | confidence score and locations.
14 | """
15 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh,
16 | def_forward,merging_method,sampling_strategy,modeltype):
17 | self.num_classes = num_classes
18 | self.background_label = bkg_label
19 | self.top_k = top_k
20 |
21 | # Parameters used in nms.
22 | self.nms_thresh = nms_thresh
23 | if nms_thresh <= 0:
24 | raise ValueError('nms_threshold must be non negative.')
25 | self.conf_thresh = conf_thresh
26 | self.variance = cfg['variance']
27 |
28 | ## Active Learning Package variables
29 | self.def_forward = def_forward
30 | self.merging_method = merging_method
31 | self.sampling_strategy = sampling_strategy
32 | self.do_prob_dist_forward = False
33 | self.modeltype = modeltype
34 |
35 | if self.merging_method != 'pre_nms_avg' and \
36 | (self.sampling_strategy == 'p-max_localization-stability'
37 | or self.sampling_strategy == 'no_ensemble_entropy-only'
38 | or self.sampling_strategy in ['none_covariance', 'none_covariance-obj','entropy_covariance', 'entropy_covariance-obj'])\
39 | and self.modeltype != 'SSD300KL':
40 | self.do_prob_dist_forward = True
41 |
42 |
43 |
44 |
45 | def forward(self, loc_data, conf_data, prior_data, alphas = None):
46 | """
47 | Args:
48 | loc_data: (tensor) Loc preds from loc layers
49 | Shape: [batch,num_priors*4]
50 | conf_data: (tensor) Shape: Conf preds from conf layers
51 | Shape: [batch*num_priors,num_classes]
52 | prior_data: (tensor) Prior (default) boxes and variances from priorbox layers
53 | Shape: [1,num_priors,4]
54 |
55 | Only when using KL-loss:
56 | alpha: α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate. The log is used during
57 | training to avoid gradient exploding
58 | shape: [1, num_priors, 4]
59 | """
60 | # the normal forward pass, as decribed in SSD paper
61 | if self.def_forward:
62 |
63 | # Merging method = None by default, if None has been passed trough
64 | output = self.default_forward(loc_data, conf_data, prior_data)
65 | return output
66 |
67 | else:
68 | # if not a regular forward -> ensemble of SSDs can be used to merge bounding boxes
69 | # into probabilistic object detections
70 |
71 | if self.merging_method == 'pre_nms_avg' and \
72 | not self.do_prob_dist_forward and\
73 | self.modeltype != 'SSD300KL':
74 | # returns all locs and preds, without applying non maximum suppression to allow for pre-nms averaging
75 | # for more information, see paper: Miller et al - Benchmarking Sampling-based Probabilistic Object Detectors
76 | output_tup = (loc_data, conf_data, prior_data)
77 | return output_tup
78 |
79 |
80 | # elif self.merging_method in ['BSAS','Hungarian'] or 'p-max_localization-stability':
81 | elif self.do_prob_dist_forward:
82 | output, num_boxes_per_class = self.full_prob_dist_forward(loc_data,conf_data, prior_data)
83 | # output_tup = (output, prior_data) # todo: do I really need prior data for BSAS merging? -> only used for nms, which is already performed here or also for IoU calculation??
84 | return output, num_boxes_per_class, prior_data
85 |
86 | elif self.modeltype == 'SSD300KL':
87 | output, num_boxes_per_class = self.full_prob_KL_forward(loc_data, conf_data, prior_data, alphas)
88 | return output, num_boxes_per_class, prior_data
89 | else:
90 | raise NotImplementedError()
91 |
92 |
93 | def full_prob_KL_forward(self, loc_data, conf_data, prior_data, alphas):
94 | """
95 | Largely copief from the forward with the full probability distribution (full_prob_dist_forward). However,
96 | The bounding boxes are in point-form (x1,y1,x2,y2) instead of center-form (cx, cy, w, h) and for each corner also
97 | a standard deviation is returned.
98 |
99 |
100 | :param loc_data: (tensor) Loc preds from loc layers
101 | Shape: [batch,num_priors*4]
102 | :param alphas: (tensor) α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate.
103 | The log is used during training to avoid gradient exploding
104 | Shape: [batch,num_priors*4]
105 | :param conf_data: (tensor) Shape: Conf preds from conf layers
106 | Shape: [batch*num_priors,num_classes]
107 | :param prior_data:(tensor) Prior boxes and variances from priorbox layers
108 | Shape: [1,num_priors,4]
109 | :return:
110 |
111 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
112 | is not returned, only the probability for a given detection for a given class.
113 |
114 | output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb], where bb thus has 8 params (x1, std_x1, ...)
115 | where in the default forward it is [image_ids, class_id ,detection_id,conf_score+bb]
116 | """
117 |
118 | num = loc_data.size(0) # batch size
119 | num_priors = prior_data.size(0)
120 |
121 | output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 8)
122 |
123 | conf_preds = conf_data.view(num, num_priors,
124 | self.num_classes).transpose(2, 1)
125 |
126 | # todo: why use the confidence mask? (not found in paper (found in paper...))
127 | # => makes it a lot faster, no nms for all boxes
128 |
129 | # very useful to filter out the nonzero boxes later
130 | num_boxes_per_class = torch.zeros(self.num_classes)
131 |
132 | # Decode predictions into bboxes.
133 | for i in range(num):
134 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
135 | # These are the class agnostic bounding boxes!
136 | decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype)
137 | conf_scores = conf_preds[i].clone()
138 |
139 | # For each class, perform nms
140 | for cl in range(1, self.num_classes):
141 |
142 | # self.conf_tresh is 0.01
143 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
144 | c_mask = conf_scores[cl].gt(
145 | self.conf_thresh) # confidence mask, speeds up processing by not applying nms
146 |
147 | # to all bounding boxes
148 | scores = conf_scores[cl][c_mask]
149 | if scores.size(0) == 0:
150 | continue
151 |
152 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
153 | boxes = decoded_boxes[l_mask].view(-1, 4)
154 | alphas_cl = alphas[i][l_mask].view(-1,4)
155 |
156 | # idx of highest scoring and non-overlapping boxes per class (nms)
157 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
158 |
159 | # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas
160 | conf_scores2 = conf_scores[:, c_mask]
161 | assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \
162 | "Sum of the probabilities over the classes for each detection must be (relatively close to) 1"
163 |
164 | distributions = conf_scores2[:, ids[:count]]
165 | num_boxes_per_class[cl] = count
166 |
167 | # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong
168 | # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0)
169 |
170 | # [image_id,class_id,detection_id,conf_dist+bb]
171 | # [1,1,200,21+8]
172 | output[i, cl, :count, :self.num_classes] = distributions.permute(1,
173 | 0) # permute reorders axes (here: 1 to 0 and 0 to 1)
174 | output[i, cl, :count, self.num_classes:-4] = boxes[ids[:count]]
175 | # transform alphas to variances: α = log(σ^{2}) -> σ = exp(.5 * α)
176 | output[i, cl, :count, -4:] = torch.exp(alphas_cl[ids[:count]]*.5)
177 |
178 | #todo [DONE]:
179 | # Example from original KL-Loss
180 | # def bbox_std_transform_xyxy(boxes, bbox_epsilon, describ=False):
181 | # # bbox_std = np.exp(bbox_epsilon)
182 | # if cfg.PRED_STD_LOG:
183 | # bbox_std = np.exp(bbox_epsilon / 2.)
184 |
185 |
186 | # use cl 5 of image 1 to check: output[0,5,:5,:21]
187 | return output, num_boxes_per_class # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 29(class_dist + bounding box_coords + coords_std))]
188 |
189 | def full_prob_dist_forward(self,loc_data,conf_data, prior_data):
190 | """
191 | This function is largely copied from the default forward. However, the default forward returns the top-k (200)
192 | detections PER CLASS. The probability distribution over the classes is not returned, only the probability for
193 | a given detection for a given class.
194 |
195 | Args:
196 | loc_data: (tensor) Loc preds from loc layers
197 | Shape: [batch,num_priors*4]
198 | conf_data: (tensor) Shape: Conf preds from conf layers
199 | Shape: [batch*num_priors,num_classes]
200 | prior_data: (tensor) Prior boxes and variances from priorbox layers
201 | Shape: [1,num_priors,4]
202 |
203 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
204 | is not returned, only the probability for a given detection for a given class.
205 |
206 | output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb]
207 | where in the default forward it is [image_ids, class_id ,detection_id,conf_score+bb]
208 |
209 | """
210 | num = loc_data.size(0) # batch size
211 | num_priors = prior_data.size(0)
212 |
213 | output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 4)
214 |
215 | conf_preds = conf_data.view(num, num_priors,
216 | self.num_classes).transpose(2, 1)
217 |
218 | # todo: why use the confidence mask? (not found in paper (found in paper...))
219 | # => makes it a lot faster, no nms for all boxes
220 |
221 | # very useful to filter out the nonzero boxes later
222 | num_boxes_per_class = torch.zeros(self.num_classes)
223 |
224 | # Decode predictions into bboxes.
225 | for i in range(num):
226 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
227 | # These are the class agnostic bounding boxes!
228 | decoded_boxes = decode(loc_data[i], prior_data, self.variance)
229 |
230 | conf_scores = conf_preds[i].clone()
231 |
232 | # For each class, perform nms
233 | for cl in range(1, self.num_classes):
234 |
235 | # self.conf_tresh is 0.01
236 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
237 | c_mask = conf_scores[cl].gt(
238 | self.conf_thresh) # confidence mask, speeds up processing by not applying nms
239 |
240 | # to all bounding boxes
241 | scores = conf_scores[cl][c_mask]
242 | if scores.size(0) == 0:
243 | continue
244 |
245 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
246 | boxes = decoded_boxes[l_mask].view(-1, 4)
247 |
248 | '''what is ids variable that is returned here in relation to the indices in the original conf_preds variable
249 |
250 | ids are the maximum ids in boxes (gt > 0.01). The ids that are not suppressed by nms
251 | count is how many boxes there are that are not nms'ed?
252 | count is hoeveel objecten er zijn van deze klasse die niet overlappen, op deze foto.
253 | nms gaat namelijk vanaf de grootste confidence naar de kleinste en als ze genoeg overlappen,
254 | dan wordt de een na grootste weg gegooid voor deze klasse
255 |
256 | '''
257 |
258 | # idx of highest scoring and non-overlapping boxes per class (nms)
259 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
260 |
261 | # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas
262 | conf_scores2 = conf_scores[:, c_mask]
263 | assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \
264 | "Sum of the probabilities over the classes for each detection must be (relatively close to) 1"
265 |
266 | distributions = conf_scores2[:, ids[:count]]
267 | num_boxes_per_class[cl] = count
268 |
269 |
270 | # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong
271 | # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0)
272 |
273 | # [image_id,class_id,detection_id,conf_dist+bb]
274 | # [1,1,200,21+4]
275 | output[i, cl, :count, :self.num_classes] = distributions.permute(1,
276 | 0) # permute reorders axes (here: 1 to 0 and 0 to 1)
277 | output[i, cl, :count, self.num_classes:] = boxes[ids[:count]]
278 |
279 |
280 | # use cl 5 of image 1 to check: output[0,5,:5,:21]
281 | return output, num_boxes_per_class # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 25(class_dist + bounding box))]
282 |
283 | def default_forward(self, loc_data, conf_data, prior_data):
284 | """
285 | Args:
286 | loc_data: (tensor) Loc preds from loc layers
287 | Shape: [batch,num_priors*4]
288 | conf_data: (tensor) Shape: Conf preds from conf layers
289 | Shape: [batch*num_priors,num_classes]
290 | prior_data: (tensor) Prior boxes and variances from priorbox layers todo prior box variances??
291 | Shape: [1,num_priors,4]
292 |
293 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
294 | is not returned, only the probability for a given detection for a given class.
295 | :returns:
296 | output:
297 | shape: [image_id,class_id,detection_id,conf_score+bb]
298 |
299 | """
300 |
301 | num = loc_data.size(0) # batch size
302 | num_priors = prior_data.size(0)
303 | output = torch.zeros(num, self.num_classes, self.top_k, 5) # 5 is for the bounding box => 4 corners and the class
304 | conf_preds = conf_data.view(num, num_priors,
305 | self.num_classes).transpose(2, 1)
306 |
307 | # why use the confidence mask?
308 | # => makes it a lot faster, no nms for all boxes => also used in paper
309 | for i in range(num):
310 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
311 | # These are the class agnostic bounding boxes!
312 | #[8732,4]
313 | decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype)
314 | #[21,8732]
315 | conf_scores = conf_preds[i].clone()
316 |
317 | # For each class, perform nms
318 | for cl in range(1, self.num_classes):
319 |
320 | # self.conf_tresh is 0.01
321 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
322 | c_mask = conf_scores[cl].gt(self.conf_thresh) #confidence mask, speeds up processing by not applying nms
323 |
324 | # to all bounding boxes
325 | scores = conf_scores[cl][c_mask]
326 | if scores.size(0) == 0:
327 | continue
328 |
329 |
330 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
331 | boxes = decoded_boxes[l_mask].view(-1, 4)
332 |
333 | # idx of highest scoring and non-overlapping boxes per class (nms)
334 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
335 |
336 | # [image_id,class_id,detection_id,conf+bb]
337 | output[i, cl, :count] = \
338 | torch.cat((scores[ids[:count]].unsqueeze(1),
339 | boxes[ids[:count]]), 1)
340 |
341 | flt = output.contiguous().view(num, -1, 5) # [1,4200,5]
342 | _, idx = flt[:, :, 0].sort(1, descending=True) # sort over ALL confidences (not per class)
343 | _, rank = idx.sort(1)
344 | flt[(rank < self.top_k). unsqueeze(-1).expand_as(flt)].fill_(0) # take top_k
345 |
346 | # use cl 5 of image 1 to check: output[0,5,:5,:21]
347 | return output # shape (pasval VOC) [1,21,200,5] = [1 = batch, classes+background class, top_k bounding boxes, 5(bounding box + class))]
348 |
--------------------------------------------------------------------------------
/active_learning_package/voc_eval_helpers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pickle
4 | import xml.etree.ElementTree as ET
5 | import time
6 | import sys
7 |
8 | import torch
9 | from torch.autograd import Variable
10 |
11 | import data
12 | from . import helpers
13 |
14 |
15 | def eval(test_dataset, args, net, al_iteration, eval_ensemble_idx = 99999, epochs_test = False, train_iters = None, use_dataset_image_ids = False):
16 | """
17 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
18 | Slightly adjusted to fit in this active learning module
19 | """
20 | print('start VOC eval')
21 |
22 | num_images = len(test_dataset)
23 |
24 | # all detections are collected into:
25 | # all_boxes[cls][image] = N x 5 array of detections in
26 | # (x1, y1, x2, y2, score)
27 | if args.dataset in ['VOC07', 'VOC12']:
28 | labelmap = data.VOC_CLASSES
29 | elif args.dataset == 'VOC07_1_class':
30 | labelmap = [args.relevant_class]
31 | elif args.dataset == 'VOC07_6_class':
32 | labelmap = args.labelmap
33 | else:
34 | raise NotImplementedError()
35 |
36 |
37 | args.summary['eval_model']['num_images_eval'] = num_images
38 | args.summary['eval_model']['num_objects_eval'] = 'todo'
39 | args.summary['eval_model']['APs'] = {}
40 |
41 | all_boxes = [[[] for _ in range(num_images)]
42 | for _ in range(len(labelmap) + 1)]
43 |
44 | # timers
45 | _t = {'im_detect': helpers.Timer(), 'misc': helpers.Timer()}
46 |
47 | output_dir = args.experiment_dir + 'eval/'
48 | print('output dir ', output_dir)
49 | if not os.path.exists(output_dir):
50 | os.makedirs(output_dir, exist_ok=True)
51 |
52 | if epochs_test:
53 | det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+'_'+str('todo')+'_detections.pkl')
54 | else:
55 | det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+str()+'_detections.pkl')
56 |
57 | # if already done the detection passes with this network.
58 | if os.path.isfile(det_file):
59 | with open(det_file, 'rb') as file:
60 | all_boxes = pickle.load(file)
61 |
62 | else:
63 | for i in range(num_images):
64 | im, gt, h, w = test_dataset.pull_item(i)
65 |
66 | x = Variable(im.unsqueeze(0))
67 |
68 | if args.cuda and torch.cuda.is_available():
69 | x = x.cuda()
70 |
71 | _t['im_detect'].tic()
72 |
73 | detections = net(x).data
74 | detect_time = _t['im_detect'].toc(average=False)
75 | # set detections back to cpu
76 | if args.cuda and torch.cuda.is_available():
77 | detections = detections.to('cpu')
78 |
79 | # skip j = 0, because it's the background class
80 | for j in range(1, detections.size(1)):
81 | dets = detections[0, j, :] # shape [200,5]
82 | mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() # takes the detections that have confidence > 0. and expands to (5, 200) and then transposes
83 | dets = torch.masked_select(dets, mask).view(-1, 5)
84 | if dets.dim() == 0:
85 | continue
86 | boxes = dets[:, 1:]
87 | boxes[:, 0] *= w
88 | boxes[:, 2] *= w
89 | boxes[:, 1] *= h
90 | boxes[:, 3] *= h
91 | scores = dets[:, 0].cpu().numpy()
92 | cls_dets = np.hstack((boxes.cpu().numpy(),
93 | scores[:, np.newaxis])).astype(np.float32,
94 | copy=False)
95 | all_boxes[j][i] = cls_dets
96 |
97 | print('im_detect: {:d}/{:d} {:.3f}s \t al iteration: {:d} \t ensemble_idx {:d}'.format(i,
98 | num_images, detect_time, int(al_iteration), int(args.eval_ensemble_idx)))
99 |
100 | with open(det_file, 'wb') as f:
101 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
102 |
103 |
104 | print('Evaluating detections')
105 | evaluate_detections(all_boxes,
106 | output_dir,
107 | test_dataset,
108 | args,
109 | labelmap,
110 | use_dataset_image_ids)
111 |
112 |
113 | def evaluate_detections(box_list, output_dir, dataset, args, labelmap, use_dataset_image_ids):
114 | """
115 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
116 | Slightly adjusted to fit in this active learning module
117 | """
118 | if args.dataset in ['VOC07','VOC07_1_class','VOC07_6_class']:
119 |
120 | YEAR = '2007'
121 | devkit_path = args.dataset_root + 'VOC' + YEAR
122 |
123 | write_voc_results_file(box_list,
124 | dataset,
125 | labelmap,
126 | devkit_path,
127 | args)
128 |
129 | do_python_eval(output_dir,
130 | False, # use VOC07 metrics
131 | devkit_path,
132 | labelmap,
133 | args,
134 | dataset,
135 | use_dataset_image_ids)
136 | else:
137 | raise NotImplementedError()
138 |
139 | def write_voc_results_file(all_boxes,
140 | dataset,
141 | labelmap,
142 | devkit_path,
143 | args):
144 | """
145 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
146 | """
147 |
148 | for cls_ind, cls in enumerate(labelmap):
149 | print('Writing {:s} VOC results file'.format(cls))
150 | filename = get_voc_results_file_template('test',
151 | cls,
152 | devkit_path,
153 | args)
154 |
155 | # if already made the results files with this network.
156 | if os.path.isfile(filename):
157 | continue
158 |
159 | else:
160 | with open(filename, 'wt') as f:
161 | for im_ind, index in enumerate(dataset.ids):
162 | dets = all_boxes[cls_ind+1][im_ind]
163 | if dets == []:
164 | continue
165 | # the VOCdevkit expects 1-based indices
166 | for k in range(dets.shape[0]):
167 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
168 | format(index[1], dets[k, -1],
169 | dets[k, 0] + 1, dets[k, 1] + 1,
170 | dets[k, 2] + 1, dets[k, 3] + 1))
171 |
172 |
173 | def do_python_eval(output_dir,
174 | use_07,
175 | devkit_path,
176 | labelmap,
177 | args,
178 | dataset,
179 | use_dataset_image_ids):
180 | """
181 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
182 | Slightly adjusted to fit in this active learning module
183 | """
184 | annopath = os.path.join(args.dataset_root, 'VOC2007', 'Annotations', '%s.xml')
185 | if type(args.imageset_test) == list and len(args.imageset_test) == 1:
186 | imagesetfile = args.imageset_test[0][1]
187 | else:
188 | imagesetfile = args.imageset_test
189 | imgsetpath = os.path.join(args.dataset_root, 'VOC2007', 'ImageSets',
190 | 'Main', '{:s}.txt')
191 | cachedir = os.path.join(devkit_path, 'annotations_cache')
192 |
193 | # The PASCAL VOC metric changed in 2010
194 | use_07_metric = use_07
195 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
196 |
197 | iou_thresholds = [0.3]
198 | iou_thresholds.extend(list(np.linspace(0.5,0.95,10)))
199 |
200 | for iou_threshold in iou_thresholds:
201 | print('IoU threshold: ',str(iou_threshold),'\n_______________\n')
202 | args.summary['eval_model']['APs'][str(iou_threshold)] = {}
203 |
204 | aps = []
205 | if not os.path.isdir(output_dir):
206 | os.mkdir(output_dir)
207 | for i, cls in enumerate(labelmap):
208 | filename = get_voc_results_file_template('test', cls, devkit_path, args) # results file
209 | rec, prec, ap = voc_eval(
210 | filename, annopath, imgsetpath.format(imagesetfile), cls, cachedir,
211 | ovthresh=iou_threshold, use_07_metric=use_07_metric, dataset= dataset, use_dataset_image_ids=use_dataset_image_ids) # todo: imageset_file: '/home/jasper/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt'
212 | # rec,prec,ap = 0.1,0.2,0.3
213 |
214 | aps += [ap]
215 | print('AP for {} = {:.4f}'.format(cls, ap))
216 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
217 | pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
218 |
219 | #write summary average precissions
220 | args.summary['eval_model']['APs'][str(iou_threshold)][str(cls)] = ap
221 |
222 | # exclude classes without predictions
223 | aps = [ap for ap in aps if ap != -1.]
224 | args.summary['eval_model']['APs'][str(iou_threshold)]['mAP'] = np.mean(aps)
225 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
226 | print('~~~~~~~~')
227 | print('Results:')
228 | for ap in aps:
229 | print('{:.3f}'.format(ap))
230 | print('{:.3f}'.format(np.mean(aps)))
231 | print('~~~~~~~~')
232 | print('')
233 | print('--------------------------------------------------------------')
234 | print('Results computed with the **unofficial** Python eval code.')
235 | print('Results should be very close to the official MATLAB eval code.')
236 | print('--------------------------------------------------------------')
237 | print('\n\n\n')
238 |
239 | # calculate mmAP (coco definition mAP)
240 | args.summary['eval_model']['APs']['mmAP'] = 0
241 | for key, value in args.summary['eval_model']['APs'].items():
242 | if key != 'mmAP':
243 | args.summary['eval_model']['APs']['mmAP'] += args.summary['eval_model']['APs'][key]['mAP']
244 | args.summary['eval_model']['APs']['mmAP'] /= 10
245 |
246 |
247 | def get_voc_results_file_template(image_set, cls, devkit_path, args):
248 | """
249 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
250 | Slightly adjusted to fit in this active learning module
251 | """
252 |
253 | # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
254 | filename = 'det_' + image_set + '_%s.txt' % (cls)
255 |
256 | filedir = args.experiment_dir + 'eval/results/al-iter_'+str(args.al_iteration)+'/ensemble_idx_'+args.eval_ensemble_idx
257 | # filedir = os.path.join(devkit_path, 'results') # old filedir from Max De Groot
258 | if not os.path.exists(filedir):
259 | os.makedirs(filedir, exist_ok = True)
260 | path = os.path.join(filedir, filename)
261 | return path
262 |
263 | def voc_eval(detpath,
264 | annopath,
265 | imagesetfile,
266 | classname,
267 | cachedir,
268 | ovthresh=0.5,
269 | use_07_metric=True,
270 | dataset = None,
271 | use_dataset_image_ids = False):
272 | """rec, prec, ap = voc_eval(detpath,
273 | annopath,
274 | imagesetfile,
275 | classname,
276 | [ovthresh],
277 | [use_07_metric])
278 | Top level function that does the PASCAL VOC evaluation.
279 | detpath: Path to detections
280 | detpath.format(classname) should produce the detection results file.
281 | annopath: Path to annotations
282 | annopath.format(imagename) should be the xml annotations file.
283 | imagesetfile: Text file containing the list of images, one image per line.
284 | classname: Category name (duh)
285 | cachedir: Directory for caching the annotations
286 | [ovthresh]: Overlap threshold (default = 0.5)
287 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
288 | (default True)
289 |
290 |
291 | NOTE: largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
292 | Slightly adjusted to fit in this active learning module
293 | """
294 |
295 | # assumes detections are in detpath.format(classname)
296 | # assumes annotations are in annopath.format(imagename)
297 | # assumes imagesetfile is a text file with each line an image name
298 | # cachedir caches the annotations in a pickle file
299 | # first load gt
300 | if not os.path.isdir(cachedir):
301 | os.mkdir(cachedir)
302 | cachefile = os.path.join(cachedir, 'annots.pkl') # cachefile of correct annotations/truth values.
303 | # read list of images
304 | with open(imagesetfile, 'r') as f:
305 | lines = f.readlines()
306 | imagenames = [x.strip() for x in lines]
307 | if not os.path.isfile(cachefile):
308 | # load annots
309 | recs = {}
310 | for i, imagename in enumerate(imagenames):
311 | recs[imagename] = parse_rec(annopath % (imagename))
312 | if i % 100 == 0:
313 | print('Reading annotation for {:d}/{:d}'.format(
314 | i + 1, len(imagenames)))
315 | # save
316 | print('Saving cached annotations to {:s}'.format(cachefile))
317 | with open(cachefile, 'wb') as f:
318 | pickle.dump(recs, f)
319 | else:
320 | # load
321 | with open(cachefile, 'rb') as f:
322 | recs = pickle.load(f)
323 |
324 | # extract gt objects for this class
325 | class_recs = {}
326 | npos = 0
327 |
328 | if use_dataset_image_ids:
329 | for imagename in dataset.ids:
330 | imagename = imagename[1]
331 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
332 | bbox = np.array([x['bbox'] for x in R])
333 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
334 | det = [False] * len(R)
335 | npos = npos + sum(~difficult)
336 | class_recs[imagename] = {'bbox': bbox,
337 | 'difficult': difficult,
338 | 'det': det}
339 | else:
340 | for imagename in imagenames:
341 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
342 | bbox = np.array([x['bbox'] for x in R])
343 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
344 | det = [False] * len(R)
345 | npos = npos + sum(~difficult)
346 | class_recs[imagename] = {'bbox': bbox,
347 | 'difficult': difficult,
348 | 'det': det}
349 |
350 | # read detections (see results folder in VOCDevkit)
351 | detfile = detpath.format(classname)
352 | with open(detfile, 'r') as f:
353 | lines = f.readlines()
354 | if any(lines) == 1:
355 |
356 | splitlines = [x.strip().split(' ') for x in lines]
357 | image_ids = [x[0] for x in splitlines]
358 | confidence = np.array([float(x[1]) for x in splitlines])
359 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
360 |
361 | # sort by confidence
362 | sorted_ind = np.argsort(-confidence)
363 | sorted_scores = np.sort(-confidence)
364 | BB = BB[sorted_ind, :]
365 | image_ids = [image_ids[x] for x in sorted_ind]
366 |
367 | # go down dets and mark TPs and FPs
368 | nd = len(image_ids)
369 | tp = np.zeros(nd)
370 | fp = np.zeros(nd)
371 | for d in range(nd):
372 | R = class_recs[image_ids[d]] # can result in keyerror if: class recs doesn't have the image_id (class_rec is gt for all images in imagenames, where recs is taken from the cache file) todo
373 | bb = BB[d, :].astype(float)
374 | ovmax = -np.inf
375 | BBGT = R['bbox'].astype(float)
376 | if BBGT.size > 0:
377 | # compute overlaps
378 | # intersection
379 | ixmin = np.maximum(BBGT[:, 0], bb[0])
380 | iymin = np.maximum(BBGT[:, 1], bb[1])
381 | ixmax = np.minimum(BBGT[:, 2], bb[2])
382 | iymax = np.minimum(BBGT[:, 3], bb[3])
383 | iw = np.maximum(ixmax - ixmin, 0.)
384 | ih = np.maximum(iymax - iymin, 0.)
385 | inters = iw * ih
386 | uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
387 | (BBGT[:, 2] - BBGT[:, 0]) *
388 | (BBGT[:, 3] - BBGT[:, 1]) - inters)
389 | overlaps = inters / uni
390 | ovmax = np.max(overlaps)
391 | jmax = np.argmax(overlaps)
392 |
393 | if ovmax > ovthresh:
394 | if not R['difficult'][jmax]:
395 | if not R['det'][jmax]:
396 | tp[d] = 1.
397 | R['det'][jmax] = 1
398 | else:
399 | fp[d] = 1.
400 | else:
401 | fp[d] = 1.
402 |
403 | # compute precision recall
404 | fp = np.cumsum(fp)
405 | tp = np.cumsum(tp)
406 | rec = tp / float(npos)
407 | # avoid divide by zero in case the first detection matches a difficult
408 | # ground truth
409 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
410 | ap = voc_ap(rec, prec, use_07_metric)
411 | else:
412 | # note that below default values of -1 can cause negative mAPs.. Not sure why you would want this anyways..
413 | # rec = -1.
414 | # prec = -1.
415 | # ap = -1.
416 | rec = 0.
417 | prec = 0.
418 | ap = 0.
419 | return rec, prec, ap
420 |
421 |
422 | def parse_rec(filename):
423 | """ Parse a PASCAL VOC xml file
424 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
425 | """
426 | tree = ET.parse(filename)
427 | objects = []
428 | for obj in tree.findall('object'):
429 | obj_struct = {}
430 | obj_struct['name'] = obj.find('name').text
431 | obj_struct['pose'] = obj.find('pose').text
432 | obj_struct['truncated'] = int(obj.find('truncated').text)
433 | obj_struct['difficult'] = int(obj.find('difficult').text)
434 | bbox = obj.find('bndbox')
435 | obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
436 | int(bbox.find('ymin').text) - 1,
437 | int(bbox.find('xmax').text) - 1,
438 | int(bbox.find('ymax').text) - 1]
439 | objects.append(obj_struct)
440 |
441 | return objects
442 |
443 |
444 | def voc_ap(rec, prec, use_07_metric=True):
445 | """ ap = voc_ap(rec, prec, [use_07_metric])
446 | Compute VOC AP given precision and recall.
447 | If use_07_metric is true, uses the
448 | VOC 07 11 point method (default:True).
449 | """
450 | if use_07_metric:
451 | # 11 point metric
452 | ap = 0.
453 | for t in np.arange(0., 1.1, 0.1):
454 | if np.sum(rec >= t) == 0:
455 | p = 0
456 | else:
457 | p = np.max(prec[rec >= t])
458 | ap = ap + p / 11.
459 | else:
460 | # correct AP calculation
461 | # first append sentinel values at the end
462 | mrec = np.concatenate(([0.], rec, [1.]))
463 | mpre = np.concatenate(([0.], prec, [0.]))
464 |
465 | # compute the precision envelope
466 | for i in range(mpre.size - 1, 0, -1):
467 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
468 |
469 | # to calculate area under PR curve, look for points
470 | # where X axis (recall) changes value
471 | i = np.where(mrec[1:] != mrec[:-1])[0]
472 |
473 | # and sum (\Delta recall) * prec
474 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
475 | return ap
476 |
477 |
478 |
--------------------------------------------------------------------------------