├── active_learning_package
    ├── __init__.py
    ├── human_active_learning.py
    ├── uncertainty_helpers.py
    └── voc_eval_helpers.py
├── utils
    ├── __init__.py
    └── augmentations.py
├── layers
    ├── __init__.py
    ├── functions
    │   ├── __init__.py
    │   ├── prior_box.py
    │   └── detection.py
    ├── modules
    │   ├── __init__.py
    │   ├── l2norm.py
    │   ├── kl_loss.py
    │   └── multibox_loss.py
    └── box_utils.py
├── doc
    ├── SSD.jpg
    ├── ssd.png
    ├── detection_example.png
    ├── detection_example2.png
    └── detection_examples.png
├── Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf
├── run_scripts
    └── scripts
    │   ├── run_script.py
    │   ├── boat_image_budget_pre-nms-avg_42_200.job
    │   └── 6class_image_budget_pre-nms-avg_42_200.job
├── data
    ├── horse_seed_set.json
    ├── sheep_seed_set.json
    ├── scripts
    │   ├── VOC2012.sh
    │   ├── VOC2007.sh
    │   └── COCO2014.sh
    ├── car_seed_set.json
    ├── pottedplant_seed_set.json
    ├── imageset_files
    │   ├── sheep_trainval_detect.txt
    │   ├── sheep_test_detect.txt
    │   ├── boat_test_detect.txt
    │   ├── boat_trainval_detect.txt
    │   ├── bottle_test_detect.txt
    │   ├── pottedplant_test_detect.txt
    │   ├── bottle_trainval_detect.txt
    │   ├── pottedplant_trainval_detect.txt
    │   ├── horse_test_detect.txt
    │   ├── horse_trainval_detect.txt
    │   ├── car_trainval_detect.txt
    │   └── car_test_detect.txt
    ├── boat_seed_set.json
    ├── bottle_seed_set.json
    ├── __init__.py
    ├── config.py
    ├── 6_class_seed_set.json
    └── empty_seed.json
├── LICENSE
├── .gitignore
├── create_initial_networks.py
├── README.md
├── requirements
├── create_spoc_features.py
└── ssd.py


/active_learning_package/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/doc/SSD.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/SSD.jpg


--------------------------------------------------------------------------------
/doc/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/ssd.png


--------------------------------------------------------------------------------
/active_learning_package/human_active_learning.py:
--------------------------------------------------------------------------------
1 | def human_active_learning():
2 |     raise NotImplementedError()


--------------------------------------------------------------------------------
/doc/detection_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example.png


--------------------------------------------------------------------------------
/doc/detection_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example2.png


--------------------------------------------------------------------------------
/doc/detection_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_examples.png


--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | from .kl_loss import KLLoss
4 | 
5 | __all__ = ['L2Norm', 'MultiBoxLoss','KLLoss']
6 | 


--------------------------------------------------------------------------------
/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf


--------------------------------------------------------------------------------
/run_scripts/scripts/run_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import datetime
 4 | 
 5 | 
 6 | 
 7 | def main():
 8 |     run_scripts = [
 9 |         'boat_image_budget_pre-nms-avg_42_200.job',
10 |         # '6class_image_budget_pre-nms-avg_42_200.job',
11 |     ]
12 | 
13 | 
14 |     curr_dir = os.getcwd()
15 | 
16 | 
17 |     for script in run_scripts:
18 |         print('Starting script: ', script)
19 |         print('Time start: ', datetime.datetime.now())
20 |         subprocess.call(['bash', script])
21 | 
22 |         print('Time stop: ', datetime.datetime.now())
23 |         print('finished script :)!')
24 |         print('________________________\n\n\n\n\n\n\n')
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/data/horse_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "002786",
 6 |       "009841",
 7 |       "009114",
 8 |       "004834",
 9 |       "001628",
10 |       "006445",
11 |       "009874",
12 |       "006285",
13 |       "009318"
14 |     ],
15 |     "class_dist": []
16 |   },
17 |   "val_set": {
18 |     "image_set_idx": [
19 |       "001236",
20 |       "004625",
21 |       "007216",
22 |       "005145",
23 |       "009138",
24 |       "008596",
25 |       "003492",
26 |       "007448",
27 |       "009407",
28 |       "005236"
29 |     ],
30 |     "class_dist": []
31 |   },
32 |   "train_set": [
33 |     "002786",
34 |     "009841",
35 |     "009114",
36 |     "004834",
37 |     "001628",
38 |     "006445",
39 |     "009874",
40 |     "006285",
41 |     "009318"
42 |   ]
43 | }


--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant_(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         #x /= norm
22 |         x = torch.div(x,norm)
23 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 |         return out
25 | 


--------------------------------------------------------------------------------
/data/sheep_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "008592",
 6 |       "009842",
 7 |       "003681",
 8 |       "007230",
 9 |       "001714",
10 |       "007165",
11 |       "004423",
12 |       "004490",
13 |       "006679",
14 |       "001750",
15 |       "006833"
16 |     ],
17 |     "class_dist": []
18 |   },
19 |   "val_set": {
20 |     "image_set_idx": [
21 |       "003705",
22 |       "002263",
23 |       "005469",
24 |       "009816",
25 |       "000107",
26 |       "002593",
27 |       "003698",
28 |       "006944",
29 |       "000900",
30 |       "004312"
31 |     ],
32 |     "class_dist": []
33 |   },
34 |   "train_set": [
35 |     "008592",
36 |     "009842",
37 |     "003681",
38 |     "007230",
39 |     "001714",
40 |     "007165",
41 |     "004423",
42 |     "004490",
43 |     "006679",
44 |     "001750",
45 |     "006833"
46 |   ]
47 | }


--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/car_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "009863",
 6 |       "007208",
 7 |       "007821",
 8 |       "009900",
 9 |       "002533",
10 |       "002643",
11 |       "007374",
12 |       "005020",
13 |       "001980",
14 |       "009106",
15 |       "002691",
16 |       "005756",
17 |       "001360",
18 |       "008706",
19 |       "007475"
20 |     ],
21 |     "class_dist": []
22 |   },
23 |   "val_set": {
24 |     "image_set_idx": [
25 |       "003636",
26 |       "002116",
27 |       "006206",
28 |       "004244",
29 |       "000245",
30 |       "003256",
31 |       "009839",
32 |       "007663",
33 |       "006330",
34 |       "000545"
35 |     ],
36 |     "class_dist": []
37 |   },
38 |   "train_set": [
39 |     "009863",
40 |     "007208",
41 |     "007821",
42 |     "009900",
43 |     "002533",
44 |     "002643",
45 |     "007374",
46 |     "005020",
47 |     "001980",
48 |     "009106",
49 |     "002691",
50 |     "005756",
51 |     "001360",
52 |     "008706",
53 |     "007475"
54 |   ]
55 | }


--------------------------------------------------------------------------------
/data/pottedplant_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "005821",
 6 |       "007396",
 7 |       "001777",
 8 |       "006159",
 9 |       "009279",
10 |       "004446",
11 |       "006188",
12 |       "007302",
13 |       "004105",
14 |       "003758",
15 |       "009638",
16 |       "009175",
17 |       "002946",
18 |       "008749",
19 |       "008082",
20 |       "002775"
21 |     ],
22 |     "class_dist": []
23 |   },
24 |   "val_set": {
25 |     "image_set_idx": [
26 |       "007558",
27 |       "000592",
28 |       "003301",
29 |       "007999",
30 |       "004631",
31 |       "007390",
32 |       "007890",
33 |       "006351",
34 |       "000710",
35 |       "001451"
36 |     ],
37 |     "class_dist": []
38 |   },
39 |   "train_set": [
40 |     "005821",
41 |     "007396",
42 |     "001777",
43 |     "006159",
44 |     "009279",
45 |     "004446",
46 |     "006188",
47 |     "007302",
48 |     "004105",
49 |     "003758",
50 |     "009638",
51 |     "009175",
52 |     "002946",
53 |     "008749",
54 |     "008082",
55 |     "002775"
56 |   ]
57 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 JappaB
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/data/imageset_files/sheep_trainval_detect.txt:
--------------------------------------------------------------------------------
 1 | 000107
 2 | 000225
 3 | 000233
 4 | 000244
 5 | 000416
 6 | 000581
 7 | 000588
 8 | 000654
 9 | 000676
10 | 000820
11 | 000900
12 | 001191
13 | 001310
14 | 001594
15 | 001598
16 | 001661
17 | 001714
18 | 001733
19 | 001734
20 | 001750
21 | 001872
22 | 002209
23 | 002263
24 | 002267
25 | 002330
26 | 002452
27 | 002593
28 | 002615
29 | 002618
30 | 002648
31 | 002683
32 | 002845
33 | 002873
34 | 002963
35 | 002986
36 | 003161
37 | 003170
38 | 003382
39 | 003593
40 | 003681
41 | 003698
42 | 003705
43 | 003714
44 | 003874
45 | 004239
46 | 004312
47 | 004423
48 | 004490
49 | 004527
50 | 004768
51 | 004842
52 | 004954
53 | 005001
54 | 005058
55 | 005223
56 | 005288
57 | 005319
58 | 005363
59 | 005395
60 | 005469
61 | 005947
62 | 005964
63 | 006128
64 | 006136
65 | 006247
66 | 006251
67 | 006534
68 | 006678
69 | 006679
70 | 006833
71 | 006899
72 | 006944
73 | 007048
74 | 007165
75 | 007217
76 | 007230
77 | 007621
78 | 007859
79 | 008116
80 | 008150
81 | 008223
82 | 008292
83 | 008342
84 | 008349
85 | 008592
86 | 008618
87 | 008871
88 | 008920
89 | 008942
90 | 009227
91 | 009271
92 | 009349
93 | 009417
94 | 009700
95 | 009816
96 | 009842
97 | 009945
98 | 


--------------------------------------------------------------------------------
/data/imageset_files/sheep_test_detect.txt:
--------------------------------------------------------------------------------
 1 | 000062
 2 | 000175
 3 | 000176
 4 | 000376
 5 | 000451
 6 | 000458
 7 | 000475
 8 | 000574
 9 | 000596
10 | 000627
11 | 000779
12 | 000789
13 | 000875
14 | 000884
15 | 000925
16 | 000992
17 | 001271
18 | 001344
19 | 001416
20 | 001477
21 | 001625
22 | 001645
23 | 001671
24 | 001852
25 | 002033
26 | 002121
27 | 002133
28 | 002137
29 | 002338
30 | 002802
31 | 002882
32 | 003025
33 | 003050
34 | 003069
35 | 003087
36 | 003152
37 | 003190
38 | 003241
39 | 003318
40 | 003366
41 | 003471
42 | 003595
43 | 003914
44 | 004072
45 | 004357
46 | 004582
47 | 004610
48 | 004617
49 | 004642
50 | 004646
51 | 004669
52 | 004774
53 | 004827
54 | 004854
55 | 004969
56 | 005137
57 | 005147
58 | 005164
59 | 005324
60 | 005578
61 | 005795
62 | 005886
63 | 005915
64 | 005916
65 | 006217
66 | 006491
67 | 006557
68 | 006815
69 | 006832
70 | 006837
71 | 007028
72 | 007076
73 | 007402
74 | 007440
75 | 007444
76 | 007462
77 | 007534
78 | 007596
79 | 007894
80 | 008187
81 | 008192
82 | 008215
83 | 008333
84 | 008334
85 | 008343
86 | 008344
87 | 008598
88 | 008685
89 | 008795
90 | 008912
91 | 008992
92 | 009031
93 | 009169
94 | 009320
95 | 009451
96 | 009569
97 | 009818
98 | 009861
99 | 


--------------------------------------------------------------------------------
/data/boat_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "005496",
 6 |       "007743",
 7 |       "006965",
 8 |       "000154",
 9 |       "002234",
10 |       "003337",
11 |       "005614",
12 |       "004532",
13 |       "000184",
14 |       "002372",
15 |       "000382",
16 |       "008498",
17 |       "005181",
18 |       "007139",
19 |       "007685",
20 |       "003098",
21 |       "003722",
22 |       "002659",
23 |       "007460",
24 |       "007803",
25 |       "002208"
26 |     ],
27 |     "class_dist": []
28 |   },
29 |   "val_set": {
30 |     "image_set_idx": [
31 |       "000931",
32 |       "002465",
33 |       "007361",
34 |       "001732",
35 |       "003860",
36 |       "000826",
37 |       "006123",
38 |       "009603",
39 |       "005705",
40 |       "007365"
41 |     ],
42 |     "class_dist": []
43 |   },
44 |   "train_set": [
45 |     "005496",
46 |     "007743",
47 |     "006965",
48 |     "000154",
49 |     "002234",
50 |     "003337",
51 |     "005614",
52 |     "004532",
53 |     "000184",
54 |     "002372",
55 |     "000382",
56 |     "008498",
57 |     "005181",
58 |     "007139",
59 |     "007685",
60 |     "003098",
61 |     "003722",
62 |     "002659",
63 |     "007460",
64 |     "007803",
65 |     "002208"
66 |   ]
67 | }


--------------------------------------------------------------------------------
/data/bottle_seed_set.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_name": "VOC07",
 3 |   "seed_set": {
 4 |     "image_set_idx": [
 5 |       "009679",
 6 |       "009388",
 7 |       "009100",
 8 |       "007798",
 9 |       "002881",
10 |       "000250",
11 |       "004152",
12 |       "006576",
13 |       "000344",
14 |       "008204",
15 |       "002253",
16 |       "005467",
17 |       "009290",
18 |       "007457",
19 |       "006626",
20 |       "000367",
21 |       "008931",
22 |       "002350",
23 |       "007141",
24 |       "006363",
25 |       "006727",
26 |       "000498"
27 |     ],
28 |     "class_dist": []
29 |   },
30 |   "val_set": {
31 |     "image_set_idx": [
32 |       "006409",
33 |       "000269",
34 |       "006648",
35 |       "002641",
36 |       "004671",
37 |       "007121",
38 |       "007649",
39 |       "009878",
40 |       "000381",
41 |       "004886"
42 |     ],
43 |     "class_dist": []
44 |   },
45 |   "train_set": [
46 |     "009679",
47 |     "009388",
48 |     "009100",
49 |     "007798",
50 |     "002881",
51 |     "000250",
52 |     "004152",
53 |     "006576",
54 |     "000344",
55 |     "008204",
56 |     "002253",
57 |     "005467",
58 |     "009290",
59 |     "007457",
60 |     "006626",
61 |     "000367",
62 |     "008931",
63 |     "002350",
64 |     "007141",
65 |     "006363",
66 |     "006727",
67 |     "000498"
68 |   ]
69 | }


--------------------------------------------------------------------------------
/run_scripts/scripts/boat_image_budget_pre-nms-avg_42_200.job:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd ../../
 4 | 
 5 | BASE_DIR=$PWD
 6 | echo $BASE_DIR
 7 | 
 8 | EXPERIMENT_DIR=test_boat/
 9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR"
10 | 
11 | 
12 | python active_learning_main.py \
13 | --budget_measured_in_objects False \
14 | --rescaled_foreground_probs True \
15 | --sample_select_nms_conf_thresh 0.01 \
16 | --skip_sample_selection_first_iter False \
17 | --batch_size 8 \
18 | --sampling_strategy none_covariance-obj \
19 | --merging_method pre_nms_avg \
20 | --lr 1e-4 \
21 | --ensemble_size 3 \
22 | --annotate_all_objects True \
23 | --experiment_dir "$EXPERIMENT_DIR" \
24 | --train_from_basenet_every_iter True \
25 | --paths_to_weights \
26 | weights/initial_net_0 \
27 | weights/initial_net_1 \
28 | weights/initial_net_2 \
29 | --trained_models \
30 | weights/initial_net_0 \
31 | weights/initial_net_1 \
32 | weights/initial_net_2 \
33 | --samples_per_iter 10 10 10 10 10 \
34 | --eval_every_iter False \
35 | --debug False \
36 | --fixed_number_of_epochs 200 \
37 | --seed 92 \
38 | --dataset VOC07_1_class \
39 | --imageset_train boat_trainval_detect \
40 | --imageset_test boat_test_detect \
41 | --seed_set_file data/boat_seed_set.json \
42 | --optimizer SGD \
43 | --skip_sample_selection_first_iter False \
44 | --relevant_class boat \
45 | 
46 | 
47 | wait
48 | 
49 | echo end of job
50 | 


--------------------------------------------------------------------------------
/run_scripts/scripts/6class_image_budget_pre-nms-avg_42_200.job:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd ../../
 4 | 
 5 | BASE_DIR=$PWD
 6 | echo $BASE_DIR
 7 | 
 8 | EXPERIMENT_DIR=test_6class/
 9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR"
10 | 
11 | 
12 | python active_learning_main.py \
13 | --budget_measured_in_objects False \
14 | --rescaled_foreground_probs True \
15 | --sample_select_nms_conf_thresh 0.01 \
16 | --skip_sample_selection_first_iter False \
17 | --batch_size 8 \
18 | --sampling_strategy none_covariance-obj \
19 | --merging_method pre_nms_avg \
20 | --lr 1e-4 \
21 | --ensemble_size 6 \
22 | --annotate_all_objects True \
23 | --experiment_dir "$EXPERIMENT_DIR" \
24 | --train_from_basenet_every_iter True \
25 | --paths_to_weights \
26 | weights/initial_net_0 \
27 | weights/initial_net_1 \
28 | weights/initial_net_2 \
29 | --trained_models \
30 | weights/initial_net_0 \
31 | weights/initial_net_1 \
32 | weights/initial_net_2 \
33 | --samples_per_iter 25 25 25 25 25 25 \
34 | --eval_every_iter True \
35 | --debug False \
36 | --short_gpu False \
37 | --fixed_number_of_epochs 200 \
38 | --seed 42 \
39 | --dataset VOC07_6_class \
40 | --imageset_train 6_class_trainval_detect \
41 | --imageset_test 6_class_test_detect \
42 | --seed_set_file data/6_class_seed_set.json \
43 | --optimizer SGD \
44 | --skip_sample_selection_first_iter False
45 | 
46 | 
47 | wait
48 | 
49 | echo end of job
50 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT, VOC_ROOT_LOCAL
 2 | 
 3 | from .config import *
 4 | import torch
 5 | import cv2
 6 | import numpy as np
 7 | 
 8 | def detection_collate(batch):
 9 |     """Custom collate fn for dealing with batches of images that have a different
10 |     number of associated object annotations (bounding boxes).
11 | 
12 |     Arguments:
13 |         batch: (tuple) A tuple of tensor images and lists of annotations
14 | 
15 |     Return:
16 |         A tuple containing:
17 |             1) (tensor) batch of images stacked on their 0 dim
18 |             2) (list of tensors) annotations for a given image are stacked on
19 |                                  0 dim
20 |     """
21 |     targets = []
22 |     imgs = []
23 |     for sample in batch:
24 |         imgs.append(sample[0])
25 |         targets.append(torch.FloatTensor(sample[1]))
26 |     return torch.stack(imgs, 0), targets
27 | 
28 | 
29 | def base_transform(image, size, mean):
30 |     # cv2.resize: default uses linear interpolation, and doesnt preserve aspact ratio: https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=resize#void%20resize(InputArray%20src,%20OutputArray%20dst,%20Size%20dsize,%20double%20fx,%20double%20fy,%20int%20interpolation)
31 |     x = cv2.resize(image, (size, size)).astype(np.float32)
32 |     x -= mean
33 |     x = x.astype(np.float32)
34 |     return x
35 | 
36 | 
37 | class BaseTransform:
38 |     def __init__(self, size, mean):
39 |         # if modeltype == 'SSD300':
40 |         #     size = 300
41 |         self.size = size
42 |         self.mean = np.array(mean, dtype=np.float32)
43 | 
44 |     def __call__(self, image, boxes=None, labels=None):
45 |         return base_transform(image, self.size, self.mean), boxes, labels
46 | 


--------------------------------------------------------------------------------
/data/imageset_files/boat_test_detect.txt:
--------------------------------------------------------------------------------
  1 | 000069
  2 | 000080
  3 | 000105
  4 | 000179
  5 | 000240
  6 | 000295
  7 | 000350
  8 | 000371
  9 | 000375
 10 | 000444
 11 | 000449
 12 | 000481
 13 | 000529
 14 | 000538
 15 | 000558
 16 | 000576
 17 | 000631
 18 | 000792
 19 | 000811
 20 | 000837
 21 | 000907
 22 | 000914
 23 | 000995
 24 | 001049
 25 | 001059
 26 | 001076
 27 | 001141
 28 | 001155
 29 | 001188
 30 | 001227
 31 | 001342
 32 | 001355
 33 | 001394
 34 | 001410
 35 | 001474
 36 | 001487
 37 | 001527
 38 | 001591
 39 | 001634
 40 | 001646
 41 | 001698
 42 | 001705
 43 | 001770
 44 | 001815
 45 | 001822
 46 | 001895
 47 | 001974
 48 | 002029
 49 | 002085
 50 | 002175
 51 | 002225
 52 | 002274
 53 | 002390
 54 | 002395
 55 | 002449
 56 | 002474
 57 | 002580
 58 | 002628
 59 | 002719
 60 | 002764
 61 | 002811
 62 | 002821
 63 | 002948
 64 | 002983
 65 | 003012
 66 | 003073
 67 | 003111
 68 | 003131
 69 | 003409
 70 | 003498
 71 | 003543
 72 | 003571
 73 | 003652
 74 | 003799
 75 | 003836
 76 | 003892
 77 | 003900
 78 | 003910
 79 | 003929
 80 | 003952
 81 | 004123
 82 | 004160
 83 | 004184
 84 | 004225
 85 | 004299
 86 | 004374
 87 | 004382
 88 | 004443
 89 | 004573
 90 | 004613
 91 | 004665
 92 | 004843
 93 | 004899
 94 | 004914
 95 | 004937
 96 | 005022
 97 | 005074
 98 | 005098
 99 | 005112
100 | 005213
101 | 005243
102 | 005272
103 | 005329
104 | 005382
105 | 005427
106 | 005604
107 | 005616
108 | 005670
109 | 005720
110 | 005771
111 | 005849
112 | 005950
113 | 006014
114 | 006048
115 | 006110
116 | 006160
117 | 006164
118 | 006205
119 | 006302
120 | 006332
121 | 006408
122 | 006490
123 | 006528
124 | 006604
125 | 006659
126 | 006889
127 | 006977
128 | 006997
129 | 007066
130 | 007091
131 | 007173
132 | 007233
133 | 007357
134 | 007377
135 | 007400
136 | 007415
137 | 007428
138 | 007455
139 | 007464
140 | 007635
141 | 007660
142 | 007788
143 | 007850
144 | 007952
145 | 007990
146 | 008099
147 | 008145
148 | 008217
149 | 008219
150 | 008249
151 | 008278
152 | 008369
153 | 008373
154 | 008545
155 | 008571
156 | 008578
157 | 008590
158 | 008643
159 | 008704
160 | 008820
161 | 008868
162 | 008869
163 | 008998
164 | 009001
165 | 009026
166 | 009102
167 | 009120
168 | 009137
169 | 009240
170 | 009423
171 | 009538
172 | 009727
173 | 009728
174 | 009793
175 | 009811
176 | 009885
177 | 


--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | import os.path
 3 | 
 4 | # gets home dir cross platform
 5 | HOME = os.path.expanduser("~")
 6 | 
 7 | # for making bounding boxes pretty
 8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
 9 |           (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
10 | MEANS = (104, 117, 123) # channel means for Pascal VOC
11 | 
12 | # SSD300 CONFIGS
13 | voc = {
14 |     'num_classes': 21,
15 |     'dataset_mean': (104, 117, 123),
16 |     'lr_steps': (80000, 100000, 120000),
17 |     'max_iter': 120000,
18 |     'feature_maps': [38, 19, 10, 5, 3, 1],
19 |     'min_dim': 300,
20 |     'steps': [8, 16, 32, 64, 100, 300],
21 |     'min_sizes': [30, 60, 111, 162, 213, 264],
22 |     'max_sizes': [60, 111, 162, 213, 264, 315],
23 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
24 |     'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
25 |     'clip': True,
26 |     'name': 'VOC',
27 | }
28 | 
29 | 
30 | voc_1_class = {
31 |     'num_classes': 2,
32 |     'dataset_mean': (104, 117, 123),
33 |     'lr_steps': (80000, 100000, 120000),
34 |     'max_iter': 120000,
35 |     'feature_maps': [38, 19, 10, 5, 3, 1],
36 |     'min_dim': 300,
37 |     'steps': [8, 16, 32, 64, 100, 300],
38 |     'min_sizes': [30, 60, 111, 162, 213, 264],
39 |     'max_sizes': [60, 111, 162, 213, 264, 315],
40 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
41 |     'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
42 |     'clip': True,
43 |     'name': 'VOC_1_class',
44 | }
45 | 
46 | voc_6_class = {
47 |     'num_classes': 7,
48 |     'dataset_mean': (104, 117, 123),
49 |     'lr_steps': (80000, 100000, 120000),
50 |     'max_iter': 120000,
51 |     'feature_maps': [38, 19, 10, 5, 3, 1],
52 |     'min_dim': 300,
53 |     'steps': [8, 16, 32, 64, 100, 300],
54 |     'min_sizes': [30, 60, 111, 162, 213, 264],
55 |     'max_sizes': [60, 111, 162, 213, 264, 315],
56 |     'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
57 |     'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
58 |     'clip': True,
59 |     'name': 'VOC_6_class',
60 | }
61 | 


--------------------------------------------------------------------------------
/data/scripts/COCO2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | start=`date +%s`
 4 | 
 5 | # handle optional download dir
 6 | if [ -z "$1" ]
 7 |   then
 8 |     # navigate to ~/data
 9 |     echo "navigating to ~/data/ ..."
10 |     mkdir -p ~/data
11 |     cd ~/data/
12 |     mkdir -p ./coco
13 |     cd ./coco
14 |     mkdir -p ./images
15 |     mkdir -p ./annotations
16 |   else
17 |     # check if specified dir is valid
18 |     if [ ! -d $1 ]; then
19 |         echo $1 " is not a valid directory"
20 |         exit 0
21 |     fi
22 |     echo "navigating to " $1 " ..."
23 |     cd $1
24 | fi
25 | 
26 | if [ ! -d images ]
27 |   then
28 |     mkdir -p ./images
29 | fi
30 | 
31 | # Download the image data.
32 | cd ./images
33 | echo "Downloading MSCOCO train images ..."
34 | curl -LO http://images.cocodataset.org/zips/train2014.zip
35 | echo "Downloading MSCOCO val images ..."
36 | curl -LO http://images.cocodataset.org/zips/val2014.zip
37 | 
38 | cd ../
39 | if [ ! -d annotations]
40 |   then
41 |     mkdir -p ./annotations
42 | fi
43 | 
44 | # Download the annotation data.
45 | cd ./annotations
46 | echo "Downloading MSCOCO train/val annotations ..."
47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
48 | echo "Finished downloading. Now extracting ..."
49 | 
50 | # Unzip data
51 | echo "Extracting train images ..."
52 | unzip ../images/train2014.zip -d ../images
53 | echo "Extracting val images ..."
54 | unzip ../images/val2014.zip -d ../images
55 | echo "Extracting annotations ..."
56 | unzip ./annotations_trainval2014.zip
57 | 
58 | echo "Removing zip files ..."
59 | rm ../images/train2014.zip
60 | rm ../images/val2014.zip
61 | rm ./annotations_trainval2014.zip
62 | 
63 | echo "Creating trainval35k dataset..."
64 | 
65 | # Download annotations json
66 | echo "Downloading trainval35k annotations from S3"
67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
68 | 
69 | # combine train and val 
70 | echo "Combining train and val images"
71 | mkdir ../images/trainval35k
72 | cd ../images/train2014
73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp
74 | cd ../val2014
75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} +
76 | 
77 | 
78 | end=`date +%s`
79 | runtime=$((end-start))
80 | 
81 | echo "Completed in " $runtime " seconds"
82 | 


--------------------------------------------------------------------------------
/data/imageset_files/boat_trainval_detect.txt:
--------------------------------------------------------------------------------
  1 | 000061
  2 | 000073
  3 | 000130
  4 | 000154
  5 | 000184
  6 | 000232
  7 | 000241
  8 | 000296
  9 | 000370
 10 | 000382
 11 | 000474
 12 | 000513
 13 | 000519
 14 | 000563
 15 | 000608
 16 | 000713
 17 | 000740
 18 | 000742
 19 | 000826
 20 | 000862
 21 | 000906
 22 | 000931
 23 | 001002
 24 | 001121
 25 | 001136
 26 | 001176
 27 | 001205
 28 | 001247
 29 | 001298
 30 | 001325
 31 | 001463
 32 | 001481
 33 | 001484
 34 | 001497
 35 | 001732
 36 | 001847
 37 | 001887
 38 | 001976
 39 | 002000
 40 | 002039
 41 | 002091
 42 | 002208
 43 | 002234
 44 | 002288
 45 | 002302
 46 | 002372
 47 | 002392
 48 | 002403
 49 | 002435
 50 | 002465
 51 | 002479
 52 | 002500
 53 | 002605
 54 | 002653
 55 | 002659
 56 | 002738
 57 | 002838
 58 | 003008
 59 | 003065
 60 | 003098
 61 | 003223
 62 | 003284
 63 | 003337
 64 | 003404
 65 | 003413
 66 | 003639
 67 | 003657
 68 | 003669
 69 | 003722
 70 | 003797
 71 | 003811
 72 | 003860
 73 | 003861
 74 | 003863
 75 | 003919
 76 | 003974
 77 | 004170
 78 | 004194
 79 | 004241
 80 | 004246
 81 | 004258
 82 | 004325
 83 | 004371
 84 | 004387
 85 | 004464
 86 | 004498
 87 | 004532
 88 | 004592
 89 | 004651
 90 | 004839
 91 | 004885
 92 | 004931
 93 | 004958
 94 | 005073
 95 | 005108
 96 | 005128
 97 | 005150
 98 | 005171
 99 | 005181
100 | 005217
101 | 005257
102 | 005320
103 | 005325
104 | 005337
105 | 005340
106 | 005358
107 | 005369
108 | 005370
109 | 005413
110 | 005420
111 | 005431
112 | 005496
113 | 005614
114 | 005699
115 | 005705
116 | 005713
117 | 005714
118 | 005742
119 | 005825
120 | 005860
121 | 005914
122 | 006046
123 | 006079
124 | 006098
125 | 006123
126 | 006215
127 | 006281
128 | 006304
129 | 006357
130 | 006436
131 | 006542
132 | 006549
133 | 006660
134 | 006673
135 | 006773
136 | 006867
137 | 006874
138 | 006953
139 | 006965
140 | 006983
141 | 007052
142 | 007125
143 | 007139
144 | 007289
145 | 007361
146 | 007365
147 | 007449
148 | 007460
149 | 007477
150 | 007521
151 | 007533
152 | 007685
153 | 007718
154 | 007743
155 | 007803
156 | 007833
157 | 007980
158 | 008091
159 | 008112
160 | 008163
161 | 008166
162 | 008251
163 | 008261
164 | 008306
165 | 008449
166 | 008454
167 | 008456
168 | 008498
169 | 008568
170 | 008584
171 | 008604
172 | 008645
173 | 008725
174 | 008730
175 | 008784
176 | 008884
177 | 008900
178 | 008999
179 | 009131
180 | 009177
181 | 009347
182 | 009527
183 | 009533
184 | 009603
185 | 009717
186 | 009836
187 | 009947
188 | 009955
189 | 


--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from math import sqrt as sqrt
 3 | from itertools import product as product
 4 | import torch
 5 | 
 6 | import layers.box_utils as box_utils
 7 | 
 8 | 
 9 | class PriorBox(object):
10 |     """Compute priorbox coordinates in center-offset form for each source
11 |     feature map.
12 |     """
13 |     def __init__(self, cfg, modeltype = 'SSD300'):
14 |         super(PriorBox, self).__init__()
15 |         self.image_size = cfg['min_dim']
16 |         # number of priors for feature map location (either 4 or 6)
17 |         self.num_priors = len(cfg['aspect_ratios'])
18 |         self.variance = cfg['variance'] or [0.1]
19 |         self.feature_maps = cfg['feature_maps']
20 |         self.min_sizes = cfg['min_sizes']
21 |         self.max_sizes = cfg['max_sizes']
22 |         self.steps = cfg['steps']
23 |         self.aspect_ratios = cfg['aspect_ratios']
24 |         self.clip = cfg['clip']
25 |         self.version = cfg['name']
26 |         self.modeltype = modeltype
27 |         for v in self.variance:
28 |             if v <= 0:
29 |                 raise ValueError('Variances must be greater than 0')
30 | 
31 |     def forward(self):
32 |         mean = []
33 |         for k, f in enumerate(self.feature_maps):
34 |             for i, j in product(range(f), repeat=2):
35 |                 f_k = self.image_size / self.steps[k]
36 |                 # unit center x,y
37 |                 cx = (j + 0.5) / f_k
38 |                 cy = (i + 0.5) / f_k
39 | 
40 |                 # aspect_ratio: 1
41 |                 # rel size: min_size
42 |                 s_k = self.min_sizes[k]/self.image_size
43 |                 mean += [cx, cy, s_k, s_k]
44 | 
45 |                 # aspect_ratio: 1
46 |                 # rel size: sqrt(s_k * s_(k+1))
47 |                 s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
48 |                 mean += [cx, cy, s_k_prime, s_k_prime]
49 | 
50 |                 # rest of aspect ratios
51 |                 for ar in self.aspect_ratios[k]:
52 |                     mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
53 |                     mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
54 | 
55 |         # back to torch land
56 |         output = torch.Tensor(mean).view(-1, 4)
57 |         if self.clip:
58 |             output.clamp_(max=1, min=0)
59 | 
60 |         # # todo
61 |         # if self.modeltype == 'SSD300KL':
62 |         #     # transform to x1y1x2y2 form
63 |         #     output = box_utils.point_form(output)
64 |         #     # print('todo')
65 |         #
66 |         # if self.clip:
67 |         #     output.clamp_(max=1, min=0)
68 | 
69 |         return output
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | # atom remote-sync package
 92 | .remote-sync.json
 93 | 
 94 | # weights
 95 | weights/
 96 | 
 97 | #DS_Store
 98 | .DS_Store
 99 | 
100 | # dev stuff
101 | eval/*/
102 | eval.ipynb
103 | dev.ipynb
104 | .vscode/
105 | 
106 | # not ready
107 | videos/
108 | templates/
109 | data/ssd_dataloader.py
110 | data/datasets/
111 | doc/visualize.py
112 | read_results.py
113 | ssd300_120000/
114 | demos/live
115 | webdemo.py
116 | test_data_aug.py
117 | 
118 | # attributes
119 | # pycharm
120 | .idea/
121 | 
122 | # temp checkout soln
123 | data/datasets/
124 | data/ssd_dataloader.py
125 | 
126 | # pylint
127 | .pylintrc
128 | 
129 | ###### Above gitignores are simply copied from the SSD.pytorch repo by AMDegroot######
130 | ###### Below are my own (Jasper Bakker) ######
131 | 
132 | 
133 | # Coco dataset
134 | data/images/coco
135 | 
136 | # weights of experiments
137 | active_learning_dir/*/weights/
138 | active_learning_dir/*/optimizers/
139 | 
140 | #some large files
141 | **/sample_selection/
142 | **/eval/*detections*
143 | active_learning_dir/*
144 | 
145 | # server stuff
146 | .nfs*
147 | 
148 | # pandas core differs locally
149 | data/stats*.pickle
150 | 
151 | #some local folder that sometimes contains large files
152 | debug2/*
153 | active_learning_dir/
154 | 


--------------------------------------------------------------------------------
/data/imageset_files/bottle_test_detect.txt:
--------------------------------------------------------------------------------
  1 | 000136
  2 | 000144
  3 | 000151
  4 | 000178
  5 | 000202
  6 | 000277
  7 | 000280
  8 | 000299
  9 | 000327
 10 | 000335
 11 | 000346
 12 | 000369
 13 | 000414
 14 | 000447
 15 | 000517
 16 | 000587
 17 | 000606
 18 | 000611
 19 | 000737
 20 | 000762
 21 | 000825
 22 | 000893
 23 | 001021
 24 | 001023
 25 | 001035
 26 | 001047
 27 | 001086
 28 | 001105
 29 | 001167
 30 | 001177
 31 | 001244
 32 | 001295
 33 | 001349
 34 | 001354
 35 | 001407
 36 | 001429
 37 | 001431
 38 | 001456
 39 | 001489
 40 | 001569
 41 | 001585
 42 | 001602
 43 | 001631
 44 | 001652
 45 | 001720
 46 | 001812
 47 | 001829
 48 | 001921
 49 | 001925
 50 | 001957
 51 | 001975
 52 | 001992
 53 | 002026
 54 | 002207
 55 | 002231
 56 | 002235
 57 | 002389
 58 | 002429
 59 | 002536
 60 | 002538
 61 | 002560
 62 | 002575
 63 | 002614
 64 | 002656
 65 | 002705
 66 | 002711
 67 | 002712
 68 | 002743
 69 | 002773
 70 | 002857
 71 | 002904
 72 | 002905
 73 | 002945
 74 | 002950
 75 | 002951
 76 | 002982
 77 | 003010
 78 | 003067
 79 | 003071
 80 | 003076
 81 | 003249
 82 | 003278
 83 | 003309
 84 | 003431
 85 | 003446
 86 | 003459
 87 | 003488
 88 | 003544
 89 | 003590
 90 | 003643
 91 | 003649
 92 | 003665
 93 | 003697
 94 | 003707
 95 | 003725
 96 | 003736
 97 | 003776
 98 | 003802
 99 | 003842
100 | 003867
101 | 003906
102 | 003928
103 | 003942
104 | 003943
105 | 003944
106 | 004032
107 | 004042
108 | 004055
109 | 004056
110 | 004068
111 | 004072
112 | 004118
113 | 004127
114 | 004134
115 | 004162
116 | 004180
117 | 004236
118 | 004268
119 | 004311
120 | 004348
121 | 004355
122 | 004422
123 | 004469
124 | 004476
125 | 004546
126 | 004599
127 | 004640
128 | 004712
129 | 004717
130 | 004720
131 | 004721
132 | 004819
133 | 004880
134 | 004919
135 | 004922
136 | 005002
137 | 005096
138 | 005216
139 | 005226
140 | 005233
141 | 005276
142 | 005294
143 | 005428
144 | 005442
145 | 005491
146 | 005570
147 | 005622
148 | 005635
149 | 005721
150 | 005926
151 | 005936
152 | 005942
153 | 006003
154 | 006056
155 | 006057
156 | 006072
157 | 006086
158 | 006093
159 | 006152
160 | 006195
161 | 006248
162 | 006359
163 | 006360
164 | 006380
165 | 006390
166 | 006402
167 | 006432
168 | 006452
169 | 006500
170 | 006644
171 | 006662
172 | 006732
173 | 007225
174 | 007237
175 | 007404
176 | 007456
177 | 007494
178 | 007500
179 | 007504
180 | 007532
181 | 007569
182 | 007598
183 | 007652
184 | 007700
185 | 007717
186 | 007739
187 | 007744
188 | 007778
189 | 007783
190 | 007862
191 | 007937
192 | 008089
193 | 008094
194 | 008134
195 | 008182
196 | 008192
197 | 008287
198 | 008330
199 | 008379
200 | 008382
201 | 008400
202 | 008405
203 | 008414
204 | 008458
205 | 008516
206 | 008520
207 | 008555
208 | 008583
209 | 008591
210 | 008627
211 | 008686
212 | 008697
213 | 008740
214 | 008778
215 | 008894
216 | 008902
217 | 008922
218 | 008925
219 | 009012
220 | 009075
221 | 009164
222 | 009171
223 | 009257
224 | 009297
225 | 009311
226 | 009366
227 | 009431
228 | 009482
229 | 009547
230 | 009570
231 | 009626
232 | 009630
233 | 009633
234 | 009645
235 | 009782
236 | 009798
237 | 009840
238 | 009871
239 | 009891
240 | 009929
241 | 


--------------------------------------------------------------------------------
/create_initial_networks.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import torch
 4 | 
 5 | import active_learning_package.helpers as helpers
 6 | from data import config
 7 | 
 8 | def str2bool(v):
 9 |     return v.lower() in ("yes", "true", "t", "1")
10 | 
11 | # parser arguments from train.py
12 | parser = argparse.ArgumentParser(description='Active Learning With Single Shot MultiBox Detector Training With Pytorch')
13 | 
14 | parser.add_argument('--modeltype', default='SSD300',choices=['SSD300', 'SSD300KL'],
15 |                     help='Which model to use: standard SSD or the SSD with  uncertainty in the bounding box regression and KL loss ') #SSD300KL doesn't work well
16 | parser.add_argument('--dataset', default='VOC07_1_class', choices=['VOC07', 'VOC12','VOC07_1_class','VOC07_1_class','VOC07_6_class'],
17 |                     type=str, help='VOC07_1_class is with one class of interest and the background class')
18 | parser.add_argument('--sample_select_nms_conf_thresh', default = 0.01, type = float,
19 |                     help = 'The conf threshold used in before non maximum suppression. Only detections with a confidence above '
20 |                            'this threshold for a certain class will go trough nms')
21 | parser.add_argument('--paths_to_weights', default=None,type=str, nargs='+',
22 |                     help='These are the weights that ere used the initial evaluation of the unlabeled dataset') # if no trained model is given, this will return an error when loading the model.
23 | parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth',
24 |                     help='Pretrained base model')
25 | 
26 | parser.add_argument('--ensemble_size', default=3,type=int)
27 | parser.add_argument('--num_classes', default=1,type=int,
28 |                     help='number of classes of interest (so excluding background class')
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     args = parser.parse_args()
33 |     if args.dataset in ['VOC12','VOC07']:
34 |         args.cfg = config.voc # adapted from pytorch SSD code
35 | 
36 |     elif args.dataset == 'VOC07_1_class':
37 |         args.cfg = config.voc_1_class
38 | 
39 |     elif args.dataset == 'VOC07_6_class':
40 |         args.cfg = config.voc_6_class
41 | 
42 |     if torch.cuda.is_available():
43 |         device = 'cuda'
44 |         torch.set_default_tensor_type('torch.cuda.FloatTensor')
45 |     else:
46 |         device = 'cpu'
47 |         torch.set_default_tensor_type('torch.FloatTensor')
48 | 
49 |     args.device = device
50 | 
51 |     args.num_classes = args.num_classes + 1
52 |     print('Creating ',args.ensemble_size,' number of SSDs for ',args.num_classes,' (+ 1 background class) number of classes')
53 |     print('...')
54 | 
55 | 
56 | 
57 | 
58 |     for i in range(args.ensemble_size):
59 |         # make net
60 |         net = helpers.build_sample_selection_net(args,
61 |                                            args.num_classes)
62 | 
63 |         args.experiment_dir = os.getcwd()+'/'
64 |         path = 'weights/initial_net_'+str(i)
65 | 
66 |         # save net
67 |         helpers.save_weights(weights=net,
68 |                              args=args,
69 |                              path=path)
70 |         print()
71 | 
72 | 
73 |     print('Initial nets created!')


--------------------------------------------------------------------------------
/data/imageset_files/pottedplant_test_detect.txt:
--------------------------------------------------------------------------------
  1 | 000006
  2 | 000070
  3 | 000116
  4 | 000124
  5 | 000149
  6 | 000196
  7 | 000234
  8 | 000389
  9 | 000397
 10 | 000429
 11 | 000575
 12 | 000606
 13 | 000650
 14 | 000692
 15 | 000698
 16 | 000737
 17 | 000784
 18 | 000788
 19 | 000840
 20 | 000890
 21 | 000933
 22 | 000953
 23 | 000983
 24 | 001023
 25 | 001035
 26 | 001070
 27 | 001141
 28 | 001180
 29 | 001255
 30 | 001276
 31 | 001380
 32 | 001391
 33 | 001411
 34 | 001437
 35 | 001469
 36 | 001478
 37 | 001540
 38 | 001631
 39 | 001648
 40 | 001652
 41 | 001665
 42 | 001666
 43 | 001712
 44 | 001715
 45 | 001779
 46 | 001786
 47 | 001811
 48 | 001812
 49 | 001867
 50 | 001869
 51 | 001880
 52 | 001905
 53 | 002018
 54 | 002026
 55 | 002035
 56 | 002074
 57 | 002123
 58 | 002161
 59 | 002240
 60 | 002297
 61 | 002339
 62 | 002381
 63 | 002414
 64 | 002422
 65 | 002451
 66 | 002482
 67 | 002489
 68 | 002510
 69 | 002527
 70 | 002557
 71 | 002604
 72 | 002708
 73 | 002743
 74 | 002753
 75 | 002777
 76 | 002887
 77 | 002925
 78 | 002945
 79 | 003014
 80 | 003020
 81 | 003029
 82 | 003060
 83 | 003062
 84 | 003113
 85 | 003192
 86 | 003221
 87 | 003281
 88 | 003297
 89 | 003323
 90 | 003442
 91 | 003448
 92 | 003488
 93 | 003572
 94 | 003591
 95 | 003624
 96 | 003626
 97 | 003649
 98 | 003665
 99 | 003676
100 | 003756
101 | 003761
102 | 003778
103 | 003787
104 | 003882
105 | 003934
106 | 003943
107 | 004027
108 | 004038
109 | 004045
110 | 004081
111 | 004116
112 | 004124
113 | 004155
114 | 004159
115 | 004173
116 | 004211
117 | 004222
118 | 004276
119 | 004311
120 | 004422
121 | 004451
122 | 004486
123 | 004712
124 | 004778
125 | 004780
126 | 004858
127 | 004862
128 | 004893
129 | 004941
130 | 005088
131 | 005092
132 | 005174
133 | 005180
134 | 005184
135 | 005196
136 | 005266
137 | 005284
138 | 005494
139 | 005525
140 | 005556
141 | 005567
142 | 005650
143 | 005661
144 | 005673
145 | 005703
146 | 005754
147 | 005804
148 | 005857
149 | 005866
150 | 005922
151 | 005926
152 | 005937
153 | 005941
154 | 005943
155 | 005976
156 | 006022
157 | 006049
158 | 006083
159 | 006115
160 | 006231
161 | 006297
162 | 006303
163 | 006307
164 | 006311
165 | 006359
166 | 006426
167 | 006496
168 | 006504
169 | 006646
170 | 006691
171 | 006700
172 | 006712
173 | 006750
174 | 006776
175 | 006792
176 | 006795
177 | 006798
178 | 006807
179 | 006816
180 | 006843
181 | 006861
182 | 006902
183 | 006937
184 | 006964
185 | 007012
186 | 007057
187 | 007106
188 | 007164
189 | 007237
190 | 007248
191 | 007272
192 | 007319
193 | 007332
194 | 007335
195 | 007364
196 | 007406
197 | 007473
198 | 007499
199 | 007549
200 | 007643
201 | 007644
202 | 007722
203 | 007752
204 | 007839
205 | 007913
206 | 008010
207 | 008039
208 | 008071
209 | 008155
210 | 008156
211 | 008162
212 | 008167
213 | 008185
214 | 008196
215 | 008206
216 | 008207
217 | 008239
218 | 008245
219 | 008271
220 | 008380
221 | 008382
222 | 008389
223 | 008457
224 | 008458
225 | 008471
226 | 008547
227 | 008570
228 | 008599
229 | 008626
230 | 008661
231 | 008754
232 | 008821
233 | 008846
234 | 008850
235 | 008904
236 | 008937
237 | 008947
238 | 009172
239 | 009261
240 | 009313
241 | 009367
242 | 009416
243 | 009462
244 | 009514
245 | 009581
246 | 009602
247 | 009606
248 | 009630
249 | 009741
250 | 009750
251 | 009787
252 | 009854
253 | 009925
254 | 009928
255 | 


--------------------------------------------------------------------------------
/data/imageset_files/bottle_trainval_detect.txt:
--------------------------------------------------------------------------------
  1 | 000050
  2 | 000170
  3 | 000200
  4 | 000222
  5 | 000224
  6 | 000229
  7 | 000250
  8 | 000269
  9 | 000344
 10 | 000367
 11 | 000381
 12 | 000498
 13 | 000531
 14 | 000559
 15 | 000564
 16 | 000684
 17 | 000726
 18 | 000753
 19 | 000865
 20 | 000929
 21 | 000951
 22 | 000999
 23 | 001045
 24 | 001185
 25 | 001229
 26 | 001272
 27 | 001292
 28 | 001346
 29 | 001393
 30 | 001408
 31 | 001444
 32 | 001451
 33 | 001465
 34 | 001490
 35 | 001493
 36 | 001522
 37 | 001537
 38 | 001563
 39 | 001580
 40 | 001590
 41 | 001673
 42 | 001682
 43 | 001754
 44 | 001756
 45 | 001777
 46 | 001843
 47 | 001963
 48 | 001970
 49 | 002030
 50 | 002098
 51 | 002112
 52 | 002139
 53 | 002221
 54 | 002226
 55 | 002253
 56 | 002287
 57 | 002350
 58 | 002404
 59 | 002442
 60 | 002444
 61 | 002518
 62 | 002545
 63 | 002572
 64 | 002613
 65 | 002641
 66 | 002658
 67 | 002745
 68 | 002881
 69 | 002931
 70 | 002939
 71 | 002941
 72 | 002953
 73 | 002965
 74 | 003056
 75 | 003088
 76 | 003108
 77 | 003122
 78 | 003149
 79 | 003159
 80 | 003181
 81 | 003185
 82 | 003207
 83 | 003219
 84 | 003247
 85 | 003259
 86 | 003262
 87 | 003269
 88 | 003376
 89 | 003449
 90 | 003451
 91 | 003496
 92 | 003556
 93 | 003566
 94 | 003577
 95 | 003678
 96 | 003729
 97 | 003783
 98 | 003859
 99 | 003865
100 | 003932
101 | 004075
102 | 004110
103 | 004140
104 | 004152
105 | 004158
106 | 004193
107 | 004279
108 | 004280
109 | 004291
110 | 004296
111 | 004322
112 | 004333
113 | 004370
114 | 004466
115 | 004468
116 | 004479
117 | 004493
118 | 004495
119 | 004508
120 | 004588
121 | 004671
122 | 004701
123 | 004710
124 | 004718
125 | 004742
126 | 004753
127 | 004886
128 | 004916
129 | 004926
130 | 004943
131 | 005052
132 | 005122
133 | 005160
134 | 005195
135 | 005242
136 | 005245
137 | 005304
138 | 005345
139 | 005367
140 | 005385
141 | 005388
142 | 005430
143 | 005441
144 | 005467
145 | 005471
146 | 005541
147 | 005563
148 | 005605
149 | 005606
150 | 005662
151 | 005672
152 | 005814
153 | 005908
154 | 005919
155 | 006061
156 | 006141
157 | 006180
158 | 006189
159 | 006241
160 | 006299
161 | 006335
162 | 006363
163 | 006381
164 | 006398
165 | 006409
166 | 006448
167 | 006483
168 | 006486
169 | 006503
170 | 006519
171 | 006576
172 | 006610
173 | 006626
174 | 006648
175 | 006687
176 | 006727
177 | 006731
178 | 006828
179 | 006858
180 | 006892
181 | 006908
182 | 006912
183 | 006917
184 | 006919
185 | 006939
186 | 006948
187 | 007007
188 | 007021
189 | 007048
190 | 007056
191 | 007104
192 | 007117
193 | 007121
194 | 007141
195 | 007177
196 | 007191
197 | 007250
198 | 007323
199 | 007351
200 | 007370
201 | 007410
202 | 007457
203 | 007461
204 | 007493
205 | 007511
206 | 007558
207 | 007572
208 | 007631
209 | 007649
210 | 007740
211 | 007751
212 | 007762
213 | 007790
214 | 007798
215 | 007899
216 | 007987
217 | 008042
218 | 008061
219 | 008137
220 | 008139
221 | 008141
222 | 008164
223 | 008204
224 | 008213
225 | 008319
226 | 008467
227 | 008592
228 | 008783
229 | 008841
230 | 008931
231 | 008953
232 | 008962
233 | 008967
234 | 008982
235 | 008989
236 | 009016
237 | 009027
238 | 009100
239 | 009108
240 | 009174
241 | 009242
242 | 009290
243 | 009299
244 | 009375
245 | 009388
246 | 009420
247 | 009458
248 | 009481
249 | 009497
250 | 009585
251 | 009613
252 | 009647
253 | 009654
254 | 009664
255 | 009666
256 | 009679
257 | 009687
258 | 009709
259 | 009726
260 | 009878
261 | 009902
262 | 009917
263 | 


--------------------------------------------------------------------------------
/data/imageset_files/pottedplant_trainval_detect.txt:
--------------------------------------------------------------------------------
  1 | 000052
  2 | 000073
  3 | 000235
  4 | 000307
  5 | 000391
  6 | 000484
  7 | 000592
  8 | 000601
  9 | 000628
 10 | 000661
 11 | 000710
 12 | 000711
 13 | 000729
 14 | 000780
 15 | 000854
 16 | 000865
 17 | 000867
 18 | 000868
 19 | 000889
 20 | 000917
 21 | 000923
 22 | 000962
 23 | 001079
 24 | 001082
 25 | 001091
 26 | 001112
 27 | 001142
 28 | 001149
 29 | 001154
 30 | 001164
 31 | 001212
 32 | 001408
 33 | 001420
 34 | 001430
 35 | 001444
 36 | 001451
 37 | 001464
 38 | 001479
 39 | 001580
 40 | 001593
 41 | 001729
 42 | 001754
 43 | 001777
 44 | 001898
 45 | 002015
 46 | 002049
 47 | 002108
 48 | 002116
 49 | 002176
 50 | 002179
 51 | 002192
 52 | 002284
 53 | 002305
 54 | 002320
 55 | 002347
 56 | 002361
 57 | 002362
 58 | 002378
 59 | 002427
 60 | 002483
 61 | 002542
 62 | 002559
 63 | 002569
 64 | 002594
 65 | 002598
 66 | 002658
 67 | 002668
 68 | 002745
 69 | 002775
 70 | 002798
 71 | 002817
 72 | 002820
 73 | 002924
 74 | 002946
 75 | 002967
 76 | 003002
 77 | 003004
 78 | 003024
 79 | 003034
 80 | 003044
 81 | 003093
 82 | 003146
 83 | 003186
 84 | 003189
 85 | 003202
 86 | 003211
 87 | 003239
 88 | 003284
 89 | 003301
 90 | 003367
 91 | 003374
 92 | 003419
 93 | 003436
 94 | 003450
 95 | 003462
 96 | 003466
 97 | 003506
 98 | 003509
 99 | 003555
100 | 003622
101 | 003629
102 | 003694
103 | 003699
104 | 003758
105 | 003791
106 | 003796
107 | 003834
108 | 003877
109 | 003924
110 | 003932
111 | 003956
112 | 004017
113 | 004028
114 | 004037
115 | 004105
116 | 004121
117 | 004212
118 | 004274
119 | 004275
120 | 004292
121 | 004364
122 | 004392
123 | 004433
124 | 004446
125 | 004455
126 | 004484
127 | 004558
128 | 004631
129 | 004682
130 | 004699
131 | 004707
132 | 004714
133 | 004742
134 | 004825
135 | 004895
136 | 004898
137 | 004939
138 | 004948
139 | 004960
140 | 004991
141 | 005061
142 | 005129
143 | 005143
144 | 005145
145 | 005160
146 | 005183
147 | 005186
148 | 005190
149 | 005224
150 | 005346
151 | 005515
152 | 005517
153 | 005531
154 | 005605
155 | 005641
156 | 005676
157 | 005682
158 | 005687
159 | 005762
160 | 005794
161 | 005813
162 | 005817
163 | 005821
164 | 005874
165 | 005894
166 | 005919
167 | 005923
168 | 005985
169 | 005991
170 | 006029
171 | 006069
172 | 006088
173 | 006100
174 | 006159
175 | 006188
176 | 006189
177 | 006270
178 | 006299
179 | 006319
180 | 006351
181 | 006447
182 | 006575
183 | 006605
184 | 006627
185 | 006681
186 | 006726
187 | 006755
188 | 006765
189 | 006777
190 | 006806
191 | 006859
192 | 006956
193 | 007078
194 | 007130
195 | 007180
196 | 007302
197 | 007344
198 | 007356
199 | 007390
200 | 007396
201 | 007451
202 | 007511
203 | 007519
204 | 007558
205 | 007600
206 | 007619
207 | 007624
208 | 007664
209 | 007673
210 | 007704
211 | 007715
212 | 007781
213 | 007795
214 | 007814
215 | 007865
216 | 007890
217 | 007909
218 | 007925
219 | 007956
220 | 007999
221 | 008012
222 | 008029
223 | 008043
224 | 008064
225 | 008082
226 | 008106
227 | 008127
228 | 008140
229 | 008171
230 | 008199
231 | 008216
232 | 008318
233 | 008341
234 | 008424
235 | 008465
236 | 008468
237 | 008536
238 | 008549
239 | 008557
240 | 008587
241 | 008655
242 | 008688
243 | 008733
244 | 008749
245 | 008806
246 | 008835
247 | 008933
248 | 008970
249 | 008987
250 | 008997
251 | 009032
252 | 009068
253 | 009078
254 | 009087
255 | 009123
256 | 009175
257 | 009181
258 | 009194
259 | 009215
260 | 009252
261 | 009279
262 | 009342
263 | 009419
264 | 009443
265 | 009491
266 | 009519
267 | 009634
268 | 009638
269 | 009647
270 | 009686
271 | 009878
272 | 009884
273 | 009887
274 | 


--------------------------------------------------------------------------------
/data/imageset_files/horse_test_detect.txt:
--------------------------------------------------------------------------------
  1 | 000010
  2 | 000022
  3 | 000056
  4 | 000166
  5 | 000168
  6 | 000237
  7 | 000248
  8 | 000267
  9 | 000319
 10 | 000330
 11 | 000356
 12 | 000378
 13 | 000392
 14 | 000393
 15 | 000410
 16 | 000412
 17 | 000413
 18 | 000434
 19 | 000445
 20 | 000455
 21 | 000475
 22 | 000604
 23 | 000616
 24 | 000623
 25 | 000666
 26 | 000704
 27 | 000737
 28 | 000783
 29 | 000836
 30 | 000866
 31 | 000978
 32 | 000986
 33 | 001013
 34 | 001063
 35 | 001114
 36 | 001183
 37 | 001245
 38 | 001300
 39 | 001317
 40 | 001320
 41 | 001398
 42 | 001412
 43 | 001417
 44 | 001452
 45 | 001574
 46 | 001584
 47 | 001624
 48 | 001635
 49 | 001692
 50 | 001703
 51 | 001769
 52 | 001794
 53 | 001819
 54 | 001823
 55 | 001865
 56 | 001900
 57 | 001939
 58 | 001955
 59 | 002017
 60 | 002031
 61 | 002046
 62 | 002050
 63 | 002071
 64 | 002072
 65 | 002204
 66 | 002269
 67 | 002283
 68 | 002331
 69 | 002360
 70 | 002398
 71 | 002409
 72 | 002421
 73 | 002511
 74 | 002553
 75 | 002573
 76 | 002596
 77 | 002655
 78 | 002692
 79 | 002780
 80 | 002790
 81 | 002819
 82 | 002828
 83 | 002837
 84 | 002909
 85 | 002921
 86 | 003022
 87 | 003041
 88 | 003084
 89 | 003173
 90 | 003182
 91 | 003187
 92 | 003193
 93 | 003222
 94 | 003232
 95 | 003235
 96 | 003237
 97 | 003304
 98 | 003473
 99 | 003531
100 | 003557
101 | 003562
102 | 003579
103 | 003583
104 | 003584
105 | 003653
106 | 003680
107 | 003720
108 | 003734
109 | 003764
110 | 003832
111 | 003852
112 | 003922
113 | 003951
114 | 003955
115 | 003978
116 | 004006
117 | 004084
118 | 004103
119 | 004126
120 | 004177
121 | 004234
122 | 004266
123 | 004294
124 | 004301
125 | 004309
126 | 004381
127 | 004407
128 | 004492
129 | 004522
130 | 004538
131 | 004545
132 | 004561
133 | 004564
134 | 004589
135 | 004629
136 | 004749
137 | 004756
138 | 004803
139 | 004804
140 | 004807
141 | 004865
142 | 004874
143 | 004932
144 | 004957
145 | 004970
146 | 005053
147 | 005132
148 | 005139
149 | 005182
150 | 005198
151 | 005299
152 | 005334
153 | 005339
154 | 005342
155 | 005409
156 | 005456
157 | 005505
158 | 005513
159 | 005529
160 | 005564
161 | 005572
162 | 005610
163 | 005677
164 | 005722
165 | 005733
166 | 005822
167 | 005882
168 | 005890
169 | 005944
170 | 005967
171 | 005973
172 | 006002
173 | 006106
174 | 006122
175 | 006143
176 | 006207
177 | 006237
178 | 006361
179 | 006364
180 | 006365
181 | 006368
182 | 006401
183 | 006478
184 | 006511
185 | 006521
186 | 006522
187 | 006540
188 | 006555
189 | 006586
190 | 006623
191 | 006649
192 | 006672
193 | 006728
194 | 006756
195 | 006779
196 | 006793
197 | 006811
198 | 006863
199 | 006873
200 | 006875
201 | 006897
202 | 006936
203 | 006982
204 | 007055
205 | 007134
206 | 007229
207 | 007252
208 | 007253
209 | 007265
210 | 007293
211 | 007303
212 | 007310
213 | 007392
214 | 007412
215 | 007509
216 | 007587
217 | 007609
218 | 007616
219 | 007620
220 | 007623
221 | 007636
222 | 007750
223 | 007766
224 | 007774
225 | 007789
226 | 007805
227 | 007874
228 | 007942
229 | 007972
230 | 007975
231 | 008010
232 | 008027
233 | 008059
234 | 008109
235 | 008124
236 | 008147
237 | 008183
238 | 008192
239 | 008205
240 | 008228
241 | 008256
242 | 008259
243 | 008298
244 | 008303
245 | 008314
246 | 008354
247 | 008399
248 | 008476
249 | 008574
250 | 008593
251 | 008605
252 | 008786
253 | 008802
254 | 008804
255 | 008812
256 | 008882
257 | 008899
258 | 009046
259 | 009067
260 | 009074
261 | 009101
262 | 009130
263 | 009231
264 | 009369
265 | 009380
266 | 009505
267 | 009525
268 | 009535
269 | 009569
270 | 009661
271 | 009665
272 | 009722
273 | 009768
274 | 009803
275 | 009903
276 | 009931
277 | 009933
278 | 009937
279 | 009957
280 | 


--------------------------------------------------------------------------------
/data/imageset_files/horse_trainval_detect.txt:
--------------------------------------------------------------------------------
  1 | 000009
  2 | 000017
  3 | 000133
  4 | 000150
  5 | 000173
  6 | 000194
  7 | 000214
  8 | 000275
  9 | 000328
 10 | 000332
 11 | 000347
 12 | 000372
 13 | 000407
 14 | 000435
 15 | 000470
 16 | 000483
 17 | 000523
 18 | 000524
 19 | 000577
 20 | 000667
 21 | 000695
 22 | 000702
 23 | 000760
 24 | 000764
 25 | 000799
 26 | 000879
 27 | 000904
 28 | 000991
 29 | 001064
 30 | 001071
 31 | 001084
 32 | 001236
 33 | 001241
 34 | 001254
 35 | 001287
 36 | 001337
 37 | 001405
 38 | 001420
 39 | 001515
 40 | 001521
 41 | 001523
 42 | 001556
 43 | 001586
 44 | 001628
 45 | 001632
 46 | 001690
 47 | 001699
 48 | 001711
 49 | 001724
 50 | 001727
 51 | 001730
 52 | 001766
 53 | 001807
 54 | 001894
 55 | 001927
 56 | 001945
 57 | 001960
 58 | 002042
 59 | 002043
 60 | 002183
 61 | 002187
 62 | 002190
 63 | 002213
 64 | 002238
 65 | 002249
 66 | 002261
 67 | 002273
 68 | 002329
 69 | 002405
 70 | 002448
 71 | 002471
 72 | 002520
 73 | 002554
 74 | 002555
 75 | 002633
 76 | 002657
 77 | 002678
 78 | 002684
 79 | 002713
 80 | 002735
 81 | 002774
 82 | 002786
 83 | 002800
 84 | 002804
 85 | 002842
 86 | 002858
 87 | 003015
 88 | 003017
 89 | 003027
 90 | 003121
 91 | 003189
 92 | 003210
 93 | 003290
 94 | 003294
 95 | 003303
 96 | 003307
 97 | 003362
 98 | 003403
 99 | 003424
100 | 003429
101 | 003436
102 | 003492
103 | 003611
104 | 003618
105 | 003625
106 | 003645
107 | 003691
108 | 003696
109 | 003732
110 | 003735
111 | 003754
112 | 003772
113 | 003779
114 | 003889
115 | 003939
116 | 003948
117 | 003960
118 | 003970
119 | 004051
120 | 004077
121 | 004117
122 | 004146
123 | 004168
124 | 004195
125 | 004307
126 | 004323
127 | 004339
128 | 004372
129 | 004389
130 | 004450
131 | 004487
132 | 004535
133 | 004537
134 | 004553
135 | 004625
136 | 004634
137 | 004656
138 | 004662
139 | 004686
140 | 004722
141 | 004788
142 | 004789
143 | 004815
144 | 004834
145 | 004872
146 | 004902
147 | 004935
148 | 004950
149 | 004968
150 | 004995
151 | 005081
152 | 005094
153 | 005114
154 | 005145
155 | 005161
156 | 005236
157 | 005248
158 | 005278
159 | 005306
160 | 005331
161 | 005351
162 | 005419
163 | 005453
164 | 005461
165 | 005511
166 | 005550
167 | 005568
168 | 005611
169 | 005636
170 | 005700
171 | 005728
172 | 005732
173 | 005741
174 | 005877
175 | 005948
176 | 006095
177 | 006134
178 | 006151
179 | 006249
180 | 006276
181 | 006285
182 | 006295
183 | 006382
184 | 006392
185 | 006428
186 | 006429
187 | 006445
188 | 006455
189 | 006459
190 | 006506
191 | 006572
192 | 006611
193 | 006617
194 | 006645
195 | 006722
196 | 006769
197 | 006789
198 | 006797
199 | 006836
200 | 006847
201 | 006850
202 | 006858
203 | 006862
204 | 006866
205 | 006911
206 | 006933
207 | 006944
208 | 007018
209 | 007035
210 | 007109
211 | 007138
212 | 007163
213 | 007185
214 | 007216
215 | 007234
216 | 007243
217 | 007260
218 | 007308
219 | 007325
220 | 007439
221 | 007448
222 | 007526
223 | 007547
224 | 007570
225 | 007576
226 | 007594
227 | 007603
228 | 007611
229 | 007637
230 | 007697
231 | 007705
232 | 007727
233 | 007748
234 | 007809
235 | 007919
236 | 008019
237 | 008040
238 | 008069
239 | 008142
240 | 008191
241 | 008209
242 | 008248
243 | 008279
244 | 008307
245 | 008311
246 | 008320
247 | 008427
248 | 008437
249 | 008509
250 | 008524
251 | 008526
252 | 008564
253 | 008596
254 | 008610
255 | 008612
256 | 008653
257 | 008699
258 | 008731
259 | 008744
260 | 008805
261 | 008862
262 | 008867
263 | 008876
264 | 008886
265 | 008914
266 | 008919
267 | 009020
268 | 009024
269 | 009114
270 | 009138
271 | 009148
272 | 009202
273 | 009208
274 | 009236
275 | 009251
276 | 009255
277 | 009318
278 | 009331
279 | 009337
280 | 009407
281 | 009438
282 | 009465
283 | 009468
284 | 009512
285 | 009698
286 | 009719
287 | 009732
288 | 009767
289 | 009807
290 | 009808
291 | 009841
292 | 009851
293 | 009874
294 | 009954
295 | 


--------------------------------------------------------------------------------
/layers/modules/kl_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | 
  7 | class KLLoss(nn.Module):
  8 | # def KLLoss(xg,xe,alpha):
  9 |     """
 10 |     Kl-loss function for bounding box regression from CVPR 2019 paper:
 11 |     Bounding Box Regression with Uncertainty for Accurate Object Detection
 12 |     by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
 13 | 
 14 |     It is a replacement for the Smooth L1 loss often used in bounding box regression.
 15 | 
 16 |     The regression loss for a coordinate depends on |xg − xe| > 1 or not:
 17 | 
 18 |     Loss |xg − xe| ≤ 1:
 19 | 
 20 |         Lreg1 ∝ e^{−α} * 1/2(xg − xe)^2 + 1/2α
 21 | 
 22 |     and if |xg − xe| > 1, Loss:
 23 | 
 24 |         Lreg2 = e^{−α} (|xg − xe| − 1/2) + 1/2α
 25 | 
 26 |     PyTorch implementation by Jasper Bakker (JappaB @github)
 27 |     """
 28 |     def __init__(self, loc_loss_weight=1.0):
 29 |         super(KLLoss, self).__init__()
 30 | 
 31 |         # Insert your own parameters here if you want to adjust the KL-Loss function
 32 | 
 33 |         # option to adjust the size of the loss
 34 |         self.loc_loss_weight = loc_loss_weight
 35 | 
 36 |     def forward(self,xg,xe,alpha):
 37 | 
 38 |         """
 39 |         :param xg: The ground truth of the bounding box coordinates in x1y1x2y2 format
 40 |             shape: [number_of_boxes, 4]
 41 |         :param xe: The estimated bounding box coordinates in x1y1x2y2 format
 42 |             shape: [number_of_boxes, 4]
 43 |         :param alpha: The log(sigma^2) of the bounding box coordinates in x1y1x2y2 format
 44 |             shape: [number_of_boxes, 4]
 45 |         :return: total_kl_loss
 46 |         """
 47 | 
 48 |         assert (xg.shape == xe.shape and xg.shape == alpha.shape),"The shapes of the input tensors must be the same"
 49 | 
 50 | 
 51 |         smooth_l1 = F.smooth_l1_loss(xe,xg, reduction='none')
 52 | 
 53 |         # e^{-α}
 54 |         exp_min_alpha = torch.exp(-alpha)
 55 | 
 56 |         # 1/2α
 57 |         half_alpha = 0.5*alpha
 58 | 
 59 |         total_kl_loss = (exp_min_alpha * smooth_l1 + half_alpha).sum()
 60 |         # total_kl_loss = total_kl_loss.sum()
 61 | 
 62 |         #
 63 |         # # xg − xe
 64 |         # delta = xg-xe
 65 |         #
 66 |         # # |xg − xe|
 67 |         # abs_delta = torch.abs(delta)
 68 |         #
 69 |         # ## mask for Lreg1 and Lreg2
 70 |         # Lreg1_mask = abs_delta.le(1.0) # |xg − xe| ≤ 1
 71 |         # Lreg2_mask = abs_delta.gt(1.0) # |xg − xe| > 1
 72 |         #
 73 |         # ## calculate all elements for Lreg1
 74 |         # # (xg − xe) for Lreg1
 75 |         # delta_Lreg1 = delta[Lreg1_mask]
 76 |         #
 77 |         # # e^{-α}
 78 |         # exp_min_alpha1 = torch.exp(-alpha[Lreg1_mask])
 79 |         #
 80 |         # # 1/2α
 81 |         # half_alpha1 = 0.5*alpha[Lreg1_mask]
 82 |         #
 83 |         # L_reg1 = exp_min_alpha1 * 0.5 * torch.pow(delta_Lreg1,2) + half_alpha1
 84 |         # L_reg1 = L_reg1.sum()
 85 |         #
 86 |         # ## calculate all elements for Lreg2
 87 |         # # |xg − xe| for Lreg2
 88 |         #
 89 |         #
 90 |         #
 91 |         # abs_delta_Lreg2 = abs_delta[Lreg2_mask]
 92 |         #
 93 |         # # e^{-α}
 94 |         # exp_min_alpha2 = torch.exp(-alpha[Lreg2_mask])
 95 |         #
 96 |         # # 1/2α
 97 |         # half_alpha2 = 0.5*alpha[Lreg2_mask]
 98 |         #
 99 |         # L_reg2 = exp_min_alpha2 * (abs_delta_Lreg2 - 0.5) + half_alpha2
100 |         # L_reg2 = L_reg2.sum()
101 |         #
102 |         #
103 |         # ## total
104 |         # total_kl_loss = L_reg1+L_reg2
105 |         # # total_kl_loss *= self.loc_loss_weight
106 | 
107 |         # todo: remove after debugging
108 |         # print()
109 |         # print('Debug kl-loss: ')
110 |         # print('delta', delta)
111 |         # print('abs_delta', abs_delta)
112 |         # print('alpha', alpha)
113 |         # print('exp_min_alpha1', exp_min_alpha1)
114 |         # print('exp_min_alpha1', exp_min_alpha2)
115 |         # print('Lreg1mask', Lreg1_mask.sum())
116 |         # print('Lreg2mask', Lreg2_mask.sum())
117 | 
118 |         return total_kl_loss
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/data/6_class_seed_set.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "dataset_name": "VOC07",
  3 |   "seed_set": {
  4 |     "image_set_idx": [
  5 |       "009679",
  6 |       "009388",
  7 |       "009100",
  8 |       "007798",
  9 |       "002881",
 10 |       "000250",
 11 |       "004152",
 12 |       "006576",
 13 |       "000344",
 14 |       "008204",
 15 |       "002253",
 16 |       "005467",
 17 |       "009290",
 18 |       "007457",
 19 |       "006626",
 20 |       "000367",
 21 |       "008931",
 22 |       "002350",
 23 |       "007141",
 24 |       "006363",
 25 |       "006727",
 26 |       "000498",
 27 | 
 28 |       "005496",
 29 |       "007743",
 30 |       "006965",
 31 |       "000154",
 32 |       "002234",
 33 |       "003337",
 34 |       "005614",
 35 |       "004532",
 36 |       "000184",
 37 |       "002372",
 38 |       "000382",
 39 |       "008498",
 40 |       "005181",
 41 |       "007139",
 42 |       "007685",
 43 |       "003098",
 44 |       "003722",
 45 |       "002659",
 46 |       "007460",
 47 |       "007803",
 48 |       "002208",
 49 | 
 50 |       "002786",
 51 |       "009841",
 52 |       "009114",
 53 |       "004834",
 54 |       "001628",
 55 |       "006445",
 56 |       "009874",
 57 |       "006285",
 58 |       "009318",
 59 | 
 60 |       "005821",
 61 |       "007396",
 62 |       "001777",
 63 |       "006159",
 64 |       "009279",
 65 |       "004446",
 66 |       "006188",
 67 |       "007302",
 68 |       "004105",
 69 |       "003758",
 70 |       "009638",
 71 |       "009175",
 72 |       "002946",
 73 |       "008749",
 74 |       "008082",
 75 |       "002775",
 76 | 
 77 |       "009863",
 78 |       "007208",
 79 |       "007821",
 80 |       "009900",
 81 |       "002533",
 82 |       "002643",
 83 |       "007374",
 84 |       "005020",
 85 |       "001980",
 86 |       "009106",
 87 |       "002691",
 88 |       "005756",
 89 |       "001360",
 90 |       "008706",
 91 |       "007475",
 92 | 
 93 |       "008592",
 94 |       "009842",
 95 |       "003681",
 96 |       "007230",
 97 |       "001714",
 98 |       "007165",
 99 |       "004423",
100 |       "004490",
101 |       "006679",
102 |       "001750",
103 |       "006833"
104 |     ],
105 |     "class_dist": []
106 |   },
107 |   "val_set": {
108 |     "image_set_idx": [
109 |       "006409",
110 |       "000269",
111 |       "006648",
112 |       "002641",
113 |       "004671",
114 |       "007121",
115 |       "007649",
116 |       "009878",
117 |       "000381",
118 |       "004886",
119 | 
120 |       "000931",
121 |       "002465",
122 |       "007361",
123 |       "001732",
124 |       "003860",
125 |       "000826",
126 |       "006123",
127 |       "009603",
128 |       "005705",
129 |       "007365",
130 | 
131 |       "001236",
132 |       "004625",
133 |       "007216",
134 |       "005145",
135 |       "009138",
136 |       "008596",
137 |       "003492",
138 |       "007448",
139 |       "009407",
140 |       "005236",
141 | 
142 |       "007558",
143 |       "000592",
144 |       "003301",
145 |       "007999",
146 |       "004631",
147 |       "007390",
148 |       "007890",
149 |       "006351",
150 |       "000710",
151 |       "001451",
152 | 
153 |       "003636",
154 |       "002116",
155 |       "006206",
156 |       "004244",
157 |       "000245",
158 |       "003256",
159 |       "009839",
160 |       "007663",
161 |       "006330",
162 |       "000545",
163 | 
164 |       "003705",
165 |       "002263",
166 |       "005469",
167 |       "009816",
168 |       "000107",
169 |       "002593",
170 |       "003698",
171 |       "006944",
172 |       "000900",
173 |       "004312"
174 | 
175 |     ],
176 |     "class_dist": []
177 |   },
178 |   "train_set": [
179 |           "009679",
180 |       "009388",
181 |       "009100",
182 |       "007798",
183 |       "002881",
184 |       "000250",
185 |       "004152",
186 |       "006576",
187 |       "000344",
188 |       "008204",
189 |       "002253",
190 |       "005467",
191 |       "009290",
192 |       "007457",
193 |       "006626",
194 |       "000367",
195 |       "008931",
196 |       "002350",
197 |       "007141",
198 |       "006363",
199 |       "006727",
200 |       "000498",
201 | 
202 |       "005496",
203 |       "007743",
204 |       "006965",
205 |       "000154",
206 |       "002234",
207 |       "003337",
208 |       "005614",
209 |       "004532",
210 |       "000184",
211 |       "002372",
212 |       "000382",
213 |       "008498",
214 |       "005181",
215 |       "007139",
216 |       "007685",
217 |       "003098",
218 |       "003722",
219 |       "002659",
220 |       "007460",
221 |       "007803",
222 |       "002208",
223 | 
224 |       "002786",
225 |       "009841",
226 |       "009114",
227 |       "004834",
228 |       "001628",
229 |       "006445",
230 |       "009874",
231 |       "006285",
232 |       "009318",
233 | 
234 |       "005821",
235 |       "007396",
236 |       "001777",
237 |       "006159",
238 |       "009279",
239 |       "004446",
240 |       "006188",
241 |       "007302",
242 |       "004105",
243 |       "003758",
244 |       "009638",
245 |       "009175",
246 |       "002946",
247 |       "008749",
248 |       "008082",
249 |       "002775",
250 | 
251 |       "009863",
252 |       "007208",
253 |       "007821",
254 |       "009900",
255 |       "002533",
256 |       "002643",
257 |       "007374",
258 |       "005020",
259 |       "001980",
260 |       "009106",
261 |       "002691",
262 |       "005756",
263 |       "001360",
264 |       "008706",
265 |       "007475",
266 | 
267 |       "008592",
268 |       "009842",
269 |       "003681",
270 |       "007230",
271 |       "001714",
272 |       "007165",
273 |       "004423",
274 |       "004490",
275 |       "006679",
276 |       "001750",
277 |       "006833"
278 |   ]
279 | }


--------------------------------------------------------------------------------
/data/empty_seed.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "dataset_name": "VOC07",
  3 |   "seed_set": {
  4 |     "image_set_idx": [],
  5 |     "class_dist": [
  6 |     ]
  7 |   },
  8 |   "val_set": {
  9 |     "image_set_idx": [
 10 |       "004857",
 11 |       "000702",
 12 |       "004895",
 13 |       "001012",
 14 |       "001149",
 15 |       "005138",
 16 |       "008688",
 17 |       "009098",
 18 |       "003085",
 19 |       "005812",
 20 |       "007813",
 21 |       "006681",
 22 |       "004872",
 23 |       "001510",
 24 |       "005903",
 25 |       "009762",
 26 |       "000023",
 27 |       "007486",
 28 |       "008456",
 29 |       "006939",
 30 |       "003824",
 31 |       "003889",
 32 |       "007154",
 33 |       "000997",
 34 |       "004100",
 35 |       "003603",
 36 |       "002957",
 37 |       "000814",
 38 |       "004753",
 39 |       "009813",
 40 |       "007691",
 41 |       "000078",
 42 |       "006869",
 43 |       "006866",
 44 |       "003696",
 45 |       "006038",
 46 |       "002965",
 47 |       "004423",
 48 |       "004548",
 49 |       "003162",
 50 |       "000541",
 51 |       "000777",
 52 |       "008031",
 53 |       "008744",
 54 |       "009805",
 55 |       "005006",
 56 |       "005895",
 57 |       "003165",
 58 |       "007074",
 59 |       "005676",
 60 |       "004146",
 61 |       "003118",
 62 |       "000337",
 63 |       "001563",
 64 |       "000501",
 65 |       "004797",
 66 |       "001383",
 67 |       "002987",
 68 |       "007208",
 69 |       "008292",
 70 |       "001455",
 71 |       "000661",
 72 |       "005405",
 73 |       "004003",
 74 |       "001768",
 75 |       "006275",
 76 |       "002512",
 77 |       "004450",
 78 |       "002323",
 79 |       "008453",
 80 |       "009897",
 81 |       "005101",
 82 |       "008514",
 83 |       "006844",
 84 |       "009726",
 85 |       "003331",
 86 |       "001053",
 87 |       "004565",
 88 |       "001978",
 89 |       "001442",
 90 |       "007398",
 91 |       "000637",
 92 |       "008503",
 93 |       "004842",
 94 |       "002595",
 95 |       "005263",
 96 |       "004837",
 97 |       "002471",
 98 |       "007449",
 99 |       "001989",
100 |       "009368",
101 |       "005461",
102 |       "009153",
103 |       "007297",
104 |       "002476",
105 |       "008883",
106 |       "002801",
107 |       "005257",
108 |       "003455",
109 |       "005672",
110 |       "009822",
111 |       "001203",
112 |       "009619",
113 |       "004902",
114 |       "002362",
115 |       "006233",
116 |       "003847",
117 |       "006968",
118 |       "004242",
119 |       "002348",
120 |       "000394",
121 |       "008628",
122 |       "000050",
123 |       "000889",
124 |       "000438",
125 |       "007089",
126 |       "007614",
127 |       "003912",
128 |       "005868",
129 |       "007090",
130 |       "001881",
131 |       "008132",
132 |       "008940",
133 |       "004359",
134 |       "004105",
135 |       "002540",
136 |       "001247",
137 |       "002477",
138 |       "002658",
139 |       "001004",
140 |       "001408",
141 |       "002815",
142 |       "001944",
143 |       "007432",
144 |       "006911",
145 |       "000713",
146 |       "009695",
147 |       "000771",
148 |       "000917",
149 |       "002569",
150 |       "002934",
151 |       "005719",
152 |       "006747",
153 |       "007054",
154 |       "009373",
155 |       "007590",
156 |       "006718",
157 |       "009060",
158 |       "004727",
159 |       "008968",
160 |       "000060",
161 |       "009105",
162 |       "007915",
163 |       "009270",
164 |       "007600",
165 |       "002284",
166 |       "002226",
167 |       "005047",
168 |       "000513",
169 |       "002134",
170 |       "001738",
171 |       "008638",
172 |       "004768",
173 |       "006848",
174 |       "008526",
175 |       "007902",
176 |       "009446",
177 |       "005039",
178 |       "003363",
179 |       "002256",
180 |       "008730",
181 |       "006515",
182 |       "002696",
183 |       "005156",
184 |       "005655",
185 |       "000892",
186 |       "003259",
187 |       "006251",
188 |       "004326",
189 |       "009422",
190 |       "007490",
191 |       "000219",
192 |       "001011",
193 |       "000153",
194 |       "007300",
195 |       "002779",
196 |       "006066",
197 |       "009413",
198 |       "002151",
199 |       "007540",
200 |       "005208",
201 |       "008900",
202 |       "008160",
203 |       "008268",
204 |       "008211",
205 |       "003335",
206 |       "005114",
207 |       "009692",
208 |       "008144",
209 |       "009955",
210 |       "002359",
211 |       "001273",
212 |       "001707",
213 |       "008523",
214 |       "002366",
215 |       "009351",
216 |       "002450",
217 |       "001484",
218 |       "002337",
219 |       "008933",
220 |       "004879",
221 |       "004936",
222 |       "002901",
223 |       "007025",
224 |       "000387",
225 |       "005588",
226 |       "003382",
227 |       "000171",
228 |       "005841",
229 |       "007968",
230 |       "003116",
231 |       "009911",
232 |       "001250",
233 |       "009268",
234 |       "006341",
235 |       "009215",
236 |       "006739",
237 |       "003137",
238 |       "006825",
239 |       "007394",
240 |       "006609",
241 |       "008341",
242 |       "008873",
243 |       "005327",
244 |       "006437",
245 |       "000032",
246 |       "002501",
247 |       "009405",
248 |       "008454",
249 |       "005599",
250 |       "001148",
251 |       "001724",
252 |       "008452",
253 |       "008833",
254 |       "001486",
255 |       "006240",
256 |       "002935",
257 |       "002183",
258 |       "004011",
259 |       "003253"
260 |     ],
261 |     "class_dist": [
262 |       17,
263 |       24,
264 |       25,
265 |       17,
266 |       38,
267 |       13,
268 |       67,
269 |       15,
270 |       33,
271 |       9,
272 |       10,
273 |       30,
274 |       22,
275 |       20,
276 |       255,
277 |       32,
278 |       21,
279 |       12,
280 |       8,
281 |       18
282 |     ]
283 |   },
284 |   "train_set": []
285 | }


--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from ..box_utils import match, log_sum_exp
  7 | from .kl_loss import KLLoss
  8 | from data import voc as cfg
  9 | 
 10 | class MultiBoxLoss(nn.Module):
 11 |     """SSD Weighted Loss Function
 12 |     Compute Targets:
 13 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 14 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 15 |            (default threshold: 0.5).
 16 |         2) Produce localization target by 'encoding' variance into offsets of ground
 17 |            truth boxes and their matched  'priorboxes'.
 18 |         3) Hard negative mining to filter the excessive number of negative examples
 19 |            that comes with using a large number of default bounding boxes.
 20 |            (default negative:positive ratio 3:1)
 21 |     Objective Loss:
 22 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 23 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 24 |         weighted by α which is set to 1 by cross val.
 25 |         Args:
 26 |             c: class confidences,
 27 |             l: predicted boxes,
 28 |             g: ground truth boxes
 29 |             N: number of matched default boxes
 30 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 31 |     """
 32 | 
 33 |     def __init__(self, num_classes, overlap_thresh, prior_for_matching,
 34 |                  bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
 35 |                  use_gpu=True, modeltype='SSD300'):
 36 |         super(MultiBoxLoss, self).__init__()
 37 |         self.use_gpu = use_gpu
 38 |         self.num_classes = num_classes
 39 |         self.threshold = overlap_thresh
 40 |         self.background_label = bkg_label
 41 |         self.encode_target = encode_target
 42 |         self.use_prior_for_matching = prior_for_matching
 43 |         self.do_neg_mining = neg_mining
 44 |         self.negpos_ratio = neg_pos
 45 |         self.neg_overlap = neg_overlap
 46 |         self.variance = cfg['variance']
 47 |         self.modeltype = modeltype
 48 |         if modeltype == 'SSD300KL':
 49 |             self.kl_loss = KLLoss(loc_loss_weight = 1.0)
 50 | 
 51 |     def forward(self, predictions, targets,args = None): #todo: remove args
 52 |         """Multibox Loss
 53 |         Args:
 54 |             predictions (tuple): A tuple containing loc preds, conf preds,
 55 |             and prior boxes from SSD net.
 56 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 57 |                 loc shape: torch.size(batch_size,num_priors,4)
 58 |                 priors shape: torch.size(num_priors,4)
 59 | 
 60 |             targets (tensor): Ground truth boxes and labels for a batch,
 61 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 62 |         """
 63 |         if self.modeltype != 'SSD300KL':
 64 |             loc_data, conf_data, priors = predictions
 65 |         else:
 66 |             loc_data, conf_data, priors, loc_std = predictions
 67 | 
 68 |         num = loc_data.size(0)
 69 |         priors = priors[:loc_data.size(1), :]
 70 |         num_priors = (priors.size(0))
 71 | 
 72 |         # match priors (default boxes) and ground truth boxes
 73 |         loc_t = torch.Tensor(num, num_priors, 4)
 74 |         conf_t = torch.LongTensor(num, num_priors) # just a mask if matched or not
 75 | 
 76 |         for idx in range(num):
 77 |             truths = targets[idx][:, :-1].data
 78 |             labels = targets[idx][:, -1].data
 79 |             defaults = priors.data
 80 |             match(self.threshold, truths, defaults, self.variance, labels,
 81 |                   loc_t, conf_t, idx, self.modeltype)
 82 | 
 83 |         if self.use_gpu:
 84 |             loc_t = loc_t.cuda()
 85 |             conf_t = conf_t.cuda()
 86 | 
 87 |         # wrap targets
 88 |         loc_t = Variable(loc_t, requires_grad=False)
 89 |         conf_t = Variable(conf_t, requires_grad=False)
 90 | 
 91 |         pos = conf_t > 0
 92 |         num_pos = pos.sum(dim=1, keepdim=True)
 93 | 
 94 |         # Localization Loss (Smooth L1)
 95 |         # Shape: [batch,num_priors,4]
 96 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 97 |         loc_p = loc_data[pos_idx].view(-1, 4)
 98 |         loc_t = loc_t[pos_idx].view(-1, 4)
 99 |         if self.modeltype != 'SSD300KL':
100 |             loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False).double()
101 |         else:
102 |             loss_l = self.kl_loss(loc_p, loc_t, loc_std[pos_idx].view(-1, 4)).double()
103 | 
104 |         # Compute max conf across batch for hard negative mining
105 |         batch_conf = conf_data.view(-1, self.num_classes)
106 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
107 | 
108 |         # Hard Negative Mining
109 |         loss_c = loss_c.view(num, -1)
110 |         loss_c[pos] = 0  # filter out pos boxes for now
111 | 
112 |         _, loss_idx = loss_c.sort(1, descending=True)
113 |         _, idx_rank = loss_idx.sort(1)
114 |         num_pos = pos.long().sum(1, keepdim=True)
115 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
116 |         neg = idx_rank < num_neg.expand_as(idx_rank)
117 | 
118 |         # Confidence Loss Including Positive and Negative Examples
119 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
120 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
121 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
122 |         targets_weighted = conf_t[(pos+neg).gt(0)]
123 |         # if num_classes == 2:
124 |         #     loss_c = F.binary_cross_entropy(torch.sigmoid(conf_p)[:,1], targets_weighted.float(),size_average=False).double()
125 |         # else:
126 |         loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False).double()
127 | 
128 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
129 | 
130 |         N = num_pos.data.sum().double()
131 |         loss_l /= N
132 |         loss_c /= N
133 |         return loss_l, loss_c
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes
 2 | In this repo the code belonging to my master thesis titled: "Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes" can be found. I also uploaded the pdf of my thesis [here](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf). As a very short summary: I researched the use of a localization uncertainty, obtained trough an ensemble of object detectors to select more informative images to be labeled. It shows promissing results on Pascal VOC 2007, but has not been used on other datasets. Please let me know your experiences if you use it on different datasets.
 3 | 
 4 | As a basis for my repository I used the excellent repository by Max de Groot and Ellis Brown [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch), retrieved on 19-02-2019. However, as I used the then newest stable version of PyTorch (1.0.1), I did change some of their code to be able to run it. Note that their repo is probably more suitable if you just want to use an SSD written in PyTorch and don't want to perform acive learning. Some parts of this readme are directly copy-pasted from Max de Groot and Ellis Brown their repo as my work is built upon their code anyways.
 5 | 
 6 | After finishing my thesis, in order to make it more useable for others, I cleaned the code a bit and wrote this readme. I hope this helps, however, bear in mind that the code is research code and should be viewed as such. Currently I'm traveling trough Central and South America. I know the code could still be improved but it should work if you follow the instructions below. Please post issues if you are serious about using it and don't understand certain parts. I'll see what I can do when I'm back.
 7 | 
 8 | 
 9 | ### Table of Contents
10 | - <a href='#get_started'>Getting Started</a>
11 | - <a href='#datasets'>Datasets</a>
12 | - <a href='#references'>Reference</a>
13 | 
14 | &nbsp;
15 | &nbsp;
16 | &nbsp;
17 | &nbsp;
18 | 
19 | 
20 | ## SSD: Single Shot MultiBox Object Detector, in PyTorch
21 | A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detector](http://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang, and Alexander C. Berg.  The official and original Caffe code can be found [here](https://github.com/weiliu89/caffe/tree/ssd).
22 | 
23 | 
24 | <img align="right" src= "https://github.com/amdegroot/ssd.pytorch/blob/master/doc/ssd.png" height = 400/>
25 | 
26 | 
27 | ## Getting started
28 | - I supplied a list of the conda environment I used for my experiments in the [requirements](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/requirements) file for reproducability. The most important packages are probably: PyTorch, NumPy, SciPy, cv2 and hdbscan. 
29 | - Clone this repository.
30 | - Then download the dataset by following the [instructions](#datasets) below. Note that the Active Learning code has only been completely implemented for the Pascal VOC 2007 dataset.
31 | - As the SSD uses a reduced VGG-16 backbone, download the fc-reduced [VGG-16](https://arxiv.org/abs/1409.1556) PyTorch base network weights at:	https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth and put them in a directory called 'weights'
32 | - By default, we assume you have downloaded the file in the `Active_Learning_Object_Detection/weights` dir:
33 | 
34 | ```Shell
35 | mkdir weights
36 | cd weights
37 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
38 | ```
39 | - Note that a GPU is highly recommended for training the SSD.
40 | 
41 | - As there are many parser argument options, I provide two sample run scripts in the [run_scripts](https://github.com/JappaB/Active_Learning_Object_Detection/tree/master/run_scripts/scripts) directory to get a headstart. I provided one for the six classes I used in my experiments and one for a single class of interest (thus background vs non-background). To use them, you also need to copy the appropriate imageset files to the imageset folder. You can find the imageset files in `data/imageset_files` and they need to be copied to `~/data/VOCdevkit/VOC2007/ImageSets/Main/.
42 | 
43 | - You are required to give a list of paths to the currently best networks. I provided a script `create_initial_networks.py` to generate these if you don't have any yet. The current settings of this script correspond to the sample run script with the single class. NOTE: A single saved network requires approximately 100MB of storage. Make sure you have enough diskspace before running the script.
44 | 
45 | - Finally, if you don't want to use one of the provided scripts, the entry point for active learning is the `active_learning_main.py` file. 
46 | 
47 | 
48 | ## What can I find where?
49 | For active learning the two most important folders are the `active_learning_dir` and `active_learning_package`. In the first the (intermediate) results of the runs (e.g. which images to label next) will be saved and in the second the code for the active learning can be found.
50 | 
51 | 
52 | ## Datasets
53 | To make things easy, we provide bash scripts to handle the dataset (Pascal VOC) downloads  and setup for you.  We also provide simple dataset loaders that inherit `torch.utils.data.Dataset`, making them fully compatible with the `torchvision.datasets` [API](http://pytorch.org/docs/torchvision/datasets.html).
54 | 
55 | 
56 | ### VOC Dataset
57 | PASCAL VOC: Visual Object Classes
58 | 
59 | ##### Download VOC2007 trainval & test
60 | ```Shell
61 | # specify a directory for dataset to be downloaded into, else default is ~/data/
62 | sh data/scripts/VOC2007.sh # <directory>
63 | ```
64 | 
65 | ##### Download VOC2012 trainval
66 | ```Shell
67 | # specify a directory for dataset to be downloaded into, else default is ~/data/
68 | sh data/scripts/VOC2012.sh # <directory>
69 | ```
70 | 
71 | ### Use a pre-trained SSD network for detection
72 | #### Download a pre-trained network
73 | - We are trying to provide PyTorch `state_dicts` (dict of weight tensors) of the latest SSD model definitions trained on different datasets.  
74 | - Currently, we provide the following PyTorch models:
75 |     * SSD300 trained on VOC0712 (newest PyTorch weights)
76 |       - https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth
77 |     * SSD300 trained on VOC0712 (original Caffe weights)
78 |       - https://s3.amazonaws.com/amdegroot-models/ssd_300_VOC0712.pth
79 | 
80 | ## Authors
81 | Active learning part:
82 | * [**Jasper Bakker**](https://github.com/jappab)
83 | 
84 | SSD, Dataloaders, etc. (check their excellent repo at [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch)):
85 | * [**Max deGroot**](https://github.com/amdegroot)
86 | * [**Ellis Brown**](http://github.com/ellisbrown)
87 | 
88 | 


--------------------------------------------------------------------------------
/active_learning_package/uncertainty_helpers.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | import utils.augmentations as augmentations
  3 | import torch
  4 | 
  5 | def entropy(confs, already_normalized = True):
  6 |     """
  7 |     https://discuss.pytorch.org/t/calculating-the-entropy-loss/14510
  8 |     softmax proof: https://math.stackexchange.com/questions/331275/softmax-function-and-modelling-probability-distributions
  9 | 
 10 |     :param confs: (tensor)
 11 |                 shape: (batch, observations, class_probabilities) where class probabilities are real probabilities (already normalized)
 12 |     :return: H: (tensor) entropy
 13 |                 shape: (batch, observations)
 14 |     """
 15 |     # tested with a uniform and a peak distribution in a tensor
 16 | 
 17 |     if already_normalized == False:
 18 |         H = F.softmax(confs, dim=2) * F.log_softmax(confs, dim=2)
 19 |         H = H.sum(dim=2) * -1.0
 20 |     else:
 21 |         H = confs * torch.log(confs)
 22 |         H = H.sum(dim=2) * -1.0
 23 | 
 24 |     return H
 25 | 
 26 | def trace_covariance(cov_0, cov_1):
 27 |     """
 28 |     https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
 29 |     see trace calculation, however, now we keep the first two dimensions (batches and observations) as free variables
 30 | 
 31 | 
 32 |         TODO: below is just
 33 |     args:
 34 |         cov_0: (tensor)
 35 |             shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy
 36 |         cov_1: tensor)
 37 |             shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy
 38 |     :return:
 39 |         traces_0: (tensor)
 40 |             shape: [batch, observation]
 41 |         traces_1: (tensor)
 42 |             shape: [batch, observation]
 43 |     """
 44 | 
 45 |     # todo: assert that the trace must be positive
 46 |     traces_0 = torch.einsum('boxx->bo',cov_0)
 47 |     traces_1 = torch.einsum('boxx->bo',cov_1)
 48 | 
 49 |     return traces_0, traces_1
 50 | 
 51 | 
 52 | 
 53 | 
 54 | def dist_means_observation(mu_0,mu_1):
 55 |     """
 56 |     calculate the (euclidean) distance between the mean of the upper left corner (mu_0) and lower right corner (mu_1) of the bounding box
 57 | 
 58 |     args:
 59 |         mu_0:
 60 |             shape: [batch, observations, 2] where the last dim is x1y1
 61 |         mu_1:
 62 |             shape: [batch, observations, 2] where the last dim is x2y2
 63 |     :return:
 64 |         distances:
 65 |                 shape:
 66 |     """
 67 | 
 68 | 
 69 | 
 70 |     mu_1_minus_0 = mu_1-mu_0
 71 |     squared = torch.pow(mu_1_minus_0,2)
 72 |     summed = squared.sum(dim=2)
 73 |     distances = torch.pow(summed,0.5)
 74 | 
 75 |     return distances
 76 | 
 77 | def means_observation(observations):
 78 |     """
 79 |     This function is exactly the same as the means_covs_observation below, without the cov part.
 80 |     """
 81 |     max_boxes = observations.shape[2]
 82 |     num_observations = observations.shape[1]
 83 |     num_batches = observations.shape[0]
 84 | 
 85 |     # per bounding box, sum each individual coordinate
 86 |     summed_coordinates = observations.sum(dim=2)
 87 |     zeros = observations.le(0.)
 88 |     zeros_per_box = zeros.sum(dim=3)
 89 |     N = zeros_per_box.le(3).sum(dim=2).float()
 90 |     mean = torch.div(summed_coordinates, N.unsqueeze(-1))
 91 |     return mean
 92 | 
 93 | def means_covs_observation(observations):
 94 |     """
 95 |     For a guide on np.einsum (vs using for loops, a lot faster)
 96 |     (which is really similar to torch.einsum, which is used below to keep gpu speed-ups)
 97 |     check:
 98 |         - (short) http://ajcr.net/Basic-guide-to-einsum/
 99 |         - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
100 |         - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66
101 | 
102 |     args:
103 |         observations: (tensor) combined bounding boxes, only spatial information
104 |             one bounding box shnumber ofould have the coordinates like this:
105 |             [x0,y0,x1,y1], the coordinates of the upper left and lower right corners
106 |             respectively. As each observation can have a variable number of bounding boxes,
107 |             the observations that have less than the maximum number of bounding are assumed to be padded
108 |             with zeros.
109 | 
110 |             shape: [batch, observations, max(n_boxes_of_all_obs) ,4]
111 | 
112 |     :return:
113 |         means_covs_observation: last dim is (mu0,mu1,cov0,cov1)
114 |             shape: [batch, observation, 4]
115 |     """
116 |     max_boxes = observations.shape[2]
117 |     num_observations = observations.shape[1]
118 |     num_batches = observations.shape[0]
119 | 
120 |     # per bounding box, sum each individual coordinate
121 |     summed_coordinates = observations.sum(dim=2)
122 |     zeros = observations.le(0.)
123 |     zeros_per_box = zeros.sum(dim=3)
124 |     N = zeros_per_box.le(3).sum(dim=2).float()
125 |     mean = torch.div(summed_coordinates, N.unsqueeze(-1))
126 |     # mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1))
127 |     #### covariances
128 |     # must be done seperately for upperleft corner (0) and lower right corner (1) of bounding box
129 |     mean_0 = mean[:, :, 0:2]
130 |     mean_1 = mean[:, :, 2:4]
131 |     observations_0 = observations[:, :, :, 0:2]
132 |     observations_1 = observations[:, :, :, 2:4]
133 | 
134 |     # Batch Observation boXes coordinatesTransposed and Batch Observation boXes Coordinates
135 |     cov_first_part_summed_0 = torch.einsum('boxt,boxc -> botc', observations_0, observations_0)
136 |     cov_first_part_summed_1 = torch.einsum('boxt,boxc -> botc', observations_1, observations_1)
137 | 
138 |     # double unsqueeze to allow for batches
139 |     stacked_N = N.unsqueeze(-1).unsqueeze(-1)
140 | 
141 |     cov_first_part_0 = torch.div(cov_first_part_summed_0, stacked_N)
142 |     cov_first_part_1 = torch.div(cov_first_part_summed_1, stacked_N)
143 | 
144 |     cov_second_part_0 = torch.einsum('bik,bij-> bijk',mean_0, mean_0)
145 |     cov_second_part_1 = torch.einsum('bik,bij-> bijk',mean_1, mean_1)
146 | 
147 |     cov_0 = cov_first_part_0 - cov_second_part_0
148 |     cov_1 = cov_first_part_1 - cov_second_part_1
149 | 
150 | 
151 |     return mean ,cov_0, cov_1
152 | 
153 | 
154 | def means_observations(observations):
155 |     """
156 |     For a guide on np.einsum (vs using for loops, a lot faster)
157 |     (which is really similar to torch.einsum, which is used below to keep gpu speed-ups)
158 |     check:
159 |         - (short) http://ajcr.net/Basic-guide-to-einsum/
160 |         - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/
161 |         - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66
162 | 
163 |     args:
164 |         observations: (tensor) combined bounding boxes, only spatial information
165 |             one bounding box should have the coordinates like this:
166 |             [x0,y0,x1,y1], the coordinates of the upper left and lower right corners
167 |             respectively. As each observation can have a variable number of bounding boxes,
168 |             the observations that have less than the maximum number of bounding are assumed to be padded
169 |             with zeros.
170 | 
171 |             shape: [max(n_boxes), batch, observations,4]
172 | 
173 |     :return:
174 |         means_observation: last dim is (mu0,mu1)
175 |             shape: [batch, observation, 2]
176 |     """
177 | 
178 |     # per bounding box, sum each individual coordinate
179 |     summed_coordinates = observations.sum(dim=2)
180 |     zeros = observations.le(0.)
181 |     zeros_per_box = zeros.sum(dim=0)
182 |     N = zeros_per_box.le(3).sum(dim=2).float()
183 |     mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1))
184 | 
185 |     return mean
186 | 


--------------------------------------------------------------------------------
/data/imageset_files/car_trainval_detect.txt:
--------------------------------------------------------------------------------
  1 | 000007
  2 | 000012
  3 | 000020
  4 | 000026
  5 | 000047
  6 | 000060
  7 | 000083
  8 | 000091
  9 | 000131
 10 | 000134
 11 | 000142
 12 | 000153
 13 | 000156
 14 | 000159
 15 | 000161
 16 | 000169
 17 | 000180
 18 | 000210
 19 | 000220
 20 | 000221
 21 | 000233
 22 | 000245
 23 | 000251
 24 | 000262
 25 | 000263
 26 | 000288
 27 | 000289
 28 | 000296
 29 | 000302
 30 | 000303
 31 | 000311
 32 | 000317
 33 | 000318
 34 | 000320
 35 | 000329
 36 | 000334
 37 | 000338
 38 | 000343
 39 | 000355
 40 | 000367
 41 | 000373
 42 | 000387
 43 | 000396
 44 | 000404
 45 | 000406
 46 | 000420
 47 | 000431
 48 | 000461
 49 | 000463
 50 | 000469
 51 | 000474
 52 | 000477
 53 | 000494
 54 | 000509
 55 | 000515
 56 | 000522
 57 | 000541
 58 | 000543
 59 | 000545
 60 | 000554
 61 | 000579
 62 | 000590
 63 | 000605
 64 | 000620
 65 | 000628
 66 | 000648
 67 | 000653
 68 | 000663
 69 | 000672
 70 | 000688
 71 | 000700
 72 | 000754
 73 | 000776
 74 | 000787
 75 | 000800
 76 | 000818
 77 | 000823
 78 | 000829
 79 | 000855
 80 | 000860
 81 | 000871
 82 | 000888
 83 | 000906
 84 | 000911
 85 | 000935
 86 | 000937
 87 | 000972
 88 | 000977
 89 | 000987
 90 | 001052
 91 | 001057
 92 | 001060
 93 | 001069
 94 | 001071
 95 | 001093
 96 | 001112
 97 | 001119
 98 | 001124
 99 | 001125
100 | 001148
101 | 001237
102 | 001258
103 | 001268
104 | 001281
105 | 001290
106 | 001292
107 | 001294
108 | 001330
109 | 001334
110 | 001352
111 | 001360
112 | 001364
113 | 001371
114 | 001384
115 | 001385
116 | 001386
117 | 001409
118 | 001414
119 | 001432
120 | 001445
121 | 001455
122 | 001466
123 | 001472
124 | 001488
125 | 001492
126 | 001494
127 | 001532
128 | 001559
129 | 001561
130 | 001576
131 | 001604
132 | 001618
133 | 001622
134 | 001640
135 | 001654
136 | 001662
137 | 001676
138 | 001693
139 | 001699
140 | 001746
141 | 001780
142 | 001801
143 | 001821
144 | 001845
145 | 001862
146 | 001877
147 | 001881
148 | 001899
149 | 001902
150 | 001931
151 | 001937
152 | 001944
153 | 001950
154 | 001954
155 | 001962
156 | 001980
157 | 002019
158 | 002023
159 | 002045
160 | 002056
161 | 002116
162 | 002125
163 | 002134
164 | 002135
165 | 002153
166 | 002170
167 | 002174
168 | 002178
169 | 002180
170 | 002182
171 | 002197
172 | 002228
173 | 002241
174 | 002244
175 | 002247
176 | 002248
177 | 002281
178 | 002291
179 | 002307
180 | 002311
181 | 002340
182 | 002343
183 | 002355
184 | 002373
185 | 002374
186 | 002393
187 | 002411
188 | 002420
189 | 002436
190 | 002439
191 | 002454
192 | 002478
193 | 002490
194 | 002493
195 | 002497
196 | 002504
197 | 002533
198 | 002534
199 | 002544
200 | 002559
201 | 002563
202 | 002566
203 | 002584
204 | 002595
205 | 002606
206 | 002625
207 | 002643
208 | 002646
209 | 002647
210 | 002666
211 | 002667
212 | 002682
213 | 002691
214 | 002693
215 | 002704
216 | 002730
217 | 002734
218 | 002759
219 | 002772
220 | 002776
221 | 002779
222 | 002783
223 | 002804
224 | 002812
225 | 002833
226 | 002854
227 | 002884
228 | 002917
229 | 002937
230 | 002994
231 | 003007
232 | 003009
233 | 003013
234 | 003027
235 | 003032
236 | 003051
237 | 003053
238 | 003054
239 | 003057
240 | 003083
241 | 003092
242 | 003094
243 | 003103
244 | 003117
245 | 003165
246 | 003176
247 | 003195
248 | 003214
249 | 003228
250 | 003231
251 | 003240
252 | 003243
253 | 003256
254 | 003261
255 | 003271
256 | 003285
257 | 003313
258 | 003331
259 | 003344
260 | 003355
261 | 003359
262 | 003363
263 | 003379
264 | 003390
265 | 003397
266 | 003406
267 | 003420
268 | 003461
269 | 003484
270 | 003551
271 | 003555
272 | 003587
273 | 003596
274 | 003606
275 | 003608
276 | 003634
277 | 003636
278 | 003655
279 | 003703
280 | 003711
281 | 003713
282 | 003721
283 | 003790
284 | 003798
285 | 003806
286 | 003809
287 | 003820
288 | 003824
289 | 003835
290 | 003885
291 | 003891
292 | 003936
293 | 003954
294 | 003971
295 | 003973
296 | 003974
297 | 003987
298 | 003997
299 | 003998
300 | 004011
301 | 004019
302 | 004073
303 | 004087
304 | 004089
305 | 004091
306 | 004108
307 | 004136
308 | 004145
309 | 004186
310 | 004203
311 | 004228
312 | 004231
313 | 004242
314 | 004244
315 | 004284
316 | 004295
317 | 004303
318 | 004304
319 | 004329
320 | 004346
321 | 004365
322 | 004367
323 | 004384
324 | 004386
325 | 004387
326 | 004429
327 | 004439
328 | 004481
329 | 004488
330 | 004494
331 | 004519
332 | 004526
333 | 004539
334 | 004544
335 | 004563
336 | 004576
337 | 004581
338 | 004591
339 | 004604
340 | 004618
341 | 004660
342 | 004687
343 | 004691
344 | 004705
345 | 004719
346 | 004727
347 | 004747
348 | 004748
349 | 004750
350 | 004779
351 | 004786
352 | 004793
353 | 004805
354 | 004808
355 | 004823
356 | 004828
357 | 004830
358 | 004850
359 | 004863
360 | 004873
361 | 004890
362 | 004903
363 | 004946
364 | 004961
365 | 004962
366 | 004973
367 | 004983
368 | 005003
369 | 005020
370 | 005028
371 | 005047
372 | 005065
373 | 005067
374 | 005068
375 | 005071
376 | 005072
377 | 005090
378 | 005102
379 | 005110
380 | 005156
381 | 005159
382 | 005169
383 | 005199
384 | 005209
385 | 005259
386 | 005262
387 | 005273
388 | 005305
389 | 005318
390 | 005331
391 | 005350
392 | 005373
393 | 005387
394 | 005423
395 | 005457
396 | 005475
397 | 005481
398 | 005483
399 | 005486
400 | 005489
401 | 005499
402 | 005509
403 | 005536
404 | 005547
405 | 005549
406 | 005566
407 | 005577
408 | 005584
409 | 005585
410 | 005588
411 | 005592
412 | 005593
413 | 005609
414 | 005640
415 | 005645
416 | 005669
417 | 005679
418 | 005738
419 | 005747
420 | 005749
421 | 005756
422 | 005760
423 | 005782
424 | 005791
425 | 005806
426 | 005815
427 | 005830
428 | 005831
429 | 005839
430 | 005861
431 | 005868
432 | 005897
433 | 005899
434 | 005918
435 | 005956
436 | 005979
437 | 005988
438 | 005998
439 | 006009
440 | 006011
441 | 006018
442 | 006035
443 | 006038
444 | 006043
445 | 006058
446 | 006062
447 | 006079
448 | 006089
449 | 006097
450 | 006103
451 | 006104
452 | 006120
453 | 006124
454 | 006128
455 | 006133
456 | 006151
457 | 006196
458 | 006201
459 | 006203
460 | 006206
461 | 006210
462 | 006218
463 | 006223
464 | 006224
465 | 006225
466 | 006235
467 | 006250
468 | 006261
469 | 006277
470 | 006290
471 | 006301
472 | 006320
473 | 006325
474 | 006329
475 | 006330
476 | 006346
477 | 006362
478 | 006369
479 | 006375
480 | 006396
481 | 006417
482 | 006421
483 | 006438
484 | 006458
485 | 006459
486 | 006484
487 | 006497
488 | 006524
489 | 006588
490 | 006593
491 | 006625
492 | 006632
493 | 006654
494 | 006660
495 | 006668
496 | 006706
497 | 006719
498 | 006734
499 | 006736
500 | 006748
501 | 006766
502 | 006783
503 | 006821
504 | 006822
505 | 006858
506 | 006868
507 | 006884
508 | 006893
509 | 006900
510 | 006918
511 | 006931
512 | 006988
513 | 007003
514 | 007004
515 | 007040
516 | 007058
517 | 007068
518 | 007074
519 | 007090
520 | 007133
521 | 007153
522 | 007159
523 | 007167
524 | 007205
525 | 007208
526 | 007247
527 | 007261
528 | 007270
529 | 007279
530 | 007283
531 | 007284
532 | 007285
533 | 007294
534 | 007305
535 | 007346
536 | 007363
537 | 007374
538 | 007376
539 | 007383
540 | 007396
541 | 007414
542 | 007422
543 | 007424
544 | 007427
545 | 007446
546 | 007468
547 | 007475
548 | 007479
549 | 007490
550 | 007497
551 | 007525
552 | 007527
553 | 007566
554 | 007592
555 | 007601
556 | 007614
557 | 007622
558 | 007647
559 | 007650
560 | 007653
561 | 007663
562 | 007667
563 | 007691
564 | 007699
565 | 007709
566 | 007721
567 | 007731
568 | 007736
569 | 007745
570 | 007779
571 | 007790
572 | 007815
573 | 007819
574 | 007821
575 | 007843
576 | 007855
577 | 007856
578 | 007883
579 | 007898
580 | 007905
581 | 007921
582 | 007931
583 | 007932
584 | 007950
585 | 007963
586 | 007964
587 | 007970
588 | 007971
589 | 008001
590 | 008019
591 | 008026
592 | 008031
593 | 008037
594 | 008044
595 | 008057
596 | 008060
597 | 008079
598 | 008087
599 | 008093
600 | 008098
601 | 008105
602 | 008108
603 | 008160
604 | 008169
605 | 008174
606 | 008188
607 | 008197
608 | 008225
609 | 008232
610 | 008268
611 | 008279
612 | 008294
613 | 008296
614 | 008315
615 | 008329
616 | 008336
617 | 008359
618 | 008360
619 | 008376
620 | 008388
621 | 008391
622 | 008397
623 | 008429
624 | 008444
625 | 008449
626 | 008461
627 | 008466
628 | 008478
629 | 008482
630 | 008483
631 | 008484
632 | 008502
633 | 008503
634 | 008517
635 | 008524
636 | 008549
637 | 008550
638 | 008562
639 | 008572
640 | 008581
641 | 008586
642 | 008601
643 | 008633
644 | 008663
645 | 008665
646 | 008676
647 | 008680
648 | 008706
649 | 008716
650 | 008727
651 | 008739
652 | 008747
653 | 008750
654 | 008768
655 | 008784
656 | 008793
657 | 008794
658 | 008801
659 | 008826
660 | 008838
661 | 008843
662 | 008848
663 | 008859
664 | 008891
665 | 008892
666 | 008909
667 | 008911
668 | 008923
669 | 008929
670 | 008939
671 | 008958
672 | 008960
673 | 008966
674 | 008968
675 | 008969
676 | 008978
677 | 009000
678 | 009006
679 | 009015
680 | 009029
681 | 009045
682 | 009053
683 | 009058
684 | 009060
685 | 009064
686 | 009073
687 | 009078
688 | 009106
689 | 009116
690 | 009121
691 | 009163
692 | 009174
693 | 009178
694 | 009179
695 | 009186
696 | 009205
697 | 009213
698 | 009214
699 | 009254
700 | 009269
701 | 009282
702 | 009283
703 | 009286
704 | 009318
705 | 009326
706 | 009336
707 | 009350
708 | 009358
709 | 009368
710 | 009392
711 | 009406
712 | 009409
713 | 009411
714 | 009424
715 | 009434
716 | 009448
717 | 009469
718 | 009477
719 | 009507
720 | 009515
721 | 009517
722 | 009532
723 | 009558
724 | 009596
725 | 009614
726 | 009620
727 | 009623
728 | 009641
729 | 009644
730 | 009671
731 | 009676
732 | 009699
733 | 009711
734 | 009718
735 | 009729
736 | 009733
737 | 009737
738 | 009745
739 | 009762
740 | 009774
741 | 009776
742 | 009785
743 | 009801
744 | 009810
745 | 009822
746 | 009830
747 | 009834
748 | 009839
749 | 009845
750 | 009848
751 | 009862
752 | 009863
753 | 009879
754 | 009898
755 | 009900
756 | 009904
757 | 009913
758 | 009920
759 | 009932
760 | 009938
761 | 009959
762 | 


--------------------------------------------------------------------------------
/data/imageset_files/car_test_detect.txt:
--------------------------------------------------------------------------------
  1 | 000004
  2 | 000014
  3 | 000071
  4 | 000074
  5 | 000082
  6 | 000103
  7 | 000135
  8 | 000137
  9 | 000152
 10 | 000172
 11 | 000188
 12 | 000197
 13 | 000240
 14 | 000252
 15 | 000254
 16 | 000271
 17 | 000284
 18 | 000293
 19 | 000300
 20 | 000301
 21 | 000313
 22 | 000341
 23 | 000351
 24 | 000358
 25 | 000361
 26 | 000390
 27 | 000402
 28 | 000415
 29 | 000425
 30 | 000440
 31 | 000453
 32 | 000465
 33 | 000471
 34 | 000488
 35 | 000505
 36 | 000507
 37 | 000529
 38 | 000548
 39 | 000580
 40 | 000585
 41 | 000586
 42 | 000593
 43 | 000602
 44 | 000607
 45 | 000624
 46 | 000634
 47 | 000646
 48 | 000649
 49 | 000669
 50 | 000679
 51 | 000687
 52 | 000693
 53 | 000715
 54 | 000719
 55 | 000721
 56 | 000724
 57 | 000727
 58 | 000736
 59 | 000743
 60 | 000747
 61 | 000757
 62 | 000778
 63 | 000788
 64 | 000801
 65 | 000809
 66 | 000844
 67 | 000881
 68 | 000883
 69 | 000894
 70 | 000932
 71 | 000945
 72 | 000961
 73 | 000984
 74 | 000985
 75 | 001003
 76 | 001005
 77 | 001022
 78 | 001034
 79 | 001058
 80 | 001063
 81 | 001080
 82 | 001085
 83 | 001090
 84 | 001111
 85 | 001134
 86 | 001135
 87 | 001155
 88 | 001198
 89 | 001222
 90 | 001252
 91 | 001267
 92 | 001280
 93 | 001283
 94 | 001291
 95 | 001308
 96 | 001318
 97 | 001321
 98 | 001328
 99 | 001331
100 | 001335
101 | 001356
102 | 001358
103 | 001369
104 | 001376
105 | 001379
106 | 001382
107 | 001394
108 | 001403
109 | 001422
110 | 001428
111 | 001435
112 | 001476
113 | 001491
114 | 001511
115 | 001525
116 | 001535
117 | 001550
118 | 001552
119 | 001560
120 | 001569
121 | 001572
122 | 001605
123 | 001613
124 | 001616
125 | 001619
126 | 001623
127 | 001626
128 | 001652
129 | 001658
130 | 001700
131 | 001701
132 | 001770
133 | 001776
134 | 001804
135 | 001820
136 | 001838
137 | 001846
138 | 001851
139 | 001857
140 | 001863
141 | 001873
142 | 001883
143 | 001891
144 | 001908
145 | 001913
146 | 001919
147 | 001923
148 | 001924
149 | 001935
150 | 001942
151 | 001951
152 | 001956
153 | 001965
154 | 001991
155 | 002040
156 | 002041
157 | 002057
158 | 002118
159 | 002141
160 | 002143
161 | 002149
162 | 002154
163 | 002177
164 | 002185
165 | 002210
166 | 002223
167 | 002232
168 | 002242
169 | 002245
170 | 002271
171 | 002294
172 | 002319
173 | 002331
174 | 002346
175 | 002349
176 | 002358
177 | 002370
178 | 002383
179 | 002402
180 | 002406
181 | 002416
182 | 002418
183 | 002424
184 | 002446
185 | 002484
186 | 002517
187 | 002522
188 | 002526
189 | 002531
190 | 002532
191 | 002543
192 | 002548
193 | 002556
194 | 002562
195 | 002577
196 | 002583
197 | 002602
198 | 002607
199 | 002610
200 | 002622
201 | 002650
202 | 002681
203 | 002701
204 | 002703
205 | 002729
206 | 002733
207 | 002740
208 | 002746
209 | 002750
210 | 002752
211 | 002758
212 | 002789
213 | 002790
214 | 002793
215 | 002808
216 | 002814
217 | 002829
218 | 002840
219 | 002871
220 | 002900
221 | 002920
222 | 002927
223 | 002955
224 | 002961
225 | 002993
226 | 003006
227 | 003033
228 | 003046
229 | 003052
230 | 003055
231 | 003070
232 | 003101
233 | 003109
234 | 003128
235 | 003143
236 | 003168
237 | 003179
238 | 003217
239 | 003220
240 | 003234
241 | 003257
242 | 003265
243 | 003276
244 | 003289
245 | 003302
246 | 003306
247 | 003321
248 | 003328
249 | 003334
250 | 003348
251 | 003353
252 | 003357
253 | 003364
254 | 003375
255 | 003385
256 | 003387
257 | 003405
258 | 003414
259 | 003434
260 | 003456
261 | 003460
262 | 003476
263 | 003481
264 | 003483
265 | 003486
266 | 003501
267 | 003503
268 | 003512
269 | 003515
270 | 003517
271 | 003523
272 | 003527
273 | 003545
274 | 003552
275 | 003553
276 | 003559
277 | 003569
278 | 003570
279 | 003578
280 | 003607
281 | 003630
282 | 003631
283 | 003661
284 | 003666
285 | 003677
286 | 003683
287 | 003692
288 | 003701
289 | 003718
290 | 003719
291 | 003733
292 | 003746
293 | 003769
294 | 003810
295 | 003815
296 | 003833
297 | 003878
298 | 003893
299 | 003904
300 | 003916
301 | 003917
302 | 003940
303 | 003962
304 | 003967
305 | 004001
306 | 004026
307 | 004030
308 | 004043
309 | 004050
310 | 004071
311 | 004080
312 | 004088
313 | 004097
314 | 004104
315 | 004107
316 | 004115
317 | 004147
318 | 004154
319 | 004172
320 | 004183
321 | 004187
322 | 004188
323 | 004216
324 | 004217
325 | 004240
326 | 004245
327 | 004249
328 | 004254
329 | 004290
330 | 004305
331 | 004313
332 | 004320
333 | 004335
334 | 004337
335 | 004378
336 | 004426
337 | 004442
338 | 004453
339 | 004456
340 | 004458
341 | 004478
342 | 004492
343 | 004521
344 | 004543
345 | 004545
346 | 004554
347 | 004560
348 | 004578
349 | 004580
350 | 004586
351 | 004596
352 | 004602
353 | 004615
354 | 004650
355 | 004667
356 | 004684
357 | 004688
358 | 004697
359 | 004731
360 | 004734
361 | 004745
362 | 004755
363 | 004762
364 | 004763
365 | 004764
366 | 004780
367 | 004800
368 | 004804
369 | 004806
370 | 004810
371 | 004821
372 | 004844
373 | 004860
374 | 004870
375 | 004891
376 | 004927
377 | 004933
378 | 004940
379 | 004959
380 | 004965
381 | 004981
382 | 004989
383 | 004996
384 | 005005
385 | 005021
386 | 005030
387 | 005035
388 | 005041
389 | 005074
390 | 005083
391 | 005091
392 | 005099
393 | 005105
394 | 005125
395 | 005126
396 | 005142
397 | 005157
398 | 005158
399 | 005163
400 | 005166
401 | 005192
402 | 005204
403 | 005275
404 | 005287
405 | 005296
406 | 005316
407 | 005333
408 | 005357
409 | 005372
410 | 005381
411 | 005401
412 | 005432
413 | 005437
414 | 005443
415 | 005447
416 | 005468
417 | 005484
418 | 005493
419 | 005501
420 | 005520
421 | 005523
422 | 005546
423 | 005558
424 | 005627
425 | 005638
426 | 005649
427 | 005663
428 | 005666
429 | 005678
430 | 005694
431 | 005706
432 | 005708
433 | 005717
434 | 005739
435 | 005746
436 | 005763
437 | 005770
438 | 005775
439 | 005793
440 | 005809
441 | 005835
442 | 005842
443 | 005862
444 | 005869
445 | 005870
446 | 005904
447 | 005924
448 | 005929
449 | 005932
450 | 005953
451 | 005959
452 | 005974
453 | 005987
454 | 006006
455 | 006016
456 | 006017
457 | 006019
458 | 006024
459 | 006034
460 | 006047
461 | 006082
462 | 006094
463 | 006109
464 | 006113
465 | 006121
466 | 006137
467 | 006155
468 | 006213
469 | 006228
470 | 006242
471 | 006246
472 | 006256
473 | 006283
474 | 006324
475 | 006326
476 | 006327
477 | 006331
478 | 006333
479 | 006334
480 | 006340
481 | 006358
482 | 006376
483 | 006383
484 | 006386
485 | 006397
486 | 006405
487 | 006415
488 | 006420
489 | 006423
490 | 006435
491 | 006441
492 | 006454
493 | 006469
494 | 006481
495 | 006493
496 | 006502
497 | 006510
498 | 006525
499 | 006527
500 | 006567
501 | 006581
502 | 006590
503 | 006634
504 | 006653
505 | 006685
506 | 006693
507 | 006717
508 | 006724
509 | 006733
510 | 006741
511 | 006749
512 | 006754
513 | 006757
514 | 006785
515 | 006790
516 | 006793
517 | 006817
518 | 006853
519 | 006854
520 | 006882
521 | 006890
522 | 006907
523 | 006925
524 | 006955
525 | 006970
526 | 006974
527 | 006996
528 | 007001
529 | 007014
530 | 007015
531 | 007034
532 | 007061
533 | 007082
534 | 007085
535 | 007112
536 | 007118
537 | 007126
538 | 007143
539 | 007164
540 | 007173
541 | 007176
542 | 007179
543 | 007242
544 | 007246
545 | 007267
546 | 007273
547 | 007278
548 | 007281
549 | 007282
550 | 007288
551 | 007304
552 | 007337
553 | 007339
554 | 007347
555 | 007358
556 | 007362
557 | 007368
558 | 007386
559 | 007399
560 | 007405
561 | 007423
562 | 007429
563 | 007447
564 | 007452
565 | 007459
566 | 007478
567 | 007496
568 | 007501
569 | 007507
570 | 007510
571 | 007518
572 | 007522
573 | 007556
574 | 007562
575 | 007580
576 | 007589
577 | 007591
578 | 007613
579 | 007617
580 | 007634
581 | 007665
582 | 007676
583 | 007690
584 | 007693
585 | 007701
586 | 007714
587 | 007734
588 | 007757
589 | 007761
590 | 007797
591 | 007800
592 | 007806
593 | 007807
594 | 007818
595 | 007835
596 | 007839
597 | 007844
598 | 007861
599 | 007866
600 | 007882
601 | 007906
602 | 007927
603 | 007948
604 | 007960
605 | 007961
606 | 007967
607 | 007969
608 | 007992
609 | 008006
610 | 008020
611 | 008030
612 | 008035
613 | 008047
614 | 008052
615 | 008088
616 | 008104
617 | 008114
618 | 008120
619 | 008126
620 | 008129
621 | 008133
622 | 008135
623 | 008136
624 | 008143
625 | 008152
626 | 008158
627 | 008161
628 | 008212
629 | 008215
630 | 008231
631 | 008246
632 | 008259
633 | 008264
634 | 008270
635 | 008271
636 | 008276
637 | 008283
638 | 008289
639 | 008290
640 | 008324
641 | 008353
642 | 008357
643 | 008363
644 | 008375
645 | 008378
646 | 008383
647 | 008408
648 | 008414
649 | 008421
650 | 008432
651 | 008447
652 | 008451
653 | 008464
654 | 008479
655 | 008481
656 | 008488
657 | 008504
658 | 008548
659 | 008560
660 | 008579
661 | 008593
662 | 008609
663 | 008622
664 | 008632
665 | 008657
666 | 008658
667 | 008668
668 | 008682
669 | 008684
670 | 008693
671 | 008694
672 | 008708
673 | 008711
674 | 008715
675 | 008724
676 | 008734
677 | 008761
678 | 008777
679 | 008785
680 | 008788
681 | 008797
682 | 008800
683 | 008824
684 | 008828
685 | 008829
686 | 008895
687 | 008896
688 | 008903
689 | 008906
690 | 008910
691 | 008915
692 | 008916
693 | 008996
694 | 009008
695 | 009023
696 | 009033
697 | 009052
698 | 009071
699 | 009077
700 | 009081
701 | 009092
702 | 009096
703 | 009111
704 | 009119
705 | 009122
706 | 009125
707 | 009134
708 | 009140
709 | 009149
710 | 009156
711 | 009182
712 | 009201
713 | 009206
714 | 009210
715 | 009241
716 | 009243
717 | 009261
718 | 009267
719 | 009284
720 | 009302
721 | 009304
722 | 009321
723 | 009322
724 | 009335
725 | 009341
726 | 009360
727 | 009376
728 | 009381
729 | 009384
730 | 009387
731 | 009396
732 | 009426
733 | 009427
734 | 009430
735 | 009475
736 | 009492
737 | 009530
738 | 009536
739 | 009564
740 | 009590
741 | 009593
742 | 009599
743 | 009601
744 | 009643
745 | 009652
746 | 009675
747 | 009680
748 | 009683
749 | 009688
750 | 009694
751 | 009701
752 | 009704
753 | 009705
754 | 009723
755 | 009740
756 | 009757
757 | 009768
758 | 009770
759 | 009777
760 | 009779
761 | 009804
762 | 009806
763 | 009821
764 | 009827
765 | 009829
766 | 009847
767 | 009849
768 | 009856
769 | 009873
770 | 009883
771 | 009895
772 | 009903
773 | 009927
774 | 009943
775 | 009963
776 | 


--------------------------------------------------------------------------------
/requirements:
--------------------------------------------------------------------------------
  1 | # Name                    Version                   Build  Channel
  2 | _libgcc_mutex             0.1                        main  
  3 | asn1crypto                0.24.0                py37_1003    conda-forge
  4 | backcall                  0.1.0                    py37_0  
  5 | blas                      1.0                         mkl  
  6 | bleach                    3.1.0                    py37_0  
  7 | bzip2                     1.0.6                h14c3975_5  
  8 | ca-certificates           2019.8.28                     0  
  9 | cairo                     1.14.12              h8948797_3  
 10 | certifi                   2019.9.11                py37_0  
 11 | cffi                      1.12.1           py37h2e261b9_0  
 12 | chardet                   3.0.4                 py37_1003    conda-forge
 13 | cloudpickle               0.8.0                      py_0    conda-forge
 14 | cryptography              2.5              py37h9d9f1b6_1    conda-forge
 15 | cudatoolkit               9.0                  h13b8566_0  
 16 | cycler                    0.10.0                   py37_0  
 17 | cython                    0.29.7           py37he6710b0_0  
 18 | cytoolz                   0.9.0.1         py37h14c3975_1001    conda-forge
 19 | dask-core                 1.1.3                      py_0    conda-forge
 20 | dbus                      1.13.6               h746ee38_0  
 21 | decorator                 4.3.2                    py37_0  
 22 | easydict                  1.9                      pypi_0    pypi
 23 | entrypoints               0.3                      py37_0  
 24 | expat                     2.2.6                he6710b0_0  
 25 | ffmpeg                    4.0                  hcdf2ecd_0  
 26 | fontconfig                2.13.0               h9420a91_0  
 27 | freeglut                  3.0.0                hf484d3e_5  
 28 | freetype                  2.9.1                h8a8886c_1  
 29 | git                       2.20.1          pl526hacde149_0  
 30 | glib                      2.56.2               hd408876_0  
 31 | gmp                       6.1.2                h6c8ec71_1  
 32 | graphite2                 1.3.13               h23475e2_0  
 33 | gst-plugins-base          1.14.0               hbbd80ab_1  
 34 | gstreamer                 1.14.0               hb453b48_1  
 35 | harfbuzz                  1.8.8                hffaf4a1_0  
 36 | hdbscan                   0.8.22           py37hd352d35_1    conda-forge
 37 | hdf5                      1.10.2               hba1933b_1  
 38 | icu                       58.2                 h9c2bf20_1  
 39 | idna                      2.8                   py37_1000    conda-forge
 40 | imageio                   2.5.0                    py37_0    conda-forge
 41 | intel-openmp              2019.1                      144  
 42 | ipykernel                 5.1.0            py37h39e3cac_0  
 43 | ipython                   7.3.0            py37h39e3cac_0  
 44 | ipython_genutils          0.2.0                    py37_0  
 45 | ipywidgets                7.4.2                    py37_0  
 46 | jasper                    2.0.14               h07fcdf6_1  
 47 | jedi                      0.13.3                   py37_0  
 48 | jinja2                    2.10                     py37_0  
 49 | joblib                    0.13.2                     py_0    conda-forge
 50 | jpeg                      9b                   h024ee3a_2  
 51 | jsonschema                2.6.0                    py37_0  
 52 | jupyter                   1.0.0                    py37_7  
 53 | jupyter_client            5.2.4                    py37_0  
 54 | jupyter_console           6.0.0                    py37_0  
 55 | jupyter_core              4.4.0                    py37_0  
 56 | kiwisolver                1.0.1            py37hf484d3e_0  
 57 | krb5                      1.16.1               h173b8e3_7  
 58 | libcurl                   7.64.1               h20c2e04_0  
 59 | libedit                   3.1.20181209         hc058e9b_0  
 60 | libffi                    3.2.1                hd88cf55_4  
 61 | libgcc-ng                 8.2.0                hdf63c60_1  
 62 | libgfortran-ng            7.3.0                hdf63c60_0  
 63 | libglu                    9.0.0                hf484d3e_1  
 64 | libopencv                 3.4.2                hb342d67_1  
 65 | libopus                   1.3                  h7b6447c_0  
 66 | libpng                    1.6.36               hbc83047_0  
 67 | libsodium                 1.0.16               h1bed415_0  
 68 | libssh2                   1.8.2                h1ba5d50_0  
 69 | libstdcxx-ng              8.2.0                hdf63c60_1  
 70 | libtiff                   4.0.10               h2733197_2  
 71 | libuuid                   1.0.3                h1bed415_2  
 72 | libvpx                    1.7.0                h439df22_0  
 73 | libxcb                    1.13                 h1bed415_1  
 74 | libxml2                   2.9.9                he19cac6_0  
 75 | markupsafe                1.1.1            py37h7b6447c_0  
 76 | matplotlib                3.0.2            py37h5429711_0  
 77 | matplotlib-base           3.0.2           py37h167e16e_1001    conda-forge
 78 | mistune                   0.8.4            py37h7b6447c_0  
 79 | mkl                       2019.4                      243  
 80 | mkl-service               2.3.0            py37he904b0f_0  
 81 | mkl_fft                   1.0.10           py37ha843d7b_0  
 82 | mkl_random                1.0.2            py37hd81dba3_0  
 83 | nbconvert                 5.3.1                    py37_0  
 84 | nbformat                  4.4.0                    py37_0  
 85 | ncurses                   6.1                  he6710b0_1  
 86 | networkx                  2.2                        py_1    conda-forge
 87 | ninja                     1.8.2            py37h6bb024c_1  
 88 | notebook                  5.7.4                    py37_0  
 89 | numpy                     1.16.1           py37h7e9f1db_0  
 90 | numpy-base                1.16.1           py37hde5b4d6_0  
 91 | olefile                   0.46                     py37_0  
 92 | opencv                    3.4.2            py37h6fd60c2_1  
 93 | openssl                   1.1.1d               h7b6447c_3  
 94 | pandas                    0.24.1           py37he6710b0_0  
 95 | pandoc                    2.2.3.2                       0  
 96 | pandocfilters             1.4.2                    py37_1  
 97 | parso                     0.3.4                    py37_0  
 98 | patsy                     0.5.1                    py37_0  
 99 | pcre                      8.42                 h439df22_0  
100 | perl                      5.26.2               h14c3975_0  
101 | pexpect                   4.6.0                    py37_0  
102 | pickleshare               0.7.5                    py37_0  
103 | pillow                    5.4.1            py37h34e0f95_0  
104 | pip                       19.0.3                   py37_0  
105 | pixman                    0.36.0               h7b6447c_0  
106 | prometheus_client         0.6.0                    py37_0  
107 | prompt_toolkit            2.0.9                    py37_0  
108 | ptyprocess                0.6.0                    py37_0  
109 | py-opencv                 3.4.2            py37hb342d67_1  
110 | pyclustering              0.9.0                    pypi_0    pypi
111 | pycocotools               2.0.0                    pypi_0    pypi
112 | pycparser                 2.19                     py37_0  
113 | pygments                  2.3.1                    py37_0  
114 | pyopenssl                 19.0.0                   py37_0    conda-forge
115 | pyparsing                 2.3.1                    py37_0  
116 | pyqt                      5.9.2            py37h05f1152_2  
117 | pysocks                   1.6.8                 py37_1002    conda-forge
118 | python                    3.7.2                h0371630_0  
119 | python-dateutil           2.8.0                    py37_0  
120 | pytorch                   1.0.1           py3.7_cuda9.0.176_cudnn7.4.2_2    pytorch
121 | pytz                      2018.9                   py37_0  
122 | pywavelets                1.0.2            py37h3010b51_0    conda-forge
123 | pyzmq                     18.0.0           py37he6710b0_0  
124 | qt                        5.9.7                h5867ecd_1  
125 | qtconsole                 4.4.3                    py37_0  
126 | readline                  7.0                  h7b6447c_5  
127 | requests                  2.21.0                py37_1000    conda-forge
128 | scikit-image              0.14.2           py37hf484d3e_1    conda-forge
129 | scikit-learn              0.21.3           py37hd81dba3_0  
130 | scipy                     1.2.1            py37h7c811a0_0  
131 | seaborn                   0.9.0                    py37_0  
132 | send2trash                1.5.0                    py37_0  
133 | setuptools                40.8.0                   py37_0  
134 | sip                       4.19.8           py37hf484d3e_0  
135 | six                       1.12.0                   py37_0  
136 | sqlite                    3.26.0               h7b6447c_0  
137 | statsmodels               0.10.1           py37hdd07704_0  
138 | terminado                 0.8.1                    py37_1  
139 | testpath                  0.4.2                    py37_0  
140 | tk                        8.6.8                hbc83047_0  
141 | toolz                     0.9.0                      py_1    conda-forge
142 | torchfile                 0.1.0                      py_0    conda-forge
143 | torchvision               0.2.2                      py_2    pytorch
144 | tornado                   5.1.1            py37h7b6447c_0  
145 | traitlets                 4.3.2                    py37_0  
146 | urllib3                   1.24.1                py37_1000    conda-forge
147 | visdom                    0.1.8.8                       0    conda-forge
148 | wcwidth                   0.1.7                    py37_0  
149 | webencodings              0.5.1                    py37_1  
150 | websocket-client          0.55.0                   py37_0    conda-forge
151 | wheel                     0.33.1                   py37_0  
152 | widgetsnbextension        3.4.2                    py37_0  
153 | xz                        5.2.4                h14c3975_4  
154 | zeromq                    4.3.1                he6710b0_3  
155 | zlib                      1.2.11               h7b6447c_3  
156 | zstd                      1.3.7                h0b5b093_0 
157 | 


--------------------------------------------------------------------------------
/create_spoc_features.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import json
  4 | 
  5 | from sklearn.decomposition import PCA
  6 | from torch.autograd import Variable
  7 | import torchvision.models as models
  8 | 
  9 | from data import *
 10 | import active_learning_package.helpers as helpers
 11 | 
 12 | 
 13 | 
 14 | def get_feature_maps(dataset,
 15 |                      net,
 16 |                      imageset_name,
 17 |                      save_dir):
 18 | 
 19 |     path_to_image_feature_dir = os.path.join(save_dir, imageset_name + '586_conv5_3_features_before_relu/')
 20 |     # path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/'
 21 | 
 22 |     if not os.path.exists(path_to_image_feature_dir):
 23 |         os.mkdir(path_to_image_feature_dir)
 24 | 
 25 |     # go trough all images in imageset
 26 |     already_saved = os.listdir(path_to_image_feature_dir)
 27 | 
 28 |     transform = BaseTransform(586, (104, 117, 123))
 29 | 
 30 |     for i, idx in enumerate(dataset.ids):
 31 |         image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle'
 32 |         if str(idx[1]) + '.pickle' in already_saved:
 33 |             print(i, '/', len(dataset.ids), ' was already saved')
 34 | 
 35 |             # load feature and append it
 36 |             # features = helpers.unpickle(image_feature_path)
 37 | 
 38 |             # conv_feature_list.append(features)
 39 | 
 40 |             continue
 41 | 
 42 |         print(i, '/', len(dataset.ids))
 43 | 
 44 |         # load image and transform (colors in different order)
 45 |         img = dataset.pull_image_using_imageset_id(idx)
 46 | 
 47 |         # if features already saved, load them
 48 |         x = torch.from_numpy(transform(img)[0][:, :, (2, 1, 0)]).permute(2, 0, 1)  # We use pre-trained model from pytorch model zoo, which is trained with RGB, cv2.imread loads in BGR
 49 | 
 50 |         x = Variable(x.unsqueeze(0))
 51 | 
 52 |         if torch.cuda.is_available():
 53 |             torch.cuda.empty_cache()
 54 |             x = x.to('cuda')
 55 | 
 56 |         # directly calculate sum over channels
 57 |         features = net(x)
 58 | 
 59 |         # # take the sum of the 512 channels as features (NOTE: 512 is specific to VGG16 conv5_3)
 60 |         features = features.reshape(1, 512, -1).sum(dim=-1)
 61 | 
 62 |         # set detections back to cpu
 63 |         if torch.cuda.is_available():
 64 |             features = features.to('cpu')
 65 | 
 66 |         # append to conv_feature list
 67 |         # conv_feature_list.append(features)
 68 | 
 69 |         with open(image_feature_path, 'wb') as f:
 70 |             pickle.dump(features, f)
 71 | 
 72 |     return
 73 | def calculate_PCA_and_whitening_parameters(dataset,
 74 |                                            imageset_name,
 75 |                                            save_dir):
 76 | 
 77 | 
 78 |     conv_feature_list = []
 79 |     if '2007' in imageset_name:
 80 |         print('This is to get PCA, should be done with 2012 dataset, this is a failsafe to not overwrite the 2012 PCA with 2007 PCA')
 81 |         raise NotImplementedError
 82 | 
 83 |     # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features_before_relu/')
 84 |     path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/'
 85 |     #
 86 | 
 87 |     # load features
 88 |     pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle'
 89 | 
 90 |     if os.path.exists(pca_save_path):
 91 |         print('already did this PCA')
 92 |         return
 93 |     print('load features:')
 94 |     for i, idx in enumerate(dataset.ids):
 95 |         print('load feature', i, '/', len(dataset.ids),' and L2 normalize features before PCA')
 96 |         image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle'
 97 | 
 98 |         # load feature and append it
 99 |         features = helpers.unpickle(image_feature_path)
100 | 
101 |         # L2 normalize
102 |         features = features / features.norm(2)
103 | 
104 | 
105 |         conv_feature_list.append(features)
106 | 
107 |     np_features = torch.cat(conv_feature_list).detach().numpy()
108 |     print('loaded all features and transformed them into a numpy array')
109 | 
110 |     ## calculate PCA parameters (which dimensions should be kept)
111 |     # numpy array
112 |     print('Do PCA')
113 |     pca = PCA(n_components = 256, svd_solver = 'full', random_state = 42,whiten=True)
114 |     pca.fit(np_features)
115 |     print('did PCA')
116 | 
117 |     # save PCA
118 |     pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle'
119 | 
120 |     with open(pca_save_path, 'wb') as f:
121 |         pickle.dump(pca, f)
122 | 
123 |     print('Saved PCA')
124 | 
125 |     return
126 | 
127 | def create_spoc_features(dataset,
128 |                          image_features_path,
129 |                          PCA_param_path,
130 |                          imageset_name,
131 |                          save_dir):
132 |     """
133 |     See Babenko 2014
134 | 
135 |     """
136 | 
137 |     # load pca and whitening parameters
138 |     pca = helpers.unpickle(PCA_param_path)
139 | 
140 |     if not os.path.exists(save_dir):
141 |         os.mkdir(save_dir)
142 | 
143 |     # load image features
144 |     for i, idx in enumerate(dataset.ids):
145 |         image_feature_path = image_features_path + str(idx[1]) + '.pickle'
146 | 
147 |         # load feature and append it
148 |         features = helpers.unpickle(image_feature_path)
149 | 
150 |         print(i, '/', len(dataset.ids))
151 | 
152 |         # l2 normalization
153 |         features = features/features.norm(2)
154 | 
155 |         # apply pca transform + whitening to features
156 |         features = pca.transform(features.detach().numpy())
157 |         features = torch.tensor(features)
158 | 
159 |         # l2-normalization
160 |         features = features/features.norm(2)
161 |         spoc_feature_path = save_dir + str(idx[1]) + '.pickle'
162 | 
163 |         # save SPoC representation
164 | 
165 |         with open(spoc_feature_path,'wb') as f:
166 |             pickle.dump(features, f)
167 | 
168 |     print('Created and Saved all SpoC representations of images')
169 | 
170 |     return
171 | 
172 | 
173 | def calculate_scalar_product_image_similarity(tensor_a,tensor_b):
174 |     """
175 |     https://datascience.stackexchange.com/questions/744/cosine-similarity-versus-dot-product-as-distance-metrics
176 | 
177 |     calculates image similarity between two images using a simple scalar product matching kernel
178 |     L. Bo and C. Sminchisescu. Efficient match kernel between
179 |     sets of features for visual recognition. In Advances in Neural Information Processing Systems (NIPS)., pages 135–143,
180 |     2009.
181 | 
182 |     :return: similarity
183 |     """
184 | 
185 |     return torch.dot(tensor_a.squeeze(),tensor_b.squeeze())
186 | 
187 | 
188 | def calculate_all_images_similarities(dataset, load_dir_spoc_features):
189 |     """
190 | 
191 |     :return:
192 |     """
193 | 
194 |     # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a)
195 | 
196 |     save_dir = load_dir_spoc_features + 'image_similarities/'
197 |     if not os.path.exists(save_dir):
198 |         os.mkdir(save_dir)
199 | 
200 |     already_saved = os.listdir(save_dir)
201 | 
202 |     # go trough dataset
203 |     for i,idx in enumerate(dataset.ids):
204 |         if str(idx[1]) + '.pickle' in already_saved:
205 |             print(i, '/', len(dataset.ids), ' was already saved')
206 | 
207 |         print(i,'/',len(dataset.ids))
208 |         # placeholder to store similarities between all images
209 |         image_similarity_dir = {}
210 | 
211 |         # load image description
212 |         image_path_a = load_dir_spoc_features+ str(idx[1]) + '.pickle'
213 |         image_a = helpers.unpickle(image_path_a)
214 | 
215 | 
216 |         # go trough all OTHER images except the idx whe are currently at
217 |         other_images = [idj for idj in dataset.ids if idj != idx]
218 | 
219 |         for j, idj in enumerate(other_images):
220 | 
221 |             # load image description
222 |             image_path_b = load_dir_spoc_features + str(idj[1]) + '.pickle'
223 |             image_b = helpers.unpickle(image_path_b)
224 | 
225 | 
226 |             # calculate similarity
227 |             similarity = calculate_scalar_product_image_similarity(image_a,image_b)
228 | 
229 |             if similarity.shape == torch.Size([0]):
230 |                 print(similarity)
231 |                 print('similarity should bed a scalar')
232 |                 raise NotImplementedError
233 | 
234 |             # store similarity
235 |             image_similarity_dir[idj[1]] = similarity.item()
236 | 
237 |         # save image similarity dir
238 |         path = save_dir + str(idx[1]) + '.pickle'
239 | 
240 |         with open(path,'wb') as f:
241 |             pickle.dump(image_similarity_dir, f)
242 | 
243 |     return save_dir
244 | 
245 | 
246 | def calculate_density_per_imageset(dataset,load_dir_similarities):
247 |     """
248 |     density is the mean similarity of one image to all other images in the dataset (see Settles 2008)
249 |     """
250 | 
251 |     # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a)
252 |     # go trough dataset
253 |     density = {}
254 |     for i,idx in enumerate(dataset.ids):
255 |         print(i,'/',len(dataset.ids))
256 |         # load similarity between all images in trainval and current image (idx)
257 |         path = load_dir_similarities + str(idx[1]) + '.pickle'
258 | 
259 |         similarities_idx = helpers.unpickle(path)
260 | 
261 |         # go trough all OTHER images in the dataset (can be a subset of trainval, e.g. only the car images)
262 |         # except the id where are currently
263 |         other_images = [idj for idj in dataset.ids if idj != idx]
264 | 
265 |         # placeholder
266 |         density[idx[1]] = 0
267 |         for i, idj in enumerate(other_images):
268 | 
269 |             density[idx[1]] += similarities_idx[idj[1]]
270 | 
271 |         # divide by number of images to get mean
272 |         density[idx[1]] /= len(other_images)
273 | 
274 | 
275 | 
276 |     # save image density dir
277 |     path = load_dir_similarities + dataset.image_set[0][1] + '.pickle'
278 | 
279 | 
280 | 
281 |     with open(path,'wb') as f:
282 |         pickle.dump(density, f)
283 | 
284 | 
285 | 
286 | 
287 | # def create_image_affinity_propagation_clusters(features,
288 | #                                                dataset,
289 | #                                                imageset_name):
290 | #
291 | #     return
292 | 
293 | if __name__ == '__main__':
294 | 
295 |     save_dir = 'data/'
296 | 
297 |     """get feature maps"""
298 |     # imagesets = [[('2012', 'trainval')],
299 |     #              [('2007', 'trainval')]
300 |     #              [('2012', 'bottle_trainval_detect')],
301 |     #               [('2012', 'car_trainval_detect')],
302 |     #               [('2012', 'horse_trainval_detect')],
303 |     #               [('2012', 'sheep_trainval_detect')],
304 |     #               [('2012', 'pottedplant_trainval_detect')]
305 |     #               ]
306 | 
307 | 
308 |     # load network
309 |     # vgg16 = models.vgg16(pretrained=True) #NOTE:  I adjusted the source code of the vgg16 such that it only goes up to the conv5_3 layer in forward passes
310 |     # vgg16.eval()
311 |     #
312 |     # for imageset in imagesets:
313 |     #     # load dataset
314 |     #     dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']),
315 |     #                            VOCAnnotationTransform())
316 |     #
317 |     #     get_feature_maps(dataset = dataset,
318 |     #                      net = vgg16,
319 |     #                      imageset_name=imageset[0][0] + imageset[0][1],
320 |     #                      save_dir= save_dir)
321 | 
322 |     """ Get PCA and whitening params on hold-out dataset (VOC2012)"""
323 | 
324 |     #
325 |     #
326 |     # imagesets = [[('2012', 'trainval')],
327 |     #              [('2012', 'bottle_trainval_detect')],
328 |     #               [('2012', 'car_trainval_detect')],
329 |     #               [('2012', 'horse_trainval_detect')],
330 |     #               [('2012', 'sheep_trainval_detect')],
331 |     #               [('2012', 'pottedplant_trainval_detect')]
332 |     #               ]
333 |     #
334 |     # for imageset in imagesets:
335 |     #
336 |     #     # load dataset
337 |     #     dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform())
338 |     #
339 |     #     calculate_PCA_and_whitening_parameters(dataset=dataset,
340 |     #                                            imageset_name=imageset[0][0]+imageset[0][1],
341 |     #                                            save_dir=save_dir)
342 |     #
343 |     #
344 | 
345 |     """ Make spoc features """
346 |     # Imagesets
347 |     # imagesets = [[('2007', 'trainval')]]
348 |     #
349 |     # for imageset in imagesets:
350 |     #     # load dataset
351 |     #     dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(586, config.voc['dataset_mean']),
352 |     #                            VOCAnnotationTransform())
353 |     #     #
354 |     #     # calculate_PCA_and_whitening_parameters(dataset=dataset,
355 |     #     #                                        imageset_name=imageset[0][0]+imageset[0][1],
356 |     #     #                                        save_dir=save_dir,
357 |     #     #                                        net=vgg16)
358 |     #     pca_dir = save_dir+'2012trainval586_conv5_3_features_before_relu/'
359 |     #     PCA_param_path = pca_dir + '2012trainvalPCA.pickle' # for now only using the 2012 full trainval PCA
360 |     #     image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/')
361 |     #     # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features/')
362 |     #
363 |     #     create_spoc_features(dataset,
364 |     #                          image_features_path,
365 |     #                          PCA_param_path,
366 |     #                          imageset_name=imageset[0][0] + imageset[0][1],
367 |     #                          save_dir=image_features_path + '2012trainvalPCA/')
368 | 
369 | 
370 |     """ Calculate complete similarities from each image in trainval 2007 to all other images"""
371 | 
372 |     # dataset = VOCDetection(VOC_ROOT_LOCAL, [('2007', 'trainval')], BaseTransform(586, config.voc['dataset_mean']),
373 |     #                        VOCAnnotationTransform())
374 |     # image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/')
375 |     # load_dir_spoc_features = image_features_path + '2012trainvalPCA/'
376 |     # similarity_dir = calculate_all_images_similarities(dataset=dataset,
377 |     #                                                      load_dir_spoc_features = load_dir_spoc_features)
378 |     #
379 | 
380 |     """ Create density per imageset """
381 |     image_sim_dir = save_dir+'2007trainval586_conv5_3_features_before_relu/2012trainvalPCA/image_similarities/'
382 | 
383 | 
384 |     imagesets = [[('2007', 'trainval')],
385 |                  [('2007', 'bottle_trainval_detect')],
386 |                   [('2007', 'car_trainval_detect')],
387 |                   [('2007', 'horse_trainval_detect')],
388 |                   [('2007', 'sheep_trainval_detect')],
389 |                   [('2007', 'pottedplant_trainval_detect')]
390 |                   ]
391 | 
392 |     for imageset in imagesets:
393 |         print(imageset)
394 |         # load dataset
395 |         dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform())
396 |         calculate_density_per_imageset(dataset=dataset,
397 |                                        load_dir_similarities = image_sim_dir)


--------------------------------------------------------------------------------
/ssd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from layers import *
  6 | from data import voc
  7 | import os
  8 | 
  9 | 
 10 | class SSD(nn.Module):
 11 |     """Single Shot Multibox Architecture
 12 |     The network is composed of a base VGG network followed by the
 13 |     added multibox conv layers.  Each multibox layer branches into
 14 |         1) conv2d for class conf scores
 15 |         2) conv2d for localization predictions
 16 |         3) associated priorbox layer to produce default bounding
 17 |            boxes specific to the layer's feature map size.
 18 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 19 | 
 20 |     Also implemented a version predicting a standard deviation per bounding box coordinate, following:
 21 |     CVPR 2019 paper:
 22 |     Bounding Box Regression with Uncertainty for Accurate Object Detection
 23 |     by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
 24 | 
 25 |     Args:
 26 |         phase: (string) Can be "test" or "train"
 27 |         size: input image size
 28 |         base: VGG16 layers for input, size of either 300 or 500
 29 |         extras: extra layers that feed to multibox loc and conf layers
 30 |         head: "multibox head" consists of loc and conf conv layers
 31 |     """
 32 | 
 33 |     def __init__(self, phase, modeltype, base, extras, head, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only):
 34 |         super(SSD, self).__init__()
 35 |         self.phase = phase
 36 |         self.num_classes = num_classes
 37 |         self.cfg = cfg
 38 |         self.priorbox = PriorBox(self.cfg, modeltype)
 39 |         with torch.no_grad():
 40 |             self.priors = Variable(self.priorbox.forward())
 41 | 
 42 |         # todo: convert to x1y1x2y2 format here if necessary
 43 | 
 44 | 
 45 |         self.size = 300
 46 | 
 47 |         # SSD network
 48 |         self.vgg = nn.ModuleList(base)
 49 |         # Layer learns to scale the l2 normalized features from conv4_3
 50 |         self.L2Norm = L2Norm(512, 20)
 51 |         self.extras = nn.ModuleList(extras)
 52 | 
 53 |         self.loc = nn.ModuleList(head[0])
 54 |         self.modeltype = modeltype
 55 |         if self.modeltype == 'SSD300KL':
 56 |             self.loc_std = nn.ModuleList(head[2])
 57 |         self.conf = nn.ModuleList(head[1])
 58 |         if self.modeltype in ['SSD300','SSD300KL']:
 59 |             self.size = 300
 60 |         else:
 61 |             raise NotImplementedError()
 62 | 
 63 |         if phase == 'test':
 64 |             self.softmax = nn.Softmax(dim=-1)
 65 | 
 66 | 
 67 |             if sample_select_forward and merging_method in ['bsas','hbdscan','pre_nms_avg']:
 68 |                 conf_tresh = sample_select_nms_conf_thresh # merging of boxes can be expensive, to have less boxes, we can apply a more agressive conf treshold
 69 |             else:
 70 |                 conf_tresh = 0.01
 71 |             # Active Learning parameters added to enable experiments with and usage of Active Learning
 72 |             self.detect = Detect(num_classes, 0, 200, conf_tresh, 0.45, # default values in paper: num_classes,0,200,0.01,0.45
 73 |                                  default_forward,
 74 |                                  merging_method,
 75 |                                  sampling_strategy,
 76 |                                  modeltype)
 77 | 
 78 |         self.forward_vgg_base_only = forward_vgg_base_only
 79 |         
 80 | 
 81 |     def forward(self, x):
 82 |         """Applies network layers and ops on input image(s) x.
 83 | 
 84 |         Args:
 85 |             x: input image or batch of images. Shape: [batch,3,300,300].
 86 | 
 87 |         Return:
 88 |             Depending on phase:
 89 |             test:
 90 |                 Variable(tensor) of output class label predictions,
 91 |                 confidence score, and corresponding location predictions for
 92 |                 each object detected. Shape: [batch,topk,7]
 93 | 
 94 |             train:
 95 |                 list of concat outputs from:
 96 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 97 |                     2: localization layers, Shape: [batch,num_priors*4]
 98 |                     3: priorbox layers, Shape: [2,num_priors*4]
 99 |         """
100 | 
101 |         sources = list()
102 |         loc = list()
103 |         conf = list()
104 |         if self.modeltype == 'SSD300KL':
105 |             loc_std = list()
106 |         # apply vgg up to conv4_3 relu
107 |         for k in range(23):
108 |             # print('debug: apply vgg')
109 |             x = self.vgg[k](x)
110 | 
111 |         if self.forward_vgg_base_only:
112 |             return x
113 |         # TODO: Why apply L2norm already? => because conv4_3 has larger scale than the rest
114 |         s = self.L2Norm(x)
115 |         sources.append(s)
116 | 
117 |         # apply vgg up to fc7 TODO: Why FC layers? => Doesn't use FC layers, UP TO FC layers..
118 |         for k in range(23, len(self.vgg)):
119 |             # print('debug2: apply vgg')
120 |             x = self.vgg[k](x)
121 |         sources.append(x)
122 | 
123 |         # apply extra layers and cache source layer outputs
124 |         for k, v in enumerate(self.extras):
125 |             # print('debug3: apply extra layers')
126 |             x = F.relu(v(x), inplace=True)
127 |             if k % 2 == 1: #TODO: Why only every second layer of the extra layers? => because thats how the paper states it. It has conv blocks of 2 conv layers
128 |                 sources.append(x)
129 | 
130 |         if self.modeltype != 'SSD300KL':
131 |             # apply multibox head to source layers
132 |             for (x, l, c) in zip(sources, self.loc, self.conf):
133 |                 # print('debug4: apply multibox head')
134 |                 loc.append(l(x).permute(0, 2, 3, 1).contiguous())
135 |                 conf.append(c(x).permute(0, 2, 3, 1).contiguous())
136 | 
137 |             loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
138 |             conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
139 |         # print('debug foward 1')
140 |             if self.phase == "test":
141 |                 # if self.sampling_strategy != 'p-max_localization-stability' :
142 |                 output = self.detect(loc.view(loc.size(0), -1, 4),  # loc preds
143 |                                      self.softmax(conf.view(conf.size(0), -1,self.num_classes)),  # conf preds
144 |                                      self.priors.type(type(x.data)),  # default boxes
145 |                                      )
146 |                 # else:
147 |                 #     output = self.detect()
148 | 
149 |                 # training phase => no merging or other forwards used
150 |             else:
151 |                 output = (
152 |                     loc.view(loc.size(0), -1, 4),
153 |                     conf.view(conf.size(0), -1, self.num_classes),
154 |                     self.priors
155 |                 )
156 |         else:
157 |             # apply multibox head to source layers
158 |             for (x, l, c, std) in zip(sources, self.loc, self.conf, self.loc_std):
159 |                 # print('debug4: apply multibox head')
160 |                 loc.append(l(x).permute(0, 2, 3, 1).contiguous())
161 |                 conf.append(c(x).permute(0, 2, 3, 1).contiguous())
162 |                 loc_std.append(std(x).permute(0, 2, 3, 1).contiguous())
163 | 
164 |             loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
165 |             conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
166 |             loc_std = torch.cat([o.view(o.size(0), -1) for o in loc_std], 1)
167 | 
168 |             if self.phase == "test":
169 |                 # during training alpha = log(sigma^2), during testing, this needs to be converted back
170 |                 loc_std = torch.exp(loc_std)
171 | 
172 |                 output = self.detect(loc.view(loc.size(0), -1, 4),  # loc preds
173 |                                      self.softmax(conf.view(conf.size(0), -1,self.num_classes)),  # conf preds
174 |                                      self.priors.type(type(x.data)),  # default boxes
175 |                                      torch.abs(loc_std.view(loc_std.size(0), -1, 4)) # alphas (predicted log of std deviations of loc preds)
176 |                                      )
177 |             else:
178 |                 # during training, alpha = log(sigma^2) is predicted
179 |                 output = (
180 |                     loc.view(loc.size(0), -1, 4),
181 |                     conf.view(conf.size(0), -1, self.num_classes),
182 |                     self.priors,
183 |                     torch.abs(loc_std.view(loc_std.size(0), -1, 4)) #alphas
184 |                 )
185 | 
186 |         return output
187 | 
188 | 
189 |     def load_weights(self, base_file):
190 |         other, ext = os.path.splitext(base_file)
191 |         if ext == '.pkl' or '.pth':
192 |             print('Loading weights into state dict...')
193 |             self.load_state_dict(torch.load(base_file,
194 |                                  map_location=lambda storage, loc: storage))
195 |             print('Finished!')
196 |         else:
197 |             print('Sorry only .pth and .pkl files supported.')
198 | def vgg(cfg, i, batch_norm=False):
199 |     layers = []
200 |     in_channels = i
201 |     for v in cfg:
202 |         if v == 'M':
203 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
204 |         elif v == 'C': #TODO: ceil mode not used in https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py => impacts output shape
205 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
206 |         else:
207 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
208 |             if batch_norm:
209 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
210 |             else:
211 |                 layers += [conv2d, nn.ReLU(inplace=True)]
212 |             in_channels = v
213 |     pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
214 |     conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) # A TROUS algorithm (dilated conv)
215 |     conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
216 |     layers += [pool5, conv6,
217 |                nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
218 |     return layers
219 | 
220 | 
221 | # This function is derived from torchvision VGG make_layers()
222 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
223 | def add_extras(cfg, i, batch_norm=False):
224 |     # Extra layers added to VGG for feature scaling
225 |     layers = []
226 |     in_channels = i
227 |     flag = False
228 |     for k, v in enumerate(cfg):
229 |         if in_channels != 'S':
230 |             if v == 'S':
231 |                 layers += [nn.Conv2d(in_channels, cfg[k + 1],
232 |                            kernel_size=(1, 3)[flag], stride=2, padding=1)]
233 |             else:
234 |                 layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
235 |             flag = not flag
236 |         in_channels = v
237 |     return layers
238 | 
239 | 
240 | 
241 | def multibox(vgg, extra_layers, cfg, num_classes, model_type):
242 |     #cfg = number of boxes per feature map location
243 | 
244 |     loc_layers = []
245 |     conf_layers = []
246 |     vgg_source = [21, -2]
247 |     if model_type != 'SSD300KL':
248 |         for k, v in enumerate(vgg_source):
249 |             loc_layers += [nn.Conv2d(vgg[v].out_channels,
250 |                                      cfg[k] * 4, kernel_size=3,
251 |                                      padding=1)]  # 4 is for the 4 corners of the bounding box
252 |             conf_layers += [nn.Conv2d(vgg[v].out_channels,
253 |                                       cfg[k] * num_classes, kernel_size=3,
254 |                                       padding=1)]  # out = #boxes*classes (per feature map)
255 | 
256 |         for k, v in enumerate(extra_layers[1::2], 2):
257 |             loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
258 |                                      * 4, kernel_size=3, padding=1)]
259 |             conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
260 |                                       * num_classes, kernel_size=3, padding=1)]
261 |         return vgg, extra_layers, (loc_layers, conf_layers)
262 | 
263 |     else:
264 |         """
265 |         Also predict a standard deviation per bounding box coordinate, from CVPR 2019 paper:
266 |         Bounding Box Regression with Uncertainty for Accurate Object Detection
267 |         by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang
268 |         """
269 |         loc_std_layers = []
270 |         for k, v in enumerate(vgg_source):
271 |             loc_layers += [nn.Conv2d(vgg[v].out_channels,
272 |                                      cfg[k] * 4, kernel_size=3,
273 |                                      padding=1)]  # 4 is for the 4 corners of the bounding box
274 |             loc_std_layers += [nn.Conv2d(vgg[v].out_channels,
275 |                                      cfg[k] * 4, kernel_size=3,
276 |                                      padding=1)]  # 4 is for the 4 corners of the bounding box
277 | 
278 |             conf_layers += [nn.Conv2d(vgg[v].out_channels,
279 |                                       cfg[k] * num_classes, kernel_size=3,
280 |                                       padding=1)]  # out = #boxes*classes (per feature map)
281 | 
282 |         for k, v in enumerate(extra_layers[1::2], 2):
283 |             loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
284 |                                      * 4, kernel_size=3, padding=1)]
285 | 
286 |             loc_std_layers += [nn.Conv2d(v.out_channels, cfg[k]
287 |                                      * 4, kernel_size=3, padding=1)]
288 | 
289 |             conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
290 |                                       * num_classes, kernel_size=3, padding=1)]
291 | 
292 |         return vgg, extra_layers, (loc_layers, conf_layers, loc_std_layers)
293 | 
294 | 
295 | # 300D is SSD300 with dropout layers to be able to make Bayesian using MC-Dropout
296 | # TODO: upconvolution first and then downconvolution?? NOPE => zijn de channels!
297 | base = {
298 |     '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
299 |             512, 512, 512],
300 |     '512': [],
301 |     # '300D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512,'D', 'M',
302 |     #         512, 512, 512,'D']
303 | }
304 | #todo: should the dropout layers be inbetween base and extra? and also between
305 | extras = {
306 |     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
307 |     '512': [],
308 |     '300D': [256, 'S', 512, 128, 'D', 'S', 256, 128, 256, 128, 256]
309 | }
310 | mbox = {
311 |     '300': [4, 6, 6, 6, 4, 4],  # number of boxes per feature map location
312 |     '512': [],
313 |     # '300D': [4, 6, 6, 6, 4, 4]
314 | }
315 | 
316 | 
317 | def build_ssd(phase, model_type='SSD300', num_classes=21, default_forward = True, merging_method = None, sampling_strategy = None, sample_select_forward = False, sample_select_nms_conf_thresh = None, cfg = None, forward_vgg_base_only = False):
318 |     " Active learning parameter here is the sample selection part"
319 | 
320 |     if phase != "test" and phase != "train":
321 |         print("ERROR: Phase: " + phase + " not recognized")
322 |         return
323 |     if model_type not in ['SSD300','SSD300KL']:
324 |         print("ERROR: You specified size " + repr(model_type) + ". However, " +
325 |               "currently only SSD300 (size=300) is supported!")
326 |         return
327 | 
328 |     if model_type in ['SSD300','SSD300KL']:  # if wished add other SSD models with input dim 300 to this list
329 |         size = 300
330 | 
331 |     base_, extras_, head_ = multibox(vgg(base[str(size)], 3),
332 |                                      add_extras(extras[str(size)], 1024),
333 |                                      mbox[str(size)], num_classes, model_type) #cfg
334 |     return SSD(phase, model_type, base_, extras_, head_, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only)
335 | 


--------------------------------------------------------------------------------
/layers/box_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | 
  4 | 
  5 | def point_form(boxes):
  6 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
  7 |     representation for comparison to point form ground truth data.
  8 |     Args:
  9 |         boxes: (tensor) center-size default boxes from priorbox layers.
 10 |     Return:
 11 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 12 |     """
 13 |     return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
 14 |                      boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
 15 | 
 16 | 
 17 | def center_size(boxes):
 18 |     """ Convert prior_boxes to (cx, cy, w, h)
 19 |     representation for comparison to center-size form ground truth data.
 20 |     Args:
 21 |         boxes: (tensor) point_form boxes
 22 |     Return:
 23 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 24 |     """
 25 |     return torch.cat(((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 26 |                      boxes[:, 2:] - boxes[:, :2]), 1)  # w, h
 27 | 
 28 | # def center_size2(boxes):
 29 | #     """
 30 | #
 31 | #     :param boxes:
 32 | #     :return:
 33 | #     """
 34 | #
 35 | #     output_boxes[:, 0] = (input_boxes[:, 2] + input_boxes[:, 0]) / 2
 36 | #     output_boxes[:, 1] = (input_boxes[:, 3] + input_boxes[:, 1]) / 2
 37 | #     output_boxes[:, 2] = input_boxes[:, 2] - input_boxes[:, 0]
 38 | #     output_boxes[:, 3] = input_boxes[:, 3] - input_boxes[:, 1]
 39 | #     return torch.cat((boxes[:, 2:] + (torch.abs(boxes[:, :2]))/2,  # cx, cy
 40 | #                      boxes[:, 2:] - boxes[:, :2]), 1)  # w, h
 41 | 
 42 | def intersect(box_a, box_b):
 43 |     """ We resize both tensors to [A,B,2] without new malloc:
 44 |     [A,2] -> [A,1,2] -> [A,B,2]
 45 |     [B,2] -> [1,B,2] -> [A,B,2]
 46 |     Then we compute the area of intersect between box_a and box_b.
 47 |     Args:
 48 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 49 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 50 |     Return:
 51 |       (tensor) intersection area, Shape: [A,B].
 52 |     """
 53 |     A = box_a.size(0)
 54 |     B = box_b.size(0)
 55 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 56 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 57 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 58 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 59 |     inter = torch.clamp((max_xy - min_xy), min=0)
 60 |     return inter[:, :, 0] * inter[:, :, 1]
 61 | 
 62 | 
 63 | def jaccard(box_a, box_b):
 64 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 65 |     is simply the intersection over union of two boxes.  Here we operate on
 66 |     ground truth boxes and default boxes.
 67 |     E.g.:
 68 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 69 |     Args:
 70 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 71 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 72 |     Return:
 73 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 74 |     """
 75 |     inter = intersect(box_a, box_b)
 76 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 77 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 78 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
 79 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 80 |     union = area_a + area_b - inter
 81 |     return inter / union  # [A,B]
 82 | 
 83 | 
 84 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx,
 85 |           modeltype = 'SSD300'):
 86 |     """Match each prior box with the ground truth box of the highest jaccard
 87 |     overlap, encode the bounding boxes, then return the matched indices
 88 |     corresponding to both confidence and location preds.
 89 | 
 90 |     For KL Loss, we need to predict everything in x1y1x2y2 format. So the matching should not transform to center form
 91 | 
 92 |     Args:
 93 |         threshold: (float) The overlap threshold used when mathing boxes.
 94 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
 95 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
 96 |         variances: (tensor) Variances corresponding to each prior coord,
 97 |             Shape: [num_priors, 4].
 98 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
 99 |         loc_t: (tensor) Tensor to be filled w/ encoded location targets.
100 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
101 |         idx: (int) current batch index
102 |     Return:
103 |         The matched indices corresponding to 1) location and 2) confidence preds.
104 |     """
105 |     # jaccard index
106 |     # if modeltype != 'SSD300KL':
107 |     overlaps = jaccard(
108 |         truths,
109 |         point_form(priors)
110 |     )
111 |     # else:
112 |     #     overlaps = jaccard(
113 |     #         truths,
114 |     #         priors # already in point form if KL-Loss is used
115 |     #     )
116 |     # (Bipartite Matching)
117 |     # [1,num_objects] best prior for each ground truth
118 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
119 |     # [1,num_priors] best ground truth for each prior
120 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
121 |     best_truth_idx.squeeze_(0)
122 |     best_truth_overlap.squeeze_(0)
123 |     best_prior_idx.squeeze_(1)
124 |     best_prior_overlap.squeeze_(1)
125 |     best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior
126 |     # TODO refactor: index  best_prior_idx with long tensor
127 |     # ensure every gt matches with its prior of max overlap
128 |     for j in range(best_prior_idx.size(0)):
129 |         best_truth_idx[best_prior_idx[j]] = j
130 |     matches = truths[best_truth_idx]          # Shape: [num_priors,4]
131 |     conf = labels[best_truth_idx] + 1         # Shape: [num_priors]
132 |     conf[best_truth_overlap < threshold] = 0  # label as background
133 | 
134 |     # If KL Loss, encoding shouldnt use center offset form
135 |     loc = encode(matches, priors, variances, modeltype)
136 | 
137 | 
138 |     loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
139 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
140 | 
141 | 
142 | def encode(matched, priors, variances, modeltype = 'SSD300'):
143 |     """
144 |     Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
145 |     corroborated by original author: https://github.com/weiliu89/caffe/issues/155#issuecomment-243541464
146 |     and more: https://github.com/rykov8/ssd_keras/issues/53
147 | 
148 |     Encode the variances from the priorbox layers into the ground truth boxes
149 |     we have matched (based on jaccard overlap) with the prior boxes.
150 |     Args:
151 |         matched: (tensor) Coords of ground truth for each prior in point-form
152 |             Shape: [num_priors, 4].
153 |         priors: (tensor) Prior boxes in center-offset form
154 |             Shape: [num_priors,4].
155 |         variances: (list[float]) Variances of priorboxes
156 |     Return:
157 |         encoded boxes (tensor), Shape: [num_priors, 4]
158 |     """
159 | 
160 |     # transform everything back to center-form
161 | 
162 | 
163 |     # dist b/t match center and prior's center
164 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
165 |     # encode variance
166 |     # todo: check of dit goed gaat, priors is in point-form
167 |     g_cxcy /= (variances[0] * priors[:, 2:])
168 |     # match wh / prior wh
169 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
170 |     g_wh = torch.log(g_wh) / variances[1]
171 |     # return target for smooth_l1_loss
172 |     # todo
173 |     if modeltype == 'SSD300KL':
174 |         # transform to x1y1x2y2 form
175 |         return point_form(torch.cat([g_cxcy, g_wh], 1)) # [num_priors,4]
176 | 
177 |     else:
178 |         return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
179 | 
180 | 
181 | # Adapted from https://github.com/Hakuyume/chainer-ssd
182 | def decode(loc, priors, variances,  modeltype = 'SSD300'):
183 |     """
184 |     Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
185 | 
186 |     Decode locations from predictions using priors to undo
187 |     the encoding we did for offset regression at train time.
188 |     Args:
189 |         loc (tensor): location predictions for loc layers,
190 |             Shape: [num_priors,4]
191 |         priors (tensor): Prior boxes in center-offset form.
192 |             Shape: [num_priors,4].
193 |         variances: (list[float]) Variances of priorboxes
194 |     Return:
195 |         decoded bounding box predictions; decoded in x1y1x2y2-form with x1y1 at upper left and x2y2 lower right, all in a range [0,1]
196 |     """
197 | 
198 |     # todo check of dit goed gaat met heen en weer schrijven van de
199 | 
200 | 
201 |     if modeltype == 'SSD300KL':
202 |         # transform predictions from x1y1x2y2 to cx, cy, w, h form. The variances are precalculated cx,cy,w,h variances
203 |         loc = center_size(loc)
204 | 
205 |     boxes = torch.cat((
206 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
207 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
208 |     boxes[:, :2] -= boxes[:, 2:] / 2
209 |     boxes[:, 2:] += boxes[:, :2]
210 | 
211 |     return boxes # [num_priors,4]
212 | 
213 | 
214 | # def batch_decode(loc, priors, variances):
215 | #     """
216 | #     Same as decode, but adjusted to work for batches
217 | #
218 | #     Decode locations from predictions using priors to undo
219 | #     the encoding we did for offset regression at train time.
220 | #     Args:
221 | #         loc (tensor): location predictions for loc layers,
222 | #             Shape: [ensemble_size,batch,num_priors,4]
223 | #         priors (tensor): Prior boxes in center-offset form.
224 | #             Shape: [ensemble_size,batch, num_priors,4].
225 | #         variances: (list[float]) Variances of priorboxes
226 | #     Return:
227 | #         decoded bounding box predictions
228 | #     """
229 | #     boxes = torch.cat((
230 | #         priors[:,:, :2] + loc[:,:, :2] * variances[0] * priors[:,:, 2:],
231 | #         priors[:,:, 2:] * torch.exp(loc[:,:, 2:] * variances[1])), 1)
232 | #     boxes[:,:, :2] -= boxes[:,:, 2:] / 2
233 | #     boxes[:,:, 2:] += boxes[:,:, :2]
234 | #     return boxes
235 | 
236 | def log_sum_exp(x):
237 |     """Utility function for computing log_sum_exp while determining
238 |     This will be used to determine unaveraged confidence loss across
239 |     all examples in a batch.
240 |     Args:
241 |         x (Variable(tensor)): conf_preds from conf layers
242 |     """
243 |     x_max = x.data.max()
244 |     return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
245 | 
246 | 
247 | # Original author: Francisco Massa:
248 | # https://github.com/fmassa/object-detection.torch
249 | # Ported to PyTorch by Max deGroot (02/01/2017)
250 | def nms(boxes, scores, overlap=0.5, top_k=200): # todo: overlap default in paper 0.45
251 |     """Apply non-maximum suppression at test time to avoid detecting too many
252 |     overlapping bounding boxes for a given object.
253 |     Args:
254 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
255 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
256 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
257 |         top_k: (int) The Maximum number of box preds to consider. (default in paper = 200)
258 |     Return:
259 |         The indices of the kept boxes with respect to num_priors.
260 | 
261 |     todo: pure numpy implementation might be faster according to the issues on github
262 |     possible implementation https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
263 |     """
264 | 
265 |     keep = scores.new(scores.size(0)).zero_().long()
266 |     if boxes.numel() == 0: #number of elements
267 |         return keep # for a class, there are no bounding boxes
268 |     x1 = boxes[:, 0]
269 |     y1 = boxes[:, 1]
270 |     x2 = boxes[:, 2]
271 |     y2 = boxes[:, 3]
272 |     area = torch.mul(x2 - x1, y2 - y1)
273 |     v, idx = scores.sort(0)  # sort in ascending order
274 |     # I = I[v >= 0.01]
275 |     idx = idx[-top_k:]  # indices of the top-k largest vals
276 |     xx1 = boxes.new()
277 |     yy1 = boxes.new()
278 |     xx2 = boxes.new()
279 |     yy2 = boxes.new()
280 |     w = boxes.new()
281 |     h = boxes.new()
282 | 
283 |     # keep = torch.Tensor()
284 |     count = 0
285 |     while idx.numel() > 0:
286 |         i = idx[-1]  # index of current largest val
287 |         # keep.append(i)
288 |         keep[count] = i
289 |         count += 1
290 |         if idx.size(0) == 1:
291 |             break
292 |         idx = idx[:-1]  # remove kept element from view
293 |         # load bboxes of next highest vals
294 |         torch.index_select(x1, 0, idx, out=xx1)
295 |         torch.index_select(y1, 0, idx, out=yy1)
296 |         torch.index_select(x2, 0, idx, out=xx2)
297 |         torch.index_select(y2, 0, idx, out=yy2)
298 |         # store element-wise max with next highest score
299 |         xx1 = torch.clamp(xx1, min=x1[i])
300 |         yy1 = torch.clamp(yy1, min=y1[i])
301 |         xx2 = torch.clamp(xx2, max=x2[i])
302 |         yy2 = torch.clamp(yy2, max=y2[i])
303 |         w.resize_as_(xx2)
304 |         h.resize_as_(yy2)
305 |         w = xx2 - xx1
306 |         h = yy2 - yy1
307 |         # check sizes of xx1 and xx2.. after each iteration
308 |         w = torch.clamp(w, min=0.0)
309 |         h = torch.clamp(h, min=0.0)
310 |         inter = w*h
311 |         # IoU = i / (area(a) + area(b) - i)
312 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
313 |         union = (rem_areas - inter) + area[i]
314 |         IoU = inter/union  # store result in iou
315 | 
316 |         # keep only elements with an IoU <= overlap
317 |         # print(IoU.le(overlap)) #le = less or equal, creates a binary mask
318 |         idx = idx[IoU.le(overlap)]
319 |     return keep, count
320 | 
321 | #
322 | # def nms_uncertainty_sampling(boxes, scores, overlap=0.5, top_k=200, object_treshold = None):
323 | #     """
324 | #     This function takes (un)certainty scores and bounding boxes, and returns the top b
325 | #
326 | #
327 | #     """
328 | #
329 | #     keep = scores.new(scores.size(0)).zero_().long()
330 | #     if boxes.numel() == 0: #number of elements
331 | #         return keep # for a class, there are no bounding boxes
332 | #     x1 = boxes[:, 0]
333 | #     y1 = boxes[:, 1]
334 | #     x2 = boxes[:, 2]
335 | #     y2 = boxes[:, 3]
336 | #     area = torch.mul(x2 - x1, y2 - y1)
337 | #     v, idx = scores.sort(0)  # sort in ascending order
338 | #     # I = I[v >= 0.01]
339 | #     idx = idx[-top_k:]  # indices of the top-k largest vals
340 | #     xx1 = boxes.new()
341 | #     yy1 = boxes.new()
342 | #     xx2 = boxes.new()
343 | #     yy2 = boxes.new()
344 | #     w = boxes.new()
345 | #     h = boxes.new()
346 | #
347 | #     # keep = torch.Tensor()
348 | #     count = 0
349 | #     while idx.numel() > 0:
350 | #         i = idx[-1]  # index of current largest val
351 | #         # keep.append(i)
352 | #         keep[count] = i
353 | #         count += 1
354 | #         if idx.size(0) == 1:
355 | #             break
356 | #         idx = idx[:-1]  # remove kept element from view
357 | #         # load bboxes of next highest vals
358 | #         torch.index_select(x1, 0, idx, out=xx1)
359 | #         torch.index_select(y1, 0, idx, out=yy1)
360 | #         torch.index_select(x2, 0, idx, out=xx2)
361 | #         torch.index_select(y2, 0, idx, out=yy2)
362 | #         # store element-wise max with next highest score
363 | #         xx1 = torch.clamp(xx1, min=x1[i])
364 | #         yy1 = torch.clamp(yy1, min=y1[i])
365 | #         xx2 = torch.clamp(xx2, max=x2[i])
366 | #         yy2 = torch.clamp(yy2, max=y2[i])
367 | #         w.resize_as_(xx2)
368 | #         h.resize_as_(yy2)
369 | #         w = xx2 - xx1
370 | #         h = yy2 - yy1
371 | #         # check sizes of xx1 and xx2.. after each iteration
372 | #         w = torch.clamp(w, min=0.0)
373 | #         h = torch.clamp(h, min=0.0)
374 | #         inter = w*h
375 | #         # IoU = i / (area(a) + area(b) - i)
376 | #         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
377 | #         union = (rem_areas - inter) + area[i]
378 | #         IoU = inter/union  # store result in iou
379 | #
380 | #         # keep only elements with an IoU <= overlap
381 | #         print(IoU.le(overlap)) #le = less or equal, creates a binary mask
382 | #         idx = idx[IoU.le(overlap)]
383 | #     return keep, count
384 | 


--------------------------------------------------------------------------------
/utils/augmentations.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision import transforms
  3 | import cv2
  4 | import numpy as np
  5 | import types
  6 | from numpy import random
  7 | 
  8 | 
  9 | def intersect(box_a, box_b):
 10 |     max_xy = np.minimum(box_a[:, 2:], box_b[2:])
 11 |     min_xy = np.maximum(box_a[:, :2], box_b[:2])
 12 |     inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
 13 |     return inter[:, 0] * inter[:, 1]
 14 | 
 15 | 
 16 | def jaccard_numpy(box_a, box_b):
 17 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 18 |     is simply the intersection over union of two boxes.
 19 |     E.g.:
 20 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 21 |     Args:
 22 |         box_a: Multiple bounding boxes, Shape: [num_boxes,4]
 23 |         box_b: Single bounding box, Shape: [4]
 24 |     Return:
 25 |         jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
 26 |     """
 27 |     inter = intersect(box_a, box_b)
 28 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 29 |               (box_a[:, 3]-box_a[:, 1]))  # [A,B]
 30 |     area_b = ((box_b[2]-box_b[0]) *
 31 |               (box_b[3]-box_b[1]))  # [A,B]
 32 |     union = area_a + area_b - inter
 33 |     return inter / union  # [A,B]
 34 | 
 35 | 
 36 | class Compose(object):
 37 |     """Composes several augmentations together.
 38 |     Args:
 39 |         transforms (List[Transform]): list of transforms to compose.
 40 |     Example:
 41 |         >>> augmentations.Compose([
 42 |         >>>     transforms.CenterCrop(10),
 43 |         >>>     transforms.ToTensor(),
 44 |         >>> ])
 45 |     """
 46 | 
 47 |     def __init__(self, transforms):
 48 |         self.transforms = transforms
 49 | 
 50 |     def __call__(self, img, boxes=None, labels=None):
 51 |         for t in self.transforms:
 52 |             img, boxes, labels = t(img, boxes, labels)
 53 |         return img, boxes, labels
 54 | 
 55 | 
 56 | class Lambda(object):
 57 |     """Applies a lambda as a transform."""
 58 | 
 59 |     def __init__(self, lambd):
 60 |         assert isinstance(lambd, types.LambdaType)
 61 |         self.lambd = lambd
 62 | 
 63 |     def __call__(self, img, boxes=None, labels=None):
 64 |         return self.lambd(img, boxes, labels)
 65 | 
 66 | 
 67 | class ConvertFromInts(object):
 68 |     def __call__(self, image, boxes=None, labels=None):
 69 |         return image.astype(np.float32), boxes, labels
 70 | 
 71 | 
 72 | class SubtractMeans(object):
 73 |     def __init__(self, mean):
 74 |         self.mean = np.array(mean, dtype=np.float32)
 75 | 
 76 |     def __call__(self, image, boxes=None, labels=None):
 77 |         image = image.astype(np.float32)
 78 |         image -= self.mean
 79 |         return image.astype(np.float32), boxes, labels
 80 | 
 81 | 
 82 | class ToAbsoluteCoords(object):
 83 |     def __call__(self, image, boxes=None, labels=None):
 84 |         height, width, channels = image.shape
 85 |         boxes[:, 0] *= width
 86 |         boxes[:, 2] *= width
 87 |         boxes[:, 1] *= height
 88 |         boxes[:, 3] *= height
 89 | 
 90 |         return image, boxes, labels
 91 | 
 92 | 
 93 | class ToPercentCoords(object):
 94 |     def __call__(self, image, boxes=None, labels=None):
 95 |         height, width, channels = image.shape
 96 |         boxes[:, 0] /= width
 97 |         boxes[:, 2] /= width
 98 |         boxes[:, 1] /= height
 99 |         boxes[:, 3] /= height
100 | 
101 |         return image, boxes, labels
102 | 
103 | 
104 | class Resize(object):
105 |     def __init__(self, size=300):
106 |         self.size = size
107 | 
108 |     def __call__(self, image, boxes=None, labels=None):
109 |         image = cv2.resize(image, (self.size,
110 |                                  self.size))
111 |         return image, boxes, labels
112 | 
113 | 
114 | class RandomSaturation(object):
115 |     def __init__(self, lower=0.5, upper=1.5):
116 |         self.lower = lower
117 |         self.upper = upper
118 |         assert self.upper >= self.lower, "contrast upper must be >= lower."
119 |         assert self.lower >= 0, "contrast lower must be non-negative."
120 | 
121 |     def __call__(self, image, boxes=None, labels=None):
122 |         if random.randint(2):
123 |             image[:, :, 1] *= random.uniform(self.lower, self.upper)
124 | 
125 |         return image, boxes, labels
126 | 
127 | 
128 | class RandomHue(object):
129 |     def __init__(self, delta=18.0):
130 |         assert delta >= 0.0 and delta <= 360.0
131 |         self.delta = delta
132 | 
133 |     def __call__(self, image, boxes=None, labels=None):
134 |         if random.randint(2):
135 |             image[:, :, 0] += random.uniform(-self.delta, self.delta)
136 |             image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
137 |             image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
138 |         return image, boxes, labels
139 | 
140 | 
141 | class RandomLightingNoise(object):
142 |     def __init__(self):
143 |         self.perms = ((0, 1, 2), (0, 2, 1),
144 |                       (1, 0, 2), (1, 2, 0),
145 |                       (2, 0, 1), (2, 1, 0))
146 | 
147 |     def __call__(self, image, boxes=None, labels=None):
148 |         if random.randint(2):
149 |             swap = self.perms[random.randint(len(self.perms))]
150 |             shuffle = SwapChannels(swap)  # shuffle channels
151 |             image = shuffle(image)
152 |         return image, boxes, labels
153 | 
154 | 
155 | class ConvertColor(object):
156 |     def __init__(self, current='BGR', transform='HSV'):
157 |         self.transform = transform
158 |         self.current = current
159 | 
160 |     def __call__(self, image, boxes=None, labels=None):
161 |         if self.current == 'BGR' and self.transform == 'HSV':
162 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
163 |         elif self.current == 'HSV' and self.transform == 'BGR':
164 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
165 |         else:
166 |             raise NotImplementedError
167 |         return image, boxes, labels
168 | 
169 | 
170 | class RandomContrast(object):
171 |     def __init__(self, lower=0.5, upper=1.5):
172 |         self.lower = lower
173 |         self.upper = upper
174 |         assert self.upper >= self.lower, "contrast upper must be >= lower."
175 |         assert self.lower >= 0, "contrast lower must be non-negative."
176 | 
177 |     # expects float image
178 |     def __call__(self, image, boxes=None, labels=None):
179 |         if random.randint(2):
180 |             alpha = random.uniform(self.lower, self.upper)
181 |             image *= alpha
182 |         return image, boxes, labels
183 | 
184 | 
185 | class RandomBrightness(object):
186 |     def __init__(self, delta=32):
187 |         assert delta >= 0.0
188 |         assert delta <= 255.0
189 |         self.delta = delta
190 | 
191 |     def __call__(self, image, boxes=None, labels=None):
192 |         if random.randint(2):
193 |             delta = random.uniform(-self.delta, self.delta)
194 |             image += delta
195 |         return image, boxes, labels
196 | 
197 | 
198 | class ToCV2Image(object):
199 |     def __call__(self, tensor, boxes=None, labels=None):
200 |         return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
201 | 
202 | 
203 | class ToTensor(object):
204 |     def __call__(self, cvimage, boxes=None, labels=None):
205 |         return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
206 | 
207 | 
208 | class RandomSampleCrop(object):
209 |     """Crop
210 |     Arguments:
211 |         img (Image): the image being input during training
212 |         boxes (Tensor): the original bounding boxes in pt form
213 |         labels (Tensor): the class labels for each bbox
214 |         mode (float tuple): the min and max jaccard overlaps
215 |     Return:
216 |         (img, boxes, classes)
217 |             img (Image): the cropped image
218 |             boxes (Tensor): the adjusted bounding boxes in pt form
219 |             labels (Tensor): the class labels for each bbox
220 |     """
221 |     def __init__(self):
222 |         self.sample_options = (
223 |             # using entire original input image
224 |             None,
225 |             # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
226 |             (0.1, None),
227 |             (0.3, None),
228 |             (0.7, None),
229 |             (0.9, None),
230 |             # randomly sample a patch
231 |             (None, None),
232 |         )
233 | 
234 |     def __call__(self, image, boxes=None, labels=None):
235 |         height, width, _ = image.shape
236 |         while True:
237 |             # randomly choose a mode
238 |             mode = random.choice(self.sample_options)
239 |             if mode is None:
240 |                 return image, boxes, labels
241 | 
242 |             min_iou, max_iou = mode
243 |             if min_iou is None:
244 |                 min_iou = float('-inf')
245 |             if max_iou is None:
246 |                 max_iou = float('inf')
247 | 
248 |             # max trails (50)
249 |             for _ in range(50):
250 |                 current_image = image
251 | 
252 |                 w = random.uniform(0.3 * width, width)
253 |                 h = random.uniform(0.3 * height, height)
254 | 
255 |                 # aspect ratio constraint b/t .5 & 2
256 |                 if h / w < 0.5 or h / w > 2:
257 |                     continue
258 | 
259 |                 left = random.uniform(width - w)
260 |                 top = random.uniform(height - h)
261 | 
262 |                 # convert to integer rect x1,y1,x2,y2
263 |                 rect = np.array([int(left), int(top), int(left+w), int(top+h)])
264 | 
265 |                 # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
266 |                 overlap = jaccard_numpy(boxes, rect)
267 | 
268 |                 # is min and max overlap constraint satisfied? if not try again
269 |                 if overlap.min() < min_iou and max_iou < overlap.max():
270 |                     continue
271 | 
272 |                 # cut the crop from the image
273 |                 current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
274 |                                               :]
275 | 
276 |                 # keep overlap with gt box IF center in sampled patch
277 |                 centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
278 | 
279 |                 # mask in all gt boxes that above and to the left of centers
280 |                 m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
281 | 
282 |                 # mask in all gt boxes that under and to the right of centers
283 |                 m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
284 | 
285 |                 # mask in that both m1 and m2 are true
286 |                 mask = m1 * m2
287 | 
288 |                 # have any valid boxes? try again if not
289 |                 if not mask.any():
290 |                     continue
291 | 
292 |                 # take only matching gt boxes
293 |                 current_boxes = boxes[mask, :].copy()
294 | 
295 |                 # take only matching gt labels
296 |                 current_labels = labels[mask]
297 | 
298 |                 # should we use the box left and top corner or the crop's
299 |                 current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
300 |                                                   rect[:2])
301 |                 # adjust to crop (by substracting crop's left,top)
302 |                 current_boxes[:, :2] -= rect[:2]
303 | 
304 |                 current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
305 |                                                   rect[2:])
306 |                 # adjust to crop (by substracting crop's left,top)
307 |                 current_boxes[:, 2:] -= rect[:2]
308 | 
309 |                 return current_image, current_boxes, current_labels
310 | 
311 | 
312 | class Expand(object):
313 |     def __init__(self, mean):
314 |         self.mean = mean
315 | 
316 |     def __call__(self, image, boxes, labels):
317 |         if random.randint(2):
318 |             return image, boxes, labels
319 | 
320 |         height, width, depth = image.shape
321 |         ratio = random.uniform(1, 4)
322 |         left = random.uniform(0, width*ratio - width)
323 |         top = random.uniform(0, height*ratio - height)
324 | 
325 |         expand_image = np.zeros(
326 |             (int(height*ratio), int(width*ratio), depth),
327 |             dtype=image.dtype)
328 |         expand_image[:, :, :] = self.mean
329 |         expand_image[int(top):int(top + height),
330 |                      int(left):int(left + width)] = image
331 |         image = expand_image
332 | 
333 |         boxes = boxes.copy()
334 |         boxes[:, :2] += (int(left), int(top))
335 |         boxes[:, 2:] += (int(left), int(top))
336 | 
337 |         return image, boxes, labels
338 | 
339 | 
340 | class RandomMirror(object):
341 |     def __call__(self, image, boxes, classes):
342 |         _, width, _ = image.shape
343 |         if random.randint(2):
344 |             image = image[:, ::-1]
345 |             boxes = boxes.copy()
346 |             boxes[:, 0::2] = width - boxes[:, 2::-2]
347 |         return image, boxes, classes
348 | 
349 | 
350 | class SwapChannels(object):
351 |     """Transforms a tensorized image by swapping the channels in the order
352 |      specified in the swap tuple.
353 |     Args:
354 |         swaps (int triple): final order of channels
355 |             eg: (2, 1, 0)
356 |     """
357 | 
358 |     def __init__(self, swaps):
359 |         self.swaps = swaps
360 | 
361 |     def __call__(self, image):
362 |         """
363 |         Args:
364 |             image (Tensor): image tensor to be transformed
365 |         Return:
366 |             a tensor with channels swapped according to swap
367 |         """
368 |         # if torch.is_tensor(image):
369 |         #     image = image.data.cpu().numpy()
370 |         # else:
371 |         #     image = np.array(image)
372 |         image = image[:, :, self.swaps]
373 |         return image
374 | 
375 | 
376 | class PhotometricDistort(object):
377 |     def __init__(self):
378 |         self.pd = [
379 |             RandomContrast(),
380 |             ConvertColor(transform='HSV'),
381 |             RandomSaturation(),
382 |             RandomHue(),
383 |             ConvertColor(current='HSV', transform='BGR'),
384 |             RandomContrast()
385 |         ]
386 |         self.rand_brightness = RandomBrightness()
387 |         self.rand_light_noise = RandomLightingNoise()
388 | 
389 |     def __call__(self, image, boxes, labels):
390 |         im = image.copy()
391 |         im, boxes, labels = self.rand_brightness(im, boxes, labels)
392 |         if random.randint(2):
393 |             distort = Compose(self.pd[:-1])
394 |         else:
395 |             distort = Compose(self.pd[1:])
396 |         im, boxes, labels = distort(im, boxes, labels)
397 |         return self.rand_light_noise(im, boxes, labels)
398 | 
399 | 
400 | # class GaussianRandomPixelNoise():
401 | #     # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu
402 | #     # http://www.merl.com/publications/docs/TR2018-166.pdf
403 | #     """
404 | #     To each pixel value gaussian noise is added.
405 | #     The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel.
406 | #     Here we will do it per pixel.
407 | #     """
408 | #
409 | #     def __init__(self, std, image_len = 90000):# 300*300 pixels
410 | #         self.std = std
411 | #         self.image_len = image_len
412 | #
413 | #     def __call__(self, image):
414 | #
415 | #         # add noise
416 | #         noise = np.random.normal(0, self.std, self.image_len)
417 | #         image = image.flatten()
418 | #         image = image+noise
419 | #
420 | #         # set max and min values
421 | #         low_values = image < 0
422 | #         high_values = image > 255
423 | #
424 | #         image[low_values] = 0
425 | #         image[high_values] = 255
426 | #
427 | #         # reshape
428 | #         # image = image.reshape(300,300,:)
429 | #
430 | #         return image
431 | 
432 | 
433 | def GaussianRandomPixelNoise(image, std, image_len = 90000):# 300*300 pixels
434 | 
435 |     # add noise
436 |     normal_dist = torch.distributions.normal.Normal(torch.zeros(image_len), torch.ones(image_len) * std)
437 |     noise = normal_dist.sample().unsqueeze(-1).to('cpu')
438 |     image = torch.from_numpy(image).reshape(-1,3)
439 |     image = image+noise
440 | 
441 |     # set max and min values
442 |     # NOTE: I didn't do this as I subtract the (dataset) mean per channel before pulling it trough this function (the base-transform
443 |     # the network is also trained on this (on average) zero mean input.
444 |     # low_values = abs(image) < 0
445 |     # high_values = abs(image) > 255
446 |     #
447 |     # image[low_values] = 0
448 |     # image[high_values] = 255
449 | 
450 |     # reshape
451 |     image = image.reshape(300,300,-1)
452 | 
453 |     return image
454 | 
455 | 
456 | # # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu
457 | #     # http://www.merl.com/publications/docs/TR2018-166.pdf
458 | #     """
459 | #     To each pixel value gaussian noise is added.
460 | #     The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel.
461 | #     Here
462 | 
463 | class SSDAugmentation(object):
464 |     def __init__(self, size=300, mean=(104, 117, 123)):
465 |         self.mean = mean
466 |         self.size = size
467 |         self.augment = Compose([
468 |             ConvertFromInts(),
469 |             ToAbsoluteCoords(),
470 |             PhotometricDistort(),
471 |             Expand(self.mean),
472 |             RandomSampleCrop(),
473 |             RandomMirror(),
474 |             ToPercentCoords(),
475 |             Resize(self.size),
476 |             SubtractMeans(self.mean)
477 |         ])
478 | 
479 |     def __call__(self, img, boxes, labels):
480 |         return self.augment(img, boxes, labels)
481 | 


--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | from ..box_utils import decode, nms
  4 | from data import voc as cfg
  5 | from active_learning_package import uncertainty_helpers
  6 | import math
  7 | 
  8 | 
  9 | class Detect(Function):
 10 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
 11 |     apply non-maximum suppression to location predictions based on conf
 12 |     scores and threshold to a top_k number of output predictions for both
 13 |     confidence score and locations.
 14 |     """
 15 |     def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh,
 16 |                  def_forward,merging_method,sampling_strategy,modeltype):
 17 |         self.num_classes = num_classes
 18 |         self.background_label = bkg_label
 19 |         self.top_k = top_k
 20 | 
 21 |         # Parameters used in nms.
 22 |         self.nms_thresh = nms_thresh
 23 |         if nms_thresh <= 0:
 24 |             raise ValueError('nms_threshold must be non negative.')
 25 |         self.conf_thresh = conf_thresh
 26 |         self.variance = cfg['variance']
 27 | 
 28 |         ## Active Learning Package variables
 29 |         self.def_forward = def_forward
 30 |         self.merging_method = merging_method
 31 |         self.sampling_strategy = sampling_strategy
 32 |         self.do_prob_dist_forward = False
 33 |         self.modeltype = modeltype
 34 | 
 35 |         if self.merging_method != 'pre_nms_avg' and \
 36 |                 (self.sampling_strategy == 'p-max_localization-stability'
 37 |                  or self.sampling_strategy == 'no_ensemble_entropy-only'
 38 |                  or self.sampling_strategy in ['none_covariance', 'none_covariance-obj','entropy_covariance', 'entropy_covariance-obj'])\
 39 |                 and self.modeltype != 'SSD300KL':
 40 |             self.do_prob_dist_forward = True
 41 | 
 42 | 
 43 | 
 44 | 
 45 |     def forward(self, loc_data, conf_data, prior_data, alphas = None):
 46 |         """
 47 |         Args:
 48 |             loc_data: (tensor) Loc preds from loc layers
 49 |                 Shape: [batch,num_priors*4]
 50 |             conf_data: (tensor) Shape: Conf preds from conf layers
 51 |                 Shape: [batch*num_priors,num_classes]
 52 |             prior_data: (tensor) Prior (default) boxes and variances from priorbox layers
 53 |                 Shape: [1,num_priors,4]
 54 | 
 55 |             Only when using KL-loss:
 56 |             alpha: α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate. The log is used during
 57 |                 training to avoid gradient exploding
 58 |                 shape: [1, num_priors, 4]
 59 |         """
 60 |         # the normal forward pass, as decribed in SSD paper
 61 |         if self.def_forward:
 62 | 
 63 |             # Merging method = None by default, if None has been passed trough
 64 |             output = self.default_forward(loc_data, conf_data, prior_data)
 65 |             return output
 66 | 
 67 |         else:
 68 |             # if not a regular forward -> ensemble of SSDs can be used to merge bounding boxes
 69 |             # into probabilistic object detections
 70 | 
 71 |             if self.merging_method == 'pre_nms_avg' and \
 72 |                 not self.do_prob_dist_forward and\
 73 |                     self.modeltype != 'SSD300KL':
 74 |                 # returns all locs and preds, without applying non maximum suppression to allow for pre-nms averaging
 75 |                 # for more information, see paper: Miller et al - Benchmarking Sampling-based Probabilistic Object Detectors
 76 |                 output_tup = (loc_data, conf_data, prior_data)
 77 |                 return output_tup
 78 | 
 79 | 
 80 |             # elif self.merging_method in ['BSAS','Hungarian'] or 'p-max_localization-stability':
 81 |             elif self.do_prob_dist_forward:
 82 |                 output, num_boxes_per_class = self.full_prob_dist_forward(loc_data,conf_data, prior_data)
 83 |                 # output_tup = (output, prior_data) # todo: do I really need prior data for BSAS merging? -> only used for nms, which is already performed here or also for IoU calculation??
 84 |                 return output, num_boxes_per_class, prior_data
 85 | 
 86 |             elif self.modeltype == 'SSD300KL':
 87 |                 output, num_boxes_per_class = self.full_prob_KL_forward(loc_data, conf_data, prior_data, alphas)
 88 |                 return output, num_boxes_per_class, prior_data
 89 |             else:
 90 |                 raise NotImplementedError()
 91 | 
 92 | 
 93 |     def full_prob_KL_forward(self, loc_data, conf_data, prior_data, alphas):
 94 |         """
 95 |         Largely copief from the forward with the full probability distribution (full_prob_dist_forward). However,
 96 |         The bounding boxes are in point-form (x1,y1,x2,y2) instead of center-form (cx, cy, w, h) and for each corner also
 97 |         a standard deviation is returned.
 98 | 
 99 | 
100 |         :param loc_data: (tensor) Loc preds from loc layers
101 |                 Shape: [batch,num_priors*4]
102 |         :param alphas: (tensor)  α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate.
103 |                 The log is used during training to avoid gradient exploding
104 |                 Shape: [batch,num_priors*4]
105 |         :param conf_data: (tensor) Shape: Conf preds from conf layers
106 |                 Shape: [batch*num_priors,num_classes]
107 |         :param prior_data:(tensor) Prior boxes and variances from priorbox layers
108 |                 Shape: [1,num_priors,4]
109 |         :return:
110 | 
111 |         the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
112 |         is not returned, only the probability for a given detection for a given class.
113 | 
114 |         output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb], where bb thus has 8 params (x1, std_x1, ...)
115 |         where in the default forward it is  [image_ids, class_id ,detection_id,conf_score+bb]
116 |         """
117 | 
118 |         num = loc_data.size(0)  # batch size
119 |         num_priors = prior_data.size(0)
120 | 
121 |         output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 8)
122 | 
123 |         conf_preds = conf_data.view(num, num_priors,
124 |                                     self.num_classes).transpose(2, 1)
125 | 
126 |         # todo: why use the confidence mask? (not found in paper (found in paper...))
127 |         # => makes it a lot faster, no nms for all boxes
128 | 
129 |         # very useful to filter out the nonzero boxes later
130 |         num_boxes_per_class = torch.zeros(self.num_classes)
131 | 
132 |         # Decode predictions into bboxes.
133 |         for i in range(num):
134 |             # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
135 |             # These are the class agnostic bounding boxes!
136 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype)
137 |             conf_scores = conf_preds[i].clone()
138 | 
139 |             # For each class, perform nms
140 |             for cl in range(1, self.num_classes):
141 | 
142 |                 # self.conf_tresh is 0.01
143 |                 # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
144 |                 c_mask = conf_scores[cl].gt(
145 |                     self.conf_thresh)  # confidence mask, speeds up processing by not applying nms
146 | 
147 |                 # to all bounding boxes
148 |                 scores = conf_scores[cl][c_mask]
149 |                 if scores.size(0) == 0:
150 |                     continue
151 | 
152 |                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
153 |                 boxes = decoded_boxes[l_mask].view(-1, 4)
154 |                 alphas_cl = alphas[i][l_mask].view(-1,4)
155 | 
156 |                 # idx of highest scoring and non-overlapping boxes per class (nms)
157 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
158 | 
159 |                 # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas
160 |                 conf_scores2 = conf_scores[:, c_mask]
161 |                 assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \
162 |                     "Sum of the probabilities over the classes for each detection must be (relatively close to) 1"
163 | 
164 |                 distributions = conf_scores2[:, ids[:count]]
165 |                 num_boxes_per_class[cl] = count
166 | 
167 |                 # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong
168 |                 # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0)
169 | 
170 |                 # [image_id,class_id,detection_id,conf_dist+bb]
171 |                 # [1,1,200,21+8]
172 |                 output[i, cl, :count, :self.num_classes] = distributions.permute(1,
173 |                                                                                  0)  # permute reorders axes (here: 1 to 0 and 0 to 1)
174 |                 output[i, cl, :count, self.num_classes:-4] = boxes[ids[:count]]
175 |                 # transform alphas to variances:  α = log(σ^{2}) ->   σ = exp(.5 * α)
176 |                 output[i, cl, :count, -4:] = torch.exp(alphas_cl[ids[:count]]*.5)
177 | 
178 |                 #todo [DONE]:
179 |                 # Example from original KL-Loss
180 |                 # def bbox_std_transform_xyxy(boxes, bbox_epsilon, describ=False):
181 |                 #     # bbox_std = np.exp(bbox_epsilon)
182 |                 #     if cfg.PRED_STD_LOG:
183 |                 #         bbox_std = np.exp(bbox_epsilon / 2.)
184 | 
185 | 
186 |         # use cl 5 of image 1 to check: output[0,5,:5,:21]
187 |         return output, num_boxes_per_class  # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 29(class_dist + bounding box_coords + coords_std))]
188 | 
189 |     def full_prob_dist_forward(self,loc_data,conf_data, prior_data):
190 |         """
191 |         This function is largely copied from the default forward. However, the default forward returns the top-k (200)
192 |         detections PER CLASS. The probability distribution over the classes is not returned, only the probability for
193 |         a given detection for a given class.
194 | 
195 |                 Args:
196 |             loc_data: (tensor) Loc preds from loc layers
197 |                 Shape: [batch,num_priors*4]
198 |             conf_data: (tensor) Shape: Conf preds from conf layers
199 |                 Shape: [batch*num_priors,num_classes]
200 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
201 |                 Shape: [1,num_priors,4]
202 | 
203 |         the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
204 |         is not returned, only the probability for a given detection for a given class.
205 | 
206 |         output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb]
207 |         where in the default forward it is  [image_ids, class_id ,detection_id,conf_score+bb]
208 | 
209 |         """
210 |         num = loc_data.size(0)  # batch size
211 |         num_priors = prior_data.size(0)
212 | 
213 |         output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 4)
214 | 
215 |         conf_preds = conf_data.view(num, num_priors,
216 |                                     self.num_classes).transpose(2, 1)
217 | 
218 |         # todo: why use the confidence mask? (not found in paper (found in paper...))
219 |         # => makes it a lot faster, no nms for all boxes
220 | 
221 |         # very useful to filter out the nonzero boxes later
222 |         num_boxes_per_class = torch.zeros(self.num_classes)
223 | 
224 |         # Decode predictions into bboxes.
225 |         for i in range(num):
226 |             # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
227 |             # These are the class agnostic bounding boxes!
228 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance)
229 | 
230 |             conf_scores = conf_preds[i].clone()
231 | 
232 |             # For each class, perform nms
233 |             for cl in range(1, self.num_classes):
234 | 
235 |                 # self.conf_tresh is 0.01
236 |                 # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
237 |                 c_mask = conf_scores[cl].gt(
238 |                     self.conf_thresh)  # confidence mask, speeds up processing by not applying nms
239 | 
240 |                 # to all bounding boxes
241 |                 scores = conf_scores[cl][c_mask]
242 |                 if scores.size(0) == 0:
243 |                     continue
244 | 
245 |                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
246 |                 boxes = decoded_boxes[l_mask].view(-1, 4)
247 | 
248 |                 '''what is ids variable that is returned here in relation to the indices in the original conf_preds variable
249 | 
250 |                 ids are the maximum ids in boxes (gt > 0.01). The ids that are not suppressed by nms
251 |                 count is how many boxes there are that are not nms'ed?
252 |                 count is hoeveel objecten er zijn van deze klasse die niet overlappen, op deze foto. 
253 |                 nms gaat namelijk vanaf de grootste confidence naar de kleinste en als ze genoeg overlappen, 
254 |                 dan wordt de een na grootste weg gegooid voor deze klasse
255 | 
256 |                 '''
257 | 
258 |                 # idx of highest scoring and non-overlapping boxes per class (nms)
259 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
260 | 
261 |                 # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas
262 |                 conf_scores2 = conf_scores[:, c_mask]
263 |                 assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \
264 |                     "Sum of the probabilities over the classes for each detection must be (relatively close to) 1"
265 | 
266 |                 distributions = conf_scores2[:, ids[:count]]
267 |                 num_boxes_per_class[cl] = count
268 | 
269 | 
270 |                 # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong
271 |                 # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0)
272 | 
273 |                 # [image_id,class_id,detection_id,conf_dist+bb]
274 |                 # [1,1,200,21+4]
275 |                 output[i, cl, :count, :self.num_classes] = distributions.permute(1,
276 |                                                                                  0)  # permute reorders axes (here: 1 to 0 and 0 to 1)
277 |                 output[i, cl, :count, self.num_classes:] = boxes[ids[:count]]
278 | 
279 | 
280 |         # use cl 5 of image 1 to check: output[0,5,:5,:21]
281 |         return output, num_boxes_per_class  # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 25(class_dist + bounding box))]
282 | 
283 |     def default_forward(self, loc_data, conf_data, prior_data):
284 |         """
285 |         Args:
286 |             loc_data: (tensor) Loc preds from loc layers
287 |                 Shape: [batch,num_priors*4]
288 |             conf_data: (tensor) Shape: Conf preds from conf layers
289 |                 Shape: [batch*num_priors,num_classes]
290 |             prior_data: (tensor) Prior boxes and variances from priorbox layers todo prior box variances??
291 |                 Shape: [1,num_priors,4]
292 | 
293 |         the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes
294 |         is not returned, only the probability for a given detection for a given class.
295 |         :returns:
296 |             output:
297 |                 shape: [image_id,class_id,detection_id,conf_score+bb]
298 | 
299 |         """
300 | 
301 |         num = loc_data.size(0)  # batch size
302 |         num_priors = prior_data.size(0)
303 |         output = torch.zeros(num, self.num_classes, self.top_k, 5) # 5 is for the bounding box => 4 corners and the class
304 |         conf_preds = conf_data.view(num, num_priors,
305 |                                     self.num_classes).transpose(2, 1)
306 | 
307 |         # why use the confidence mask?
308 |         # => makes it a lot faster, no nms for all boxes => also used in paper
309 |         for i in range(num):
310 |             # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time.
311 |             # These are the class agnostic bounding boxes!
312 |             #[8732,4]
313 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype)
314 |             #[21,8732]
315 |             conf_scores = conf_preds[i].clone()
316 | 
317 |             # For each class, perform nms
318 |             for cl in range(1, self.num_classes):
319 | 
320 |                 # self.conf_tresh is 0.01
321 |                 # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt
322 |                 c_mask = conf_scores[cl].gt(self.conf_thresh) #confidence mask, speeds up processing by not applying nms
323 | 
324 |                 # to all bounding boxes
325 |                 scores = conf_scores[cl][c_mask]
326 |                 if scores.size(0) == 0:
327 |                     continue
328 | 
329 | 
330 |                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
331 |                 boxes = decoded_boxes[l_mask].view(-1, 4)
332 | 
333 |                 # idx of highest scoring and non-overlapping boxes per class (nms)
334 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
335 | 
336 |                 # [image_id,class_id,detection_id,conf+bb]
337 |                 output[i, cl, :count] = \
338 |                     torch.cat((scores[ids[:count]].unsqueeze(1),
339 |                                boxes[ids[:count]]), 1)
340 | 
341 |         flt = output.contiguous().view(num, -1, 5) # [1,4200,5]
342 |         _, idx = flt[:, :, 0].sort(1, descending=True) # sort over ALL confidences (not per class)
343 |         _, rank = idx.sort(1)
344 |         flt[(rank < self.top_k).   unsqueeze(-1).expand_as(flt)].fill_(0) # take top_k
345 | 
346 |         # use cl 5 of image 1 to check: output[0,5,:5,:21]
347 |         return output  # shape (pasval VOC) [1,21,200,5] = [1 = batch, classes+background class, top_k bounding boxes, 5(bounding box + class))]
348 | 


--------------------------------------------------------------------------------
/active_learning_package/voc_eval_helpers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pickle
  4 | import xml.etree.ElementTree as ET
  5 | import time
  6 | import sys
  7 | 
  8 | import torch
  9 | from torch.autograd import Variable
 10 | 
 11 | import data
 12 | from . import helpers
 13 | 
 14 | 
 15 | def eval(test_dataset, args, net, al_iteration, eval_ensemble_idx = 99999, epochs_test = False, train_iters = None, use_dataset_image_ids = False):
 16 |     """
 17 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
 18 |     Slightly adjusted to fit in this active learning module
 19 |     """
 20 |     print('start VOC eval')
 21 | 
 22 |     num_images = len(test_dataset)
 23 | 
 24 |     # all detections are collected into:
 25 |     #    all_boxes[cls][image] = N x 5 array of detections in
 26 |     #    (x1, y1, x2, y2, score)
 27 |     if args.dataset in ['VOC07', 'VOC12']:
 28 |         labelmap = data.VOC_CLASSES
 29 |     elif args.dataset == 'VOC07_1_class':
 30 |         labelmap = [args.relevant_class]
 31 |     elif args.dataset == 'VOC07_6_class':
 32 |         labelmap = args.labelmap
 33 |     else:
 34 |         raise NotImplementedError()
 35 | 
 36 | 
 37 |     args.summary['eval_model']['num_images_eval'] = num_images
 38 |     args.summary['eval_model']['num_objects_eval'] = 'todo'
 39 |     args.summary['eval_model']['APs'] = {}
 40 | 
 41 |     all_boxes = [[[] for _ in range(num_images)]
 42 |                  for _ in range(len(labelmap) + 1)]
 43 | 
 44 |     # timers
 45 |     _t = {'im_detect': helpers.Timer(), 'misc': helpers.Timer()}
 46 | 
 47 |     output_dir = args.experiment_dir + 'eval/'
 48 |     print('output dir ', output_dir)
 49 |     if not os.path.exists(output_dir):
 50 |         os.makedirs(output_dir, exist_ok=True)
 51 | 
 52 |     if epochs_test:
 53 |         det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+'_'+str('todo')+'_detections.pkl')
 54 |     else:
 55 |         det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+str()+'_detections.pkl')
 56 | 
 57 |     # if already done the detection passes with this network.
 58 |     if os.path.isfile(det_file):
 59 |         with open(det_file, 'rb') as file:
 60 |             all_boxes = pickle.load(file)
 61 | 
 62 |     else:
 63 |         for i in range(num_images):
 64 |             im, gt, h, w = test_dataset.pull_item(i)
 65 | 
 66 |             x = Variable(im.unsqueeze(0))
 67 | 
 68 |             if args.cuda and torch.cuda.is_available():
 69 |                 x = x.cuda()
 70 | 
 71 |             _t['im_detect'].tic()
 72 | 
 73 |             detections = net(x).data
 74 |             detect_time = _t['im_detect'].toc(average=False)
 75 |             # set detections back to cpu
 76 |             if args.cuda and torch.cuda.is_available():
 77 |                 detections = detections.to('cpu')
 78 | 
 79 |             # skip j = 0, because it's the background class
 80 |             for j in range(1, detections.size(1)):
 81 |                 dets = detections[0, j, :] # shape [200,5]
 82 |                 mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() # takes the detections that have confidence > 0. and expands to (5, 200) and then transposes
 83 |                 dets = torch.masked_select(dets, mask).view(-1, 5)
 84 |                 if dets.dim() == 0:
 85 |                     continue
 86 |                 boxes = dets[:, 1:]
 87 |                 boxes[:, 0] *= w
 88 |                 boxes[:, 2] *= w
 89 |                 boxes[:, 1] *= h
 90 |                 boxes[:, 3] *= h
 91 |                 scores = dets[:, 0].cpu().numpy()
 92 |                 cls_dets = np.hstack((boxes.cpu().numpy(),
 93 |                                       scores[:, np.newaxis])).astype(np.float32,
 94 |                                                                      copy=False)
 95 |                 all_boxes[j][i] = cls_dets
 96 | 
 97 |             print('im_detect: {:d}/{:d} {:.3f}s \t al iteration: {:d} \t ensemble_idx {:d}'.format(i,
 98 |                                                         num_images, detect_time, int(al_iteration), int(args.eval_ensemble_idx)))
 99 | 
100 |         with open(det_file, 'wb') as f:
101 |             pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
102 | 
103 | 
104 |     print('Evaluating detections')
105 |     evaluate_detections(all_boxes,
106 |                         output_dir,
107 |                         test_dataset,
108 |                         args,
109 |                         labelmap,
110 |                         use_dataset_image_ids)
111 | 
112 | 
113 | def evaluate_detections(box_list, output_dir, dataset, args, labelmap, use_dataset_image_ids):
114 |     """
115 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
116 |     Slightly adjusted to fit in this active learning module
117 |     """
118 |     if args.dataset in ['VOC07','VOC07_1_class','VOC07_6_class']:
119 | 
120 |         YEAR = '2007'
121 |         devkit_path = args.dataset_root + 'VOC' + YEAR
122 | 
123 |         write_voc_results_file(box_list,
124 |                                dataset,
125 |                                labelmap,
126 |                                devkit_path,
127 |                                args)
128 | 
129 |         do_python_eval(output_dir,
130 |                        False,  # use VOC07 metrics
131 |                        devkit_path,
132 |                        labelmap,
133 |                        args,
134 |                        dataset,
135 |                        use_dataset_image_ids)
136 |     else:
137 |         raise NotImplementedError()
138 | 
139 | def write_voc_results_file(all_boxes,
140 |                            dataset,
141 |                            labelmap,
142 |                            devkit_path,
143 |                            args):
144 |     """
145 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
146 |     """
147 | 
148 |     for cls_ind, cls in enumerate(labelmap):
149 |         print('Writing {:s} VOC results file'.format(cls))
150 |         filename = get_voc_results_file_template('test',
151 |                                                  cls,
152 |                                                  devkit_path,
153 |                                                  args)
154 | 
155 |         # if already made the results files with this network.
156 |         if os.path.isfile(filename):
157 |             continue
158 | 
159 |         else:
160 |             with open(filename, 'wt') as f:
161 |                 for im_ind, index in enumerate(dataset.ids):
162 |                     dets = all_boxes[cls_ind+1][im_ind]
163 |                     if dets == []:
164 |                         continue
165 |                     # the VOCdevkit expects 1-based indices
166 |                     for k in range(dets.shape[0]):
167 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
168 |                                 format(index[1], dets[k, -1],
169 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
170 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
171 | 
172 | 
173 | def do_python_eval(output_dir,
174 |                    use_07,
175 |                    devkit_path,
176 |                    labelmap,
177 |                    args,
178 |                    dataset,
179 |                    use_dataset_image_ids):
180 |     """
181 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
182 |     Slightly adjusted to fit in this active learning module
183 |     """
184 |     annopath = os.path.join(args.dataset_root, 'VOC2007', 'Annotations', '%s.xml')
185 |     if type(args.imageset_test) == list and len(args.imageset_test) == 1:
186 |         imagesetfile = args.imageset_test[0][1]
187 |     else:
188 |         imagesetfile = args.imageset_test
189 |     imgsetpath = os.path.join(args.dataset_root, 'VOC2007', 'ImageSets',
190 |                               'Main', '{:s}.txt')
191 |     cachedir = os.path.join(devkit_path, 'annotations_cache')
192 | 
193 |     # The PASCAL VOC metric changed in 2010
194 |     use_07_metric = use_07
195 |     print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
196 | 
197 |     iou_thresholds = [0.3]
198 |     iou_thresholds.extend(list(np.linspace(0.5,0.95,10)))
199 | 
200 |     for iou_threshold in iou_thresholds:
201 |         print('IoU threshold: ',str(iou_threshold),'\n_______________\n')
202 |         args.summary['eval_model']['APs'][str(iou_threshold)] = {}
203 | 
204 |         aps = []
205 |         if not os.path.isdir(output_dir):
206 |             os.mkdir(output_dir)
207 |         for i, cls in enumerate(labelmap):
208 |             filename = get_voc_results_file_template('test', cls, devkit_path, args) # results file
209 |             rec, prec, ap = voc_eval(
210 |                filename, annopath, imgsetpath.format(imagesetfile), cls, cachedir,
211 |                ovthresh=iou_threshold, use_07_metric=use_07_metric, dataset= dataset, use_dataset_image_ids=use_dataset_image_ids) # todo: imageset_file: '/home/jasper/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt'
212 |             # rec,prec,ap = 0.1,0.2,0.3
213 | 
214 |             aps += [ap]
215 |             print('AP for {} = {:.4f}'.format(cls, ap))
216 |             with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
217 |                 pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
218 | 
219 |             #write summary average precissions
220 |             args.summary['eval_model']['APs'][str(iou_threshold)][str(cls)] = ap
221 | 
222 |         # exclude classes without predictions
223 |         aps = [ap for ap in aps if ap != -1.]
224 |         args.summary['eval_model']['APs'][str(iou_threshold)]['mAP'] = np.mean(aps)
225 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
226 |         print('~~~~~~~~')
227 |         print('Results:')
228 |         for ap in aps:
229 |             print('{:.3f}'.format(ap))
230 |         print('{:.3f}'.format(np.mean(aps)))
231 |         print('~~~~~~~~')
232 |         print('')
233 |         print('--------------------------------------------------------------')
234 |         print('Results computed with the **unofficial** Python eval code.')
235 |         print('Results should be very close to the official MATLAB eval code.')
236 |         print('--------------------------------------------------------------')
237 |         print('\n\n\n')
238 | 
239 |     # calculate mmAP (coco definition mAP)
240 |     args.summary['eval_model']['APs']['mmAP'] = 0
241 |     for key, value in args.summary['eval_model']['APs'].items():
242 |         if key != 'mmAP':
243 |             args.summary['eval_model']['APs']['mmAP'] += args.summary['eval_model']['APs'][key]['mAP']
244 |     args.summary['eval_model']['APs']['mmAP'] /= 10
245 | 
246 | 
247 | def get_voc_results_file_template(image_set, cls, devkit_path, args):
248 |     """
249 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
250 |     Slightly adjusted to fit in this active learning module
251 |     """
252 | 
253 |     # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
254 |     filename = 'det_' + image_set + '_%s.txt' % (cls)
255 | 
256 |     filedir = args.experiment_dir + 'eval/results/al-iter_'+str(args.al_iteration)+'/ensemble_idx_'+args.eval_ensemble_idx
257 |     # filedir = os.path.join(devkit_path, 'results') # old filedir from Max De Groot
258 |     if not os.path.exists(filedir):
259 |         os.makedirs(filedir, exist_ok = True)
260 |     path = os.path.join(filedir, filename)
261 |     return path
262 | 
263 | def voc_eval(detpath,
264 |              annopath,
265 |              imagesetfile,
266 |              classname,
267 |              cachedir,
268 |              ovthresh=0.5,
269 |              use_07_metric=True,
270 |              dataset = None,
271 |              use_dataset_image_ids = False):
272 |     """rec, prec, ap = voc_eval(detpath,
273 |                            annopath,
274 |                            imagesetfile,
275 |                            classname,
276 |                            [ovthresh],
277 |                            [use_07_metric])
278 | Top level function that does the PASCAL VOC evaluation.
279 | detpath: Path to detections
280 |    detpath.format(classname) should produce the detection results file.
281 | annopath: Path to annotations
282 |    annopath.format(imagename) should be the xml annotations file.
283 | imagesetfile: Text file containing the list of images, one image per line.
284 | classname: Category name (duh)
285 | cachedir: Directory for caching the annotations
286 | [ovthresh]: Overlap threshold (default = 0.5)
287 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
288 |    (default True)
289 | 
290 | 
291 |     NOTE: largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
292 |     Slightly adjusted to fit in this active learning module
293 | """
294 | 
295 | # assumes detections are in detpath.format(classname)
296 | # assumes annotations are in annopath.format(imagename)
297 | # assumes imagesetfile is a text file with each line an image name
298 | # cachedir caches the annotations in a pickle file
299 | # first load gt
300 |     if not os.path.isdir(cachedir):
301 |         os.mkdir(cachedir)
302 |     cachefile = os.path.join(cachedir, 'annots.pkl') # cachefile of correct annotations/truth values.
303 |     # read list of images
304 |     with open(imagesetfile, 'r') as f:
305 |         lines = f.readlines()
306 |     imagenames = [x.strip() for x in lines]
307 |     if not os.path.isfile(cachefile):
308 |         # load annots
309 |         recs = {}
310 |         for i, imagename in enumerate(imagenames):
311 |             recs[imagename] = parse_rec(annopath % (imagename))
312 |             if i % 100 == 0:
313 |                 print('Reading annotation for {:d}/{:d}'.format(
314 |                    i + 1, len(imagenames)))
315 |         # save
316 |         print('Saving cached annotations to {:s}'.format(cachefile))
317 |         with open(cachefile, 'wb') as f:
318 |             pickle.dump(recs, f)
319 |     else:
320 |         # load
321 |         with open(cachefile, 'rb') as f:
322 |             recs = pickle.load(f)
323 | 
324 |     # extract gt objects for this class
325 |     class_recs = {}
326 |     npos = 0
327 | 
328 |     if use_dataset_image_ids:
329 |         for imagename in dataset.ids:
330 |             imagename = imagename[1]
331 |             R = [obj for obj in recs[imagename] if obj['name'] == classname]
332 |             bbox = np.array([x['bbox'] for x in R])
333 |             difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
334 |             det = [False] * len(R)
335 |             npos = npos + sum(~difficult)
336 |             class_recs[imagename] = {'bbox': bbox,
337 |                                      'difficult': difficult,
338 |                                      'det': det}
339 |     else:
340 |         for imagename in imagenames:
341 |             R = [obj for obj in recs[imagename] if obj['name'] == classname]
342 |             bbox = np.array([x['bbox'] for x in R])
343 |             difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
344 |             det = [False] * len(R)
345 |             npos = npos + sum(~difficult)
346 |             class_recs[imagename] = {'bbox': bbox,
347 |                                      'difficult': difficult,
348 |                                      'det': det}
349 | 
350 |     # read detections (see results folder in VOCDevkit)
351 |     detfile = detpath.format(classname)
352 |     with open(detfile, 'r') as f:
353 |         lines = f.readlines()
354 |     if any(lines) == 1:
355 | 
356 |         splitlines = [x.strip().split(' ') for x in lines]
357 |         image_ids = [x[0] for x in splitlines]
358 |         confidence = np.array([float(x[1]) for x in splitlines])
359 |         BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
360 | 
361 |         # sort by confidence
362 |         sorted_ind = np.argsort(-confidence)
363 |         sorted_scores = np.sort(-confidence)
364 |         BB = BB[sorted_ind, :]
365 |         image_ids = [image_ids[x] for x in sorted_ind]
366 | 
367 |         # go down dets and mark TPs and FPs
368 |         nd = len(image_ids)
369 |         tp = np.zeros(nd)
370 |         fp = np.zeros(nd)
371 |         for d in range(nd):
372 |             R = class_recs[image_ids[d]] # can result in keyerror if: class recs doesn't have the image_id (class_rec is gt for all images in imagenames, where recs is taken from the cache file) todo
373 |             bb = BB[d, :].astype(float)
374 |             ovmax = -np.inf
375 |             BBGT = R['bbox'].astype(float)
376 |             if BBGT.size > 0:
377 |                 # compute overlaps
378 |                 # intersection
379 |                 ixmin = np.maximum(BBGT[:, 0], bb[0])
380 |                 iymin = np.maximum(BBGT[:, 1], bb[1])
381 |                 ixmax = np.minimum(BBGT[:, 2], bb[2])
382 |                 iymax = np.minimum(BBGT[:, 3], bb[3])
383 |                 iw = np.maximum(ixmax - ixmin, 0.)
384 |                 ih = np.maximum(iymax - iymin, 0.)
385 |                 inters = iw * ih
386 |                 uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
387 |                        (BBGT[:, 2] - BBGT[:, 0]) *
388 |                        (BBGT[:, 3] - BBGT[:, 1]) - inters)
389 |                 overlaps = inters / uni
390 |                 ovmax = np.max(overlaps)
391 |                 jmax = np.argmax(overlaps)
392 | 
393 |             if ovmax > ovthresh:
394 |                 if not R['difficult'][jmax]:
395 |                     if not R['det'][jmax]:
396 |                         tp[d] = 1.
397 |                         R['det'][jmax] = 1
398 |                     else:
399 |                         fp[d] = 1.
400 |             else:
401 |                 fp[d] = 1.
402 | 
403 |         # compute precision recall
404 |         fp = np.cumsum(fp)
405 |         tp = np.cumsum(tp)
406 |         rec = tp / float(npos)
407 |         # avoid divide by zero in case the first detection matches a difficult
408 |         # ground truth
409 |         prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
410 |         ap = voc_ap(rec, prec, use_07_metric)
411 |     else:
412 |         # note that below default values of -1 can cause negative mAPs.. Not sure why you would want this anyways..
413 |         # rec = -1.
414 |         # prec = -1.
415 |         # ap = -1.
416 |         rec = 0.
417 |         prec = 0.
418 |         ap = 0.
419 |     return rec, prec, ap
420 | 
421 | 
422 | def parse_rec(filename):
423 |     """ Parse a PASCAL VOC xml file
424 |     largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch
425 |      """
426 |     tree = ET.parse(filename)
427 |     objects = []
428 |     for obj in tree.findall('object'):
429 |         obj_struct = {}
430 |         obj_struct['name'] = obj.find('name').text
431 |         obj_struct['pose'] = obj.find('pose').text
432 |         obj_struct['truncated'] = int(obj.find('truncated').text)
433 |         obj_struct['difficult'] = int(obj.find('difficult').text)
434 |         bbox = obj.find('bndbox')
435 |         obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
436 |                               int(bbox.find('ymin').text) - 1,
437 |                               int(bbox.find('xmax').text) - 1,
438 |                               int(bbox.find('ymax').text) - 1]
439 |         objects.append(obj_struct)
440 | 
441 |     return objects
442 | 
443 | 
444 | def voc_ap(rec, prec, use_07_metric=True):
445 |     """ ap = voc_ap(rec, prec, [use_07_metric])
446 |     Compute VOC AP given precision and recall.
447 |     If use_07_metric is true, uses the
448 |     VOC 07 11 point method (default:True).
449 |     """
450 |     if use_07_metric:
451 |         # 11 point metric
452 |         ap = 0.
453 |         for t in np.arange(0., 1.1, 0.1):
454 |             if np.sum(rec >= t) == 0:
455 |                 p = 0
456 |             else:
457 |                 p = np.max(prec[rec >= t])
458 |             ap = ap + p / 11.
459 |     else:
460 |         # correct AP calculation
461 |         # first append sentinel values at the end
462 |         mrec = np.concatenate(([0.], rec, [1.]))
463 |         mpre = np.concatenate(([0.], prec, [0.]))
464 | 
465 |         # compute the precision envelope
466 |         for i in range(mpre.size - 1, 0, -1):
467 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
468 | 
469 |         # to calculate area under PR curve, look for points
470 |         # where X axis (recall) changes value
471 |         i = np.where(mrec[1:] != mrec[:-1])[0]
472 | 
473 |         # and sum (\Delta recall) * prec
474 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
475 |     return ap
476 | 
477 | 
478 | 


--------------------------------------------------------------------------------