├── active_learning_package ├── __init__.py ├── human_active_learning.py ├── uncertainty_helpers.py └── voc_eval_helpers.py ├── utils ├── __init__.py └── augmentations.py ├── layers ├── __init__.py ├── functions │ ├── __init__.py │ ├── prior_box.py │ └── detection.py ├── modules │ ├── __init__.py │ ├── l2norm.py │ ├── kl_loss.py │ └── multibox_loss.py └── box_utils.py ├── doc ├── SSD.jpg ├── ssd.png ├── detection_example.png ├── detection_example2.png └── detection_examples.png ├── Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf ├── run_scripts └── scripts │ ├── run_script.py │ ├── boat_image_budget_pre-nms-avg_42_200.job │ └── 6class_image_budget_pre-nms-avg_42_200.job ├── data ├── horse_seed_set.json ├── sheep_seed_set.json ├── scripts │ ├── VOC2012.sh │ ├── VOC2007.sh │ └── COCO2014.sh ├── car_seed_set.json ├── pottedplant_seed_set.json ├── imageset_files │ ├── sheep_trainval_detect.txt │ ├── sheep_test_detect.txt │ ├── boat_test_detect.txt │ ├── boat_trainval_detect.txt │ ├── bottle_test_detect.txt │ ├── pottedplant_test_detect.txt │ ├── bottle_trainval_detect.txt │ ├── pottedplant_trainval_detect.txt │ ├── horse_test_detect.txt │ ├── horse_trainval_detect.txt │ ├── car_trainval_detect.txt │ └── car_test_detect.txt ├── boat_seed_set.json ├── bottle_seed_set.json ├── __init__.py ├── config.py ├── 6_class_seed_set.json └── empty_seed.json ├── LICENSE ├── .gitignore ├── create_initial_networks.py ├── README.md ├── requirements ├── create_spoc_features.py └── ssd.py /active_learning_package/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .augmentations import SSDAugmentation -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /doc/SSD.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/SSD.jpg -------------------------------------------------------------------------------- /doc/ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/ssd.png -------------------------------------------------------------------------------- /active_learning_package/human_active_learning.py: -------------------------------------------------------------------------------- 1 | def human_active_learning(): 2 | raise NotImplementedError() -------------------------------------------------------------------------------- /doc/detection_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example.png -------------------------------------------------------------------------------- /doc/detection_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_example2.png -------------------------------------------------------------------------------- /doc/detection_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/doc/detection_examples.png -------------------------------------------------------------------------------- /layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2norm import L2Norm 2 | from .multibox_loss import MultiBoxLoss 3 | from .kl_loss import KLLoss 4 | 5 | __all__ = ['L2Norm', 'MultiBoxLoss','KLLoss'] 6 | -------------------------------------------------------------------------------- /Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JappaB/Active_Learning_Object_Detection/HEAD/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf -------------------------------------------------------------------------------- /run_scripts/scripts/run_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import datetime 4 | 5 | 6 | 7 | def main(): 8 | run_scripts = [ 9 | 'boat_image_budget_pre-nms-avg_42_200.job', 10 | # '6class_image_budget_pre-nms-avg_42_200.job', 11 | ] 12 | 13 | 14 | curr_dir = os.getcwd() 15 | 16 | 17 | for script in run_scripts: 18 | print('Starting script: ', script) 19 | print('Time start: ', datetime.datetime.now()) 20 | subprocess.call(['bash', script]) 21 | 22 | print('Time stop: ', datetime.datetime.now()) 23 | print('finished script :)!') 24 | print('________________________\n\n\n\n\n\n\n') 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /data/horse_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "002786", 6 | "009841", 7 | "009114", 8 | "004834", 9 | "001628", 10 | "006445", 11 | "009874", 12 | "006285", 13 | "009318" 14 | ], 15 | "class_dist": [] 16 | }, 17 | "val_set": { 18 | "image_set_idx": [ 19 | "001236", 20 | "004625", 21 | "007216", 22 | "005145", 23 | "009138", 24 | "008596", 25 | "003492", 26 | "007448", 27 | "009407", 28 | "005236" 29 | ], 30 | "class_dist": [] 31 | }, 32 | "train_set": [ 33 | "002786", 34 | "009841", 35 | "009114", 36 | "004834", 37 | "001628", 38 | "006445", 39 | "009874", 40 | "006285", 41 | "009318" 42 | ] 43 | } -------------------------------------------------------------------------------- /layers/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | class L2Norm(nn.Module): 8 | def __init__(self,n_channels, scale): 9 | super(L2Norm,self).__init__() 10 | self.n_channels = n_channels 11 | self.gamma = scale or None 12 | self.eps = 1e-10 13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | init.constant_(self.weight,self.gamma) 18 | 19 | def forward(self, x): 20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 21 | #x /= norm 22 | x = torch.div(x,norm) 23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 24 | return out 25 | -------------------------------------------------------------------------------- /data/sheep_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "008592", 6 | "009842", 7 | "003681", 8 | "007230", 9 | "001714", 10 | "007165", 11 | "004423", 12 | "004490", 13 | "006679", 14 | "001750", 15 | "006833" 16 | ], 17 | "class_dist": [] 18 | }, 19 | "val_set": { 20 | "image_set_idx": [ 21 | "003705", 22 | "002263", 23 | "005469", 24 | "009816", 25 | "000107", 26 | "002593", 27 | "003698", 28 | "006944", 29 | "000900", 30 | "004312" 31 | ], 32 | "class_dist": [] 33 | }, 34 | "train_set": [ 35 | "008592", 36 | "009842", 37 | "003681", 38 | "007230", 39 | "001714", 40 | "007165", 41 | "004423", 42 | "004490", 43 | "006679", 44 | "001750", 45 | "006833" 46 | ] 47 | } -------------------------------------------------------------------------------- /data/scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /data/car_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "009863", 6 | "007208", 7 | "007821", 8 | "009900", 9 | "002533", 10 | "002643", 11 | "007374", 12 | "005020", 13 | "001980", 14 | "009106", 15 | "002691", 16 | "005756", 17 | "001360", 18 | "008706", 19 | "007475" 20 | ], 21 | "class_dist": [] 22 | }, 23 | "val_set": { 24 | "image_set_idx": [ 25 | "003636", 26 | "002116", 27 | "006206", 28 | "004244", 29 | "000245", 30 | "003256", 31 | "009839", 32 | "007663", 33 | "006330", 34 | "000545" 35 | ], 36 | "class_dist": [] 37 | }, 38 | "train_set": [ 39 | "009863", 40 | "007208", 41 | "007821", 42 | "009900", 43 | "002533", 44 | "002643", 45 | "007374", 46 | "005020", 47 | "001980", 48 | "009106", 49 | "002691", 50 | "005756", 51 | "001360", 52 | "008706", 53 | "007475" 54 | ] 55 | } -------------------------------------------------------------------------------- /data/pottedplant_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "005821", 6 | "007396", 7 | "001777", 8 | "006159", 9 | "009279", 10 | "004446", 11 | "006188", 12 | "007302", 13 | "004105", 14 | "003758", 15 | "009638", 16 | "009175", 17 | "002946", 18 | "008749", 19 | "008082", 20 | "002775" 21 | ], 22 | "class_dist": [] 23 | }, 24 | "val_set": { 25 | "image_set_idx": [ 26 | "007558", 27 | "000592", 28 | "003301", 29 | "007999", 30 | "004631", 31 | "007390", 32 | "007890", 33 | "006351", 34 | "000710", 35 | "001451" 36 | ], 37 | "class_dist": [] 38 | }, 39 | "train_set": [ 40 | "005821", 41 | "007396", 42 | "001777", 43 | "006159", 44 | "009279", 45 | "004446", 46 | "006188", 47 | "007302", 48 | "004105", 49 | "003758", 50 | "009638", 51 | "009175", 52 | "002946", 53 | "008749", 54 | "008082", 55 | "002775" 56 | ] 57 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 JappaB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /data/imageset_files/sheep_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000107 2 | 000225 3 | 000233 4 | 000244 5 | 000416 6 | 000581 7 | 000588 8 | 000654 9 | 000676 10 | 000820 11 | 000900 12 | 001191 13 | 001310 14 | 001594 15 | 001598 16 | 001661 17 | 001714 18 | 001733 19 | 001734 20 | 001750 21 | 001872 22 | 002209 23 | 002263 24 | 002267 25 | 002330 26 | 002452 27 | 002593 28 | 002615 29 | 002618 30 | 002648 31 | 002683 32 | 002845 33 | 002873 34 | 002963 35 | 002986 36 | 003161 37 | 003170 38 | 003382 39 | 003593 40 | 003681 41 | 003698 42 | 003705 43 | 003714 44 | 003874 45 | 004239 46 | 004312 47 | 004423 48 | 004490 49 | 004527 50 | 004768 51 | 004842 52 | 004954 53 | 005001 54 | 005058 55 | 005223 56 | 005288 57 | 005319 58 | 005363 59 | 005395 60 | 005469 61 | 005947 62 | 005964 63 | 006128 64 | 006136 65 | 006247 66 | 006251 67 | 006534 68 | 006678 69 | 006679 70 | 006833 71 | 006899 72 | 006944 73 | 007048 74 | 007165 75 | 007217 76 | 007230 77 | 007621 78 | 007859 79 | 008116 80 | 008150 81 | 008223 82 | 008292 83 | 008342 84 | 008349 85 | 008592 86 | 008618 87 | 008871 88 | 008920 89 | 008942 90 | 009227 91 | 009271 92 | 009349 93 | 009417 94 | 009700 95 | 009816 96 | 009842 97 | 009945 98 | -------------------------------------------------------------------------------- /data/imageset_files/sheep_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000062 2 | 000175 3 | 000176 4 | 000376 5 | 000451 6 | 000458 7 | 000475 8 | 000574 9 | 000596 10 | 000627 11 | 000779 12 | 000789 13 | 000875 14 | 000884 15 | 000925 16 | 000992 17 | 001271 18 | 001344 19 | 001416 20 | 001477 21 | 001625 22 | 001645 23 | 001671 24 | 001852 25 | 002033 26 | 002121 27 | 002133 28 | 002137 29 | 002338 30 | 002802 31 | 002882 32 | 003025 33 | 003050 34 | 003069 35 | 003087 36 | 003152 37 | 003190 38 | 003241 39 | 003318 40 | 003366 41 | 003471 42 | 003595 43 | 003914 44 | 004072 45 | 004357 46 | 004582 47 | 004610 48 | 004617 49 | 004642 50 | 004646 51 | 004669 52 | 004774 53 | 004827 54 | 004854 55 | 004969 56 | 005137 57 | 005147 58 | 005164 59 | 005324 60 | 005578 61 | 005795 62 | 005886 63 | 005915 64 | 005916 65 | 006217 66 | 006491 67 | 006557 68 | 006815 69 | 006832 70 | 006837 71 | 007028 72 | 007076 73 | 007402 74 | 007440 75 | 007444 76 | 007462 77 | 007534 78 | 007596 79 | 007894 80 | 008187 81 | 008192 82 | 008215 83 | 008333 84 | 008334 85 | 008343 86 | 008344 87 | 008598 88 | 008685 89 | 008795 90 | 008912 91 | 008992 92 | 009031 93 | 009169 94 | 009320 95 | 009451 96 | 009569 97 | 009818 98 | 009861 99 | -------------------------------------------------------------------------------- /data/boat_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "005496", 6 | "007743", 7 | "006965", 8 | "000154", 9 | "002234", 10 | "003337", 11 | "005614", 12 | "004532", 13 | "000184", 14 | "002372", 15 | "000382", 16 | "008498", 17 | "005181", 18 | "007139", 19 | "007685", 20 | "003098", 21 | "003722", 22 | "002659", 23 | "007460", 24 | "007803", 25 | "002208" 26 | ], 27 | "class_dist": [] 28 | }, 29 | "val_set": { 30 | "image_set_idx": [ 31 | "000931", 32 | "002465", 33 | "007361", 34 | "001732", 35 | "003860", 36 | "000826", 37 | "006123", 38 | "009603", 39 | "005705", 40 | "007365" 41 | ], 42 | "class_dist": [] 43 | }, 44 | "train_set": [ 45 | "005496", 46 | "007743", 47 | "006965", 48 | "000154", 49 | "002234", 50 | "003337", 51 | "005614", 52 | "004532", 53 | "000184", 54 | "002372", 55 | "000382", 56 | "008498", 57 | "005181", 58 | "007139", 59 | "007685", 60 | "003098", 61 | "003722", 62 | "002659", 63 | "007460", 64 | "007803", 65 | "002208" 66 | ] 67 | } -------------------------------------------------------------------------------- /data/bottle_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "009679", 6 | "009388", 7 | "009100", 8 | "007798", 9 | "002881", 10 | "000250", 11 | "004152", 12 | "006576", 13 | "000344", 14 | "008204", 15 | "002253", 16 | "005467", 17 | "009290", 18 | "007457", 19 | "006626", 20 | "000367", 21 | "008931", 22 | "002350", 23 | "007141", 24 | "006363", 25 | "006727", 26 | "000498" 27 | ], 28 | "class_dist": [] 29 | }, 30 | "val_set": { 31 | "image_set_idx": [ 32 | "006409", 33 | "000269", 34 | "006648", 35 | "002641", 36 | "004671", 37 | "007121", 38 | "007649", 39 | "009878", 40 | "000381", 41 | "004886" 42 | ], 43 | "class_dist": [] 44 | }, 45 | "train_set": [ 46 | "009679", 47 | "009388", 48 | "009100", 49 | "007798", 50 | "002881", 51 | "000250", 52 | "004152", 53 | "006576", 54 | "000344", 55 | "008204", 56 | "002253", 57 | "005467", 58 | "009290", 59 | "007457", 60 | "006626", 61 | "000367", 62 | "008931", 63 | "002350", 64 | "007141", 65 | "006363", 66 | "006727", 67 | "000498" 68 | ] 69 | } -------------------------------------------------------------------------------- /run_scripts/scripts/boat_image_budget_pre-nms-avg_42_200.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../../ 4 | 5 | BASE_DIR=$PWD 6 | echo $BASE_DIR 7 | 8 | EXPERIMENT_DIR=test_boat/ 9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR" 10 | 11 | 12 | python active_learning_main.py \ 13 | --budget_measured_in_objects False \ 14 | --rescaled_foreground_probs True \ 15 | --sample_select_nms_conf_thresh 0.01 \ 16 | --skip_sample_selection_first_iter False \ 17 | --batch_size 8 \ 18 | --sampling_strategy none_covariance-obj \ 19 | --merging_method pre_nms_avg \ 20 | --lr 1e-4 \ 21 | --ensemble_size 3 \ 22 | --annotate_all_objects True \ 23 | --experiment_dir "$EXPERIMENT_DIR" \ 24 | --train_from_basenet_every_iter True \ 25 | --paths_to_weights \ 26 | weights/initial_net_0 \ 27 | weights/initial_net_1 \ 28 | weights/initial_net_2 \ 29 | --trained_models \ 30 | weights/initial_net_0 \ 31 | weights/initial_net_1 \ 32 | weights/initial_net_2 \ 33 | --samples_per_iter 10 10 10 10 10 \ 34 | --eval_every_iter False \ 35 | --debug False \ 36 | --fixed_number_of_epochs 200 \ 37 | --seed 92 \ 38 | --dataset VOC07_1_class \ 39 | --imageset_train boat_trainval_detect \ 40 | --imageset_test boat_test_detect \ 41 | --seed_set_file data/boat_seed_set.json \ 42 | --optimizer SGD \ 43 | --skip_sample_selection_first_iter False \ 44 | --relevant_class boat \ 45 | 46 | 47 | wait 48 | 49 | echo end of job 50 | -------------------------------------------------------------------------------- /run_scripts/scripts/6class_image_budget_pre-nms-avg_42_200.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../../ 4 | 5 | BASE_DIR=$PWD 6 | echo $BASE_DIR 7 | 8 | EXPERIMENT_DIR=test_6class/ 9 | mkdir "$BASE_DIR"/active_learning_dir/"$EXPERIMENT_DIR" 10 | 11 | 12 | python active_learning_main.py \ 13 | --budget_measured_in_objects False \ 14 | --rescaled_foreground_probs True \ 15 | --sample_select_nms_conf_thresh 0.01 \ 16 | --skip_sample_selection_first_iter False \ 17 | --batch_size 8 \ 18 | --sampling_strategy none_covariance-obj \ 19 | --merging_method pre_nms_avg \ 20 | --lr 1e-4 \ 21 | --ensemble_size 6 \ 22 | --annotate_all_objects True \ 23 | --experiment_dir "$EXPERIMENT_DIR" \ 24 | --train_from_basenet_every_iter True \ 25 | --paths_to_weights \ 26 | weights/initial_net_0 \ 27 | weights/initial_net_1 \ 28 | weights/initial_net_2 \ 29 | --trained_models \ 30 | weights/initial_net_0 \ 31 | weights/initial_net_1 \ 32 | weights/initial_net_2 \ 33 | --samples_per_iter 25 25 25 25 25 25 \ 34 | --eval_every_iter True \ 35 | --debug False \ 36 | --short_gpu False \ 37 | --fixed_number_of_epochs 200 \ 38 | --seed 42 \ 39 | --dataset VOC07_6_class \ 40 | --imageset_train 6_class_trainval_detect \ 41 | --imageset_test 6_class_test_detect \ 42 | --seed_set_file data/6_class_seed_set.json \ 43 | --optimizer SGD \ 44 | --skip_sample_selection_first_iter False 45 | 46 | 47 | wait 48 | 49 | echo end of job 50 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT, VOC_ROOT_LOCAL 2 | 3 | from .config import * 4 | import torch 5 | import cv2 6 | import numpy as np 7 | 8 | def detection_collate(batch): 9 | """Custom collate fn for dealing with batches of images that have a different 10 | number of associated object annotations (bounding boxes). 11 | 12 | Arguments: 13 | batch: (tuple) A tuple of tensor images and lists of annotations 14 | 15 | Return: 16 | A tuple containing: 17 | 1) (tensor) batch of images stacked on their 0 dim 18 | 2) (list of tensors) annotations for a given image are stacked on 19 | 0 dim 20 | """ 21 | targets = [] 22 | imgs = [] 23 | for sample in batch: 24 | imgs.append(sample[0]) 25 | targets.append(torch.FloatTensor(sample[1])) 26 | return torch.stack(imgs, 0), targets 27 | 28 | 29 | def base_transform(image, size, mean): 30 | # cv2.resize: default uses linear interpolation, and doesnt preserve aspact ratio: https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=resize#void%20resize(InputArray%20src,%20OutputArray%20dst,%20Size%20dsize,%20double%20fx,%20double%20fy,%20int%20interpolation) 31 | x = cv2.resize(image, (size, size)).astype(np.float32) 32 | x -= mean 33 | x = x.astype(np.float32) 34 | return x 35 | 36 | 37 | class BaseTransform: 38 | def __init__(self, size, mean): 39 | # if modeltype == 'SSD300': 40 | # size = 300 41 | self.size = size 42 | self.mean = np.array(mean, dtype=np.float32) 43 | 44 | def __call__(self, image, boxes=None, labels=None): 45 | return base_transform(image, self.size, self.mean), boxes, labels 46 | -------------------------------------------------------------------------------- /data/imageset_files/boat_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000069 2 | 000080 3 | 000105 4 | 000179 5 | 000240 6 | 000295 7 | 000350 8 | 000371 9 | 000375 10 | 000444 11 | 000449 12 | 000481 13 | 000529 14 | 000538 15 | 000558 16 | 000576 17 | 000631 18 | 000792 19 | 000811 20 | 000837 21 | 000907 22 | 000914 23 | 000995 24 | 001049 25 | 001059 26 | 001076 27 | 001141 28 | 001155 29 | 001188 30 | 001227 31 | 001342 32 | 001355 33 | 001394 34 | 001410 35 | 001474 36 | 001487 37 | 001527 38 | 001591 39 | 001634 40 | 001646 41 | 001698 42 | 001705 43 | 001770 44 | 001815 45 | 001822 46 | 001895 47 | 001974 48 | 002029 49 | 002085 50 | 002175 51 | 002225 52 | 002274 53 | 002390 54 | 002395 55 | 002449 56 | 002474 57 | 002580 58 | 002628 59 | 002719 60 | 002764 61 | 002811 62 | 002821 63 | 002948 64 | 002983 65 | 003012 66 | 003073 67 | 003111 68 | 003131 69 | 003409 70 | 003498 71 | 003543 72 | 003571 73 | 003652 74 | 003799 75 | 003836 76 | 003892 77 | 003900 78 | 003910 79 | 003929 80 | 003952 81 | 004123 82 | 004160 83 | 004184 84 | 004225 85 | 004299 86 | 004374 87 | 004382 88 | 004443 89 | 004573 90 | 004613 91 | 004665 92 | 004843 93 | 004899 94 | 004914 95 | 004937 96 | 005022 97 | 005074 98 | 005098 99 | 005112 100 | 005213 101 | 005243 102 | 005272 103 | 005329 104 | 005382 105 | 005427 106 | 005604 107 | 005616 108 | 005670 109 | 005720 110 | 005771 111 | 005849 112 | 005950 113 | 006014 114 | 006048 115 | 006110 116 | 006160 117 | 006164 118 | 006205 119 | 006302 120 | 006332 121 | 006408 122 | 006490 123 | 006528 124 | 006604 125 | 006659 126 | 006889 127 | 006977 128 | 006997 129 | 007066 130 | 007091 131 | 007173 132 | 007233 133 | 007357 134 | 007377 135 | 007400 136 | 007415 137 | 007428 138 | 007455 139 | 007464 140 | 007635 141 | 007660 142 | 007788 143 | 007850 144 | 007952 145 | 007990 146 | 008099 147 | 008145 148 | 008217 149 | 008219 150 | 008249 151 | 008278 152 | 008369 153 | 008373 154 | 008545 155 | 008571 156 | 008578 157 | 008590 158 | 008643 159 | 008704 160 | 008820 161 | 008868 162 | 008869 163 | 008998 164 | 009001 165 | 009026 166 | 009102 167 | 009120 168 | 009137 169 | 009240 170 | 009423 171 | 009538 172 | 009727 173 | 009728 174 | 009793 175 | 009811 176 | 009885 177 | -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | import os.path 3 | 4 | # gets home dir cross platform 5 | HOME = os.path.expanduser("~") 6 | 7 | # for making bounding boxes pretty 8 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 9 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 10 | MEANS = (104, 117, 123) # channel means for Pascal VOC 11 | 12 | # SSD300 CONFIGS 13 | voc = { 14 | 'num_classes': 21, 15 | 'dataset_mean': (104, 117, 123), 16 | 'lr_steps': (80000, 100000, 120000), 17 | 'max_iter': 120000, 18 | 'feature_maps': [38, 19, 10, 5, 3, 1], 19 | 'min_dim': 300, 20 | 'steps': [8, 16, 32, 64, 100, 300], 21 | 'min_sizes': [30, 60, 111, 162, 213, 264], 22 | 'max_sizes': [60, 111, 162, 213, 264, 315], 23 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 24 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/ 25 | 'clip': True, 26 | 'name': 'VOC', 27 | } 28 | 29 | 30 | voc_1_class = { 31 | 'num_classes': 2, 32 | 'dataset_mean': (104, 117, 123), 33 | 'lr_steps': (80000, 100000, 120000), 34 | 'max_iter': 120000, 35 | 'feature_maps': [38, 19, 10, 5, 3, 1], 36 | 'min_dim': 300, 37 | 'steps': [8, 16, 32, 64, 100, 300], 38 | 'min_sizes': [30, 60, 111, 162, 213, 264], 39 | 'max_sizes': [60, 111, 162, 213, 264, 315], 40 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 41 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/ 42 | 'clip': True, 43 | 'name': 'VOC_1_class', 44 | } 45 | 46 | voc_6_class = { 47 | 'num_classes': 7, 48 | 'dataset_mean': (104, 117, 123), 49 | 'lr_steps': (80000, 100000, 120000), 50 | 'max_iter': 120000, 51 | 'feature_maps': [38, 19, 10, 5, 3, 1], 52 | 'min_dim': 300, 53 | 'steps': [8, 16, 32, 64, 100, 300], 54 | 'min_sizes': [30, 60, 111, 162, 213, 264], 55 | 'max_sizes': [60, 111, 162, 213, 264, 315], 56 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 57 | 'variance': [0.1, 0.2], # Not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/ 58 | 'clip': True, 59 | 'name': 'VOC_6_class', 60 | } 61 | -------------------------------------------------------------------------------- /data/scripts/COCO2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start=`date +%s` 4 | 5 | # handle optional download dir 6 | if [ -z "$1" ] 7 | then 8 | # navigate to ~/data 9 | echo "navigating to ~/data/ ..." 10 | mkdir -p ~/data 11 | cd ~/data/ 12 | mkdir -p ./coco 13 | cd ./coco 14 | mkdir -p ./images 15 | mkdir -p ./annotations 16 | else 17 | # check if specified dir is valid 18 | if [ ! -d $1 ]; then 19 | echo $1 " is not a valid directory" 20 | exit 0 21 | fi 22 | echo "navigating to " $1 " ..." 23 | cd $1 24 | fi 25 | 26 | if [ ! -d images ] 27 | then 28 | mkdir -p ./images 29 | fi 30 | 31 | # Download the image data. 32 | cd ./images 33 | echo "Downloading MSCOCO train images ..." 34 | curl -LO http://images.cocodataset.org/zips/train2014.zip 35 | echo "Downloading MSCOCO val images ..." 36 | curl -LO http://images.cocodataset.org/zips/val2014.zip 37 | 38 | cd ../ 39 | if [ ! -d annotations] 40 | then 41 | mkdir -p ./annotations 42 | fi 43 | 44 | # Download the annotation data. 45 | cd ./annotations 46 | echo "Downloading MSCOCO train/val annotations ..." 47 | curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip 48 | echo "Finished downloading. Now extracting ..." 49 | 50 | # Unzip data 51 | echo "Extracting train images ..." 52 | unzip ../images/train2014.zip -d ../images 53 | echo "Extracting val images ..." 54 | unzip ../images/val2014.zip -d ../images 55 | echo "Extracting annotations ..." 56 | unzip ./annotations_trainval2014.zip 57 | 58 | echo "Removing zip files ..." 59 | rm ../images/train2014.zip 60 | rm ../images/val2014.zip 61 | rm ./annotations_trainval2014.zip 62 | 63 | echo "Creating trainval35k dataset..." 64 | 65 | # Download annotations json 66 | echo "Downloading trainval35k annotations from S3" 67 | curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip 68 | 69 | # combine train and val 70 | echo "Combining train and val images" 71 | mkdir ../images/trainval35k 72 | cd ../images/train2014 73 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + # dir too large for cp 74 | cd ../val2014 75 | find -maxdepth 1 -name '*.jpg' -exec cp -t ../trainval35k {} + 76 | 77 | 78 | end=`date +%s` 79 | runtime=$((end-start)) 80 | 81 | echo "Completed in " $runtime " seconds" 82 | -------------------------------------------------------------------------------- /data/imageset_files/boat_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000061 2 | 000073 3 | 000130 4 | 000154 5 | 000184 6 | 000232 7 | 000241 8 | 000296 9 | 000370 10 | 000382 11 | 000474 12 | 000513 13 | 000519 14 | 000563 15 | 000608 16 | 000713 17 | 000740 18 | 000742 19 | 000826 20 | 000862 21 | 000906 22 | 000931 23 | 001002 24 | 001121 25 | 001136 26 | 001176 27 | 001205 28 | 001247 29 | 001298 30 | 001325 31 | 001463 32 | 001481 33 | 001484 34 | 001497 35 | 001732 36 | 001847 37 | 001887 38 | 001976 39 | 002000 40 | 002039 41 | 002091 42 | 002208 43 | 002234 44 | 002288 45 | 002302 46 | 002372 47 | 002392 48 | 002403 49 | 002435 50 | 002465 51 | 002479 52 | 002500 53 | 002605 54 | 002653 55 | 002659 56 | 002738 57 | 002838 58 | 003008 59 | 003065 60 | 003098 61 | 003223 62 | 003284 63 | 003337 64 | 003404 65 | 003413 66 | 003639 67 | 003657 68 | 003669 69 | 003722 70 | 003797 71 | 003811 72 | 003860 73 | 003861 74 | 003863 75 | 003919 76 | 003974 77 | 004170 78 | 004194 79 | 004241 80 | 004246 81 | 004258 82 | 004325 83 | 004371 84 | 004387 85 | 004464 86 | 004498 87 | 004532 88 | 004592 89 | 004651 90 | 004839 91 | 004885 92 | 004931 93 | 004958 94 | 005073 95 | 005108 96 | 005128 97 | 005150 98 | 005171 99 | 005181 100 | 005217 101 | 005257 102 | 005320 103 | 005325 104 | 005337 105 | 005340 106 | 005358 107 | 005369 108 | 005370 109 | 005413 110 | 005420 111 | 005431 112 | 005496 113 | 005614 114 | 005699 115 | 005705 116 | 005713 117 | 005714 118 | 005742 119 | 005825 120 | 005860 121 | 005914 122 | 006046 123 | 006079 124 | 006098 125 | 006123 126 | 006215 127 | 006281 128 | 006304 129 | 006357 130 | 006436 131 | 006542 132 | 006549 133 | 006660 134 | 006673 135 | 006773 136 | 006867 137 | 006874 138 | 006953 139 | 006965 140 | 006983 141 | 007052 142 | 007125 143 | 007139 144 | 007289 145 | 007361 146 | 007365 147 | 007449 148 | 007460 149 | 007477 150 | 007521 151 | 007533 152 | 007685 153 | 007718 154 | 007743 155 | 007803 156 | 007833 157 | 007980 158 | 008091 159 | 008112 160 | 008163 161 | 008166 162 | 008251 163 | 008261 164 | 008306 165 | 008449 166 | 008454 167 | 008456 168 | 008498 169 | 008568 170 | 008584 171 | 008604 172 | 008645 173 | 008725 174 | 008730 175 | 008784 176 | 008884 177 | 008900 178 | 008999 179 | 009131 180 | 009177 181 | 009347 182 | 009527 183 | 009533 184 | 009603 185 | 009717 186 | 009836 187 | 009947 188 | 009955 189 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from math import sqrt as sqrt 3 | from itertools import product as product 4 | import torch 5 | 6 | import layers.box_utils as box_utils 7 | 8 | 9 | class PriorBox(object): 10 | """Compute priorbox coordinates in center-offset form for each source 11 | feature map. 12 | """ 13 | def __init__(self, cfg, modeltype = 'SSD300'): 14 | super(PriorBox, self).__init__() 15 | self.image_size = cfg['min_dim'] 16 | # number of priors for feature map location (either 4 or 6) 17 | self.num_priors = len(cfg['aspect_ratios']) 18 | self.variance = cfg['variance'] or [0.1] 19 | self.feature_maps = cfg['feature_maps'] 20 | self.min_sizes = cfg['min_sizes'] 21 | self.max_sizes = cfg['max_sizes'] 22 | self.steps = cfg['steps'] 23 | self.aspect_ratios = cfg['aspect_ratios'] 24 | self.clip = cfg['clip'] 25 | self.version = cfg['name'] 26 | self.modeltype = modeltype 27 | for v in self.variance: 28 | if v <= 0: 29 | raise ValueError('Variances must be greater than 0') 30 | 31 | def forward(self): 32 | mean = [] 33 | for k, f in enumerate(self.feature_maps): 34 | for i, j in product(range(f), repeat=2): 35 | f_k = self.image_size / self.steps[k] 36 | # unit center x,y 37 | cx = (j + 0.5) / f_k 38 | cy = (i + 0.5) / f_k 39 | 40 | # aspect_ratio: 1 41 | # rel size: min_size 42 | s_k = self.min_sizes[k]/self.image_size 43 | mean += [cx, cy, s_k, s_k] 44 | 45 | # aspect_ratio: 1 46 | # rel size: sqrt(s_k * s_(k+1)) 47 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size)) 48 | mean += [cx, cy, s_k_prime, s_k_prime] 49 | 50 | # rest of aspect ratios 51 | for ar in self.aspect_ratios[k]: 52 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)] 53 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)] 54 | 55 | # back to torch land 56 | output = torch.Tensor(mean).view(-1, 4) 57 | if self.clip: 58 | output.clamp_(max=1, min=0) 59 | 60 | # # todo 61 | # if self.modeltype == 'SSD300KL': 62 | # # transform to x1y1x2y2 form 63 | # output = box_utils.point_form(output) 64 | # # print('todo') 65 | # 66 | # if self.clip: 67 | # output.clamp_(max=1, min=0) 68 | 69 | return output 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # atom remote-sync package 92 | .remote-sync.json 93 | 94 | # weights 95 | weights/ 96 | 97 | #DS_Store 98 | .DS_Store 99 | 100 | # dev stuff 101 | eval/*/ 102 | eval.ipynb 103 | dev.ipynb 104 | .vscode/ 105 | 106 | # not ready 107 | videos/ 108 | templates/ 109 | data/ssd_dataloader.py 110 | data/datasets/ 111 | doc/visualize.py 112 | read_results.py 113 | ssd300_120000/ 114 | demos/live 115 | webdemo.py 116 | test_data_aug.py 117 | 118 | # attributes 119 | # pycharm 120 | .idea/ 121 | 122 | # temp checkout soln 123 | data/datasets/ 124 | data/ssd_dataloader.py 125 | 126 | # pylint 127 | .pylintrc 128 | 129 | ###### Above gitignores are simply copied from the SSD.pytorch repo by AMDegroot###### 130 | ###### Below are my own (Jasper Bakker) ###### 131 | 132 | 133 | # Coco dataset 134 | data/images/coco 135 | 136 | # weights of experiments 137 | active_learning_dir/*/weights/ 138 | active_learning_dir/*/optimizers/ 139 | 140 | #some large files 141 | **/sample_selection/ 142 | **/eval/*detections* 143 | active_learning_dir/* 144 | 145 | # server stuff 146 | .nfs* 147 | 148 | # pandas core differs locally 149 | data/stats*.pickle 150 | 151 | #some local folder that sometimes contains large files 152 | debug2/* 153 | active_learning_dir/ 154 | -------------------------------------------------------------------------------- /data/imageset_files/bottle_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000136 2 | 000144 3 | 000151 4 | 000178 5 | 000202 6 | 000277 7 | 000280 8 | 000299 9 | 000327 10 | 000335 11 | 000346 12 | 000369 13 | 000414 14 | 000447 15 | 000517 16 | 000587 17 | 000606 18 | 000611 19 | 000737 20 | 000762 21 | 000825 22 | 000893 23 | 001021 24 | 001023 25 | 001035 26 | 001047 27 | 001086 28 | 001105 29 | 001167 30 | 001177 31 | 001244 32 | 001295 33 | 001349 34 | 001354 35 | 001407 36 | 001429 37 | 001431 38 | 001456 39 | 001489 40 | 001569 41 | 001585 42 | 001602 43 | 001631 44 | 001652 45 | 001720 46 | 001812 47 | 001829 48 | 001921 49 | 001925 50 | 001957 51 | 001975 52 | 001992 53 | 002026 54 | 002207 55 | 002231 56 | 002235 57 | 002389 58 | 002429 59 | 002536 60 | 002538 61 | 002560 62 | 002575 63 | 002614 64 | 002656 65 | 002705 66 | 002711 67 | 002712 68 | 002743 69 | 002773 70 | 002857 71 | 002904 72 | 002905 73 | 002945 74 | 002950 75 | 002951 76 | 002982 77 | 003010 78 | 003067 79 | 003071 80 | 003076 81 | 003249 82 | 003278 83 | 003309 84 | 003431 85 | 003446 86 | 003459 87 | 003488 88 | 003544 89 | 003590 90 | 003643 91 | 003649 92 | 003665 93 | 003697 94 | 003707 95 | 003725 96 | 003736 97 | 003776 98 | 003802 99 | 003842 100 | 003867 101 | 003906 102 | 003928 103 | 003942 104 | 003943 105 | 003944 106 | 004032 107 | 004042 108 | 004055 109 | 004056 110 | 004068 111 | 004072 112 | 004118 113 | 004127 114 | 004134 115 | 004162 116 | 004180 117 | 004236 118 | 004268 119 | 004311 120 | 004348 121 | 004355 122 | 004422 123 | 004469 124 | 004476 125 | 004546 126 | 004599 127 | 004640 128 | 004712 129 | 004717 130 | 004720 131 | 004721 132 | 004819 133 | 004880 134 | 004919 135 | 004922 136 | 005002 137 | 005096 138 | 005216 139 | 005226 140 | 005233 141 | 005276 142 | 005294 143 | 005428 144 | 005442 145 | 005491 146 | 005570 147 | 005622 148 | 005635 149 | 005721 150 | 005926 151 | 005936 152 | 005942 153 | 006003 154 | 006056 155 | 006057 156 | 006072 157 | 006086 158 | 006093 159 | 006152 160 | 006195 161 | 006248 162 | 006359 163 | 006360 164 | 006380 165 | 006390 166 | 006402 167 | 006432 168 | 006452 169 | 006500 170 | 006644 171 | 006662 172 | 006732 173 | 007225 174 | 007237 175 | 007404 176 | 007456 177 | 007494 178 | 007500 179 | 007504 180 | 007532 181 | 007569 182 | 007598 183 | 007652 184 | 007700 185 | 007717 186 | 007739 187 | 007744 188 | 007778 189 | 007783 190 | 007862 191 | 007937 192 | 008089 193 | 008094 194 | 008134 195 | 008182 196 | 008192 197 | 008287 198 | 008330 199 | 008379 200 | 008382 201 | 008400 202 | 008405 203 | 008414 204 | 008458 205 | 008516 206 | 008520 207 | 008555 208 | 008583 209 | 008591 210 | 008627 211 | 008686 212 | 008697 213 | 008740 214 | 008778 215 | 008894 216 | 008902 217 | 008922 218 | 008925 219 | 009012 220 | 009075 221 | 009164 222 | 009171 223 | 009257 224 | 009297 225 | 009311 226 | 009366 227 | 009431 228 | 009482 229 | 009547 230 | 009570 231 | 009626 232 | 009630 233 | 009633 234 | 009645 235 | 009782 236 | 009798 237 | 009840 238 | 009871 239 | 009891 240 | 009929 241 | -------------------------------------------------------------------------------- /create_initial_networks.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | 5 | import active_learning_package.helpers as helpers 6 | from data import config 7 | 8 | def str2bool(v): 9 | return v.lower() in ("yes", "true", "t", "1") 10 | 11 | # parser arguments from train.py 12 | parser = argparse.ArgumentParser(description='Active Learning With Single Shot MultiBox Detector Training With Pytorch') 13 | 14 | parser.add_argument('--modeltype', default='SSD300',choices=['SSD300', 'SSD300KL'], 15 | help='Which model to use: standard SSD or the SSD with uncertainty in the bounding box regression and KL loss ') #SSD300KL doesn't work well 16 | parser.add_argument('--dataset', default='VOC07_1_class', choices=['VOC07', 'VOC12','VOC07_1_class','VOC07_1_class','VOC07_6_class'], 17 | type=str, help='VOC07_1_class is with one class of interest and the background class') 18 | parser.add_argument('--sample_select_nms_conf_thresh', default = 0.01, type = float, 19 | help = 'The conf threshold used in before non maximum suppression. Only detections with a confidence above ' 20 | 'this threshold for a certain class will go trough nms') 21 | parser.add_argument('--paths_to_weights', default=None,type=str, nargs='+', 22 | help='These are the weights that ere used the initial evaluation of the unlabeled dataset') # if no trained model is given, this will return an error when loading the model. 23 | parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth', 24 | help='Pretrained base model') 25 | 26 | parser.add_argument('--ensemble_size', default=3,type=int) 27 | parser.add_argument('--num_classes', default=1,type=int, 28 | help='number of classes of interest (so excluding background class') 29 | 30 | 31 | if __name__ == '__main__': 32 | args = parser.parse_args() 33 | if args.dataset in ['VOC12','VOC07']: 34 | args.cfg = config.voc # adapted from pytorch SSD code 35 | 36 | elif args.dataset == 'VOC07_1_class': 37 | args.cfg = config.voc_1_class 38 | 39 | elif args.dataset == 'VOC07_6_class': 40 | args.cfg = config.voc_6_class 41 | 42 | if torch.cuda.is_available(): 43 | device = 'cuda' 44 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 45 | else: 46 | device = 'cpu' 47 | torch.set_default_tensor_type('torch.FloatTensor') 48 | 49 | args.device = device 50 | 51 | args.num_classes = args.num_classes + 1 52 | print('Creating ',args.ensemble_size,' number of SSDs for ',args.num_classes,' (+ 1 background class) number of classes') 53 | print('...') 54 | 55 | 56 | 57 | 58 | for i in range(args.ensemble_size): 59 | # make net 60 | net = helpers.build_sample_selection_net(args, 61 | args.num_classes) 62 | 63 | args.experiment_dir = os.getcwd()+'/' 64 | path = 'weights/initial_net_'+str(i) 65 | 66 | # save net 67 | helpers.save_weights(weights=net, 68 | args=args, 69 | path=path) 70 | print() 71 | 72 | 73 | print('Initial nets created!') -------------------------------------------------------------------------------- /data/imageset_files/pottedplant_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000006 2 | 000070 3 | 000116 4 | 000124 5 | 000149 6 | 000196 7 | 000234 8 | 000389 9 | 000397 10 | 000429 11 | 000575 12 | 000606 13 | 000650 14 | 000692 15 | 000698 16 | 000737 17 | 000784 18 | 000788 19 | 000840 20 | 000890 21 | 000933 22 | 000953 23 | 000983 24 | 001023 25 | 001035 26 | 001070 27 | 001141 28 | 001180 29 | 001255 30 | 001276 31 | 001380 32 | 001391 33 | 001411 34 | 001437 35 | 001469 36 | 001478 37 | 001540 38 | 001631 39 | 001648 40 | 001652 41 | 001665 42 | 001666 43 | 001712 44 | 001715 45 | 001779 46 | 001786 47 | 001811 48 | 001812 49 | 001867 50 | 001869 51 | 001880 52 | 001905 53 | 002018 54 | 002026 55 | 002035 56 | 002074 57 | 002123 58 | 002161 59 | 002240 60 | 002297 61 | 002339 62 | 002381 63 | 002414 64 | 002422 65 | 002451 66 | 002482 67 | 002489 68 | 002510 69 | 002527 70 | 002557 71 | 002604 72 | 002708 73 | 002743 74 | 002753 75 | 002777 76 | 002887 77 | 002925 78 | 002945 79 | 003014 80 | 003020 81 | 003029 82 | 003060 83 | 003062 84 | 003113 85 | 003192 86 | 003221 87 | 003281 88 | 003297 89 | 003323 90 | 003442 91 | 003448 92 | 003488 93 | 003572 94 | 003591 95 | 003624 96 | 003626 97 | 003649 98 | 003665 99 | 003676 100 | 003756 101 | 003761 102 | 003778 103 | 003787 104 | 003882 105 | 003934 106 | 003943 107 | 004027 108 | 004038 109 | 004045 110 | 004081 111 | 004116 112 | 004124 113 | 004155 114 | 004159 115 | 004173 116 | 004211 117 | 004222 118 | 004276 119 | 004311 120 | 004422 121 | 004451 122 | 004486 123 | 004712 124 | 004778 125 | 004780 126 | 004858 127 | 004862 128 | 004893 129 | 004941 130 | 005088 131 | 005092 132 | 005174 133 | 005180 134 | 005184 135 | 005196 136 | 005266 137 | 005284 138 | 005494 139 | 005525 140 | 005556 141 | 005567 142 | 005650 143 | 005661 144 | 005673 145 | 005703 146 | 005754 147 | 005804 148 | 005857 149 | 005866 150 | 005922 151 | 005926 152 | 005937 153 | 005941 154 | 005943 155 | 005976 156 | 006022 157 | 006049 158 | 006083 159 | 006115 160 | 006231 161 | 006297 162 | 006303 163 | 006307 164 | 006311 165 | 006359 166 | 006426 167 | 006496 168 | 006504 169 | 006646 170 | 006691 171 | 006700 172 | 006712 173 | 006750 174 | 006776 175 | 006792 176 | 006795 177 | 006798 178 | 006807 179 | 006816 180 | 006843 181 | 006861 182 | 006902 183 | 006937 184 | 006964 185 | 007012 186 | 007057 187 | 007106 188 | 007164 189 | 007237 190 | 007248 191 | 007272 192 | 007319 193 | 007332 194 | 007335 195 | 007364 196 | 007406 197 | 007473 198 | 007499 199 | 007549 200 | 007643 201 | 007644 202 | 007722 203 | 007752 204 | 007839 205 | 007913 206 | 008010 207 | 008039 208 | 008071 209 | 008155 210 | 008156 211 | 008162 212 | 008167 213 | 008185 214 | 008196 215 | 008206 216 | 008207 217 | 008239 218 | 008245 219 | 008271 220 | 008380 221 | 008382 222 | 008389 223 | 008457 224 | 008458 225 | 008471 226 | 008547 227 | 008570 228 | 008599 229 | 008626 230 | 008661 231 | 008754 232 | 008821 233 | 008846 234 | 008850 235 | 008904 236 | 008937 237 | 008947 238 | 009172 239 | 009261 240 | 009313 241 | 009367 242 | 009416 243 | 009462 244 | 009514 245 | 009581 246 | 009602 247 | 009606 248 | 009630 249 | 009741 250 | 009750 251 | 009787 252 | 009854 253 | 009925 254 | 009928 255 | -------------------------------------------------------------------------------- /data/imageset_files/bottle_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000050 2 | 000170 3 | 000200 4 | 000222 5 | 000224 6 | 000229 7 | 000250 8 | 000269 9 | 000344 10 | 000367 11 | 000381 12 | 000498 13 | 000531 14 | 000559 15 | 000564 16 | 000684 17 | 000726 18 | 000753 19 | 000865 20 | 000929 21 | 000951 22 | 000999 23 | 001045 24 | 001185 25 | 001229 26 | 001272 27 | 001292 28 | 001346 29 | 001393 30 | 001408 31 | 001444 32 | 001451 33 | 001465 34 | 001490 35 | 001493 36 | 001522 37 | 001537 38 | 001563 39 | 001580 40 | 001590 41 | 001673 42 | 001682 43 | 001754 44 | 001756 45 | 001777 46 | 001843 47 | 001963 48 | 001970 49 | 002030 50 | 002098 51 | 002112 52 | 002139 53 | 002221 54 | 002226 55 | 002253 56 | 002287 57 | 002350 58 | 002404 59 | 002442 60 | 002444 61 | 002518 62 | 002545 63 | 002572 64 | 002613 65 | 002641 66 | 002658 67 | 002745 68 | 002881 69 | 002931 70 | 002939 71 | 002941 72 | 002953 73 | 002965 74 | 003056 75 | 003088 76 | 003108 77 | 003122 78 | 003149 79 | 003159 80 | 003181 81 | 003185 82 | 003207 83 | 003219 84 | 003247 85 | 003259 86 | 003262 87 | 003269 88 | 003376 89 | 003449 90 | 003451 91 | 003496 92 | 003556 93 | 003566 94 | 003577 95 | 003678 96 | 003729 97 | 003783 98 | 003859 99 | 003865 100 | 003932 101 | 004075 102 | 004110 103 | 004140 104 | 004152 105 | 004158 106 | 004193 107 | 004279 108 | 004280 109 | 004291 110 | 004296 111 | 004322 112 | 004333 113 | 004370 114 | 004466 115 | 004468 116 | 004479 117 | 004493 118 | 004495 119 | 004508 120 | 004588 121 | 004671 122 | 004701 123 | 004710 124 | 004718 125 | 004742 126 | 004753 127 | 004886 128 | 004916 129 | 004926 130 | 004943 131 | 005052 132 | 005122 133 | 005160 134 | 005195 135 | 005242 136 | 005245 137 | 005304 138 | 005345 139 | 005367 140 | 005385 141 | 005388 142 | 005430 143 | 005441 144 | 005467 145 | 005471 146 | 005541 147 | 005563 148 | 005605 149 | 005606 150 | 005662 151 | 005672 152 | 005814 153 | 005908 154 | 005919 155 | 006061 156 | 006141 157 | 006180 158 | 006189 159 | 006241 160 | 006299 161 | 006335 162 | 006363 163 | 006381 164 | 006398 165 | 006409 166 | 006448 167 | 006483 168 | 006486 169 | 006503 170 | 006519 171 | 006576 172 | 006610 173 | 006626 174 | 006648 175 | 006687 176 | 006727 177 | 006731 178 | 006828 179 | 006858 180 | 006892 181 | 006908 182 | 006912 183 | 006917 184 | 006919 185 | 006939 186 | 006948 187 | 007007 188 | 007021 189 | 007048 190 | 007056 191 | 007104 192 | 007117 193 | 007121 194 | 007141 195 | 007177 196 | 007191 197 | 007250 198 | 007323 199 | 007351 200 | 007370 201 | 007410 202 | 007457 203 | 007461 204 | 007493 205 | 007511 206 | 007558 207 | 007572 208 | 007631 209 | 007649 210 | 007740 211 | 007751 212 | 007762 213 | 007790 214 | 007798 215 | 007899 216 | 007987 217 | 008042 218 | 008061 219 | 008137 220 | 008139 221 | 008141 222 | 008164 223 | 008204 224 | 008213 225 | 008319 226 | 008467 227 | 008592 228 | 008783 229 | 008841 230 | 008931 231 | 008953 232 | 008962 233 | 008967 234 | 008982 235 | 008989 236 | 009016 237 | 009027 238 | 009100 239 | 009108 240 | 009174 241 | 009242 242 | 009290 243 | 009299 244 | 009375 245 | 009388 246 | 009420 247 | 009458 248 | 009481 249 | 009497 250 | 009585 251 | 009613 252 | 009647 253 | 009654 254 | 009664 255 | 009666 256 | 009679 257 | 009687 258 | 009709 259 | 009726 260 | 009878 261 | 009902 262 | 009917 263 | -------------------------------------------------------------------------------- /data/imageset_files/pottedplant_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000052 2 | 000073 3 | 000235 4 | 000307 5 | 000391 6 | 000484 7 | 000592 8 | 000601 9 | 000628 10 | 000661 11 | 000710 12 | 000711 13 | 000729 14 | 000780 15 | 000854 16 | 000865 17 | 000867 18 | 000868 19 | 000889 20 | 000917 21 | 000923 22 | 000962 23 | 001079 24 | 001082 25 | 001091 26 | 001112 27 | 001142 28 | 001149 29 | 001154 30 | 001164 31 | 001212 32 | 001408 33 | 001420 34 | 001430 35 | 001444 36 | 001451 37 | 001464 38 | 001479 39 | 001580 40 | 001593 41 | 001729 42 | 001754 43 | 001777 44 | 001898 45 | 002015 46 | 002049 47 | 002108 48 | 002116 49 | 002176 50 | 002179 51 | 002192 52 | 002284 53 | 002305 54 | 002320 55 | 002347 56 | 002361 57 | 002362 58 | 002378 59 | 002427 60 | 002483 61 | 002542 62 | 002559 63 | 002569 64 | 002594 65 | 002598 66 | 002658 67 | 002668 68 | 002745 69 | 002775 70 | 002798 71 | 002817 72 | 002820 73 | 002924 74 | 002946 75 | 002967 76 | 003002 77 | 003004 78 | 003024 79 | 003034 80 | 003044 81 | 003093 82 | 003146 83 | 003186 84 | 003189 85 | 003202 86 | 003211 87 | 003239 88 | 003284 89 | 003301 90 | 003367 91 | 003374 92 | 003419 93 | 003436 94 | 003450 95 | 003462 96 | 003466 97 | 003506 98 | 003509 99 | 003555 100 | 003622 101 | 003629 102 | 003694 103 | 003699 104 | 003758 105 | 003791 106 | 003796 107 | 003834 108 | 003877 109 | 003924 110 | 003932 111 | 003956 112 | 004017 113 | 004028 114 | 004037 115 | 004105 116 | 004121 117 | 004212 118 | 004274 119 | 004275 120 | 004292 121 | 004364 122 | 004392 123 | 004433 124 | 004446 125 | 004455 126 | 004484 127 | 004558 128 | 004631 129 | 004682 130 | 004699 131 | 004707 132 | 004714 133 | 004742 134 | 004825 135 | 004895 136 | 004898 137 | 004939 138 | 004948 139 | 004960 140 | 004991 141 | 005061 142 | 005129 143 | 005143 144 | 005145 145 | 005160 146 | 005183 147 | 005186 148 | 005190 149 | 005224 150 | 005346 151 | 005515 152 | 005517 153 | 005531 154 | 005605 155 | 005641 156 | 005676 157 | 005682 158 | 005687 159 | 005762 160 | 005794 161 | 005813 162 | 005817 163 | 005821 164 | 005874 165 | 005894 166 | 005919 167 | 005923 168 | 005985 169 | 005991 170 | 006029 171 | 006069 172 | 006088 173 | 006100 174 | 006159 175 | 006188 176 | 006189 177 | 006270 178 | 006299 179 | 006319 180 | 006351 181 | 006447 182 | 006575 183 | 006605 184 | 006627 185 | 006681 186 | 006726 187 | 006755 188 | 006765 189 | 006777 190 | 006806 191 | 006859 192 | 006956 193 | 007078 194 | 007130 195 | 007180 196 | 007302 197 | 007344 198 | 007356 199 | 007390 200 | 007396 201 | 007451 202 | 007511 203 | 007519 204 | 007558 205 | 007600 206 | 007619 207 | 007624 208 | 007664 209 | 007673 210 | 007704 211 | 007715 212 | 007781 213 | 007795 214 | 007814 215 | 007865 216 | 007890 217 | 007909 218 | 007925 219 | 007956 220 | 007999 221 | 008012 222 | 008029 223 | 008043 224 | 008064 225 | 008082 226 | 008106 227 | 008127 228 | 008140 229 | 008171 230 | 008199 231 | 008216 232 | 008318 233 | 008341 234 | 008424 235 | 008465 236 | 008468 237 | 008536 238 | 008549 239 | 008557 240 | 008587 241 | 008655 242 | 008688 243 | 008733 244 | 008749 245 | 008806 246 | 008835 247 | 008933 248 | 008970 249 | 008987 250 | 008997 251 | 009032 252 | 009068 253 | 009078 254 | 009087 255 | 009123 256 | 009175 257 | 009181 258 | 009194 259 | 009215 260 | 009252 261 | 009279 262 | 009342 263 | 009419 264 | 009443 265 | 009491 266 | 009519 267 | 009634 268 | 009638 269 | 009647 270 | 009686 271 | 009878 272 | 009884 273 | 009887 274 | -------------------------------------------------------------------------------- /data/imageset_files/horse_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000010 2 | 000022 3 | 000056 4 | 000166 5 | 000168 6 | 000237 7 | 000248 8 | 000267 9 | 000319 10 | 000330 11 | 000356 12 | 000378 13 | 000392 14 | 000393 15 | 000410 16 | 000412 17 | 000413 18 | 000434 19 | 000445 20 | 000455 21 | 000475 22 | 000604 23 | 000616 24 | 000623 25 | 000666 26 | 000704 27 | 000737 28 | 000783 29 | 000836 30 | 000866 31 | 000978 32 | 000986 33 | 001013 34 | 001063 35 | 001114 36 | 001183 37 | 001245 38 | 001300 39 | 001317 40 | 001320 41 | 001398 42 | 001412 43 | 001417 44 | 001452 45 | 001574 46 | 001584 47 | 001624 48 | 001635 49 | 001692 50 | 001703 51 | 001769 52 | 001794 53 | 001819 54 | 001823 55 | 001865 56 | 001900 57 | 001939 58 | 001955 59 | 002017 60 | 002031 61 | 002046 62 | 002050 63 | 002071 64 | 002072 65 | 002204 66 | 002269 67 | 002283 68 | 002331 69 | 002360 70 | 002398 71 | 002409 72 | 002421 73 | 002511 74 | 002553 75 | 002573 76 | 002596 77 | 002655 78 | 002692 79 | 002780 80 | 002790 81 | 002819 82 | 002828 83 | 002837 84 | 002909 85 | 002921 86 | 003022 87 | 003041 88 | 003084 89 | 003173 90 | 003182 91 | 003187 92 | 003193 93 | 003222 94 | 003232 95 | 003235 96 | 003237 97 | 003304 98 | 003473 99 | 003531 100 | 003557 101 | 003562 102 | 003579 103 | 003583 104 | 003584 105 | 003653 106 | 003680 107 | 003720 108 | 003734 109 | 003764 110 | 003832 111 | 003852 112 | 003922 113 | 003951 114 | 003955 115 | 003978 116 | 004006 117 | 004084 118 | 004103 119 | 004126 120 | 004177 121 | 004234 122 | 004266 123 | 004294 124 | 004301 125 | 004309 126 | 004381 127 | 004407 128 | 004492 129 | 004522 130 | 004538 131 | 004545 132 | 004561 133 | 004564 134 | 004589 135 | 004629 136 | 004749 137 | 004756 138 | 004803 139 | 004804 140 | 004807 141 | 004865 142 | 004874 143 | 004932 144 | 004957 145 | 004970 146 | 005053 147 | 005132 148 | 005139 149 | 005182 150 | 005198 151 | 005299 152 | 005334 153 | 005339 154 | 005342 155 | 005409 156 | 005456 157 | 005505 158 | 005513 159 | 005529 160 | 005564 161 | 005572 162 | 005610 163 | 005677 164 | 005722 165 | 005733 166 | 005822 167 | 005882 168 | 005890 169 | 005944 170 | 005967 171 | 005973 172 | 006002 173 | 006106 174 | 006122 175 | 006143 176 | 006207 177 | 006237 178 | 006361 179 | 006364 180 | 006365 181 | 006368 182 | 006401 183 | 006478 184 | 006511 185 | 006521 186 | 006522 187 | 006540 188 | 006555 189 | 006586 190 | 006623 191 | 006649 192 | 006672 193 | 006728 194 | 006756 195 | 006779 196 | 006793 197 | 006811 198 | 006863 199 | 006873 200 | 006875 201 | 006897 202 | 006936 203 | 006982 204 | 007055 205 | 007134 206 | 007229 207 | 007252 208 | 007253 209 | 007265 210 | 007293 211 | 007303 212 | 007310 213 | 007392 214 | 007412 215 | 007509 216 | 007587 217 | 007609 218 | 007616 219 | 007620 220 | 007623 221 | 007636 222 | 007750 223 | 007766 224 | 007774 225 | 007789 226 | 007805 227 | 007874 228 | 007942 229 | 007972 230 | 007975 231 | 008010 232 | 008027 233 | 008059 234 | 008109 235 | 008124 236 | 008147 237 | 008183 238 | 008192 239 | 008205 240 | 008228 241 | 008256 242 | 008259 243 | 008298 244 | 008303 245 | 008314 246 | 008354 247 | 008399 248 | 008476 249 | 008574 250 | 008593 251 | 008605 252 | 008786 253 | 008802 254 | 008804 255 | 008812 256 | 008882 257 | 008899 258 | 009046 259 | 009067 260 | 009074 261 | 009101 262 | 009130 263 | 009231 264 | 009369 265 | 009380 266 | 009505 267 | 009525 268 | 009535 269 | 009569 270 | 009661 271 | 009665 272 | 009722 273 | 009768 274 | 009803 275 | 009903 276 | 009931 277 | 009933 278 | 009937 279 | 009957 280 | -------------------------------------------------------------------------------- /data/imageset_files/horse_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000009 2 | 000017 3 | 000133 4 | 000150 5 | 000173 6 | 000194 7 | 000214 8 | 000275 9 | 000328 10 | 000332 11 | 000347 12 | 000372 13 | 000407 14 | 000435 15 | 000470 16 | 000483 17 | 000523 18 | 000524 19 | 000577 20 | 000667 21 | 000695 22 | 000702 23 | 000760 24 | 000764 25 | 000799 26 | 000879 27 | 000904 28 | 000991 29 | 001064 30 | 001071 31 | 001084 32 | 001236 33 | 001241 34 | 001254 35 | 001287 36 | 001337 37 | 001405 38 | 001420 39 | 001515 40 | 001521 41 | 001523 42 | 001556 43 | 001586 44 | 001628 45 | 001632 46 | 001690 47 | 001699 48 | 001711 49 | 001724 50 | 001727 51 | 001730 52 | 001766 53 | 001807 54 | 001894 55 | 001927 56 | 001945 57 | 001960 58 | 002042 59 | 002043 60 | 002183 61 | 002187 62 | 002190 63 | 002213 64 | 002238 65 | 002249 66 | 002261 67 | 002273 68 | 002329 69 | 002405 70 | 002448 71 | 002471 72 | 002520 73 | 002554 74 | 002555 75 | 002633 76 | 002657 77 | 002678 78 | 002684 79 | 002713 80 | 002735 81 | 002774 82 | 002786 83 | 002800 84 | 002804 85 | 002842 86 | 002858 87 | 003015 88 | 003017 89 | 003027 90 | 003121 91 | 003189 92 | 003210 93 | 003290 94 | 003294 95 | 003303 96 | 003307 97 | 003362 98 | 003403 99 | 003424 100 | 003429 101 | 003436 102 | 003492 103 | 003611 104 | 003618 105 | 003625 106 | 003645 107 | 003691 108 | 003696 109 | 003732 110 | 003735 111 | 003754 112 | 003772 113 | 003779 114 | 003889 115 | 003939 116 | 003948 117 | 003960 118 | 003970 119 | 004051 120 | 004077 121 | 004117 122 | 004146 123 | 004168 124 | 004195 125 | 004307 126 | 004323 127 | 004339 128 | 004372 129 | 004389 130 | 004450 131 | 004487 132 | 004535 133 | 004537 134 | 004553 135 | 004625 136 | 004634 137 | 004656 138 | 004662 139 | 004686 140 | 004722 141 | 004788 142 | 004789 143 | 004815 144 | 004834 145 | 004872 146 | 004902 147 | 004935 148 | 004950 149 | 004968 150 | 004995 151 | 005081 152 | 005094 153 | 005114 154 | 005145 155 | 005161 156 | 005236 157 | 005248 158 | 005278 159 | 005306 160 | 005331 161 | 005351 162 | 005419 163 | 005453 164 | 005461 165 | 005511 166 | 005550 167 | 005568 168 | 005611 169 | 005636 170 | 005700 171 | 005728 172 | 005732 173 | 005741 174 | 005877 175 | 005948 176 | 006095 177 | 006134 178 | 006151 179 | 006249 180 | 006276 181 | 006285 182 | 006295 183 | 006382 184 | 006392 185 | 006428 186 | 006429 187 | 006445 188 | 006455 189 | 006459 190 | 006506 191 | 006572 192 | 006611 193 | 006617 194 | 006645 195 | 006722 196 | 006769 197 | 006789 198 | 006797 199 | 006836 200 | 006847 201 | 006850 202 | 006858 203 | 006862 204 | 006866 205 | 006911 206 | 006933 207 | 006944 208 | 007018 209 | 007035 210 | 007109 211 | 007138 212 | 007163 213 | 007185 214 | 007216 215 | 007234 216 | 007243 217 | 007260 218 | 007308 219 | 007325 220 | 007439 221 | 007448 222 | 007526 223 | 007547 224 | 007570 225 | 007576 226 | 007594 227 | 007603 228 | 007611 229 | 007637 230 | 007697 231 | 007705 232 | 007727 233 | 007748 234 | 007809 235 | 007919 236 | 008019 237 | 008040 238 | 008069 239 | 008142 240 | 008191 241 | 008209 242 | 008248 243 | 008279 244 | 008307 245 | 008311 246 | 008320 247 | 008427 248 | 008437 249 | 008509 250 | 008524 251 | 008526 252 | 008564 253 | 008596 254 | 008610 255 | 008612 256 | 008653 257 | 008699 258 | 008731 259 | 008744 260 | 008805 261 | 008862 262 | 008867 263 | 008876 264 | 008886 265 | 008914 266 | 008919 267 | 009020 268 | 009024 269 | 009114 270 | 009138 271 | 009148 272 | 009202 273 | 009208 274 | 009236 275 | 009251 276 | 009255 277 | 009318 278 | 009331 279 | 009337 280 | 009407 281 | 009438 282 | 009465 283 | 009468 284 | 009512 285 | 009698 286 | 009719 287 | 009732 288 | 009767 289 | 009807 290 | 009808 291 | 009841 292 | 009851 293 | 009874 294 | 009954 295 | -------------------------------------------------------------------------------- /layers/modules/kl_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | 7 | class KLLoss(nn.Module): 8 | # def KLLoss(xg,xe,alpha): 9 | """ 10 | Kl-loss function for bounding box regression from CVPR 2019 paper: 11 | Bounding Box Regression with Uncertainty for Accurate Object Detection 12 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang 13 | 14 | It is a replacement for the Smooth L1 loss often used in bounding box regression. 15 | 16 | The regression loss for a coordinate depends on |xg − xe| > 1 or not: 17 | 18 | Loss |xg − xe| ≤ 1: 19 | 20 | Lreg1 ∝ e^{−α} * 1/2(xg − xe)^2 + 1/2α 21 | 22 | and if |xg − xe| > 1, Loss: 23 | 24 | Lreg2 = e^{−α} (|xg − xe| − 1/2) + 1/2α 25 | 26 | PyTorch implementation by Jasper Bakker (JappaB @github) 27 | """ 28 | def __init__(self, loc_loss_weight=1.0): 29 | super(KLLoss, self).__init__() 30 | 31 | # Insert your own parameters here if you want to adjust the KL-Loss function 32 | 33 | # option to adjust the size of the loss 34 | self.loc_loss_weight = loc_loss_weight 35 | 36 | def forward(self,xg,xe,alpha): 37 | 38 | """ 39 | :param xg: The ground truth of the bounding box coordinates in x1y1x2y2 format 40 | shape: [number_of_boxes, 4] 41 | :param xe: The estimated bounding box coordinates in x1y1x2y2 format 42 | shape: [number_of_boxes, 4] 43 | :param alpha: The log(sigma^2) of the bounding box coordinates in x1y1x2y2 format 44 | shape: [number_of_boxes, 4] 45 | :return: total_kl_loss 46 | """ 47 | 48 | assert (xg.shape == xe.shape and xg.shape == alpha.shape),"The shapes of the input tensors must be the same" 49 | 50 | 51 | smooth_l1 = F.smooth_l1_loss(xe,xg, reduction='none') 52 | 53 | # e^{-α} 54 | exp_min_alpha = torch.exp(-alpha) 55 | 56 | # 1/2α 57 | half_alpha = 0.5*alpha 58 | 59 | total_kl_loss = (exp_min_alpha * smooth_l1 + half_alpha).sum() 60 | # total_kl_loss = total_kl_loss.sum() 61 | 62 | # 63 | # # xg − xe 64 | # delta = xg-xe 65 | # 66 | # # |xg − xe| 67 | # abs_delta = torch.abs(delta) 68 | # 69 | # ## mask for Lreg1 and Lreg2 70 | # Lreg1_mask = abs_delta.le(1.0) # |xg − xe| ≤ 1 71 | # Lreg2_mask = abs_delta.gt(1.0) # |xg − xe| > 1 72 | # 73 | # ## calculate all elements for Lreg1 74 | # # (xg − xe) for Lreg1 75 | # delta_Lreg1 = delta[Lreg1_mask] 76 | # 77 | # # e^{-α} 78 | # exp_min_alpha1 = torch.exp(-alpha[Lreg1_mask]) 79 | # 80 | # # 1/2α 81 | # half_alpha1 = 0.5*alpha[Lreg1_mask] 82 | # 83 | # L_reg1 = exp_min_alpha1 * 0.5 * torch.pow(delta_Lreg1,2) + half_alpha1 84 | # L_reg1 = L_reg1.sum() 85 | # 86 | # ## calculate all elements for Lreg2 87 | # # |xg − xe| for Lreg2 88 | # 89 | # 90 | # 91 | # abs_delta_Lreg2 = abs_delta[Lreg2_mask] 92 | # 93 | # # e^{-α} 94 | # exp_min_alpha2 = torch.exp(-alpha[Lreg2_mask]) 95 | # 96 | # # 1/2α 97 | # half_alpha2 = 0.5*alpha[Lreg2_mask] 98 | # 99 | # L_reg2 = exp_min_alpha2 * (abs_delta_Lreg2 - 0.5) + half_alpha2 100 | # L_reg2 = L_reg2.sum() 101 | # 102 | # 103 | # ## total 104 | # total_kl_loss = L_reg1+L_reg2 105 | # # total_kl_loss *= self.loc_loss_weight 106 | 107 | # todo: remove after debugging 108 | # print() 109 | # print('Debug kl-loss: ') 110 | # print('delta', delta) 111 | # print('abs_delta', abs_delta) 112 | # print('alpha', alpha) 113 | # print('exp_min_alpha1', exp_min_alpha1) 114 | # print('exp_min_alpha1', exp_min_alpha2) 115 | # print('Lreg1mask', Lreg1_mask.sum()) 116 | # print('Lreg2mask', Lreg2_mask.sum()) 117 | 118 | return total_kl_loss 119 | 120 | 121 | -------------------------------------------------------------------------------- /data/6_class_seed_set.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [ 5 | "009679", 6 | "009388", 7 | "009100", 8 | "007798", 9 | "002881", 10 | "000250", 11 | "004152", 12 | "006576", 13 | "000344", 14 | "008204", 15 | "002253", 16 | "005467", 17 | "009290", 18 | "007457", 19 | "006626", 20 | "000367", 21 | "008931", 22 | "002350", 23 | "007141", 24 | "006363", 25 | "006727", 26 | "000498", 27 | 28 | "005496", 29 | "007743", 30 | "006965", 31 | "000154", 32 | "002234", 33 | "003337", 34 | "005614", 35 | "004532", 36 | "000184", 37 | "002372", 38 | "000382", 39 | "008498", 40 | "005181", 41 | "007139", 42 | "007685", 43 | "003098", 44 | "003722", 45 | "002659", 46 | "007460", 47 | "007803", 48 | "002208", 49 | 50 | "002786", 51 | "009841", 52 | "009114", 53 | "004834", 54 | "001628", 55 | "006445", 56 | "009874", 57 | "006285", 58 | "009318", 59 | 60 | "005821", 61 | "007396", 62 | "001777", 63 | "006159", 64 | "009279", 65 | "004446", 66 | "006188", 67 | "007302", 68 | "004105", 69 | "003758", 70 | "009638", 71 | "009175", 72 | "002946", 73 | "008749", 74 | "008082", 75 | "002775", 76 | 77 | "009863", 78 | "007208", 79 | "007821", 80 | "009900", 81 | "002533", 82 | "002643", 83 | "007374", 84 | "005020", 85 | "001980", 86 | "009106", 87 | "002691", 88 | "005756", 89 | "001360", 90 | "008706", 91 | "007475", 92 | 93 | "008592", 94 | "009842", 95 | "003681", 96 | "007230", 97 | "001714", 98 | "007165", 99 | "004423", 100 | "004490", 101 | "006679", 102 | "001750", 103 | "006833" 104 | ], 105 | "class_dist": [] 106 | }, 107 | "val_set": { 108 | "image_set_idx": [ 109 | "006409", 110 | "000269", 111 | "006648", 112 | "002641", 113 | "004671", 114 | "007121", 115 | "007649", 116 | "009878", 117 | "000381", 118 | "004886", 119 | 120 | "000931", 121 | "002465", 122 | "007361", 123 | "001732", 124 | "003860", 125 | "000826", 126 | "006123", 127 | "009603", 128 | "005705", 129 | "007365", 130 | 131 | "001236", 132 | "004625", 133 | "007216", 134 | "005145", 135 | "009138", 136 | "008596", 137 | "003492", 138 | "007448", 139 | "009407", 140 | "005236", 141 | 142 | "007558", 143 | "000592", 144 | "003301", 145 | "007999", 146 | "004631", 147 | "007390", 148 | "007890", 149 | "006351", 150 | "000710", 151 | "001451", 152 | 153 | "003636", 154 | "002116", 155 | "006206", 156 | "004244", 157 | "000245", 158 | "003256", 159 | "009839", 160 | "007663", 161 | "006330", 162 | "000545", 163 | 164 | "003705", 165 | "002263", 166 | "005469", 167 | "009816", 168 | "000107", 169 | "002593", 170 | "003698", 171 | "006944", 172 | "000900", 173 | "004312" 174 | 175 | ], 176 | "class_dist": [] 177 | }, 178 | "train_set": [ 179 | "009679", 180 | "009388", 181 | "009100", 182 | "007798", 183 | "002881", 184 | "000250", 185 | "004152", 186 | "006576", 187 | "000344", 188 | "008204", 189 | "002253", 190 | "005467", 191 | "009290", 192 | "007457", 193 | "006626", 194 | "000367", 195 | "008931", 196 | "002350", 197 | "007141", 198 | "006363", 199 | "006727", 200 | "000498", 201 | 202 | "005496", 203 | "007743", 204 | "006965", 205 | "000154", 206 | "002234", 207 | "003337", 208 | "005614", 209 | "004532", 210 | "000184", 211 | "002372", 212 | "000382", 213 | "008498", 214 | "005181", 215 | "007139", 216 | "007685", 217 | "003098", 218 | "003722", 219 | "002659", 220 | "007460", 221 | "007803", 222 | "002208", 223 | 224 | "002786", 225 | "009841", 226 | "009114", 227 | "004834", 228 | "001628", 229 | "006445", 230 | "009874", 231 | "006285", 232 | "009318", 233 | 234 | "005821", 235 | "007396", 236 | "001777", 237 | "006159", 238 | "009279", 239 | "004446", 240 | "006188", 241 | "007302", 242 | "004105", 243 | "003758", 244 | "009638", 245 | "009175", 246 | "002946", 247 | "008749", 248 | "008082", 249 | "002775", 250 | 251 | "009863", 252 | "007208", 253 | "007821", 254 | "009900", 255 | "002533", 256 | "002643", 257 | "007374", 258 | "005020", 259 | "001980", 260 | "009106", 261 | "002691", 262 | "005756", 263 | "001360", 264 | "008706", 265 | "007475", 266 | 267 | "008592", 268 | "009842", 269 | "003681", 270 | "007230", 271 | "001714", 272 | "007165", 273 | "004423", 274 | "004490", 275 | "006679", 276 | "001750", 277 | "006833" 278 | ] 279 | } -------------------------------------------------------------------------------- /data/empty_seed.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "VOC07", 3 | "seed_set": { 4 | "image_set_idx": [], 5 | "class_dist": [ 6 | ] 7 | }, 8 | "val_set": { 9 | "image_set_idx": [ 10 | "004857", 11 | "000702", 12 | "004895", 13 | "001012", 14 | "001149", 15 | "005138", 16 | "008688", 17 | "009098", 18 | "003085", 19 | "005812", 20 | "007813", 21 | "006681", 22 | "004872", 23 | "001510", 24 | "005903", 25 | "009762", 26 | "000023", 27 | "007486", 28 | "008456", 29 | "006939", 30 | "003824", 31 | "003889", 32 | "007154", 33 | "000997", 34 | "004100", 35 | "003603", 36 | "002957", 37 | "000814", 38 | "004753", 39 | "009813", 40 | "007691", 41 | "000078", 42 | "006869", 43 | "006866", 44 | "003696", 45 | "006038", 46 | "002965", 47 | "004423", 48 | "004548", 49 | "003162", 50 | "000541", 51 | "000777", 52 | "008031", 53 | "008744", 54 | "009805", 55 | "005006", 56 | "005895", 57 | "003165", 58 | "007074", 59 | "005676", 60 | "004146", 61 | "003118", 62 | "000337", 63 | "001563", 64 | "000501", 65 | "004797", 66 | "001383", 67 | "002987", 68 | "007208", 69 | "008292", 70 | "001455", 71 | "000661", 72 | "005405", 73 | "004003", 74 | "001768", 75 | "006275", 76 | "002512", 77 | "004450", 78 | "002323", 79 | "008453", 80 | "009897", 81 | "005101", 82 | "008514", 83 | "006844", 84 | "009726", 85 | "003331", 86 | "001053", 87 | "004565", 88 | "001978", 89 | "001442", 90 | "007398", 91 | "000637", 92 | "008503", 93 | "004842", 94 | "002595", 95 | "005263", 96 | "004837", 97 | "002471", 98 | "007449", 99 | "001989", 100 | "009368", 101 | "005461", 102 | "009153", 103 | "007297", 104 | "002476", 105 | "008883", 106 | "002801", 107 | "005257", 108 | "003455", 109 | "005672", 110 | "009822", 111 | "001203", 112 | "009619", 113 | "004902", 114 | "002362", 115 | "006233", 116 | "003847", 117 | "006968", 118 | "004242", 119 | "002348", 120 | "000394", 121 | "008628", 122 | "000050", 123 | "000889", 124 | "000438", 125 | "007089", 126 | "007614", 127 | "003912", 128 | "005868", 129 | "007090", 130 | "001881", 131 | "008132", 132 | "008940", 133 | "004359", 134 | "004105", 135 | "002540", 136 | "001247", 137 | "002477", 138 | "002658", 139 | "001004", 140 | "001408", 141 | "002815", 142 | "001944", 143 | "007432", 144 | "006911", 145 | "000713", 146 | "009695", 147 | "000771", 148 | "000917", 149 | "002569", 150 | "002934", 151 | "005719", 152 | "006747", 153 | "007054", 154 | "009373", 155 | "007590", 156 | "006718", 157 | "009060", 158 | "004727", 159 | "008968", 160 | "000060", 161 | "009105", 162 | "007915", 163 | "009270", 164 | "007600", 165 | "002284", 166 | "002226", 167 | "005047", 168 | "000513", 169 | "002134", 170 | "001738", 171 | "008638", 172 | "004768", 173 | "006848", 174 | "008526", 175 | "007902", 176 | "009446", 177 | "005039", 178 | "003363", 179 | "002256", 180 | "008730", 181 | "006515", 182 | "002696", 183 | "005156", 184 | "005655", 185 | "000892", 186 | "003259", 187 | "006251", 188 | "004326", 189 | "009422", 190 | "007490", 191 | "000219", 192 | "001011", 193 | "000153", 194 | "007300", 195 | "002779", 196 | "006066", 197 | "009413", 198 | "002151", 199 | "007540", 200 | "005208", 201 | "008900", 202 | "008160", 203 | "008268", 204 | "008211", 205 | "003335", 206 | "005114", 207 | "009692", 208 | "008144", 209 | "009955", 210 | "002359", 211 | "001273", 212 | "001707", 213 | "008523", 214 | "002366", 215 | "009351", 216 | "002450", 217 | "001484", 218 | "002337", 219 | "008933", 220 | "004879", 221 | "004936", 222 | "002901", 223 | "007025", 224 | "000387", 225 | "005588", 226 | "003382", 227 | "000171", 228 | "005841", 229 | "007968", 230 | "003116", 231 | "009911", 232 | "001250", 233 | "009268", 234 | "006341", 235 | "009215", 236 | "006739", 237 | "003137", 238 | "006825", 239 | "007394", 240 | "006609", 241 | "008341", 242 | "008873", 243 | "005327", 244 | "006437", 245 | "000032", 246 | "002501", 247 | "009405", 248 | "008454", 249 | "005599", 250 | "001148", 251 | "001724", 252 | "008452", 253 | "008833", 254 | "001486", 255 | "006240", 256 | "002935", 257 | "002183", 258 | "004011", 259 | "003253" 260 | ], 261 | "class_dist": [ 262 | 17, 263 | 24, 264 | 25, 265 | 17, 266 | 38, 267 | 13, 268 | 67, 269 | 15, 270 | 33, 271 | 9, 272 | 10, 273 | 30, 274 | 22, 275 | 20, 276 | 255, 277 | 32, 278 | 21, 279 | 12, 280 | 8, 281 | 18 282 | ] 283 | }, 284 | "train_set": [] 285 | } -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from ..box_utils import match, log_sum_exp 7 | from .kl_loss import KLLoss 8 | from data import voc as cfg 9 | 10 | class MultiBoxLoss(nn.Module): 11 | """SSD Weighted Loss Function 12 | Compute Targets: 13 | 1) Produce Confidence Target Indices by matching ground truth boxes 14 | with (default) 'priorboxes' that have jaccard index > threshold parameter 15 | (default threshold: 0.5). 16 | 2) Produce localization target by 'encoding' variance into offsets of ground 17 | truth boxes and their matched 'priorboxes'. 18 | 3) Hard negative mining to filter the excessive number of negative examples 19 | that comes with using a large number of default bounding boxes. 20 | (default negative:positive ratio 3:1) 21 | Objective Loss: 22 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 23 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 24 | weighted by α which is set to 1 by cross val. 25 | Args: 26 | c: class confidences, 27 | l: predicted boxes, 28 | g: ground truth boxes 29 | N: number of matched default boxes 30 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 31 | """ 32 | 33 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, 34 | bkg_label, neg_mining, neg_pos, neg_overlap, encode_target, 35 | use_gpu=True, modeltype='SSD300'): 36 | super(MultiBoxLoss, self).__init__() 37 | self.use_gpu = use_gpu 38 | self.num_classes = num_classes 39 | self.threshold = overlap_thresh 40 | self.background_label = bkg_label 41 | self.encode_target = encode_target 42 | self.use_prior_for_matching = prior_for_matching 43 | self.do_neg_mining = neg_mining 44 | self.negpos_ratio = neg_pos 45 | self.neg_overlap = neg_overlap 46 | self.variance = cfg['variance'] 47 | self.modeltype = modeltype 48 | if modeltype == 'SSD300KL': 49 | self.kl_loss = KLLoss(loc_loss_weight = 1.0) 50 | 51 | def forward(self, predictions, targets,args = None): #todo: remove args 52 | """Multibox Loss 53 | Args: 54 | predictions (tuple): A tuple containing loc preds, conf preds, 55 | and prior boxes from SSD net. 56 | conf shape: torch.size(batch_size,num_priors,num_classes) 57 | loc shape: torch.size(batch_size,num_priors,4) 58 | priors shape: torch.size(num_priors,4) 59 | 60 | targets (tensor): Ground truth boxes and labels for a batch, 61 | shape: [batch_size,num_objs,5] (last idx is the label). 62 | """ 63 | if self.modeltype != 'SSD300KL': 64 | loc_data, conf_data, priors = predictions 65 | else: 66 | loc_data, conf_data, priors, loc_std = predictions 67 | 68 | num = loc_data.size(0) 69 | priors = priors[:loc_data.size(1), :] 70 | num_priors = (priors.size(0)) 71 | 72 | # match priors (default boxes) and ground truth boxes 73 | loc_t = torch.Tensor(num, num_priors, 4) 74 | conf_t = torch.LongTensor(num, num_priors) # just a mask if matched or not 75 | 76 | for idx in range(num): 77 | truths = targets[idx][:, :-1].data 78 | labels = targets[idx][:, -1].data 79 | defaults = priors.data 80 | match(self.threshold, truths, defaults, self.variance, labels, 81 | loc_t, conf_t, idx, self.modeltype) 82 | 83 | if self.use_gpu: 84 | loc_t = loc_t.cuda() 85 | conf_t = conf_t.cuda() 86 | 87 | # wrap targets 88 | loc_t = Variable(loc_t, requires_grad=False) 89 | conf_t = Variable(conf_t, requires_grad=False) 90 | 91 | pos = conf_t > 0 92 | num_pos = pos.sum(dim=1, keepdim=True) 93 | 94 | # Localization Loss (Smooth L1) 95 | # Shape: [batch,num_priors,4] 96 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 97 | loc_p = loc_data[pos_idx].view(-1, 4) 98 | loc_t = loc_t[pos_idx].view(-1, 4) 99 | if self.modeltype != 'SSD300KL': 100 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False).double() 101 | else: 102 | loss_l = self.kl_loss(loc_p, loc_t, loc_std[pos_idx].view(-1, 4)).double() 103 | 104 | # Compute max conf across batch for hard negative mining 105 | batch_conf = conf_data.view(-1, self.num_classes) 106 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 107 | 108 | # Hard Negative Mining 109 | loss_c = loss_c.view(num, -1) 110 | loss_c[pos] = 0 # filter out pos boxes for now 111 | 112 | _, loss_idx = loss_c.sort(1, descending=True) 113 | _, idx_rank = loss_idx.sort(1) 114 | num_pos = pos.long().sum(1, keepdim=True) 115 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 116 | neg = idx_rank < num_neg.expand_as(idx_rank) 117 | 118 | # Confidence Loss Including Positive and Negative Examples 119 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 120 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 121 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) 122 | targets_weighted = conf_t[(pos+neg).gt(0)] 123 | # if num_classes == 2: 124 | # loss_c = F.binary_cross_entropy(torch.sigmoid(conf_p)[:,1], targets_weighted.float(),size_average=False).double() 125 | # else: 126 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False).double() 127 | 128 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 129 | 130 | N = num_pos.data.sum().double() 131 | loss_l /= N 132 | loss_c /= N 133 | return loss_l, loss_c 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes 2 | In this repo the code belonging to my master thesis titled: "Active Learning for Object Detection With Localization Uncertainty from Sampling-Based Probabilistic Bounding Boxes" can be found. I also uploaded the pdf of my thesis [here](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/Thesis_Jasper_Bakker_Active_Deep_Learning_for_Object_Detection_With_Sampling_Based_Probabilistic_Bounding_Boxes_compressed.pdf). As a very short summary: I researched the use of a localization uncertainty, obtained trough an ensemble of object detectors to select more informative images to be labeled. It shows promissing results on Pascal VOC 2007, but has not been used on other datasets. Please let me know your experiences if you use it on different datasets. 3 | 4 | As a basis for my repository I used the excellent repository by Max de Groot and Ellis Brown [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch), retrieved on 19-02-2019. However, as I used the then newest stable version of PyTorch (1.0.1), I did change some of their code to be able to run it. Note that their repo is probably more suitable if you just want to use an SSD written in PyTorch and don't want to perform acive learning. Some parts of this readme are directly copy-pasted from Max de Groot and Ellis Brown their repo as my work is built upon their code anyways. 5 | 6 | After finishing my thesis, in order to make it more useable for others, I cleaned the code a bit and wrote this readme. I hope this helps, however, bear in mind that the code is research code and should be viewed as such. Currently I'm traveling trough Central and South America. I know the code could still be improved but it should work if you follow the instructions below. Please post issues if you are serious about using it and don't understand certain parts. I'll see what I can do when I'm back. 7 | 8 | 9 | ### Table of Contents 10 | - Getting Started 11 | - Datasets 12 | - Reference 13 | 14 |   15 |   16 |   17 |   18 | 19 | 20 | ## SSD: Single Shot MultiBox Object Detector, in PyTorch 21 | A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detector](http://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang, and Alexander C. Berg. The official and original Caffe code can be found [here](https://github.com/weiliu89/caffe/tree/ssd). 22 | 23 | 24 | 25 | 26 | 27 | ## Getting started 28 | - I supplied a list of the conda environment I used for my experiments in the [requirements](https://github.com/JappaB/Active_Learning_Object_Detection/blob/master/requirements) file for reproducability. The most important packages are probably: PyTorch, NumPy, SciPy, cv2 and hdbscan. 29 | - Clone this repository. 30 | - Then download the dataset by following the [instructions](#datasets) below. Note that the Active Learning code has only been completely implemented for the Pascal VOC 2007 dataset. 31 | - As the SSD uses a reduced VGG-16 backbone, download the fc-reduced [VGG-16](https://arxiv.org/abs/1409.1556) PyTorch base network weights at: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth and put them in a directory called 'weights' 32 | - By default, we assume you have downloaded the file in the `Active_Learning_Object_Detection/weights` dir: 33 | 34 | ```Shell 35 | mkdir weights 36 | cd weights 37 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth 38 | ``` 39 | - Note that a GPU is highly recommended for training the SSD. 40 | 41 | - As there are many parser argument options, I provide two sample run scripts in the [run_scripts](https://github.com/JappaB/Active_Learning_Object_Detection/tree/master/run_scripts/scripts) directory to get a headstart. I provided one for the six classes I used in my experiments and one for a single class of interest (thus background vs non-background). To use them, you also need to copy the appropriate imageset files to the imageset folder. You can find the imageset files in `data/imageset_files` and they need to be copied to `~/data/VOCdevkit/VOC2007/ImageSets/Main/. 42 | 43 | - You are required to give a list of paths to the currently best networks. I provided a script `create_initial_networks.py` to generate these if you don't have any yet. The current settings of this script correspond to the sample run script with the single class. NOTE: A single saved network requires approximately 100MB of storage. Make sure you have enough diskspace before running the script. 44 | 45 | - Finally, if you don't want to use one of the provided scripts, the entry point for active learning is the `active_learning_main.py` file. 46 | 47 | 48 | ## What can I find where? 49 | For active learning the two most important folders are the `active_learning_dir` and `active_learning_package`. In the first the (intermediate) results of the runs (e.g. which images to label next) will be saved and in the second the code for the active learning can be found. 50 | 51 | 52 | ## Datasets 53 | To make things easy, we provide bash scripts to handle the dataset (Pascal VOC) downloads and setup for you. We also provide simple dataset loaders that inherit `torch.utils.data.Dataset`, making them fully compatible with the `torchvision.datasets` [API](http://pytorch.org/docs/torchvision/datasets.html). 54 | 55 | 56 | ### VOC Dataset 57 | PASCAL VOC: Visual Object Classes 58 | 59 | ##### Download VOC2007 trainval & test 60 | ```Shell 61 | # specify a directory for dataset to be downloaded into, else default is ~/data/ 62 | sh data/scripts/VOC2007.sh # 63 | ``` 64 | 65 | ##### Download VOC2012 trainval 66 | ```Shell 67 | # specify a directory for dataset to be downloaded into, else default is ~/data/ 68 | sh data/scripts/VOC2012.sh # 69 | ``` 70 | 71 | ### Use a pre-trained SSD network for detection 72 | #### Download a pre-trained network 73 | - We are trying to provide PyTorch `state_dicts` (dict of weight tensors) of the latest SSD model definitions trained on different datasets. 74 | - Currently, we provide the following PyTorch models: 75 | * SSD300 trained on VOC0712 (newest PyTorch weights) 76 | - https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth 77 | * SSD300 trained on VOC0712 (original Caffe weights) 78 | - https://s3.amazonaws.com/amdegroot-models/ssd_300_VOC0712.pth 79 | 80 | ## Authors 81 | Active learning part: 82 | * [**Jasper Bakker**](https://github.com/jappab) 83 | 84 | SSD, Dataloaders, etc. (check their excellent repo at [PyTorch implementation of the SSD detector](https://github.com/amdegroot/ssd.pytorch)): 85 | * [**Max deGroot**](https://github.com/amdegroot) 86 | * [**Ellis Brown**](http://github.com/ellisbrown) 87 | 88 | -------------------------------------------------------------------------------- /active_learning_package/uncertainty_helpers.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import utils.augmentations as augmentations 3 | import torch 4 | 5 | def entropy(confs, already_normalized = True): 6 | """ 7 | https://discuss.pytorch.org/t/calculating-the-entropy-loss/14510 8 | softmax proof: https://math.stackexchange.com/questions/331275/softmax-function-and-modelling-probability-distributions 9 | 10 | :param confs: (tensor) 11 | shape: (batch, observations, class_probabilities) where class probabilities are real probabilities (already normalized) 12 | :return: H: (tensor) entropy 13 | shape: (batch, observations) 14 | """ 15 | # tested with a uniform and a peak distribution in a tensor 16 | 17 | if already_normalized == False: 18 | H = F.softmax(confs, dim=2) * F.log_softmax(confs, dim=2) 19 | H = H.sum(dim=2) * -1.0 20 | else: 21 | H = confs * torch.log(confs) 22 | H = H.sum(dim=2) * -1.0 23 | 24 | return H 25 | 26 | def trace_covariance(cov_0, cov_1): 27 | """ 28 | https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/ 29 | see trace calculation, however, now we keep the first two dimensions (batches and observations) as free variables 30 | 31 | 32 | TODO: below is just 33 | args: 34 | cov_0: (tensor) 35 | shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy 36 | cov_1: tensor) 37 | shape: [batch, observations, 2 ,2] #last two dimensions are xx,xy and xy,yy 38 | :return: 39 | traces_0: (tensor) 40 | shape: [batch, observation] 41 | traces_1: (tensor) 42 | shape: [batch, observation] 43 | """ 44 | 45 | # todo: assert that the trace must be positive 46 | traces_0 = torch.einsum('boxx->bo',cov_0) 47 | traces_1 = torch.einsum('boxx->bo',cov_1) 48 | 49 | return traces_0, traces_1 50 | 51 | 52 | 53 | 54 | def dist_means_observation(mu_0,mu_1): 55 | """ 56 | calculate the (euclidean) distance between the mean of the upper left corner (mu_0) and lower right corner (mu_1) of the bounding box 57 | 58 | args: 59 | mu_0: 60 | shape: [batch, observations, 2] where the last dim is x1y1 61 | mu_1: 62 | shape: [batch, observations, 2] where the last dim is x2y2 63 | :return: 64 | distances: 65 | shape: 66 | """ 67 | 68 | 69 | 70 | mu_1_minus_0 = mu_1-mu_0 71 | squared = torch.pow(mu_1_minus_0,2) 72 | summed = squared.sum(dim=2) 73 | distances = torch.pow(summed,0.5) 74 | 75 | return distances 76 | 77 | def means_observation(observations): 78 | """ 79 | This function is exactly the same as the means_covs_observation below, without the cov part. 80 | """ 81 | max_boxes = observations.shape[2] 82 | num_observations = observations.shape[1] 83 | num_batches = observations.shape[0] 84 | 85 | # per bounding box, sum each individual coordinate 86 | summed_coordinates = observations.sum(dim=2) 87 | zeros = observations.le(0.) 88 | zeros_per_box = zeros.sum(dim=3) 89 | N = zeros_per_box.le(3).sum(dim=2).float() 90 | mean = torch.div(summed_coordinates, N.unsqueeze(-1)) 91 | return mean 92 | 93 | def means_covs_observation(observations): 94 | """ 95 | For a guide on np.einsum (vs using for loops, a lot faster) 96 | (which is really similar to torch.einsum, which is used below to keep gpu speed-ups) 97 | check: 98 | - (short) http://ajcr.net/Basic-guide-to-einsum/ 99 | - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/ 100 | - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66 101 | 102 | args: 103 | observations: (tensor) combined bounding boxes, only spatial information 104 | one bounding box shnumber ofould have the coordinates like this: 105 | [x0,y0,x1,y1], the coordinates of the upper left and lower right corners 106 | respectively. As each observation can have a variable number of bounding boxes, 107 | the observations that have less than the maximum number of bounding are assumed to be padded 108 | with zeros. 109 | 110 | shape: [batch, observations, max(n_boxes_of_all_obs) ,4] 111 | 112 | :return: 113 | means_covs_observation: last dim is (mu0,mu1,cov0,cov1) 114 | shape: [batch, observation, 4] 115 | """ 116 | max_boxes = observations.shape[2] 117 | num_observations = observations.shape[1] 118 | num_batches = observations.shape[0] 119 | 120 | # per bounding box, sum each individual coordinate 121 | summed_coordinates = observations.sum(dim=2) 122 | zeros = observations.le(0.) 123 | zeros_per_box = zeros.sum(dim=3) 124 | N = zeros_per_box.le(3).sum(dim=2).float() 125 | mean = torch.div(summed_coordinates, N.unsqueeze(-1)) 126 | # mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1)) 127 | #### covariances 128 | # must be done seperately for upperleft corner (0) and lower right corner (1) of bounding box 129 | mean_0 = mean[:, :, 0:2] 130 | mean_1 = mean[:, :, 2:4] 131 | observations_0 = observations[:, :, :, 0:2] 132 | observations_1 = observations[:, :, :, 2:4] 133 | 134 | # Batch Observation boXes coordinatesTransposed and Batch Observation boXes Coordinates 135 | cov_first_part_summed_0 = torch.einsum('boxt,boxc -> botc', observations_0, observations_0) 136 | cov_first_part_summed_1 = torch.einsum('boxt,boxc -> botc', observations_1, observations_1) 137 | 138 | # double unsqueeze to allow for batches 139 | stacked_N = N.unsqueeze(-1).unsqueeze(-1) 140 | 141 | cov_first_part_0 = torch.div(cov_first_part_summed_0, stacked_N) 142 | cov_first_part_1 = torch.div(cov_first_part_summed_1, stacked_N) 143 | 144 | cov_second_part_0 = torch.einsum('bik,bij-> bijk',mean_0, mean_0) 145 | cov_second_part_1 = torch.einsum('bik,bij-> bijk',mean_1, mean_1) 146 | 147 | cov_0 = cov_first_part_0 - cov_second_part_0 148 | cov_1 = cov_first_part_1 - cov_second_part_1 149 | 150 | 151 | return mean ,cov_0, cov_1 152 | 153 | 154 | def means_observations(observations): 155 | """ 156 | For a guide on np.einsum (vs using for loops, a lot faster) 157 | (which is really similar to torch.einsum, which is used below to keep gpu speed-ups) 158 | check: 159 | - (short) http://ajcr.net/Basic-guide-to-einsum/ 160 | - or (eleborate, but VERY good) https://obilaniu6266h16.wordpress.com/2016/02/04/einstein-summation-in-numpy/ 161 | - or (example also involving a covariance calculation) https://medium.com/the-quarks/an-einsum-use-case-8dafcb933c66 162 | 163 | args: 164 | observations: (tensor) combined bounding boxes, only spatial information 165 | one bounding box should have the coordinates like this: 166 | [x0,y0,x1,y1], the coordinates of the upper left and lower right corners 167 | respectively. As each observation can have a variable number of bounding boxes, 168 | the observations that have less than the maximum number of bounding are assumed to be padded 169 | with zeros. 170 | 171 | shape: [max(n_boxes), batch, observations,4] 172 | 173 | :return: 174 | means_observation: last dim is (mu0,mu1) 175 | shape: [batch, observation, 2] 176 | """ 177 | 178 | # per bounding box, sum each individual coordinate 179 | summed_coordinates = observations.sum(dim=2) 180 | zeros = observations.le(0.) 181 | zeros_per_box = zeros.sum(dim=0) 182 | N = zeros_per_box.le(3).sum(dim=2).float() 183 | mean = torch.div(summed_coordinates, torch.transpose(N, 0, 1)) 184 | 185 | return mean 186 | -------------------------------------------------------------------------------- /data/imageset_files/car_trainval_detect.txt: -------------------------------------------------------------------------------- 1 | 000007 2 | 000012 3 | 000020 4 | 000026 5 | 000047 6 | 000060 7 | 000083 8 | 000091 9 | 000131 10 | 000134 11 | 000142 12 | 000153 13 | 000156 14 | 000159 15 | 000161 16 | 000169 17 | 000180 18 | 000210 19 | 000220 20 | 000221 21 | 000233 22 | 000245 23 | 000251 24 | 000262 25 | 000263 26 | 000288 27 | 000289 28 | 000296 29 | 000302 30 | 000303 31 | 000311 32 | 000317 33 | 000318 34 | 000320 35 | 000329 36 | 000334 37 | 000338 38 | 000343 39 | 000355 40 | 000367 41 | 000373 42 | 000387 43 | 000396 44 | 000404 45 | 000406 46 | 000420 47 | 000431 48 | 000461 49 | 000463 50 | 000469 51 | 000474 52 | 000477 53 | 000494 54 | 000509 55 | 000515 56 | 000522 57 | 000541 58 | 000543 59 | 000545 60 | 000554 61 | 000579 62 | 000590 63 | 000605 64 | 000620 65 | 000628 66 | 000648 67 | 000653 68 | 000663 69 | 000672 70 | 000688 71 | 000700 72 | 000754 73 | 000776 74 | 000787 75 | 000800 76 | 000818 77 | 000823 78 | 000829 79 | 000855 80 | 000860 81 | 000871 82 | 000888 83 | 000906 84 | 000911 85 | 000935 86 | 000937 87 | 000972 88 | 000977 89 | 000987 90 | 001052 91 | 001057 92 | 001060 93 | 001069 94 | 001071 95 | 001093 96 | 001112 97 | 001119 98 | 001124 99 | 001125 100 | 001148 101 | 001237 102 | 001258 103 | 001268 104 | 001281 105 | 001290 106 | 001292 107 | 001294 108 | 001330 109 | 001334 110 | 001352 111 | 001360 112 | 001364 113 | 001371 114 | 001384 115 | 001385 116 | 001386 117 | 001409 118 | 001414 119 | 001432 120 | 001445 121 | 001455 122 | 001466 123 | 001472 124 | 001488 125 | 001492 126 | 001494 127 | 001532 128 | 001559 129 | 001561 130 | 001576 131 | 001604 132 | 001618 133 | 001622 134 | 001640 135 | 001654 136 | 001662 137 | 001676 138 | 001693 139 | 001699 140 | 001746 141 | 001780 142 | 001801 143 | 001821 144 | 001845 145 | 001862 146 | 001877 147 | 001881 148 | 001899 149 | 001902 150 | 001931 151 | 001937 152 | 001944 153 | 001950 154 | 001954 155 | 001962 156 | 001980 157 | 002019 158 | 002023 159 | 002045 160 | 002056 161 | 002116 162 | 002125 163 | 002134 164 | 002135 165 | 002153 166 | 002170 167 | 002174 168 | 002178 169 | 002180 170 | 002182 171 | 002197 172 | 002228 173 | 002241 174 | 002244 175 | 002247 176 | 002248 177 | 002281 178 | 002291 179 | 002307 180 | 002311 181 | 002340 182 | 002343 183 | 002355 184 | 002373 185 | 002374 186 | 002393 187 | 002411 188 | 002420 189 | 002436 190 | 002439 191 | 002454 192 | 002478 193 | 002490 194 | 002493 195 | 002497 196 | 002504 197 | 002533 198 | 002534 199 | 002544 200 | 002559 201 | 002563 202 | 002566 203 | 002584 204 | 002595 205 | 002606 206 | 002625 207 | 002643 208 | 002646 209 | 002647 210 | 002666 211 | 002667 212 | 002682 213 | 002691 214 | 002693 215 | 002704 216 | 002730 217 | 002734 218 | 002759 219 | 002772 220 | 002776 221 | 002779 222 | 002783 223 | 002804 224 | 002812 225 | 002833 226 | 002854 227 | 002884 228 | 002917 229 | 002937 230 | 002994 231 | 003007 232 | 003009 233 | 003013 234 | 003027 235 | 003032 236 | 003051 237 | 003053 238 | 003054 239 | 003057 240 | 003083 241 | 003092 242 | 003094 243 | 003103 244 | 003117 245 | 003165 246 | 003176 247 | 003195 248 | 003214 249 | 003228 250 | 003231 251 | 003240 252 | 003243 253 | 003256 254 | 003261 255 | 003271 256 | 003285 257 | 003313 258 | 003331 259 | 003344 260 | 003355 261 | 003359 262 | 003363 263 | 003379 264 | 003390 265 | 003397 266 | 003406 267 | 003420 268 | 003461 269 | 003484 270 | 003551 271 | 003555 272 | 003587 273 | 003596 274 | 003606 275 | 003608 276 | 003634 277 | 003636 278 | 003655 279 | 003703 280 | 003711 281 | 003713 282 | 003721 283 | 003790 284 | 003798 285 | 003806 286 | 003809 287 | 003820 288 | 003824 289 | 003835 290 | 003885 291 | 003891 292 | 003936 293 | 003954 294 | 003971 295 | 003973 296 | 003974 297 | 003987 298 | 003997 299 | 003998 300 | 004011 301 | 004019 302 | 004073 303 | 004087 304 | 004089 305 | 004091 306 | 004108 307 | 004136 308 | 004145 309 | 004186 310 | 004203 311 | 004228 312 | 004231 313 | 004242 314 | 004244 315 | 004284 316 | 004295 317 | 004303 318 | 004304 319 | 004329 320 | 004346 321 | 004365 322 | 004367 323 | 004384 324 | 004386 325 | 004387 326 | 004429 327 | 004439 328 | 004481 329 | 004488 330 | 004494 331 | 004519 332 | 004526 333 | 004539 334 | 004544 335 | 004563 336 | 004576 337 | 004581 338 | 004591 339 | 004604 340 | 004618 341 | 004660 342 | 004687 343 | 004691 344 | 004705 345 | 004719 346 | 004727 347 | 004747 348 | 004748 349 | 004750 350 | 004779 351 | 004786 352 | 004793 353 | 004805 354 | 004808 355 | 004823 356 | 004828 357 | 004830 358 | 004850 359 | 004863 360 | 004873 361 | 004890 362 | 004903 363 | 004946 364 | 004961 365 | 004962 366 | 004973 367 | 004983 368 | 005003 369 | 005020 370 | 005028 371 | 005047 372 | 005065 373 | 005067 374 | 005068 375 | 005071 376 | 005072 377 | 005090 378 | 005102 379 | 005110 380 | 005156 381 | 005159 382 | 005169 383 | 005199 384 | 005209 385 | 005259 386 | 005262 387 | 005273 388 | 005305 389 | 005318 390 | 005331 391 | 005350 392 | 005373 393 | 005387 394 | 005423 395 | 005457 396 | 005475 397 | 005481 398 | 005483 399 | 005486 400 | 005489 401 | 005499 402 | 005509 403 | 005536 404 | 005547 405 | 005549 406 | 005566 407 | 005577 408 | 005584 409 | 005585 410 | 005588 411 | 005592 412 | 005593 413 | 005609 414 | 005640 415 | 005645 416 | 005669 417 | 005679 418 | 005738 419 | 005747 420 | 005749 421 | 005756 422 | 005760 423 | 005782 424 | 005791 425 | 005806 426 | 005815 427 | 005830 428 | 005831 429 | 005839 430 | 005861 431 | 005868 432 | 005897 433 | 005899 434 | 005918 435 | 005956 436 | 005979 437 | 005988 438 | 005998 439 | 006009 440 | 006011 441 | 006018 442 | 006035 443 | 006038 444 | 006043 445 | 006058 446 | 006062 447 | 006079 448 | 006089 449 | 006097 450 | 006103 451 | 006104 452 | 006120 453 | 006124 454 | 006128 455 | 006133 456 | 006151 457 | 006196 458 | 006201 459 | 006203 460 | 006206 461 | 006210 462 | 006218 463 | 006223 464 | 006224 465 | 006225 466 | 006235 467 | 006250 468 | 006261 469 | 006277 470 | 006290 471 | 006301 472 | 006320 473 | 006325 474 | 006329 475 | 006330 476 | 006346 477 | 006362 478 | 006369 479 | 006375 480 | 006396 481 | 006417 482 | 006421 483 | 006438 484 | 006458 485 | 006459 486 | 006484 487 | 006497 488 | 006524 489 | 006588 490 | 006593 491 | 006625 492 | 006632 493 | 006654 494 | 006660 495 | 006668 496 | 006706 497 | 006719 498 | 006734 499 | 006736 500 | 006748 501 | 006766 502 | 006783 503 | 006821 504 | 006822 505 | 006858 506 | 006868 507 | 006884 508 | 006893 509 | 006900 510 | 006918 511 | 006931 512 | 006988 513 | 007003 514 | 007004 515 | 007040 516 | 007058 517 | 007068 518 | 007074 519 | 007090 520 | 007133 521 | 007153 522 | 007159 523 | 007167 524 | 007205 525 | 007208 526 | 007247 527 | 007261 528 | 007270 529 | 007279 530 | 007283 531 | 007284 532 | 007285 533 | 007294 534 | 007305 535 | 007346 536 | 007363 537 | 007374 538 | 007376 539 | 007383 540 | 007396 541 | 007414 542 | 007422 543 | 007424 544 | 007427 545 | 007446 546 | 007468 547 | 007475 548 | 007479 549 | 007490 550 | 007497 551 | 007525 552 | 007527 553 | 007566 554 | 007592 555 | 007601 556 | 007614 557 | 007622 558 | 007647 559 | 007650 560 | 007653 561 | 007663 562 | 007667 563 | 007691 564 | 007699 565 | 007709 566 | 007721 567 | 007731 568 | 007736 569 | 007745 570 | 007779 571 | 007790 572 | 007815 573 | 007819 574 | 007821 575 | 007843 576 | 007855 577 | 007856 578 | 007883 579 | 007898 580 | 007905 581 | 007921 582 | 007931 583 | 007932 584 | 007950 585 | 007963 586 | 007964 587 | 007970 588 | 007971 589 | 008001 590 | 008019 591 | 008026 592 | 008031 593 | 008037 594 | 008044 595 | 008057 596 | 008060 597 | 008079 598 | 008087 599 | 008093 600 | 008098 601 | 008105 602 | 008108 603 | 008160 604 | 008169 605 | 008174 606 | 008188 607 | 008197 608 | 008225 609 | 008232 610 | 008268 611 | 008279 612 | 008294 613 | 008296 614 | 008315 615 | 008329 616 | 008336 617 | 008359 618 | 008360 619 | 008376 620 | 008388 621 | 008391 622 | 008397 623 | 008429 624 | 008444 625 | 008449 626 | 008461 627 | 008466 628 | 008478 629 | 008482 630 | 008483 631 | 008484 632 | 008502 633 | 008503 634 | 008517 635 | 008524 636 | 008549 637 | 008550 638 | 008562 639 | 008572 640 | 008581 641 | 008586 642 | 008601 643 | 008633 644 | 008663 645 | 008665 646 | 008676 647 | 008680 648 | 008706 649 | 008716 650 | 008727 651 | 008739 652 | 008747 653 | 008750 654 | 008768 655 | 008784 656 | 008793 657 | 008794 658 | 008801 659 | 008826 660 | 008838 661 | 008843 662 | 008848 663 | 008859 664 | 008891 665 | 008892 666 | 008909 667 | 008911 668 | 008923 669 | 008929 670 | 008939 671 | 008958 672 | 008960 673 | 008966 674 | 008968 675 | 008969 676 | 008978 677 | 009000 678 | 009006 679 | 009015 680 | 009029 681 | 009045 682 | 009053 683 | 009058 684 | 009060 685 | 009064 686 | 009073 687 | 009078 688 | 009106 689 | 009116 690 | 009121 691 | 009163 692 | 009174 693 | 009178 694 | 009179 695 | 009186 696 | 009205 697 | 009213 698 | 009214 699 | 009254 700 | 009269 701 | 009282 702 | 009283 703 | 009286 704 | 009318 705 | 009326 706 | 009336 707 | 009350 708 | 009358 709 | 009368 710 | 009392 711 | 009406 712 | 009409 713 | 009411 714 | 009424 715 | 009434 716 | 009448 717 | 009469 718 | 009477 719 | 009507 720 | 009515 721 | 009517 722 | 009532 723 | 009558 724 | 009596 725 | 009614 726 | 009620 727 | 009623 728 | 009641 729 | 009644 730 | 009671 731 | 009676 732 | 009699 733 | 009711 734 | 009718 735 | 009729 736 | 009733 737 | 009737 738 | 009745 739 | 009762 740 | 009774 741 | 009776 742 | 009785 743 | 009801 744 | 009810 745 | 009822 746 | 009830 747 | 009834 748 | 009839 749 | 009845 750 | 009848 751 | 009862 752 | 009863 753 | 009879 754 | 009898 755 | 009900 756 | 009904 757 | 009913 758 | 009920 759 | 009932 760 | 009938 761 | 009959 762 | -------------------------------------------------------------------------------- /data/imageset_files/car_test_detect.txt: -------------------------------------------------------------------------------- 1 | 000004 2 | 000014 3 | 000071 4 | 000074 5 | 000082 6 | 000103 7 | 000135 8 | 000137 9 | 000152 10 | 000172 11 | 000188 12 | 000197 13 | 000240 14 | 000252 15 | 000254 16 | 000271 17 | 000284 18 | 000293 19 | 000300 20 | 000301 21 | 000313 22 | 000341 23 | 000351 24 | 000358 25 | 000361 26 | 000390 27 | 000402 28 | 000415 29 | 000425 30 | 000440 31 | 000453 32 | 000465 33 | 000471 34 | 000488 35 | 000505 36 | 000507 37 | 000529 38 | 000548 39 | 000580 40 | 000585 41 | 000586 42 | 000593 43 | 000602 44 | 000607 45 | 000624 46 | 000634 47 | 000646 48 | 000649 49 | 000669 50 | 000679 51 | 000687 52 | 000693 53 | 000715 54 | 000719 55 | 000721 56 | 000724 57 | 000727 58 | 000736 59 | 000743 60 | 000747 61 | 000757 62 | 000778 63 | 000788 64 | 000801 65 | 000809 66 | 000844 67 | 000881 68 | 000883 69 | 000894 70 | 000932 71 | 000945 72 | 000961 73 | 000984 74 | 000985 75 | 001003 76 | 001005 77 | 001022 78 | 001034 79 | 001058 80 | 001063 81 | 001080 82 | 001085 83 | 001090 84 | 001111 85 | 001134 86 | 001135 87 | 001155 88 | 001198 89 | 001222 90 | 001252 91 | 001267 92 | 001280 93 | 001283 94 | 001291 95 | 001308 96 | 001318 97 | 001321 98 | 001328 99 | 001331 100 | 001335 101 | 001356 102 | 001358 103 | 001369 104 | 001376 105 | 001379 106 | 001382 107 | 001394 108 | 001403 109 | 001422 110 | 001428 111 | 001435 112 | 001476 113 | 001491 114 | 001511 115 | 001525 116 | 001535 117 | 001550 118 | 001552 119 | 001560 120 | 001569 121 | 001572 122 | 001605 123 | 001613 124 | 001616 125 | 001619 126 | 001623 127 | 001626 128 | 001652 129 | 001658 130 | 001700 131 | 001701 132 | 001770 133 | 001776 134 | 001804 135 | 001820 136 | 001838 137 | 001846 138 | 001851 139 | 001857 140 | 001863 141 | 001873 142 | 001883 143 | 001891 144 | 001908 145 | 001913 146 | 001919 147 | 001923 148 | 001924 149 | 001935 150 | 001942 151 | 001951 152 | 001956 153 | 001965 154 | 001991 155 | 002040 156 | 002041 157 | 002057 158 | 002118 159 | 002141 160 | 002143 161 | 002149 162 | 002154 163 | 002177 164 | 002185 165 | 002210 166 | 002223 167 | 002232 168 | 002242 169 | 002245 170 | 002271 171 | 002294 172 | 002319 173 | 002331 174 | 002346 175 | 002349 176 | 002358 177 | 002370 178 | 002383 179 | 002402 180 | 002406 181 | 002416 182 | 002418 183 | 002424 184 | 002446 185 | 002484 186 | 002517 187 | 002522 188 | 002526 189 | 002531 190 | 002532 191 | 002543 192 | 002548 193 | 002556 194 | 002562 195 | 002577 196 | 002583 197 | 002602 198 | 002607 199 | 002610 200 | 002622 201 | 002650 202 | 002681 203 | 002701 204 | 002703 205 | 002729 206 | 002733 207 | 002740 208 | 002746 209 | 002750 210 | 002752 211 | 002758 212 | 002789 213 | 002790 214 | 002793 215 | 002808 216 | 002814 217 | 002829 218 | 002840 219 | 002871 220 | 002900 221 | 002920 222 | 002927 223 | 002955 224 | 002961 225 | 002993 226 | 003006 227 | 003033 228 | 003046 229 | 003052 230 | 003055 231 | 003070 232 | 003101 233 | 003109 234 | 003128 235 | 003143 236 | 003168 237 | 003179 238 | 003217 239 | 003220 240 | 003234 241 | 003257 242 | 003265 243 | 003276 244 | 003289 245 | 003302 246 | 003306 247 | 003321 248 | 003328 249 | 003334 250 | 003348 251 | 003353 252 | 003357 253 | 003364 254 | 003375 255 | 003385 256 | 003387 257 | 003405 258 | 003414 259 | 003434 260 | 003456 261 | 003460 262 | 003476 263 | 003481 264 | 003483 265 | 003486 266 | 003501 267 | 003503 268 | 003512 269 | 003515 270 | 003517 271 | 003523 272 | 003527 273 | 003545 274 | 003552 275 | 003553 276 | 003559 277 | 003569 278 | 003570 279 | 003578 280 | 003607 281 | 003630 282 | 003631 283 | 003661 284 | 003666 285 | 003677 286 | 003683 287 | 003692 288 | 003701 289 | 003718 290 | 003719 291 | 003733 292 | 003746 293 | 003769 294 | 003810 295 | 003815 296 | 003833 297 | 003878 298 | 003893 299 | 003904 300 | 003916 301 | 003917 302 | 003940 303 | 003962 304 | 003967 305 | 004001 306 | 004026 307 | 004030 308 | 004043 309 | 004050 310 | 004071 311 | 004080 312 | 004088 313 | 004097 314 | 004104 315 | 004107 316 | 004115 317 | 004147 318 | 004154 319 | 004172 320 | 004183 321 | 004187 322 | 004188 323 | 004216 324 | 004217 325 | 004240 326 | 004245 327 | 004249 328 | 004254 329 | 004290 330 | 004305 331 | 004313 332 | 004320 333 | 004335 334 | 004337 335 | 004378 336 | 004426 337 | 004442 338 | 004453 339 | 004456 340 | 004458 341 | 004478 342 | 004492 343 | 004521 344 | 004543 345 | 004545 346 | 004554 347 | 004560 348 | 004578 349 | 004580 350 | 004586 351 | 004596 352 | 004602 353 | 004615 354 | 004650 355 | 004667 356 | 004684 357 | 004688 358 | 004697 359 | 004731 360 | 004734 361 | 004745 362 | 004755 363 | 004762 364 | 004763 365 | 004764 366 | 004780 367 | 004800 368 | 004804 369 | 004806 370 | 004810 371 | 004821 372 | 004844 373 | 004860 374 | 004870 375 | 004891 376 | 004927 377 | 004933 378 | 004940 379 | 004959 380 | 004965 381 | 004981 382 | 004989 383 | 004996 384 | 005005 385 | 005021 386 | 005030 387 | 005035 388 | 005041 389 | 005074 390 | 005083 391 | 005091 392 | 005099 393 | 005105 394 | 005125 395 | 005126 396 | 005142 397 | 005157 398 | 005158 399 | 005163 400 | 005166 401 | 005192 402 | 005204 403 | 005275 404 | 005287 405 | 005296 406 | 005316 407 | 005333 408 | 005357 409 | 005372 410 | 005381 411 | 005401 412 | 005432 413 | 005437 414 | 005443 415 | 005447 416 | 005468 417 | 005484 418 | 005493 419 | 005501 420 | 005520 421 | 005523 422 | 005546 423 | 005558 424 | 005627 425 | 005638 426 | 005649 427 | 005663 428 | 005666 429 | 005678 430 | 005694 431 | 005706 432 | 005708 433 | 005717 434 | 005739 435 | 005746 436 | 005763 437 | 005770 438 | 005775 439 | 005793 440 | 005809 441 | 005835 442 | 005842 443 | 005862 444 | 005869 445 | 005870 446 | 005904 447 | 005924 448 | 005929 449 | 005932 450 | 005953 451 | 005959 452 | 005974 453 | 005987 454 | 006006 455 | 006016 456 | 006017 457 | 006019 458 | 006024 459 | 006034 460 | 006047 461 | 006082 462 | 006094 463 | 006109 464 | 006113 465 | 006121 466 | 006137 467 | 006155 468 | 006213 469 | 006228 470 | 006242 471 | 006246 472 | 006256 473 | 006283 474 | 006324 475 | 006326 476 | 006327 477 | 006331 478 | 006333 479 | 006334 480 | 006340 481 | 006358 482 | 006376 483 | 006383 484 | 006386 485 | 006397 486 | 006405 487 | 006415 488 | 006420 489 | 006423 490 | 006435 491 | 006441 492 | 006454 493 | 006469 494 | 006481 495 | 006493 496 | 006502 497 | 006510 498 | 006525 499 | 006527 500 | 006567 501 | 006581 502 | 006590 503 | 006634 504 | 006653 505 | 006685 506 | 006693 507 | 006717 508 | 006724 509 | 006733 510 | 006741 511 | 006749 512 | 006754 513 | 006757 514 | 006785 515 | 006790 516 | 006793 517 | 006817 518 | 006853 519 | 006854 520 | 006882 521 | 006890 522 | 006907 523 | 006925 524 | 006955 525 | 006970 526 | 006974 527 | 006996 528 | 007001 529 | 007014 530 | 007015 531 | 007034 532 | 007061 533 | 007082 534 | 007085 535 | 007112 536 | 007118 537 | 007126 538 | 007143 539 | 007164 540 | 007173 541 | 007176 542 | 007179 543 | 007242 544 | 007246 545 | 007267 546 | 007273 547 | 007278 548 | 007281 549 | 007282 550 | 007288 551 | 007304 552 | 007337 553 | 007339 554 | 007347 555 | 007358 556 | 007362 557 | 007368 558 | 007386 559 | 007399 560 | 007405 561 | 007423 562 | 007429 563 | 007447 564 | 007452 565 | 007459 566 | 007478 567 | 007496 568 | 007501 569 | 007507 570 | 007510 571 | 007518 572 | 007522 573 | 007556 574 | 007562 575 | 007580 576 | 007589 577 | 007591 578 | 007613 579 | 007617 580 | 007634 581 | 007665 582 | 007676 583 | 007690 584 | 007693 585 | 007701 586 | 007714 587 | 007734 588 | 007757 589 | 007761 590 | 007797 591 | 007800 592 | 007806 593 | 007807 594 | 007818 595 | 007835 596 | 007839 597 | 007844 598 | 007861 599 | 007866 600 | 007882 601 | 007906 602 | 007927 603 | 007948 604 | 007960 605 | 007961 606 | 007967 607 | 007969 608 | 007992 609 | 008006 610 | 008020 611 | 008030 612 | 008035 613 | 008047 614 | 008052 615 | 008088 616 | 008104 617 | 008114 618 | 008120 619 | 008126 620 | 008129 621 | 008133 622 | 008135 623 | 008136 624 | 008143 625 | 008152 626 | 008158 627 | 008161 628 | 008212 629 | 008215 630 | 008231 631 | 008246 632 | 008259 633 | 008264 634 | 008270 635 | 008271 636 | 008276 637 | 008283 638 | 008289 639 | 008290 640 | 008324 641 | 008353 642 | 008357 643 | 008363 644 | 008375 645 | 008378 646 | 008383 647 | 008408 648 | 008414 649 | 008421 650 | 008432 651 | 008447 652 | 008451 653 | 008464 654 | 008479 655 | 008481 656 | 008488 657 | 008504 658 | 008548 659 | 008560 660 | 008579 661 | 008593 662 | 008609 663 | 008622 664 | 008632 665 | 008657 666 | 008658 667 | 008668 668 | 008682 669 | 008684 670 | 008693 671 | 008694 672 | 008708 673 | 008711 674 | 008715 675 | 008724 676 | 008734 677 | 008761 678 | 008777 679 | 008785 680 | 008788 681 | 008797 682 | 008800 683 | 008824 684 | 008828 685 | 008829 686 | 008895 687 | 008896 688 | 008903 689 | 008906 690 | 008910 691 | 008915 692 | 008916 693 | 008996 694 | 009008 695 | 009023 696 | 009033 697 | 009052 698 | 009071 699 | 009077 700 | 009081 701 | 009092 702 | 009096 703 | 009111 704 | 009119 705 | 009122 706 | 009125 707 | 009134 708 | 009140 709 | 009149 710 | 009156 711 | 009182 712 | 009201 713 | 009206 714 | 009210 715 | 009241 716 | 009243 717 | 009261 718 | 009267 719 | 009284 720 | 009302 721 | 009304 722 | 009321 723 | 009322 724 | 009335 725 | 009341 726 | 009360 727 | 009376 728 | 009381 729 | 009384 730 | 009387 731 | 009396 732 | 009426 733 | 009427 734 | 009430 735 | 009475 736 | 009492 737 | 009530 738 | 009536 739 | 009564 740 | 009590 741 | 009593 742 | 009599 743 | 009601 744 | 009643 745 | 009652 746 | 009675 747 | 009680 748 | 009683 749 | 009688 750 | 009694 751 | 009701 752 | 009704 753 | 009705 754 | 009723 755 | 009740 756 | 009757 757 | 009768 758 | 009770 759 | 009777 760 | 009779 761 | 009804 762 | 009806 763 | 009821 764 | 009827 765 | 009829 766 | 009847 767 | 009849 768 | 009856 769 | 009873 770 | 009883 771 | 009895 772 | 009903 773 | 009927 774 | 009943 775 | 009963 776 | -------------------------------------------------------------------------------- /requirements: -------------------------------------------------------------------------------- 1 | # Name Version Build Channel 2 | _libgcc_mutex 0.1 main 3 | asn1crypto 0.24.0 py37_1003 conda-forge 4 | backcall 0.1.0 py37_0 5 | blas 1.0 mkl 6 | bleach 3.1.0 py37_0 7 | bzip2 1.0.6 h14c3975_5 8 | ca-certificates 2019.8.28 0 9 | cairo 1.14.12 h8948797_3 10 | certifi 2019.9.11 py37_0 11 | cffi 1.12.1 py37h2e261b9_0 12 | chardet 3.0.4 py37_1003 conda-forge 13 | cloudpickle 0.8.0 py_0 conda-forge 14 | cryptography 2.5 py37h9d9f1b6_1 conda-forge 15 | cudatoolkit 9.0 h13b8566_0 16 | cycler 0.10.0 py37_0 17 | cython 0.29.7 py37he6710b0_0 18 | cytoolz 0.9.0.1 py37h14c3975_1001 conda-forge 19 | dask-core 1.1.3 py_0 conda-forge 20 | dbus 1.13.6 h746ee38_0 21 | decorator 4.3.2 py37_0 22 | easydict 1.9 pypi_0 pypi 23 | entrypoints 0.3 py37_0 24 | expat 2.2.6 he6710b0_0 25 | ffmpeg 4.0 hcdf2ecd_0 26 | fontconfig 2.13.0 h9420a91_0 27 | freeglut 3.0.0 hf484d3e_5 28 | freetype 2.9.1 h8a8886c_1 29 | git 2.20.1 pl526hacde149_0 30 | glib 2.56.2 hd408876_0 31 | gmp 6.1.2 h6c8ec71_1 32 | graphite2 1.3.13 h23475e2_0 33 | gst-plugins-base 1.14.0 hbbd80ab_1 34 | gstreamer 1.14.0 hb453b48_1 35 | harfbuzz 1.8.8 hffaf4a1_0 36 | hdbscan 0.8.22 py37hd352d35_1 conda-forge 37 | hdf5 1.10.2 hba1933b_1 38 | icu 58.2 h9c2bf20_1 39 | idna 2.8 py37_1000 conda-forge 40 | imageio 2.5.0 py37_0 conda-forge 41 | intel-openmp 2019.1 144 42 | ipykernel 5.1.0 py37h39e3cac_0 43 | ipython 7.3.0 py37h39e3cac_0 44 | ipython_genutils 0.2.0 py37_0 45 | ipywidgets 7.4.2 py37_0 46 | jasper 2.0.14 h07fcdf6_1 47 | jedi 0.13.3 py37_0 48 | jinja2 2.10 py37_0 49 | joblib 0.13.2 py_0 conda-forge 50 | jpeg 9b h024ee3a_2 51 | jsonschema 2.6.0 py37_0 52 | jupyter 1.0.0 py37_7 53 | jupyter_client 5.2.4 py37_0 54 | jupyter_console 6.0.0 py37_0 55 | jupyter_core 4.4.0 py37_0 56 | kiwisolver 1.0.1 py37hf484d3e_0 57 | krb5 1.16.1 h173b8e3_7 58 | libcurl 7.64.1 h20c2e04_0 59 | libedit 3.1.20181209 hc058e9b_0 60 | libffi 3.2.1 hd88cf55_4 61 | libgcc-ng 8.2.0 hdf63c60_1 62 | libgfortran-ng 7.3.0 hdf63c60_0 63 | libglu 9.0.0 hf484d3e_1 64 | libopencv 3.4.2 hb342d67_1 65 | libopus 1.3 h7b6447c_0 66 | libpng 1.6.36 hbc83047_0 67 | libsodium 1.0.16 h1bed415_0 68 | libssh2 1.8.2 h1ba5d50_0 69 | libstdcxx-ng 8.2.0 hdf63c60_1 70 | libtiff 4.0.10 h2733197_2 71 | libuuid 1.0.3 h1bed415_2 72 | libvpx 1.7.0 h439df22_0 73 | libxcb 1.13 h1bed415_1 74 | libxml2 2.9.9 he19cac6_0 75 | markupsafe 1.1.1 py37h7b6447c_0 76 | matplotlib 3.0.2 py37h5429711_0 77 | matplotlib-base 3.0.2 py37h167e16e_1001 conda-forge 78 | mistune 0.8.4 py37h7b6447c_0 79 | mkl 2019.4 243 80 | mkl-service 2.3.0 py37he904b0f_0 81 | mkl_fft 1.0.10 py37ha843d7b_0 82 | mkl_random 1.0.2 py37hd81dba3_0 83 | nbconvert 5.3.1 py37_0 84 | nbformat 4.4.0 py37_0 85 | ncurses 6.1 he6710b0_1 86 | networkx 2.2 py_1 conda-forge 87 | ninja 1.8.2 py37h6bb024c_1 88 | notebook 5.7.4 py37_0 89 | numpy 1.16.1 py37h7e9f1db_0 90 | numpy-base 1.16.1 py37hde5b4d6_0 91 | olefile 0.46 py37_0 92 | opencv 3.4.2 py37h6fd60c2_1 93 | openssl 1.1.1d h7b6447c_3 94 | pandas 0.24.1 py37he6710b0_0 95 | pandoc 2.2.3.2 0 96 | pandocfilters 1.4.2 py37_1 97 | parso 0.3.4 py37_0 98 | patsy 0.5.1 py37_0 99 | pcre 8.42 h439df22_0 100 | perl 5.26.2 h14c3975_0 101 | pexpect 4.6.0 py37_0 102 | pickleshare 0.7.5 py37_0 103 | pillow 5.4.1 py37h34e0f95_0 104 | pip 19.0.3 py37_0 105 | pixman 0.36.0 h7b6447c_0 106 | prometheus_client 0.6.0 py37_0 107 | prompt_toolkit 2.0.9 py37_0 108 | ptyprocess 0.6.0 py37_0 109 | py-opencv 3.4.2 py37hb342d67_1 110 | pyclustering 0.9.0 pypi_0 pypi 111 | pycocotools 2.0.0 pypi_0 pypi 112 | pycparser 2.19 py37_0 113 | pygments 2.3.1 py37_0 114 | pyopenssl 19.0.0 py37_0 conda-forge 115 | pyparsing 2.3.1 py37_0 116 | pyqt 5.9.2 py37h05f1152_2 117 | pysocks 1.6.8 py37_1002 conda-forge 118 | python 3.7.2 h0371630_0 119 | python-dateutil 2.8.0 py37_0 120 | pytorch 1.0.1 py3.7_cuda9.0.176_cudnn7.4.2_2 pytorch 121 | pytz 2018.9 py37_0 122 | pywavelets 1.0.2 py37h3010b51_0 conda-forge 123 | pyzmq 18.0.0 py37he6710b0_0 124 | qt 5.9.7 h5867ecd_1 125 | qtconsole 4.4.3 py37_0 126 | readline 7.0 h7b6447c_5 127 | requests 2.21.0 py37_1000 conda-forge 128 | scikit-image 0.14.2 py37hf484d3e_1 conda-forge 129 | scikit-learn 0.21.3 py37hd81dba3_0 130 | scipy 1.2.1 py37h7c811a0_0 131 | seaborn 0.9.0 py37_0 132 | send2trash 1.5.0 py37_0 133 | setuptools 40.8.0 py37_0 134 | sip 4.19.8 py37hf484d3e_0 135 | six 1.12.0 py37_0 136 | sqlite 3.26.0 h7b6447c_0 137 | statsmodels 0.10.1 py37hdd07704_0 138 | terminado 0.8.1 py37_1 139 | testpath 0.4.2 py37_0 140 | tk 8.6.8 hbc83047_0 141 | toolz 0.9.0 py_1 conda-forge 142 | torchfile 0.1.0 py_0 conda-forge 143 | torchvision 0.2.2 py_2 pytorch 144 | tornado 5.1.1 py37h7b6447c_0 145 | traitlets 4.3.2 py37_0 146 | urllib3 1.24.1 py37_1000 conda-forge 147 | visdom 0.1.8.8 0 conda-forge 148 | wcwidth 0.1.7 py37_0 149 | webencodings 0.5.1 py37_1 150 | websocket-client 0.55.0 py37_0 conda-forge 151 | wheel 0.33.1 py37_0 152 | widgetsnbextension 3.4.2 py37_0 153 | xz 5.2.4 h14c3975_4 154 | zeromq 4.3.1 he6710b0_3 155 | zlib 1.2.11 h7b6447c_3 156 | zstd 1.3.7 h0b5b093_0 157 | -------------------------------------------------------------------------------- /create_spoc_features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import json 4 | 5 | from sklearn.decomposition import PCA 6 | from torch.autograd import Variable 7 | import torchvision.models as models 8 | 9 | from data import * 10 | import active_learning_package.helpers as helpers 11 | 12 | 13 | 14 | def get_feature_maps(dataset, 15 | net, 16 | imageset_name, 17 | save_dir): 18 | 19 | path_to_image_feature_dir = os.path.join(save_dir, imageset_name + '586_conv5_3_features_before_relu/') 20 | # path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/' 21 | 22 | if not os.path.exists(path_to_image_feature_dir): 23 | os.mkdir(path_to_image_feature_dir) 24 | 25 | # go trough all images in imageset 26 | already_saved = os.listdir(path_to_image_feature_dir) 27 | 28 | transform = BaseTransform(586, (104, 117, 123)) 29 | 30 | for i, idx in enumerate(dataset.ids): 31 | image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle' 32 | if str(idx[1]) + '.pickle' in already_saved: 33 | print(i, '/', len(dataset.ids), ' was already saved') 34 | 35 | # load feature and append it 36 | # features = helpers.unpickle(image_feature_path) 37 | 38 | # conv_feature_list.append(features) 39 | 40 | continue 41 | 42 | print(i, '/', len(dataset.ids)) 43 | 44 | # load image and transform (colors in different order) 45 | img = dataset.pull_image_using_imageset_id(idx) 46 | 47 | # if features already saved, load them 48 | x = torch.from_numpy(transform(img)[0][:, :, (2, 1, 0)]).permute(2, 0, 1) # We use pre-trained model from pytorch model zoo, which is trained with RGB, cv2.imread loads in BGR 49 | 50 | x = Variable(x.unsqueeze(0)) 51 | 52 | if torch.cuda.is_available(): 53 | torch.cuda.empty_cache() 54 | x = x.to('cuda') 55 | 56 | # directly calculate sum over channels 57 | features = net(x) 58 | 59 | # # take the sum of the 512 channels as features (NOTE: 512 is specific to VGG16 conv5_3) 60 | features = features.reshape(1, 512, -1).sum(dim=-1) 61 | 62 | # set detections back to cpu 63 | if torch.cuda.is_available(): 64 | features = features.to('cpu') 65 | 66 | # append to conv_feature list 67 | # conv_feature_list.append(features) 68 | 69 | with open(image_feature_path, 'wb') as f: 70 | pickle.dump(features, f) 71 | 72 | return 73 | def calculate_PCA_and_whitening_parameters(dataset, 74 | imageset_name, 75 | save_dir): 76 | 77 | 78 | conv_feature_list = [] 79 | if '2007' in imageset_name: 80 | print('This is to get PCA, should be done with 2012 dataset, this is a failsafe to not overwrite the 2012 PCA with 2007 PCA') 81 | raise NotImplementedError 82 | 83 | # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features_before_relu/') 84 | path_to_image_feature_dir = save_dir+'2012trainval586_conv5_3_features/' 85 | # 86 | 87 | # load features 88 | pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle' 89 | 90 | if os.path.exists(pca_save_path): 91 | print('already did this PCA') 92 | return 93 | print('load features:') 94 | for i, idx in enumerate(dataset.ids): 95 | print('load feature', i, '/', len(dataset.ids),' and L2 normalize features before PCA') 96 | image_feature_path = path_to_image_feature_dir + str(idx[1]) + '.pickle' 97 | 98 | # load feature and append it 99 | features = helpers.unpickle(image_feature_path) 100 | 101 | # L2 normalize 102 | features = features / features.norm(2) 103 | 104 | 105 | conv_feature_list.append(features) 106 | 107 | np_features = torch.cat(conv_feature_list).detach().numpy() 108 | print('loaded all features and transformed them into a numpy array') 109 | 110 | ## calculate PCA parameters (which dimensions should be kept) 111 | # numpy array 112 | print('Do PCA') 113 | pca = PCA(n_components = 256, svd_solver = 'full', random_state = 42,whiten=True) 114 | pca.fit(np_features) 115 | print('did PCA') 116 | 117 | # save PCA 118 | pca_save_path = path_to_image_feature_dir + imageset_name +'PCA.pickle' 119 | 120 | with open(pca_save_path, 'wb') as f: 121 | pickle.dump(pca, f) 122 | 123 | print('Saved PCA') 124 | 125 | return 126 | 127 | def create_spoc_features(dataset, 128 | image_features_path, 129 | PCA_param_path, 130 | imageset_name, 131 | save_dir): 132 | """ 133 | See Babenko 2014 134 | 135 | """ 136 | 137 | # load pca and whitening parameters 138 | pca = helpers.unpickle(PCA_param_path) 139 | 140 | if not os.path.exists(save_dir): 141 | os.mkdir(save_dir) 142 | 143 | # load image features 144 | for i, idx in enumerate(dataset.ids): 145 | image_feature_path = image_features_path + str(idx[1]) + '.pickle' 146 | 147 | # load feature and append it 148 | features = helpers.unpickle(image_feature_path) 149 | 150 | print(i, '/', len(dataset.ids)) 151 | 152 | # l2 normalization 153 | features = features/features.norm(2) 154 | 155 | # apply pca transform + whitening to features 156 | features = pca.transform(features.detach().numpy()) 157 | features = torch.tensor(features) 158 | 159 | # l2-normalization 160 | features = features/features.norm(2) 161 | spoc_feature_path = save_dir + str(idx[1]) + '.pickle' 162 | 163 | # save SPoC representation 164 | 165 | with open(spoc_feature_path,'wb') as f: 166 | pickle.dump(features, f) 167 | 168 | print('Created and Saved all SpoC representations of images') 169 | 170 | return 171 | 172 | 173 | def calculate_scalar_product_image_similarity(tensor_a,tensor_b): 174 | """ 175 | https://datascience.stackexchange.com/questions/744/cosine-similarity-versus-dot-product-as-distance-metrics 176 | 177 | calculates image similarity between two images using a simple scalar product matching kernel 178 | L. Bo and C. Sminchisescu. Efficient match kernel between 179 | sets of features for visual recognition. In Advances in Neural Information Processing Systems (NIPS)., pages 135–143, 180 | 2009. 181 | 182 | :return: similarity 183 | """ 184 | 185 | return torch.dot(tensor_a.squeeze(),tensor_b.squeeze()) 186 | 187 | 188 | def calculate_all_images_similarities(dataset, load_dir_spoc_features): 189 | """ 190 | 191 | :return: 192 | """ 193 | 194 | # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a) 195 | 196 | save_dir = load_dir_spoc_features + 'image_similarities/' 197 | if not os.path.exists(save_dir): 198 | os.mkdir(save_dir) 199 | 200 | already_saved = os.listdir(save_dir) 201 | 202 | # go trough dataset 203 | for i,idx in enumerate(dataset.ids): 204 | if str(idx[1]) + '.pickle' in already_saved: 205 | print(i, '/', len(dataset.ids), ' was already saved') 206 | 207 | print(i,'/',len(dataset.ids)) 208 | # placeholder to store similarities between all images 209 | image_similarity_dir = {} 210 | 211 | # load image description 212 | image_path_a = load_dir_spoc_features+ str(idx[1]) + '.pickle' 213 | image_a = helpers.unpickle(image_path_a) 214 | 215 | 216 | # go trough all OTHER images except the idx whe are currently at 217 | other_images = [idj for idj in dataset.ids if idj != idx] 218 | 219 | for j, idj in enumerate(other_images): 220 | 221 | # load image description 222 | image_path_b = load_dir_spoc_features + str(idj[1]) + '.pickle' 223 | image_b = helpers.unpickle(image_path_b) 224 | 225 | 226 | # calculate similarity 227 | similarity = calculate_scalar_product_image_similarity(image_a,image_b) 228 | 229 | if similarity.shape == torch.Size([0]): 230 | print(similarity) 231 | print('similarity should bed a scalar') 232 | raise NotImplementedError 233 | 234 | # store similarity 235 | image_similarity_dir[idj[1]] = similarity.item() 236 | 237 | # save image similarity dir 238 | path = save_dir + str(idx[1]) + '.pickle' 239 | 240 | with open(path,'wb') as f: 241 | pickle.dump(image_similarity_dir, f) 242 | 243 | return save_dir 244 | 245 | 246 | def calculate_density_per_imageset(dataset,load_dir_similarities): 247 | """ 248 | density is the mean similarity of one image to all other images in the dataset (see Settles 2008) 249 | """ 250 | 251 | # todo: can be made faster, now doing redundant calculations (similarities of a->b and b->a) 252 | # go trough dataset 253 | density = {} 254 | for i,idx in enumerate(dataset.ids): 255 | print(i,'/',len(dataset.ids)) 256 | # load similarity between all images in trainval and current image (idx) 257 | path = load_dir_similarities + str(idx[1]) + '.pickle' 258 | 259 | similarities_idx = helpers.unpickle(path) 260 | 261 | # go trough all OTHER images in the dataset (can be a subset of trainval, e.g. only the car images) 262 | # except the id where are currently 263 | other_images = [idj for idj in dataset.ids if idj != idx] 264 | 265 | # placeholder 266 | density[idx[1]] = 0 267 | for i, idj in enumerate(other_images): 268 | 269 | density[idx[1]] += similarities_idx[idj[1]] 270 | 271 | # divide by number of images to get mean 272 | density[idx[1]] /= len(other_images) 273 | 274 | 275 | 276 | # save image density dir 277 | path = load_dir_similarities + dataset.image_set[0][1] + '.pickle' 278 | 279 | 280 | 281 | with open(path,'wb') as f: 282 | pickle.dump(density, f) 283 | 284 | 285 | 286 | 287 | # def create_image_affinity_propagation_clusters(features, 288 | # dataset, 289 | # imageset_name): 290 | # 291 | # return 292 | 293 | if __name__ == '__main__': 294 | 295 | save_dir = 'data/' 296 | 297 | """get feature maps""" 298 | # imagesets = [[('2012', 'trainval')], 299 | # [('2007', 'trainval')] 300 | # [('2012', 'bottle_trainval_detect')], 301 | # [('2012', 'car_trainval_detect')], 302 | # [('2012', 'horse_trainval_detect')], 303 | # [('2012', 'sheep_trainval_detect')], 304 | # [('2012', 'pottedplant_trainval_detect')] 305 | # ] 306 | 307 | 308 | # load network 309 | # vgg16 = models.vgg16(pretrained=True) #NOTE: I adjusted the source code of the vgg16 such that it only goes up to the conv5_3 layer in forward passes 310 | # vgg16.eval() 311 | # 312 | # for imageset in imagesets: 313 | # # load dataset 314 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), 315 | # VOCAnnotationTransform()) 316 | # 317 | # get_feature_maps(dataset = dataset, 318 | # net = vgg16, 319 | # imageset_name=imageset[0][0] + imageset[0][1], 320 | # save_dir= save_dir) 321 | 322 | """ Get PCA and whitening params on hold-out dataset (VOC2012)""" 323 | 324 | # 325 | # 326 | # imagesets = [[('2012', 'trainval')], 327 | # [('2012', 'bottle_trainval_detect')], 328 | # [('2012', 'car_trainval_detect')], 329 | # [('2012', 'horse_trainval_detect')], 330 | # [('2012', 'sheep_trainval_detect')], 331 | # [('2012', 'pottedplant_trainval_detect')] 332 | # ] 333 | # 334 | # for imageset in imagesets: 335 | # 336 | # # load dataset 337 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform()) 338 | # 339 | # calculate_PCA_and_whitening_parameters(dataset=dataset, 340 | # imageset_name=imageset[0][0]+imageset[0][1], 341 | # save_dir=save_dir) 342 | # 343 | # 344 | 345 | """ Make spoc features """ 346 | # Imagesets 347 | # imagesets = [[('2007', 'trainval')]] 348 | # 349 | # for imageset in imagesets: 350 | # # load dataset 351 | # dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(586, config.voc['dataset_mean']), 352 | # VOCAnnotationTransform()) 353 | # # 354 | # # calculate_PCA_and_whitening_parameters(dataset=dataset, 355 | # # imageset_name=imageset[0][0]+imageset[0][1], 356 | # # save_dir=save_dir, 357 | # # net=vgg16) 358 | # pca_dir = save_dir+'2012trainval586_conv5_3_features_before_relu/' 359 | # PCA_param_path = pca_dir + '2012trainvalPCA.pickle' # for now only using the 2012 full trainval PCA 360 | # image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/') 361 | # # path_to_image_feature_dir = os.path.join(save_dir,imageset_name+'586_conv5_3_features/') 362 | # 363 | # create_spoc_features(dataset, 364 | # image_features_path, 365 | # PCA_param_path, 366 | # imageset_name=imageset[0][0] + imageset[0][1], 367 | # save_dir=image_features_path + '2012trainvalPCA/') 368 | 369 | 370 | """ Calculate complete similarities from each image in trainval 2007 to all other images""" 371 | 372 | # dataset = VOCDetection(VOC_ROOT_LOCAL, [('2007', 'trainval')], BaseTransform(586, config.voc['dataset_mean']), 373 | # VOCAnnotationTransform()) 374 | # image_features_path = os.path.join(os.getcwd(), save_dir, '2007trainval586_conv5_3_features_before_relu/') 375 | # load_dir_spoc_features = image_features_path + '2012trainvalPCA/' 376 | # similarity_dir = calculate_all_images_similarities(dataset=dataset, 377 | # load_dir_spoc_features = load_dir_spoc_features) 378 | # 379 | 380 | """ Create density per imageset """ 381 | image_sim_dir = save_dir+'2007trainval586_conv5_3_features_before_relu/2012trainvalPCA/image_similarities/' 382 | 383 | 384 | imagesets = [[('2007', 'trainval')], 385 | [('2007', 'bottle_trainval_detect')], 386 | [('2007', 'car_trainval_detect')], 387 | [('2007', 'horse_trainval_detect')], 388 | [('2007', 'sheep_trainval_detect')], 389 | [('2007', 'pottedplant_trainval_detect')] 390 | ] 391 | 392 | for imageset in imagesets: 393 | print(imageset) 394 | # load dataset 395 | dataset = VOCDetection(VOC_ROOT_LOCAL, imageset, BaseTransform(300, config.voc['dataset_mean']), VOCAnnotationTransform()) 396 | calculate_density_per_imageset(dataset=dataset, 397 | load_dir_similarities = image_sim_dir) -------------------------------------------------------------------------------- /ssd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from layers import * 6 | from data import voc 7 | import os 8 | 9 | 10 | class SSD(nn.Module): 11 | """Single Shot Multibox Architecture 12 | The network is composed of a base VGG network followed by the 13 | added multibox conv layers. Each multibox layer branches into 14 | 1) conv2d for class conf scores 15 | 2) conv2d for localization predictions 16 | 3) associated priorbox layer to produce default bounding 17 | boxes specific to the layer's feature map size. 18 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 19 | 20 | Also implemented a version predicting a standard deviation per bounding box coordinate, following: 21 | CVPR 2019 paper: 22 | Bounding Box Regression with Uncertainty for Accurate Object Detection 23 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang 24 | 25 | Args: 26 | phase: (string) Can be "test" or "train" 27 | size: input image size 28 | base: VGG16 layers for input, size of either 300 or 500 29 | extras: extra layers that feed to multibox loc and conf layers 30 | head: "multibox head" consists of loc and conf conv layers 31 | """ 32 | 33 | def __init__(self, phase, modeltype, base, extras, head, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only): 34 | super(SSD, self).__init__() 35 | self.phase = phase 36 | self.num_classes = num_classes 37 | self.cfg = cfg 38 | self.priorbox = PriorBox(self.cfg, modeltype) 39 | with torch.no_grad(): 40 | self.priors = Variable(self.priorbox.forward()) 41 | 42 | # todo: convert to x1y1x2y2 format here if necessary 43 | 44 | 45 | self.size = 300 46 | 47 | # SSD network 48 | self.vgg = nn.ModuleList(base) 49 | # Layer learns to scale the l2 normalized features from conv4_3 50 | self.L2Norm = L2Norm(512, 20) 51 | self.extras = nn.ModuleList(extras) 52 | 53 | self.loc = nn.ModuleList(head[0]) 54 | self.modeltype = modeltype 55 | if self.modeltype == 'SSD300KL': 56 | self.loc_std = nn.ModuleList(head[2]) 57 | self.conf = nn.ModuleList(head[1]) 58 | if self.modeltype in ['SSD300','SSD300KL']: 59 | self.size = 300 60 | else: 61 | raise NotImplementedError() 62 | 63 | if phase == 'test': 64 | self.softmax = nn.Softmax(dim=-1) 65 | 66 | 67 | if sample_select_forward and merging_method in ['bsas','hbdscan','pre_nms_avg']: 68 | conf_tresh = sample_select_nms_conf_thresh # merging of boxes can be expensive, to have less boxes, we can apply a more agressive conf treshold 69 | else: 70 | conf_tresh = 0.01 71 | # Active Learning parameters added to enable experiments with and usage of Active Learning 72 | self.detect = Detect(num_classes, 0, 200, conf_tresh, 0.45, # default values in paper: num_classes,0,200,0.01,0.45 73 | default_forward, 74 | merging_method, 75 | sampling_strategy, 76 | modeltype) 77 | 78 | self.forward_vgg_base_only = forward_vgg_base_only 79 | 80 | 81 | def forward(self, x): 82 | """Applies network layers and ops on input image(s) x. 83 | 84 | Args: 85 | x: input image or batch of images. Shape: [batch,3,300,300]. 86 | 87 | Return: 88 | Depending on phase: 89 | test: 90 | Variable(tensor) of output class label predictions, 91 | confidence score, and corresponding location predictions for 92 | each object detected. Shape: [batch,topk,7] 93 | 94 | train: 95 | list of concat outputs from: 96 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 97 | 2: localization layers, Shape: [batch,num_priors*4] 98 | 3: priorbox layers, Shape: [2,num_priors*4] 99 | """ 100 | 101 | sources = list() 102 | loc = list() 103 | conf = list() 104 | if self.modeltype == 'SSD300KL': 105 | loc_std = list() 106 | # apply vgg up to conv4_3 relu 107 | for k in range(23): 108 | # print('debug: apply vgg') 109 | x = self.vgg[k](x) 110 | 111 | if self.forward_vgg_base_only: 112 | return x 113 | # TODO: Why apply L2norm already? => because conv4_3 has larger scale than the rest 114 | s = self.L2Norm(x) 115 | sources.append(s) 116 | 117 | # apply vgg up to fc7 TODO: Why FC layers? => Doesn't use FC layers, UP TO FC layers.. 118 | for k in range(23, len(self.vgg)): 119 | # print('debug2: apply vgg') 120 | x = self.vgg[k](x) 121 | sources.append(x) 122 | 123 | # apply extra layers and cache source layer outputs 124 | for k, v in enumerate(self.extras): 125 | # print('debug3: apply extra layers') 126 | x = F.relu(v(x), inplace=True) 127 | if k % 2 == 1: #TODO: Why only every second layer of the extra layers? => because thats how the paper states it. It has conv blocks of 2 conv layers 128 | sources.append(x) 129 | 130 | if self.modeltype != 'SSD300KL': 131 | # apply multibox head to source layers 132 | for (x, l, c) in zip(sources, self.loc, self.conf): 133 | # print('debug4: apply multibox head') 134 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 135 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 136 | 137 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 138 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 139 | # print('debug foward 1') 140 | if self.phase == "test": 141 | # if self.sampling_strategy != 'p-max_localization-stability' : 142 | output = self.detect(loc.view(loc.size(0), -1, 4), # loc preds 143 | self.softmax(conf.view(conf.size(0), -1,self.num_classes)), # conf preds 144 | self.priors.type(type(x.data)), # default boxes 145 | ) 146 | # else: 147 | # output = self.detect() 148 | 149 | # training phase => no merging or other forwards used 150 | else: 151 | output = ( 152 | loc.view(loc.size(0), -1, 4), 153 | conf.view(conf.size(0), -1, self.num_classes), 154 | self.priors 155 | ) 156 | else: 157 | # apply multibox head to source layers 158 | for (x, l, c, std) in zip(sources, self.loc, self.conf, self.loc_std): 159 | # print('debug4: apply multibox head') 160 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 161 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 162 | loc_std.append(std(x).permute(0, 2, 3, 1).contiguous()) 163 | 164 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 165 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 166 | loc_std = torch.cat([o.view(o.size(0), -1) for o in loc_std], 1) 167 | 168 | if self.phase == "test": 169 | # during training alpha = log(sigma^2), during testing, this needs to be converted back 170 | loc_std = torch.exp(loc_std) 171 | 172 | output = self.detect(loc.view(loc.size(0), -1, 4), # loc preds 173 | self.softmax(conf.view(conf.size(0), -1,self.num_classes)), # conf preds 174 | self.priors.type(type(x.data)), # default boxes 175 | torch.abs(loc_std.view(loc_std.size(0), -1, 4)) # alphas (predicted log of std deviations of loc preds) 176 | ) 177 | else: 178 | # during training, alpha = log(sigma^2) is predicted 179 | output = ( 180 | loc.view(loc.size(0), -1, 4), 181 | conf.view(conf.size(0), -1, self.num_classes), 182 | self.priors, 183 | torch.abs(loc_std.view(loc_std.size(0), -1, 4)) #alphas 184 | ) 185 | 186 | return output 187 | 188 | 189 | def load_weights(self, base_file): 190 | other, ext = os.path.splitext(base_file) 191 | if ext == '.pkl' or '.pth': 192 | print('Loading weights into state dict...') 193 | self.load_state_dict(torch.load(base_file, 194 | map_location=lambda storage, loc: storage)) 195 | print('Finished!') 196 | else: 197 | print('Sorry only .pth and .pkl files supported.') 198 | def vgg(cfg, i, batch_norm=False): 199 | layers = [] 200 | in_channels = i 201 | for v in cfg: 202 | if v == 'M': 203 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 204 | elif v == 'C': #TODO: ceil mode not used in https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py => impacts output shape 205 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 206 | else: 207 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 208 | if batch_norm: 209 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 210 | else: 211 | layers += [conv2d, nn.ReLU(inplace=True)] 212 | in_channels = v 213 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 214 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) # A TROUS algorithm (dilated conv) 215 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 216 | layers += [pool5, conv6, 217 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] 218 | return layers 219 | 220 | 221 | # This function is derived from torchvision VGG make_layers() 222 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 223 | def add_extras(cfg, i, batch_norm=False): 224 | # Extra layers added to VGG for feature scaling 225 | layers = [] 226 | in_channels = i 227 | flag = False 228 | for k, v in enumerate(cfg): 229 | if in_channels != 'S': 230 | if v == 'S': 231 | layers += [nn.Conv2d(in_channels, cfg[k + 1], 232 | kernel_size=(1, 3)[flag], stride=2, padding=1)] 233 | else: 234 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] 235 | flag = not flag 236 | in_channels = v 237 | return layers 238 | 239 | 240 | 241 | def multibox(vgg, extra_layers, cfg, num_classes, model_type): 242 | #cfg = number of boxes per feature map location 243 | 244 | loc_layers = [] 245 | conf_layers = [] 246 | vgg_source = [21, -2] 247 | if model_type != 'SSD300KL': 248 | for k, v in enumerate(vgg_source): 249 | loc_layers += [nn.Conv2d(vgg[v].out_channels, 250 | cfg[k] * 4, kernel_size=3, 251 | padding=1)] # 4 is for the 4 corners of the bounding box 252 | conf_layers += [nn.Conv2d(vgg[v].out_channels, 253 | cfg[k] * num_classes, kernel_size=3, 254 | padding=1)] # out = #boxes*classes (per feature map) 255 | 256 | for k, v in enumerate(extra_layers[1::2], 2): 257 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k] 258 | * 4, kernel_size=3, padding=1)] 259 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k] 260 | * num_classes, kernel_size=3, padding=1)] 261 | return vgg, extra_layers, (loc_layers, conf_layers) 262 | 263 | else: 264 | """ 265 | Also predict a standard deviation per bounding box coordinate, from CVPR 2019 paper: 266 | Bounding Box Regression with Uncertainty for Accurate Object Detection 267 | by Yihui He, Chenchen Zhu, Jianren Wang. Marios Savvides, Xiangyu Zhang 268 | """ 269 | loc_std_layers = [] 270 | for k, v in enumerate(vgg_source): 271 | loc_layers += [nn.Conv2d(vgg[v].out_channels, 272 | cfg[k] * 4, kernel_size=3, 273 | padding=1)] # 4 is for the 4 corners of the bounding box 274 | loc_std_layers += [nn.Conv2d(vgg[v].out_channels, 275 | cfg[k] * 4, kernel_size=3, 276 | padding=1)] # 4 is for the 4 corners of the bounding box 277 | 278 | conf_layers += [nn.Conv2d(vgg[v].out_channels, 279 | cfg[k] * num_classes, kernel_size=3, 280 | padding=1)] # out = #boxes*classes (per feature map) 281 | 282 | for k, v in enumerate(extra_layers[1::2], 2): 283 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k] 284 | * 4, kernel_size=3, padding=1)] 285 | 286 | loc_std_layers += [nn.Conv2d(v.out_channels, cfg[k] 287 | * 4, kernel_size=3, padding=1)] 288 | 289 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k] 290 | * num_classes, kernel_size=3, padding=1)] 291 | 292 | return vgg, extra_layers, (loc_layers, conf_layers, loc_std_layers) 293 | 294 | 295 | # 300D is SSD300 with dropout layers to be able to make Bayesian using MC-Dropout 296 | # TODO: upconvolution first and then downconvolution?? NOPE => zijn de channels! 297 | base = { 298 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 299 | 512, 512, 512], 300 | '512': [], 301 | # '300D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512,'D', 'M', 302 | # 512, 512, 512,'D'] 303 | } 304 | #todo: should the dropout layers be inbetween base and extra? and also between 305 | extras = { 306 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 307 | '512': [], 308 | '300D': [256, 'S', 512, 128, 'D', 'S', 256, 128, 256, 128, 256] 309 | } 310 | mbox = { 311 | '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location 312 | '512': [], 313 | # '300D': [4, 6, 6, 6, 4, 4] 314 | } 315 | 316 | 317 | def build_ssd(phase, model_type='SSD300', num_classes=21, default_forward = True, merging_method = None, sampling_strategy = None, sample_select_forward = False, sample_select_nms_conf_thresh = None, cfg = None, forward_vgg_base_only = False): 318 | " Active learning parameter here is the sample selection part" 319 | 320 | if phase != "test" and phase != "train": 321 | print("ERROR: Phase: " + phase + " not recognized") 322 | return 323 | if model_type not in ['SSD300','SSD300KL']: 324 | print("ERROR: You specified size " + repr(model_type) + ". However, " + 325 | "currently only SSD300 (size=300) is supported!") 326 | return 327 | 328 | if model_type in ['SSD300','SSD300KL']: # if wished add other SSD models with input dim 300 to this list 329 | size = 300 330 | 331 | base_, extras_, head_ = multibox(vgg(base[str(size)], 3), 332 | add_extras(extras[str(size)], 1024), 333 | mbox[str(size)], num_classes, model_type) #cfg 334 | return SSD(phase, model_type, base_, extras_, head_, num_classes, default_forward, merging_method, sampling_strategy, sample_select_forward, sample_select_nms_conf_thresh, cfg,forward_vgg_base_only) 335 | -------------------------------------------------------------------------------- /layers/box_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | 4 | 5 | def point_form(boxes): 6 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 7 | representation for comparison to point form ground truth data. 8 | Args: 9 | boxes: (tensor) center-size default boxes from priorbox layers. 10 | Return: 11 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 12 | """ 13 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin 14 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax 15 | 16 | 17 | def center_size(boxes): 18 | """ Convert prior_boxes to (cx, cy, w, h) 19 | representation for comparison to center-size form ground truth data. 20 | Args: 21 | boxes: (tensor) point_form boxes 22 | Return: 23 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 24 | """ 25 | return torch.cat(((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy 26 | boxes[:, 2:] - boxes[:, :2]), 1) # w, h 27 | 28 | # def center_size2(boxes): 29 | # """ 30 | # 31 | # :param boxes: 32 | # :return: 33 | # """ 34 | # 35 | # output_boxes[:, 0] = (input_boxes[:, 2] + input_boxes[:, 0]) / 2 36 | # output_boxes[:, 1] = (input_boxes[:, 3] + input_boxes[:, 1]) / 2 37 | # output_boxes[:, 2] = input_boxes[:, 2] - input_boxes[:, 0] 38 | # output_boxes[:, 3] = input_boxes[:, 3] - input_boxes[:, 1] 39 | # return torch.cat((boxes[:, 2:] + (torch.abs(boxes[:, :2]))/2, # cx, cy 40 | # boxes[:, 2:] - boxes[:, :2]), 1) # w, h 41 | 42 | def intersect(box_a, box_b): 43 | """ We resize both tensors to [A,B,2] without new malloc: 44 | [A,2] -> [A,1,2] -> [A,B,2] 45 | [B,2] -> [1,B,2] -> [A,B,2] 46 | Then we compute the area of intersect between box_a and box_b. 47 | Args: 48 | box_a: (tensor) bounding boxes, Shape: [A,4]. 49 | box_b: (tensor) bounding boxes, Shape: [B,4]. 50 | Return: 51 | (tensor) intersection area, Shape: [A,B]. 52 | """ 53 | A = box_a.size(0) 54 | B = box_b.size(0) 55 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 56 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 57 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 58 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 59 | inter = torch.clamp((max_xy - min_xy), min=0) 60 | return inter[:, :, 0] * inter[:, :, 1] 61 | 62 | 63 | def jaccard(box_a, box_b): 64 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 65 | is simply the intersection over union of two boxes. Here we operate on 66 | ground truth boxes and default boxes. 67 | E.g.: 68 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 69 | Args: 70 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 71 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 72 | Return: 73 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 74 | """ 75 | inter = intersect(box_a, box_b) 76 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 77 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 78 | area_b = ((box_b[:, 2]-box_b[:, 0]) * 79 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 80 | union = area_a + area_b - inter 81 | return inter / union # [A,B] 82 | 83 | 84 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx, 85 | modeltype = 'SSD300'): 86 | """Match each prior box with the ground truth box of the highest jaccard 87 | overlap, encode the bounding boxes, then return the matched indices 88 | corresponding to both confidence and location preds. 89 | 90 | For KL Loss, we need to predict everything in x1y1x2y2 format. So the matching should not transform to center form 91 | 92 | Args: 93 | threshold: (float) The overlap threshold used when mathing boxes. 94 | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. 95 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. 96 | variances: (tensor) Variances corresponding to each prior coord, 97 | Shape: [num_priors, 4]. 98 | labels: (tensor) All the class labels for the image, Shape: [num_obj]. 99 | loc_t: (tensor) Tensor to be filled w/ encoded location targets. 100 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. 101 | idx: (int) current batch index 102 | Return: 103 | The matched indices corresponding to 1) location and 2) confidence preds. 104 | """ 105 | # jaccard index 106 | # if modeltype != 'SSD300KL': 107 | overlaps = jaccard( 108 | truths, 109 | point_form(priors) 110 | ) 111 | # else: 112 | # overlaps = jaccard( 113 | # truths, 114 | # priors # already in point form if KL-Loss is used 115 | # ) 116 | # (Bipartite Matching) 117 | # [1,num_objects] best prior for each ground truth 118 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) 119 | # [1,num_priors] best ground truth for each prior 120 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) 121 | best_truth_idx.squeeze_(0) 122 | best_truth_overlap.squeeze_(0) 123 | best_prior_idx.squeeze_(1) 124 | best_prior_overlap.squeeze_(1) 125 | best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior 126 | # TODO refactor: index best_prior_idx with long tensor 127 | # ensure every gt matches with its prior of max overlap 128 | for j in range(best_prior_idx.size(0)): 129 | best_truth_idx[best_prior_idx[j]] = j 130 | matches = truths[best_truth_idx] # Shape: [num_priors,4] 131 | conf = labels[best_truth_idx] + 1 # Shape: [num_priors] 132 | conf[best_truth_overlap < threshold] = 0 # label as background 133 | 134 | # If KL Loss, encoding shouldnt use center offset form 135 | loc = encode(matches, priors, variances, modeltype) 136 | 137 | 138 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn 139 | conf_t[idx] = conf # [num_priors] top class label for each prior 140 | 141 | 142 | def encode(matched, priors, variances, modeltype = 'SSD300'): 143 | """ 144 | Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/ 145 | corroborated by original author: https://github.com/weiliu89/caffe/issues/155#issuecomment-243541464 146 | and more: https://github.com/rykov8/ssd_keras/issues/53 147 | 148 | Encode the variances from the priorbox layers into the ground truth boxes 149 | we have matched (based on jaccard overlap) with the prior boxes. 150 | Args: 151 | matched: (tensor) Coords of ground truth for each prior in point-form 152 | Shape: [num_priors, 4]. 153 | priors: (tensor) Prior boxes in center-offset form 154 | Shape: [num_priors,4]. 155 | variances: (list[float]) Variances of priorboxes 156 | Return: 157 | encoded boxes (tensor), Shape: [num_priors, 4] 158 | """ 159 | 160 | # transform everything back to center-form 161 | 162 | 163 | # dist b/t match center and prior's center 164 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] 165 | # encode variance 166 | # todo: check of dit goed gaat, priors is in point-form 167 | g_cxcy /= (variances[0] * priors[:, 2:]) 168 | # match wh / prior wh 169 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 170 | g_wh = torch.log(g_wh) / variances[1] 171 | # return target for smooth_l1_loss 172 | # todo 173 | if modeltype == 'SSD300KL': 174 | # transform to x1y1x2y2 form 175 | return point_form(torch.cat([g_cxcy, g_wh], 1)) # [num_priors,4] 176 | 177 | else: 178 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 179 | 180 | 181 | # Adapted from https://github.com/Hakuyume/chainer-ssd 182 | def decode(loc, priors, variances, modeltype = 'SSD300'): 183 | """ 184 | Use of 'variance' not discussed in paper, see blogpost: https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/ 185 | 186 | Decode locations from predictions using priors to undo 187 | the encoding we did for offset regression at train time. 188 | Args: 189 | loc (tensor): location predictions for loc layers, 190 | Shape: [num_priors,4] 191 | priors (tensor): Prior boxes in center-offset form. 192 | Shape: [num_priors,4]. 193 | variances: (list[float]) Variances of priorboxes 194 | Return: 195 | decoded bounding box predictions; decoded in x1y1x2y2-form with x1y1 at upper left and x2y2 lower right, all in a range [0,1] 196 | """ 197 | 198 | # todo check of dit goed gaat met heen en weer schrijven van de 199 | 200 | 201 | if modeltype == 'SSD300KL': 202 | # transform predictions from x1y1x2y2 to cx, cy, w, h form. The variances are precalculated cx,cy,w,h variances 203 | loc = center_size(loc) 204 | 205 | boxes = torch.cat(( 206 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 207 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 208 | boxes[:, :2] -= boxes[:, 2:] / 2 209 | boxes[:, 2:] += boxes[:, :2] 210 | 211 | return boxes # [num_priors,4] 212 | 213 | 214 | # def batch_decode(loc, priors, variances): 215 | # """ 216 | # Same as decode, but adjusted to work for batches 217 | # 218 | # Decode locations from predictions using priors to undo 219 | # the encoding we did for offset regression at train time. 220 | # Args: 221 | # loc (tensor): location predictions for loc layers, 222 | # Shape: [ensemble_size,batch,num_priors,4] 223 | # priors (tensor): Prior boxes in center-offset form. 224 | # Shape: [ensemble_size,batch, num_priors,4]. 225 | # variances: (list[float]) Variances of priorboxes 226 | # Return: 227 | # decoded bounding box predictions 228 | # """ 229 | # boxes = torch.cat(( 230 | # priors[:,:, :2] + loc[:,:, :2] * variances[0] * priors[:,:, 2:], 231 | # priors[:,:, 2:] * torch.exp(loc[:,:, 2:] * variances[1])), 1) 232 | # boxes[:,:, :2] -= boxes[:,:, 2:] / 2 233 | # boxes[:,:, 2:] += boxes[:,:, :2] 234 | # return boxes 235 | 236 | def log_sum_exp(x): 237 | """Utility function for computing log_sum_exp while determining 238 | This will be used to determine unaveraged confidence loss across 239 | all examples in a batch. 240 | Args: 241 | x (Variable(tensor)): conf_preds from conf layers 242 | """ 243 | x_max = x.data.max() 244 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max 245 | 246 | 247 | # Original author: Francisco Massa: 248 | # https://github.com/fmassa/object-detection.torch 249 | # Ported to PyTorch by Max deGroot (02/01/2017) 250 | def nms(boxes, scores, overlap=0.5, top_k=200): # todo: overlap default in paper 0.45 251 | """Apply non-maximum suppression at test time to avoid detecting too many 252 | overlapping bounding boxes for a given object. 253 | Args: 254 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 255 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 256 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 257 | top_k: (int) The Maximum number of box preds to consider. (default in paper = 200) 258 | Return: 259 | The indices of the kept boxes with respect to num_priors. 260 | 261 | todo: pure numpy implementation might be faster according to the issues on github 262 | possible implementation https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 263 | """ 264 | 265 | keep = scores.new(scores.size(0)).zero_().long() 266 | if boxes.numel() == 0: #number of elements 267 | return keep # for a class, there are no bounding boxes 268 | x1 = boxes[:, 0] 269 | y1 = boxes[:, 1] 270 | x2 = boxes[:, 2] 271 | y2 = boxes[:, 3] 272 | area = torch.mul(x2 - x1, y2 - y1) 273 | v, idx = scores.sort(0) # sort in ascending order 274 | # I = I[v >= 0.01] 275 | idx = idx[-top_k:] # indices of the top-k largest vals 276 | xx1 = boxes.new() 277 | yy1 = boxes.new() 278 | xx2 = boxes.new() 279 | yy2 = boxes.new() 280 | w = boxes.new() 281 | h = boxes.new() 282 | 283 | # keep = torch.Tensor() 284 | count = 0 285 | while idx.numel() > 0: 286 | i = idx[-1] # index of current largest val 287 | # keep.append(i) 288 | keep[count] = i 289 | count += 1 290 | if idx.size(0) == 1: 291 | break 292 | idx = idx[:-1] # remove kept element from view 293 | # load bboxes of next highest vals 294 | torch.index_select(x1, 0, idx, out=xx1) 295 | torch.index_select(y1, 0, idx, out=yy1) 296 | torch.index_select(x2, 0, idx, out=xx2) 297 | torch.index_select(y2, 0, idx, out=yy2) 298 | # store element-wise max with next highest score 299 | xx1 = torch.clamp(xx1, min=x1[i]) 300 | yy1 = torch.clamp(yy1, min=y1[i]) 301 | xx2 = torch.clamp(xx2, max=x2[i]) 302 | yy2 = torch.clamp(yy2, max=y2[i]) 303 | w.resize_as_(xx2) 304 | h.resize_as_(yy2) 305 | w = xx2 - xx1 306 | h = yy2 - yy1 307 | # check sizes of xx1 and xx2.. after each iteration 308 | w = torch.clamp(w, min=0.0) 309 | h = torch.clamp(h, min=0.0) 310 | inter = w*h 311 | # IoU = i / (area(a) + area(b) - i) 312 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 313 | union = (rem_areas - inter) + area[i] 314 | IoU = inter/union # store result in iou 315 | 316 | # keep only elements with an IoU <= overlap 317 | # print(IoU.le(overlap)) #le = less or equal, creates a binary mask 318 | idx = idx[IoU.le(overlap)] 319 | return keep, count 320 | 321 | # 322 | # def nms_uncertainty_sampling(boxes, scores, overlap=0.5, top_k=200, object_treshold = None): 323 | # """ 324 | # This function takes (un)certainty scores and bounding boxes, and returns the top b 325 | # 326 | # 327 | # """ 328 | # 329 | # keep = scores.new(scores.size(0)).zero_().long() 330 | # if boxes.numel() == 0: #number of elements 331 | # return keep # for a class, there are no bounding boxes 332 | # x1 = boxes[:, 0] 333 | # y1 = boxes[:, 1] 334 | # x2 = boxes[:, 2] 335 | # y2 = boxes[:, 3] 336 | # area = torch.mul(x2 - x1, y2 - y1) 337 | # v, idx = scores.sort(0) # sort in ascending order 338 | # # I = I[v >= 0.01] 339 | # idx = idx[-top_k:] # indices of the top-k largest vals 340 | # xx1 = boxes.new() 341 | # yy1 = boxes.new() 342 | # xx2 = boxes.new() 343 | # yy2 = boxes.new() 344 | # w = boxes.new() 345 | # h = boxes.new() 346 | # 347 | # # keep = torch.Tensor() 348 | # count = 0 349 | # while idx.numel() > 0: 350 | # i = idx[-1] # index of current largest val 351 | # # keep.append(i) 352 | # keep[count] = i 353 | # count += 1 354 | # if idx.size(0) == 1: 355 | # break 356 | # idx = idx[:-1] # remove kept element from view 357 | # # load bboxes of next highest vals 358 | # torch.index_select(x1, 0, idx, out=xx1) 359 | # torch.index_select(y1, 0, idx, out=yy1) 360 | # torch.index_select(x2, 0, idx, out=xx2) 361 | # torch.index_select(y2, 0, idx, out=yy2) 362 | # # store element-wise max with next highest score 363 | # xx1 = torch.clamp(xx1, min=x1[i]) 364 | # yy1 = torch.clamp(yy1, min=y1[i]) 365 | # xx2 = torch.clamp(xx2, max=x2[i]) 366 | # yy2 = torch.clamp(yy2, max=y2[i]) 367 | # w.resize_as_(xx2) 368 | # h.resize_as_(yy2) 369 | # w = xx2 - xx1 370 | # h = yy2 - yy1 371 | # # check sizes of xx1 and xx2.. after each iteration 372 | # w = torch.clamp(w, min=0.0) 373 | # h = torch.clamp(h, min=0.0) 374 | # inter = w*h 375 | # # IoU = i / (area(a) + area(b) - i) 376 | # rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 377 | # union = (rem_areas - inter) + area[i] 378 | # IoU = inter/union # store result in iou 379 | # 380 | # # keep only elements with an IoU <= overlap 381 | # print(IoU.le(overlap)) #le = less or equal, creates a binary mask 382 | # idx = idx[IoU.le(overlap)] 383 | # return keep, count 384 | -------------------------------------------------------------------------------- /utils/augmentations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | import cv2 4 | import numpy as np 5 | import types 6 | from numpy import random 7 | 8 | 9 | def intersect(box_a, box_b): 10 | max_xy = np.minimum(box_a[:, 2:], box_b[2:]) 11 | min_xy = np.maximum(box_a[:, :2], box_b[:2]) 12 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) 13 | return inter[:, 0] * inter[:, 1] 14 | 15 | 16 | def jaccard_numpy(box_a, box_b): 17 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 18 | is simply the intersection over union of two boxes. 19 | E.g.: 20 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 21 | Args: 22 | box_a: Multiple bounding boxes, Shape: [num_boxes,4] 23 | box_b: Single bounding box, Shape: [4] 24 | Return: 25 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] 26 | """ 27 | inter = intersect(box_a, box_b) 28 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 29 | (box_a[:, 3]-box_a[:, 1])) # [A,B] 30 | area_b = ((box_b[2]-box_b[0]) * 31 | (box_b[3]-box_b[1])) # [A,B] 32 | union = area_a + area_b - inter 33 | return inter / union # [A,B] 34 | 35 | 36 | class Compose(object): 37 | """Composes several augmentations together. 38 | Args: 39 | transforms (List[Transform]): list of transforms to compose. 40 | Example: 41 | >>> augmentations.Compose([ 42 | >>> transforms.CenterCrop(10), 43 | >>> transforms.ToTensor(), 44 | >>> ]) 45 | """ 46 | 47 | def __init__(self, transforms): 48 | self.transforms = transforms 49 | 50 | def __call__(self, img, boxes=None, labels=None): 51 | for t in self.transforms: 52 | img, boxes, labels = t(img, boxes, labels) 53 | return img, boxes, labels 54 | 55 | 56 | class Lambda(object): 57 | """Applies a lambda as a transform.""" 58 | 59 | def __init__(self, lambd): 60 | assert isinstance(lambd, types.LambdaType) 61 | self.lambd = lambd 62 | 63 | def __call__(self, img, boxes=None, labels=None): 64 | return self.lambd(img, boxes, labels) 65 | 66 | 67 | class ConvertFromInts(object): 68 | def __call__(self, image, boxes=None, labels=None): 69 | return image.astype(np.float32), boxes, labels 70 | 71 | 72 | class SubtractMeans(object): 73 | def __init__(self, mean): 74 | self.mean = np.array(mean, dtype=np.float32) 75 | 76 | def __call__(self, image, boxes=None, labels=None): 77 | image = image.astype(np.float32) 78 | image -= self.mean 79 | return image.astype(np.float32), boxes, labels 80 | 81 | 82 | class ToAbsoluteCoords(object): 83 | def __call__(self, image, boxes=None, labels=None): 84 | height, width, channels = image.shape 85 | boxes[:, 0] *= width 86 | boxes[:, 2] *= width 87 | boxes[:, 1] *= height 88 | boxes[:, 3] *= height 89 | 90 | return image, boxes, labels 91 | 92 | 93 | class ToPercentCoords(object): 94 | def __call__(self, image, boxes=None, labels=None): 95 | height, width, channels = image.shape 96 | boxes[:, 0] /= width 97 | boxes[:, 2] /= width 98 | boxes[:, 1] /= height 99 | boxes[:, 3] /= height 100 | 101 | return image, boxes, labels 102 | 103 | 104 | class Resize(object): 105 | def __init__(self, size=300): 106 | self.size = size 107 | 108 | def __call__(self, image, boxes=None, labels=None): 109 | image = cv2.resize(image, (self.size, 110 | self.size)) 111 | return image, boxes, labels 112 | 113 | 114 | class RandomSaturation(object): 115 | def __init__(self, lower=0.5, upper=1.5): 116 | self.lower = lower 117 | self.upper = upper 118 | assert self.upper >= self.lower, "contrast upper must be >= lower." 119 | assert self.lower >= 0, "contrast lower must be non-negative." 120 | 121 | def __call__(self, image, boxes=None, labels=None): 122 | if random.randint(2): 123 | image[:, :, 1] *= random.uniform(self.lower, self.upper) 124 | 125 | return image, boxes, labels 126 | 127 | 128 | class RandomHue(object): 129 | def __init__(self, delta=18.0): 130 | assert delta >= 0.0 and delta <= 360.0 131 | self.delta = delta 132 | 133 | def __call__(self, image, boxes=None, labels=None): 134 | if random.randint(2): 135 | image[:, :, 0] += random.uniform(-self.delta, self.delta) 136 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 137 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 138 | return image, boxes, labels 139 | 140 | 141 | class RandomLightingNoise(object): 142 | def __init__(self): 143 | self.perms = ((0, 1, 2), (0, 2, 1), 144 | (1, 0, 2), (1, 2, 0), 145 | (2, 0, 1), (2, 1, 0)) 146 | 147 | def __call__(self, image, boxes=None, labels=None): 148 | if random.randint(2): 149 | swap = self.perms[random.randint(len(self.perms))] 150 | shuffle = SwapChannels(swap) # shuffle channels 151 | image = shuffle(image) 152 | return image, boxes, labels 153 | 154 | 155 | class ConvertColor(object): 156 | def __init__(self, current='BGR', transform='HSV'): 157 | self.transform = transform 158 | self.current = current 159 | 160 | def __call__(self, image, boxes=None, labels=None): 161 | if self.current == 'BGR' and self.transform == 'HSV': 162 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 163 | elif self.current == 'HSV' and self.transform == 'BGR': 164 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 165 | else: 166 | raise NotImplementedError 167 | return image, boxes, labels 168 | 169 | 170 | class RandomContrast(object): 171 | def __init__(self, lower=0.5, upper=1.5): 172 | self.lower = lower 173 | self.upper = upper 174 | assert self.upper >= self.lower, "contrast upper must be >= lower." 175 | assert self.lower >= 0, "contrast lower must be non-negative." 176 | 177 | # expects float image 178 | def __call__(self, image, boxes=None, labels=None): 179 | if random.randint(2): 180 | alpha = random.uniform(self.lower, self.upper) 181 | image *= alpha 182 | return image, boxes, labels 183 | 184 | 185 | class RandomBrightness(object): 186 | def __init__(self, delta=32): 187 | assert delta >= 0.0 188 | assert delta <= 255.0 189 | self.delta = delta 190 | 191 | def __call__(self, image, boxes=None, labels=None): 192 | if random.randint(2): 193 | delta = random.uniform(-self.delta, self.delta) 194 | image += delta 195 | return image, boxes, labels 196 | 197 | 198 | class ToCV2Image(object): 199 | def __call__(self, tensor, boxes=None, labels=None): 200 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels 201 | 202 | 203 | class ToTensor(object): 204 | def __call__(self, cvimage, boxes=None, labels=None): 205 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels 206 | 207 | 208 | class RandomSampleCrop(object): 209 | """Crop 210 | Arguments: 211 | img (Image): the image being input during training 212 | boxes (Tensor): the original bounding boxes in pt form 213 | labels (Tensor): the class labels for each bbox 214 | mode (float tuple): the min and max jaccard overlaps 215 | Return: 216 | (img, boxes, classes) 217 | img (Image): the cropped image 218 | boxes (Tensor): the adjusted bounding boxes in pt form 219 | labels (Tensor): the class labels for each bbox 220 | """ 221 | def __init__(self): 222 | self.sample_options = ( 223 | # using entire original input image 224 | None, 225 | # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 226 | (0.1, None), 227 | (0.3, None), 228 | (0.7, None), 229 | (0.9, None), 230 | # randomly sample a patch 231 | (None, None), 232 | ) 233 | 234 | def __call__(self, image, boxes=None, labels=None): 235 | height, width, _ = image.shape 236 | while True: 237 | # randomly choose a mode 238 | mode = random.choice(self.sample_options) 239 | if mode is None: 240 | return image, boxes, labels 241 | 242 | min_iou, max_iou = mode 243 | if min_iou is None: 244 | min_iou = float('-inf') 245 | if max_iou is None: 246 | max_iou = float('inf') 247 | 248 | # max trails (50) 249 | for _ in range(50): 250 | current_image = image 251 | 252 | w = random.uniform(0.3 * width, width) 253 | h = random.uniform(0.3 * height, height) 254 | 255 | # aspect ratio constraint b/t .5 & 2 256 | if h / w < 0.5 or h / w > 2: 257 | continue 258 | 259 | left = random.uniform(width - w) 260 | top = random.uniform(height - h) 261 | 262 | # convert to integer rect x1,y1,x2,y2 263 | rect = np.array([int(left), int(top), int(left+w), int(top+h)]) 264 | 265 | # calculate IoU (jaccard overlap) b/t the cropped and gt boxes 266 | overlap = jaccard_numpy(boxes, rect) 267 | 268 | # is min and max overlap constraint satisfied? if not try again 269 | if overlap.min() < min_iou and max_iou < overlap.max(): 270 | continue 271 | 272 | # cut the crop from the image 273 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], 274 | :] 275 | 276 | # keep overlap with gt box IF center in sampled patch 277 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 278 | 279 | # mask in all gt boxes that above and to the left of centers 280 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) 281 | 282 | # mask in all gt boxes that under and to the right of centers 283 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) 284 | 285 | # mask in that both m1 and m2 are true 286 | mask = m1 * m2 287 | 288 | # have any valid boxes? try again if not 289 | if not mask.any(): 290 | continue 291 | 292 | # take only matching gt boxes 293 | current_boxes = boxes[mask, :].copy() 294 | 295 | # take only matching gt labels 296 | current_labels = labels[mask] 297 | 298 | # should we use the box left and top corner or the crop's 299 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2], 300 | rect[:2]) 301 | # adjust to crop (by substracting crop's left,top) 302 | current_boxes[:, :2] -= rect[:2] 303 | 304 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], 305 | rect[2:]) 306 | # adjust to crop (by substracting crop's left,top) 307 | current_boxes[:, 2:] -= rect[:2] 308 | 309 | return current_image, current_boxes, current_labels 310 | 311 | 312 | class Expand(object): 313 | def __init__(self, mean): 314 | self.mean = mean 315 | 316 | def __call__(self, image, boxes, labels): 317 | if random.randint(2): 318 | return image, boxes, labels 319 | 320 | height, width, depth = image.shape 321 | ratio = random.uniform(1, 4) 322 | left = random.uniform(0, width*ratio - width) 323 | top = random.uniform(0, height*ratio - height) 324 | 325 | expand_image = np.zeros( 326 | (int(height*ratio), int(width*ratio), depth), 327 | dtype=image.dtype) 328 | expand_image[:, :, :] = self.mean 329 | expand_image[int(top):int(top + height), 330 | int(left):int(left + width)] = image 331 | image = expand_image 332 | 333 | boxes = boxes.copy() 334 | boxes[:, :2] += (int(left), int(top)) 335 | boxes[:, 2:] += (int(left), int(top)) 336 | 337 | return image, boxes, labels 338 | 339 | 340 | class RandomMirror(object): 341 | def __call__(self, image, boxes, classes): 342 | _, width, _ = image.shape 343 | if random.randint(2): 344 | image = image[:, ::-1] 345 | boxes = boxes.copy() 346 | boxes[:, 0::2] = width - boxes[:, 2::-2] 347 | return image, boxes, classes 348 | 349 | 350 | class SwapChannels(object): 351 | """Transforms a tensorized image by swapping the channels in the order 352 | specified in the swap tuple. 353 | Args: 354 | swaps (int triple): final order of channels 355 | eg: (2, 1, 0) 356 | """ 357 | 358 | def __init__(self, swaps): 359 | self.swaps = swaps 360 | 361 | def __call__(self, image): 362 | """ 363 | Args: 364 | image (Tensor): image tensor to be transformed 365 | Return: 366 | a tensor with channels swapped according to swap 367 | """ 368 | # if torch.is_tensor(image): 369 | # image = image.data.cpu().numpy() 370 | # else: 371 | # image = np.array(image) 372 | image = image[:, :, self.swaps] 373 | return image 374 | 375 | 376 | class PhotometricDistort(object): 377 | def __init__(self): 378 | self.pd = [ 379 | RandomContrast(), 380 | ConvertColor(transform='HSV'), 381 | RandomSaturation(), 382 | RandomHue(), 383 | ConvertColor(current='HSV', transform='BGR'), 384 | RandomContrast() 385 | ] 386 | self.rand_brightness = RandomBrightness() 387 | self.rand_light_noise = RandomLightingNoise() 388 | 389 | def __call__(self, image, boxes, labels): 390 | im = image.copy() 391 | im, boxes, labels = self.rand_brightness(im, boxes, labels) 392 | if random.randint(2): 393 | distort = Compose(self.pd[:-1]) 394 | else: 395 | distort = Compose(self.pd[1:]) 396 | im, boxes, labels = distort(im, boxes, labels) 397 | return self.rand_light_noise(im, boxes, labels) 398 | 399 | 400 | # class GaussianRandomPixelNoise(): 401 | # # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu 402 | # # http://www.merl.com/publications/docs/TR2018-166.pdf 403 | # """ 404 | # To each pixel value gaussian noise is added. 405 | # The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel. 406 | # Here we will do it per pixel. 407 | # """ 408 | # 409 | # def __init__(self, std, image_len = 90000):# 300*300 pixels 410 | # self.std = std 411 | # self.image_len = image_len 412 | # 413 | # def __call__(self, image): 414 | # 415 | # # add noise 416 | # noise = np.random.normal(0, self.std, self.image_len) 417 | # image = image.flatten() 418 | # image = image+noise 419 | # 420 | # # set max and min values 421 | # low_values = image < 0 422 | # high_values = image > 255 423 | # 424 | # image[low_values] = 0 425 | # image[high_values] = 255 426 | # 427 | # # reshape 428 | # # image = image.reshape(300,300,:) 429 | # 430 | # return image 431 | 432 | 433 | def GaussianRandomPixelNoise(image, std, image_len = 90000):# 300*300 pixels 434 | 435 | # add noise 436 | normal_dist = torch.distributions.normal.Normal(torch.zeros(image_len), torch.ones(image_len) * std) 437 | noise = normal_dist.sample().unsqueeze(-1).to('cpu') 438 | image = torch.from_numpy(image).reshape(-1,3) 439 | image = image+noise 440 | 441 | # set max and min values 442 | # NOTE: I didn't do this as I subtract the (dataset) mean per channel before pulling it trough this function (the base-transform 443 | # the network is also trained on this (on average) zero mean input. 444 | # low_values = abs(image) < 0 445 | # high_values = abs(image) > 255 446 | # 447 | # image[low_values] = 0 448 | # image[high_values] = 255 449 | 450 | # reshape 451 | image = image.reshape(300,300,-1) 452 | 453 | return image 454 | 455 | 456 | # # idea taken from the paper "Localization-Aware Active Learning for Object Detection" by Kao, Lee, Sen and Liu 457 | # # http://www.merl.com/publications/docs/TR2018-166.pdf 458 | # """ 459 | # To each pixel value gaussian noise is added. 460 | # The paper doesn't clearly state if the noise is different per channel epr pixel or just per pixel. 461 | # Here 462 | 463 | class SSDAugmentation(object): 464 | def __init__(self, size=300, mean=(104, 117, 123)): 465 | self.mean = mean 466 | self.size = size 467 | self.augment = Compose([ 468 | ConvertFromInts(), 469 | ToAbsoluteCoords(), 470 | PhotometricDistort(), 471 | Expand(self.mean), 472 | RandomSampleCrop(), 473 | RandomMirror(), 474 | ToPercentCoords(), 475 | Resize(self.size), 476 | SubtractMeans(self.mean) 477 | ]) 478 | 479 | def __call__(self, img, boxes, labels): 480 | return self.augment(img, boxes, labels) 481 | -------------------------------------------------------------------------------- /layers/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ..box_utils import decode, nms 4 | from data import voc as cfg 5 | from active_learning_package import uncertainty_helpers 6 | import math 7 | 8 | 9 | class Detect(Function): 10 | """At test time, Detect is the final layer of SSD. Decode location preds, 11 | apply non-maximum suppression to location predictions based on conf 12 | scores and threshold to a top_k number of output predictions for both 13 | confidence score and locations. 14 | """ 15 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh, 16 | def_forward,merging_method,sampling_strategy,modeltype): 17 | self.num_classes = num_classes 18 | self.background_label = bkg_label 19 | self.top_k = top_k 20 | 21 | # Parameters used in nms. 22 | self.nms_thresh = nms_thresh 23 | if nms_thresh <= 0: 24 | raise ValueError('nms_threshold must be non negative.') 25 | self.conf_thresh = conf_thresh 26 | self.variance = cfg['variance'] 27 | 28 | ## Active Learning Package variables 29 | self.def_forward = def_forward 30 | self.merging_method = merging_method 31 | self.sampling_strategy = sampling_strategy 32 | self.do_prob_dist_forward = False 33 | self.modeltype = modeltype 34 | 35 | if self.merging_method != 'pre_nms_avg' and \ 36 | (self.sampling_strategy == 'p-max_localization-stability' 37 | or self.sampling_strategy == 'no_ensemble_entropy-only' 38 | or self.sampling_strategy in ['none_covariance', 'none_covariance-obj','entropy_covariance', 'entropy_covariance-obj'])\ 39 | and self.modeltype != 'SSD300KL': 40 | self.do_prob_dist_forward = True 41 | 42 | 43 | 44 | 45 | def forward(self, loc_data, conf_data, prior_data, alphas = None): 46 | """ 47 | Args: 48 | loc_data: (tensor) Loc preds from loc layers 49 | Shape: [batch,num_priors*4] 50 | conf_data: (tensor) Shape: Conf preds from conf layers 51 | Shape: [batch*num_priors,num_classes] 52 | prior_data: (tensor) Prior (default) boxes and variances from priorbox layers 53 | Shape: [1,num_priors,4] 54 | 55 | Only when using KL-loss: 56 | alpha: α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate. The log is used during 57 | training to avoid gradient exploding 58 | shape: [1, num_priors, 4] 59 | """ 60 | # the normal forward pass, as decribed in SSD paper 61 | if self.def_forward: 62 | 63 | # Merging method = None by default, if None has been passed trough 64 | output = self.default_forward(loc_data, conf_data, prior_data) 65 | return output 66 | 67 | else: 68 | # if not a regular forward -> ensemble of SSDs can be used to merge bounding boxes 69 | # into probabilistic object detections 70 | 71 | if self.merging_method == 'pre_nms_avg' and \ 72 | not self.do_prob_dist_forward and\ 73 | self.modeltype != 'SSD300KL': 74 | # returns all locs and preds, without applying non maximum suppression to allow for pre-nms averaging 75 | # for more information, see paper: Miller et al - Benchmarking Sampling-based Probabilistic Object Detectors 76 | output_tup = (loc_data, conf_data, prior_data) 77 | return output_tup 78 | 79 | 80 | # elif self.merging_method in ['BSAS','Hungarian'] or 'p-max_localization-stability': 81 | elif self.do_prob_dist_forward: 82 | output, num_boxes_per_class = self.full_prob_dist_forward(loc_data,conf_data, prior_data) 83 | # output_tup = (output, prior_data) # todo: do I really need prior data for BSAS merging? -> only used for nms, which is already performed here or also for IoU calculation?? 84 | return output, num_boxes_per_class, prior_data 85 | 86 | elif self.modeltype == 'SSD300KL': 87 | output, num_boxes_per_class = self.full_prob_KL_forward(loc_data, conf_data, prior_data, alphas) 88 | return output, num_boxes_per_class, prior_data 89 | else: 90 | raise NotImplementedError() 91 | 92 | 93 | def full_prob_KL_forward(self, loc_data, conf_data, prior_data, alphas): 94 | """ 95 | Largely copief from the forward with the full probability distribution (full_prob_dist_forward). However, 96 | The bounding boxes are in point-form (x1,y1,x2,y2) instead of center-form (cx, cy, w, h) and for each corner also 97 | a standard deviation is returned. 98 | 99 | 100 | :param loc_data: (tensor) Loc preds from loc layers 101 | Shape: [batch,num_priors*4] 102 | :param alphas: (tensor) α = log(σ^{2}) where σ^2 is the standard deviation per bounding box coordinate. 103 | The log is used during training to avoid gradient exploding 104 | Shape: [batch,num_priors*4] 105 | :param conf_data: (tensor) Shape: Conf preds from conf layers 106 | Shape: [batch*num_priors,num_classes] 107 | :param prior_data:(tensor) Prior boxes and variances from priorbox layers 108 | Shape: [1,num_priors,4] 109 | :return: 110 | 111 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes 112 | is not returned, only the probability for a given detection for a given class. 113 | 114 | output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb], where bb thus has 8 params (x1, std_x1, ...) 115 | where in the default forward it is [image_ids, class_id ,detection_id,conf_score+bb] 116 | """ 117 | 118 | num = loc_data.size(0) # batch size 119 | num_priors = prior_data.size(0) 120 | 121 | output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 8) 122 | 123 | conf_preds = conf_data.view(num, num_priors, 124 | self.num_classes).transpose(2, 1) 125 | 126 | # todo: why use the confidence mask? (not found in paper (found in paper...)) 127 | # => makes it a lot faster, no nms for all boxes 128 | 129 | # very useful to filter out the nonzero boxes later 130 | num_boxes_per_class = torch.zeros(self.num_classes) 131 | 132 | # Decode predictions into bboxes. 133 | for i in range(num): 134 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time. 135 | # These are the class agnostic bounding boxes! 136 | decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype) 137 | conf_scores = conf_preds[i].clone() 138 | 139 | # For each class, perform nms 140 | for cl in range(1, self.num_classes): 141 | 142 | # self.conf_tresh is 0.01 143 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt 144 | c_mask = conf_scores[cl].gt( 145 | self.conf_thresh) # confidence mask, speeds up processing by not applying nms 146 | 147 | # to all bounding boxes 148 | scores = conf_scores[cl][c_mask] 149 | if scores.size(0) == 0: 150 | continue 151 | 152 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 153 | boxes = decoded_boxes[l_mask].view(-1, 4) 154 | alphas_cl = alphas[i][l_mask].view(-1,4) 155 | 156 | # idx of highest scoring and non-overlapping boxes per class (nms) 157 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 158 | 159 | # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas 160 | conf_scores2 = conf_scores[:, c_mask] 161 | assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \ 162 | "Sum of the probabilities over the classes for each detection must be (relatively close to) 1" 163 | 164 | distributions = conf_scores2[:, ids[:count]] 165 | num_boxes_per_class[cl] = count 166 | 167 | # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong 168 | # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0) 169 | 170 | # [image_id,class_id,detection_id,conf_dist+bb] 171 | # [1,1,200,21+8] 172 | output[i, cl, :count, :self.num_classes] = distributions.permute(1, 173 | 0) # permute reorders axes (here: 1 to 0 and 0 to 1) 174 | output[i, cl, :count, self.num_classes:-4] = boxes[ids[:count]] 175 | # transform alphas to variances: α = log(σ^{2}) -> σ = exp(.5 * α) 176 | output[i, cl, :count, -4:] = torch.exp(alphas_cl[ids[:count]]*.5) 177 | 178 | #todo [DONE]: 179 | # Example from original KL-Loss 180 | # def bbox_std_transform_xyxy(boxes, bbox_epsilon, describ=False): 181 | # # bbox_std = np.exp(bbox_epsilon) 182 | # if cfg.PRED_STD_LOG: 183 | # bbox_std = np.exp(bbox_epsilon / 2.) 184 | 185 | 186 | # use cl 5 of image 1 to check: output[0,5,:5,:21] 187 | return output, num_boxes_per_class # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 29(class_dist + bounding box_coords + coords_std))] 188 | 189 | def full_prob_dist_forward(self,loc_data,conf_data, prior_data): 190 | """ 191 | This function is largely copied from the default forward. However, the default forward returns the top-k (200) 192 | detections PER CLASS. The probability distribution over the classes is not returned, only the probability for 193 | a given detection for a given class. 194 | 195 | Args: 196 | loc_data: (tensor) Loc preds from loc layers 197 | Shape: [batch,num_priors*4] 198 | conf_data: (tensor) Shape: Conf preds from conf layers 199 | Shape: [batch*num_priors,num_classes] 200 | prior_data: (tensor) Prior boxes and variances from priorbox layers 201 | Shape: [1,num_priors,4] 202 | 203 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes 204 | is not returned, only the probability for a given detection for a given class. 205 | 206 | output in this functon is [image_ids, class_id ,detection_id,conf_dist + bb] 207 | where in the default forward it is [image_ids, class_id ,detection_id,conf_score+bb] 208 | 209 | """ 210 | num = loc_data.size(0) # batch size 211 | num_priors = prior_data.size(0) 212 | 213 | output = torch.zeros(num, self.num_classes, self.top_k, self.num_classes + 4) 214 | 215 | conf_preds = conf_data.view(num, num_priors, 216 | self.num_classes).transpose(2, 1) 217 | 218 | # todo: why use the confidence mask? (not found in paper (found in paper...)) 219 | # => makes it a lot faster, no nms for all boxes 220 | 221 | # very useful to filter out the nonzero boxes later 222 | num_boxes_per_class = torch.zeros(self.num_classes) 223 | 224 | # Decode predictions into bboxes. 225 | for i in range(num): 226 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time. 227 | # These are the class agnostic bounding boxes! 228 | decoded_boxes = decode(loc_data[i], prior_data, self.variance) 229 | 230 | conf_scores = conf_preds[i].clone() 231 | 232 | # For each class, perform nms 233 | for cl in range(1, self.num_classes): 234 | 235 | # self.conf_tresh is 0.01 236 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt 237 | c_mask = conf_scores[cl].gt( 238 | self.conf_thresh) # confidence mask, speeds up processing by not applying nms 239 | 240 | # to all bounding boxes 241 | scores = conf_scores[cl][c_mask] 242 | if scores.size(0) == 0: 243 | continue 244 | 245 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 246 | boxes = decoded_boxes[l_mask].view(-1, 4) 247 | 248 | '''what is ids variable that is returned here in relation to the indices in the original conf_preds variable 249 | 250 | ids are the maximum ids in boxes (gt > 0.01). The ids that are not suppressed by nms 251 | count is how many boxes there are that are not nms'ed? 252 | count is hoeveel objecten er zijn van deze klasse die niet overlappen, op deze foto. 253 | nms gaat namelijk vanaf de grootste confidence naar de kleinste en als ze genoeg overlappen, 254 | dan wordt de een na grootste weg gegooid voor deze klasse 255 | 256 | ''' 257 | 258 | # idx of highest scoring and non-overlapping boxes per class (nms) 259 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 260 | 261 | # use c_mask to get the conf_scores per bounding box of the other classes for all bbs that exceed the conf treshold for this clas 262 | conf_scores2 = conf_scores[:, c_mask] 263 | assert math.isclose(conf_scores2.sum().item(), conf_scores2.shape[1], rel_tol=1e-03), \ 264 | "Sum of the probabilities over the classes for each detection must be (relatively close to) 1" 265 | 266 | distributions = conf_scores2[:, ids[:count]] 267 | num_boxes_per_class[cl] = count 268 | 269 | 270 | # idx of LOWEST scoring and non-overlapping boxes per class for boxes that don't belong 271 | # to the background class with a probability larger than the object treshold (IMPORTANT: Background = class 0) 272 | 273 | # [image_id,class_id,detection_id,conf_dist+bb] 274 | # [1,1,200,21+4] 275 | output[i, cl, :count, :self.num_classes] = distributions.permute(1, 276 | 0) # permute reorders axes (here: 1 to 0 and 0 to 1) 277 | output[i, cl, :count, self.num_classes:] = boxes[ids[:count]] 278 | 279 | 280 | # use cl 5 of image 1 to check: output[0,5,:5,:21] 281 | return output, num_boxes_per_class # shape (pasval VOC) [1,21,200,25] = [1 = batch, classes+background class, top_k bounding boxes, 25(class_dist + bounding box))] 282 | 283 | def default_forward(self, loc_data, conf_data, prior_data): 284 | """ 285 | Args: 286 | loc_data: (tensor) Loc preds from loc layers 287 | Shape: [batch,num_priors*4] 288 | conf_data: (tensor) Shape: Conf preds from conf layers 289 | Shape: [batch*num_priors,num_classes] 290 | prior_data: (tensor) Prior boxes and variances from priorbox layers todo prior box variances?? 291 | Shape: [1,num_priors,4] 292 | 293 | the default forward returns the top-k (200) detections PER CLASS. The probability distribution over the classes 294 | is not returned, only the probability for a given detection for a given class. 295 | :returns: 296 | output: 297 | shape: [image_id,class_id,detection_id,conf_score+bb] 298 | 299 | """ 300 | 301 | num = loc_data.size(0) # batch size 302 | num_priors = prior_data.size(0) 303 | output = torch.zeros(num, self.num_classes, self.top_k, 5) # 5 is for the bounding box => 4 corners and the class 304 | conf_preds = conf_data.view(num, num_priors, 305 | self.num_classes).transpose(2, 1) 306 | 307 | # why use the confidence mask? 308 | # => makes it a lot faster, no nms for all boxes => also used in paper 309 | for i in range(num): 310 | # Decode locations from predictions using priors to undo the encoding we did for offset regression at train time. 311 | # These are the class agnostic bounding boxes! 312 | #[8732,4] 313 | decoded_boxes = decode(loc_data[i], prior_data, self.variance, self.modeltype) 314 | #[21,8732] 315 | conf_scores = conf_preds[i].clone() 316 | 317 | # For each class, perform nms 318 | for cl in range(1, self.num_classes): 319 | 320 | # self.conf_tresh is 0.01 321 | # gt: Computes input > other element-wise. source: https://pytorch.org/docs/stable/torch.html#torch.gt 322 | c_mask = conf_scores[cl].gt(self.conf_thresh) #confidence mask, speeds up processing by not applying nms 323 | 324 | # to all bounding boxes 325 | scores = conf_scores[cl][c_mask] 326 | if scores.size(0) == 0: 327 | continue 328 | 329 | 330 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 331 | boxes = decoded_boxes[l_mask].view(-1, 4) 332 | 333 | # idx of highest scoring and non-overlapping boxes per class (nms) 334 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 335 | 336 | # [image_id,class_id,detection_id,conf+bb] 337 | output[i, cl, :count] = \ 338 | torch.cat((scores[ids[:count]].unsqueeze(1), 339 | boxes[ids[:count]]), 1) 340 | 341 | flt = output.contiguous().view(num, -1, 5) # [1,4200,5] 342 | _, idx = flt[:, :, 0].sort(1, descending=True) # sort over ALL confidences (not per class) 343 | _, rank = idx.sort(1) 344 | flt[(rank < self.top_k). unsqueeze(-1).expand_as(flt)].fill_(0) # take top_k 345 | 346 | # use cl 5 of image 1 to check: output[0,5,:5,:21] 347 | return output # shape (pasval VOC) [1,21,200,5] = [1 = batch, classes+background class, top_k bounding boxes, 5(bounding box + class))] 348 | -------------------------------------------------------------------------------- /active_learning_package/voc_eval_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | import xml.etree.ElementTree as ET 5 | import time 6 | import sys 7 | 8 | import torch 9 | from torch.autograd import Variable 10 | 11 | import data 12 | from . import helpers 13 | 14 | 15 | def eval(test_dataset, args, net, al_iteration, eval_ensemble_idx = 99999, epochs_test = False, train_iters = None, use_dataset_image_ids = False): 16 | """ 17 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 18 | Slightly adjusted to fit in this active learning module 19 | """ 20 | print('start VOC eval') 21 | 22 | num_images = len(test_dataset) 23 | 24 | # all detections are collected into: 25 | # all_boxes[cls][image] = N x 5 array of detections in 26 | # (x1, y1, x2, y2, score) 27 | if args.dataset in ['VOC07', 'VOC12']: 28 | labelmap = data.VOC_CLASSES 29 | elif args.dataset == 'VOC07_1_class': 30 | labelmap = [args.relevant_class] 31 | elif args.dataset == 'VOC07_6_class': 32 | labelmap = args.labelmap 33 | else: 34 | raise NotImplementedError() 35 | 36 | 37 | args.summary['eval_model']['num_images_eval'] = num_images 38 | args.summary['eval_model']['num_objects_eval'] = 'todo' 39 | args.summary['eval_model']['APs'] = {} 40 | 41 | all_boxes = [[[] for _ in range(num_images)] 42 | for _ in range(len(labelmap) + 1)] 43 | 44 | # timers 45 | _t = {'im_detect': helpers.Timer(), 'misc': helpers.Timer()} 46 | 47 | output_dir = args.experiment_dir + 'eval/' 48 | print('output dir ', output_dir) 49 | if not os.path.exists(output_dir): 50 | os.makedirs(output_dir, exist_ok=True) 51 | 52 | if epochs_test: 53 | det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+'_'+str('todo')+'_detections.pkl') 54 | else: 55 | det_file = os.path.join(output_dir,'al-iter_'+str(al_iteration)+'_ensemble_'+str(args.eval_ensemble_idx)+str()+'_detections.pkl') 56 | 57 | # if already done the detection passes with this network. 58 | if os.path.isfile(det_file): 59 | with open(det_file, 'rb') as file: 60 | all_boxes = pickle.load(file) 61 | 62 | else: 63 | for i in range(num_images): 64 | im, gt, h, w = test_dataset.pull_item(i) 65 | 66 | x = Variable(im.unsqueeze(0)) 67 | 68 | if args.cuda and torch.cuda.is_available(): 69 | x = x.cuda() 70 | 71 | _t['im_detect'].tic() 72 | 73 | detections = net(x).data 74 | detect_time = _t['im_detect'].toc(average=False) 75 | # set detections back to cpu 76 | if args.cuda and torch.cuda.is_available(): 77 | detections = detections.to('cpu') 78 | 79 | # skip j = 0, because it's the background class 80 | for j in range(1, detections.size(1)): 81 | dets = detections[0, j, :] # shape [200,5] 82 | mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() # takes the detections that have confidence > 0. and expands to (5, 200) and then transposes 83 | dets = torch.masked_select(dets, mask).view(-1, 5) 84 | if dets.dim() == 0: 85 | continue 86 | boxes = dets[:, 1:] 87 | boxes[:, 0] *= w 88 | boxes[:, 2] *= w 89 | boxes[:, 1] *= h 90 | boxes[:, 3] *= h 91 | scores = dets[:, 0].cpu().numpy() 92 | cls_dets = np.hstack((boxes.cpu().numpy(), 93 | scores[:, np.newaxis])).astype(np.float32, 94 | copy=False) 95 | all_boxes[j][i] = cls_dets 96 | 97 | print('im_detect: {:d}/{:d} {:.3f}s \t al iteration: {:d} \t ensemble_idx {:d}'.format(i, 98 | num_images, detect_time, int(al_iteration), int(args.eval_ensemble_idx))) 99 | 100 | with open(det_file, 'wb') as f: 101 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) 102 | 103 | 104 | print('Evaluating detections') 105 | evaluate_detections(all_boxes, 106 | output_dir, 107 | test_dataset, 108 | args, 109 | labelmap, 110 | use_dataset_image_ids) 111 | 112 | 113 | def evaluate_detections(box_list, output_dir, dataset, args, labelmap, use_dataset_image_ids): 114 | """ 115 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 116 | Slightly adjusted to fit in this active learning module 117 | """ 118 | if args.dataset in ['VOC07','VOC07_1_class','VOC07_6_class']: 119 | 120 | YEAR = '2007' 121 | devkit_path = args.dataset_root + 'VOC' + YEAR 122 | 123 | write_voc_results_file(box_list, 124 | dataset, 125 | labelmap, 126 | devkit_path, 127 | args) 128 | 129 | do_python_eval(output_dir, 130 | False, # use VOC07 metrics 131 | devkit_path, 132 | labelmap, 133 | args, 134 | dataset, 135 | use_dataset_image_ids) 136 | else: 137 | raise NotImplementedError() 138 | 139 | def write_voc_results_file(all_boxes, 140 | dataset, 141 | labelmap, 142 | devkit_path, 143 | args): 144 | """ 145 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 146 | """ 147 | 148 | for cls_ind, cls in enumerate(labelmap): 149 | print('Writing {:s} VOC results file'.format(cls)) 150 | filename = get_voc_results_file_template('test', 151 | cls, 152 | devkit_path, 153 | args) 154 | 155 | # if already made the results files with this network. 156 | if os.path.isfile(filename): 157 | continue 158 | 159 | else: 160 | with open(filename, 'wt') as f: 161 | for im_ind, index in enumerate(dataset.ids): 162 | dets = all_boxes[cls_ind+1][im_ind] 163 | if dets == []: 164 | continue 165 | # the VOCdevkit expects 1-based indices 166 | for k in range(dets.shape[0]): 167 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. 168 | format(index[1], dets[k, -1], 169 | dets[k, 0] + 1, dets[k, 1] + 1, 170 | dets[k, 2] + 1, dets[k, 3] + 1)) 171 | 172 | 173 | def do_python_eval(output_dir, 174 | use_07, 175 | devkit_path, 176 | labelmap, 177 | args, 178 | dataset, 179 | use_dataset_image_ids): 180 | """ 181 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 182 | Slightly adjusted to fit in this active learning module 183 | """ 184 | annopath = os.path.join(args.dataset_root, 'VOC2007', 'Annotations', '%s.xml') 185 | if type(args.imageset_test) == list and len(args.imageset_test) == 1: 186 | imagesetfile = args.imageset_test[0][1] 187 | else: 188 | imagesetfile = args.imageset_test 189 | imgsetpath = os.path.join(args.dataset_root, 'VOC2007', 'ImageSets', 190 | 'Main', '{:s}.txt') 191 | cachedir = os.path.join(devkit_path, 'annotations_cache') 192 | 193 | # The PASCAL VOC metric changed in 2010 194 | use_07_metric = use_07 195 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) 196 | 197 | iou_thresholds = [0.3] 198 | iou_thresholds.extend(list(np.linspace(0.5,0.95,10))) 199 | 200 | for iou_threshold in iou_thresholds: 201 | print('IoU threshold: ',str(iou_threshold),'\n_______________\n') 202 | args.summary['eval_model']['APs'][str(iou_threshold)] = {} 203 | 204 | aps = [] 205 | if not os.path.isdir(output_dir): 206 | os.mkdir(output_dir) 207 | for i, cls in enumerate(labelmap): 208 | filename = get_voc_results_file_template('test', cls, devkit_path, args) # results file 209 | rec, prec, ap = voc_eval( 210 | filename, annopath, imgsetpath.format(imagesetfile), cls, cachedir, 211 | ovthresh=iou_threshold, use_07_metric=use_07_metric, dataset= dataset, use_dataset_image_ids=use_dataset_image_ids) # todo: imageset_file: '/home/jasper/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt' 212 | # rec,prec,ap = 0.1,0.2,0.3 213 | 214 | aps += [ap] 215 | print('AP for {} = {:.4f}'.format(cls, ap)) 216 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: 217 | pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) 218 | 219 | #write summary average precissions 220 | args.summary['eval_model']['APs'][str(iou_threshold)][str(cls)] = ap 221 | 222 | # exclude classes without predictions 223 | aps = [ap for ap in aps if ap != -1.] 224 | args.summary['eval_model']['APs'][str(iou_threshold)]['mAP'] = np.mean(aps) 225 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 226 | print('~~~~~~~~') 227 | print('Results:') 228 | for ap in aps: 229 | print('{:.3f}'.format(ap)) 230 | print('{:.3f}'.format(np.mean(aps))) 231 | print('~~~~~~~~') 232 | print('') 233 | print('--------------------------------------------------------------') 234 | print('Results computed with the **unofficial** Python eval code.') 235 | print('Results should be very close to the official MATLAB eval code.') 236 | print('--------------------------------------------------------------') 237 | print('\n\n\n') 238 | 239 | # calculate mmAP (coco definition mAP) 240 | args.summary['eval_model']['APs']['mmAP'] = 0 241 | for key, value in args.summary['eval_model']['APs'].items(): 242 | if key != 'mmAP': 243 | args.summary['eval_model']['APs']['mmAP'] += args.summary['eval_model']['APs'][key]['mAP'] 244 | args.summary['eval_model']['APs']['mmAP'] /= 10 245 | 246 | 247 | def get_voc_results_file_template(image_set, cls, devkit_path, args): 248 | """ 249 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 250 | Slightly adjusted to fit in this active learning module 251 | """ 252 | 253 | # VOCdevkit/VOC2007/results/det_test_aeroplane.txt 254 | filename = 'det_' + image_set + '_%s.txt' % (cls) 255 | 256 | filedir = args.experiment_dir + 'eval/results/al-iter_'+str(args.al_iteration)+'/ensemble_idx_'+args.eval_ensemble_idx 257 | # filedir = os.path.join(devkit_path, 'results') # old filedir from Max De Groot 258 | if not os.path.exists(filedir): 259 | os.makedirs(filedir, exist_ok = True) 260 | path = os.path.join(filedir, filename) 261 | return path 262 | 263 | def voc_eval(detpath, 264 | annopath, 265 | imagesetfile, 266 | classname, 267 | cachedir, 268 | ovthresh=0.5, 269 | use_07_metric=True, 270 | dataset = None, 271 | use_dataset_image_ids = False): 272 | """rec, prec, ap = voc_eval(detpath, 273 | annopath, 274 | imagesetfile, 275 | classname, 276 | [ovthresh], 277 | [use_07_metric]) 278 | Top level function that does the PASCAL VOC evaluation. 279 | detpath: Path to detections 280 | detpath.format(classname) should produce the detection results file. 281 | annopath: Path to annotations 282 | annopath.format(imagename) should be the xml annotations file. 283 | imagesetfile: Text file containing the list of images, one image per line. 284 | classname: Category name (duh) 285 | cachedir: Directory for caching the annotations 286 | [ovthresh]: Overlap threshold (default = 0.5) 287 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 288 | (default True) 289 | 290 | 291 | NOTE: largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 292 | Slightly adjusted to fit in this active learning module 293 | """ 294 | 295 | # assumes detections are in detpath.format(classname) 296 | # assumes annotations are in annopath.format(imagename) 297 | # assumes imagesetfile is a text file with each line an image name 298 | # cachedir caches the annotations in a pickle file 299 | # first load gt 300 | if not os.path.isdir(cachedir): 301 | os.mkdir(cachedir) 302 | cachefile = os.path.join(cachedir, 'annots.pkl') # cachefile of correct annotations/truth values. 303 | # read list of images 304 | with open(imagesetfile, 'r') as f: 305 | lines = f.readlines() 306 | imagenames = [x.strip() for x in lines] 307 | if not os.path.isfile(cachefile): 308 | # load annots 309 | recs = {} 310 | for i, imagename in enumerate(imagenames): 311 | recs[imagename] = parse_rec(annopath % (imagename)) 312 | if i % 100 == 0: 313 | print('Reading annotation for {:d}/{:d}'.format( 314 | i + 1, len(imagenames))) 315 | # save 316 | print('Saving cached annotations to {:s}'.format(cachefile)) 317 | with open(cachefile, 'wb') as f: 318 | pickle.dump(recs, f) 319 | else: 320 | # load 321 | with open(cachefile, 'rb') as f: 322 | recs = pickle.load(f) 323 | 324 | # extract gt objects for this class 325 | class_recs = {} 326 | npos = 0 327 | 328 | if use_dataset_image_ids: 329 | for imagename in dataset.ids: 330 | imagename = imagename[1] 331 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 332 | bbox = np.array([x['bbox'] for x in R]) 333 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 334 | det = [False] * len(R) 335 | npos = npos + sum(~difficult) 336 | class_recs[imagename] = {'bbox': bbox, 337 | 'difficult': difficult, 338 | 'det': det} 339 | else: 340 | for imagename in imagenames: 341 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 342 | bbox = np.array([x['bbox'] for x in R]) 343 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 344 | det = [False] * len(R) 345 | npos = npos + sum(~difficult) 346 | class_recs[imagename] = {'bbox': bbox, 347 | 'difficult': difficult, 348 | 'det': det} 349 | 350 | # read detections (see results folder in VOCDevkit) 351 | detfile = detpath.format(classname) 352 | with open(detfile, 'r') as f: 353 | lines = f.readlines() 354 | if any(lines) == 1: 355 | 356 | splitlines = [x.strip().split(' ') for x in lines] 357 | image_ids = [x[0] for x in splitlines] 358 | confidence = np.array([float(x[1]) for x in splitlines]) 359 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 360 | 361 | # sort by confidence 362 | sorted_ind = np.argsort(-confidence) 363 | sorted_scores = np.sort(-confidence) 364 | BB = BB[sorted_ind, :] 365 | image_ids = [image_ids[x] for x in sorted_ind] 366 | 367 | # go down dets and mark TPs and FPs 368 | nd = len(image_ids) 369 | tp = np.zeros(nd) 370 | fp = np.zeros(nd) 371 | for d in range(nd): 372 | R = class_recs[image_ids[d]] # can result in keyerror if: class recs doesn't have the image_id (class_rec is gt for all images in imagenames, where recs is taken from the cache file) todo 373 | bb = BB[d, :].astype(float) 374 | ovmax = -np.inf 375 | BBGT = R['bbox'].astype(float) 376 | if BBGT.size > 0: 377 | # compute overlaps 378 | # intersection 379 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 380 | iymin = np.maximum(BBGT[:, 1], bb[1]) 381 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 382 | iymax = np.minimum(BBGT[:, 3], bb[3]) 383 | iw = np.maximum(ixmax - ixmin, 0.) 384 | ih = np.maximum(iymax - iymin, 0.) 385 | inters = iw * ih 386 | uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) + 387 | (BBGT[:, 2] - BBGT[:, 0]) * 388 | (BBGT[:, 3] - BBGT[:, 1]) - inters) 389 | overlaps = inters / uni 390 | ovmax = np.max(overlaps) 391 | jmax = np.argmax(overlaps) 392 | 393 | if ovmax > ovthresh: 394 | if not R['difficult'][jmax]: 395 | if not R['det'][jmax]: 396 | tp[d] = 1. 397 | R['det'][jmax] = 1 398 | else: 399 | fp[d] = 1. 400 | else: 401 | fp[d] = 1. 402 | 403 | # compute precision recall 404 | fp = np.cumsum(fp) 405 | tp = np.cumsum(tp) 406 | rec = tp / float(npos) 407 | # avoid divide by zero in case the first detection matches a difficult 408 | # ground truth 409 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 410 | ap = voc_ap(rec, prec, use_07_metric) 411 | else: 412 | # note that below default values of -1 can cause negative mAPs.. Not sure why you would want this anyways.. 413 | # rec = -1. 414 | # prec = -1. 415 | # ap = -1. 416 | rec = 0. 417 | prec = 0. 418 | ap = 0. 419 | return rec, prec, ap 420 | 421 | 422 | def parse_rec(filename): 423 | """ Parse a PASCAL VOC xml file 424 | largely copied from eval.py from the original pytorch SSD repository: https://github.com/amdegroot/ssd.pytorch 425 | """ 426 | tree = ET.parse(filename) 427 | objects = [] 428 | for obj in tree.findall('object'): 429 | obj_struct = {} 430 | obj_struct['name'] = obj.find('name').text 431 | obj_struct['pose'] = obj.find('pose').text 432 | obj_struct['truncated'] = int(obj.find('truncated').text) 433 | obj_struct['difficult'] = int(obj.find('difficult').text) 434 | bbox = obj.find('bndbox') 435 | obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1, 436 | int(bbox.find('ymin').text) - 1, 437 | int(bbox.find('xmax').text) - 1, 438 | int(bbox.find('ymax').text) - 1] 439 | objects.append(obj_struct) 440 | 441 | return objects 442 | 443 | 444 | def voc_ap(rec, prec, use_07_metric=True): 445 | """ ap = voc_ap(rec, prec, [use_07_metric]) 446 | Compute VOC AP given precision and recall. 447 | If use_07_metric is true, uses the 448 | VOC 07 11 point method (default:True). 449 | """ 450 | if use_07_metric: 451 | # 11 point metric 452 | ap = 0. 453 | for t in np.arange(0., 1.1, 0.1): 454 | if np.sum(rec >= t) == 0: 455 | p = 0 456 | else: 457 | p = np.max(prec[rec >= t]) 458 | ap = ap + p / 11. 459 | else: 460 | # correct AP calculation 461 | # first append sentinel values at the end 462 | mrec = np.concatenate(([0.], rec, [1.])) 463 | mpre = np.concatenate(([0.], prec, [0.])) 464 | 465 | # compute the precision envelope 466 | for i in range(mpre.size - 1, 0, -1): 467 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 468 | 469 | # to calculate area under PR curve, look for points 470 | # where X axis (recall) changes value 471 | i = np.where(mrec[1:] != mrec[:-1])[0] 472 | 473 | # and sum (\Delta recall) * prec 474 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 475 | return ap 476 | 477 | 478 | --------------------------------------------------------------------------------