├── LICENSE
├── README.md
├── __init__.py
├── configs
    ├── .DS_Store
    ├── BDD100k
    │   ├── R50_FPN_all.yaml
    │   ├── R50_FPN_all_ood.yaml
    │   ├── R50_FPN_all_ood_nu.yaml
    │   ├── stud_regnet.yaml
    │   ├── stud_regnet_ood_coco.yaml
    │   ├── stud_regnet_ood_nu.yaml
    │   ├── stud_resnet.yaml
    │   ├── stud_resnet_ood_coco.yaml
    │   └── stud_resnet_ood_nu.yaml
    ├── Base-RCNN-FPN.yaml
    └── VIS
    │   ├── R50_FPN_all.yaml
    │   ├── R50_FPN_all_ood_coco.yaml
    │   ├── R50_FPN_all_ood_nu.yaml
    │   ├── stud_regnet.yaml
    │   ├── stud_regnet_ood_coco.yaml
    │   ├── stud_regnet_ood_nu.yaml
    │   ├── stud_resnet.yaml
    │   ├── stud_resnet_ood_coco.yaml
    │   └── stud_resnet_ood_nu.yaml
├── datasets
    ├── __init__.py
    ├── bdd100k2coco.py
    ├── convert_bdd_3cls.py
    ├── convert_bdd_ood.py
    ├── convert_city.py
    ├── convert_coco_vis.py
    ├── convert_nu.py
    ├── convert_nu_ood.py
    ├── convert_openimages_vis.py
    ├── convert_vg_ood.py
    ├── convert_vis_val.py
    ├── convert_vis_val1.py
    ├── convert_vis_val_new.py
    ├── domain_splits_bdd100k.py
    ├── metadata.py
    ├── vg_classes.py
    └── waymo2coco.py
├── figs
    ├── .DS_Store
    └── cycle_confusion_arch.png
├── permutations
    ├── permutations_hamming_all_24.npy
    ├── permutations_hamming_max_1000.npy
    ├── permutations_hamming_max_2.npy
    ├── permutations_hamming_max_24.npy
    └── permutations_hamming_max_35.npy
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── src
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── builtin.py
    │   ├── coco.py
    │   ├── pair_all_sampler.py
    │   ├── pair_fix_sampler.py
    │   ├── pair_sampler.py
    │   ├── pair_sampler_multi_interval.py
    │   └── pair_sampler_multi_random.py
    ├── engine
    │   ├── __init__.py
    │   ├── defaults.py
    │   ├── evaluate.py
    │   ├── evaluator.py
    │   ├── myvisualizer.py
    │   └── train_loop.py
    └── modeling
    │   ├── __init__.py
    │   ├── meta_arch
    │       ├── GAN.py
    │       ├── Imagelist.py
    │       ├── __init__.py
    │       ├── backbone.py
    │       ├── build.py
    │       ├── dla.py
    │       ├── eff.py
    │       ├── fpn.py
    │       ├── layers
    │       │   ├── __init__.py
    │       │   ├── activations.py
    │       │   └── wrappers.py
    │       ├── rcnn.py
    │       ├── rcnn_csi.py
    │       ├── rcnn_gan.py
    │       ├── rcnn_ss.py
    │       ├── rcnn_ss_add.py
    │       ├── rcnn_ss_cache.py
    │       ├── rcnn_ss_cheap.py
    │       ├── rcnn_ss_gene.py
    │       ├── rcnn_ss_mixup.py
    │       ├── rcnn_ss_remove.py
    │       ├── rcnn_ss_single.py
    │       ├── regnet.py
    │       ├── regnet_model.py
    │       ├── resnest.py
    │       ├── resnest1.py
    │       ├── splat.py
    │       └── vovnet.py
    │   ├── roi_heads
    │       ├── __init__.py
    │       ├── fast_rcnn.py
    │       ├── roi_heads.py
    │       ├── roi_heads_add.py
    │       ├── roi_heads_csi.py
    │       ├── roi_heads_gan.py
    │       └── roi_heads_godinc.py
    │   ├── self_supervised
    │       ├── __init__.py
    │       ├── build.py
    │       ├── cycle.py
    │       ├── cycle_energy.py
    │       ├── cycle_energy_1024_latter.py
    │       ├── cycle_energy_direct.py
    │       ├── cycle_energy_direct_add.py
    │       ├── cycle_energy_direct_add_all.py
    │       ├── cycle_energy_direct_add_all_cache.py
    │       ├── cycle_energy_direct_add_all_cache_new.py
    │       ├── cycle_energy_direct_add_all_max.py
    │       ├── cycle_energy_direct_add_all_mild_energy.py
    │       ├── cycle_energy_direct_add_all_noise.py
    │       ├── cycle_energy_direct_add_all_random.py
    │       ├── cycle_energy_direct_add_att.py
    │       ├── cycle_energy_direct_add_att_neg.py
    │       ├── cycle_energy_direct_add_cache.py
    │       ├── cycle_energy_direct_add_cache_new.py
    │       ├── cycle_energy_direct_max.py
    │       ├── cycle_energy_direct_no.py
    │       ├── cycle_energy_direct_random.py
    │       ├── jigsaw.py
    │       ├── leftright.py
    │       ├── rotation.py
    │       └── ss_layers.py
    │   ├── utils
    │       ├── __init__.py
    │       └── image_list.py
    │   └── vit
    │       ├── __init__.py
    │       ├── config.py
    │       ├── dataset_mapper.py
    │       ├── linformer.py
    │       ├── longformer2d.py
    │       ├── msvit.py
    │       ├── performer.py
    │       ├── slidingchunk_2d.py
    │       └── srformer.py
└── tools
    ├── __init__.py
    ├── agg_results.py
    ├── analyze_bdd_fea.py
    ├── analyze_energy.py
    ├── bdd_coco.py
    ├── bdd_heatmap.py
    ├── ckpt_surgery.py
    ├── convert_weight.py
    ├── count.py
    ├── metric_utils.py
    ├── plot_frame_interval.py
    ├── plot_frame_range.py
    ├── plot_loss.py
    ├── plot_vos.py
    ├── select_permutation.py
    ├── simple_permutation.py
    ├── train_net.py
    ├── vis_coco.py
    ├── visualize_data.py
    ├── visualize_json_results.py
    └── waymo_bdd.py


/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/__init__.py


--------------------------------------------------------------------------------
/configs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/configs/.DS_Store


--------------------------------------------------------------------------------
/configs/BDD100k/R50_FPN_all.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 10
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("bdd_tracking_2k_train",)
13 |   TEST: ("bdd_tracking_2k_val",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/BDD100k/R50_FPN_all_ood.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 10
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("bdd_tracking_2k_train",)
13 |   TEST: ("coco_2017_val_ood_wrt_bdd",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/BDD100k/R50_FPN_all_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 10
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("bdd_tracking_2k_train",)
13 |   TEST: ("nu_bdd_ood",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_regnet.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.120, 58.395]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 | #  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   MASK_ON: False
10 |   RESNETS:
11 |     DEPTH: 50
12 |   ROI_HEADS:
13 |     NAME: "StandardROIHeadsSS"
14 |     NUM_CLASSES: 10
15 |   FPN:
16 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
17 |   SS:
18 |     FEAT_LEVEL: "res4"
19 |     NAME: ["build_cycle_energy_direct_add_all_head"]
20 |     LOSS_SCALE: 0.0
21 |     RATIO: 2.0
22 |     COEF: 1.0
23 |     FILTERING1: 0.4
24 |     FILTERING2: 0.6
25 |     ENERGY_WEIGHT: 0.05
26 | DATASETS:
27 |   TRAIN: ("bdd_tracking_2k_train",)
28 |   TEST: ("bdd_tracking_2k_val", )
29 | SOLVER:
30 |   IMS_PER_BATCH: 4
31 |   WEIGHT_DECAY: 5e-5
32 | #   IMS_PER_BATCH: 16
33 |   BASE_LR: 0.02
34 | #  STEPS: (31200, 41600,)
35 | #  MAX_ITER: 45500
36 |   STEPS: (36000, 48000,)
37 |   MAX_ITER: 52500
38 |   WARMUP_ITERS: 5000
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   RANDOM_FLIP: "none"
42 |   CROP:
43 |     ENABLED: True
44 | SEED: 34112225
45 | DATALOADER:
46 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
47 |   PAIR_OFFSET_RANGE: 9
48 |   SELCTED_NUMBER: 3
49 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet_longer"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_regnet_ood_coco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.120, 58.395]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 | #  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   MASK_ON: False
10 |   RESNETS:
11 |     DEPTH: 50
12 |   ROI_HEADS:
13 |     NAME: "StandardROIHeadsSS"
14 |     NUM_CLASSES: 10
15 |   FPN:
16 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
17 |   SS:
18 |     FEAT_LEVEL: "res4"
19 |     NAME: ["build_cycle_energy_direct_add_all_head"]
20 |     LOSS_SCALE: 0.0
21 |     RATIO: 2.0
22 |     COEF: 1.0
23 |     FILTERING1: 0.4
24 |     FILTERING2: 0.6
25 |     ENERGY_WEIGHT: 0.05
26 | DATASETS:
27 |   TRAIN: ("bdd_tracking_2k_train",)
28 |   TEST: ("nu_bdd_ood", ) #coco_2017_val_ood_wrt_bdd
29 | SOLVER:
30 |   IMS_PER_BATCH: 4
31 |   WEIGHT_DECAY: 5e-5
32 | #   IMS_PER_BATCH: 16
33 |   BASE_LR: 0.02
34 | #  STEPS: (31200, 41600,)
35 | #  MAX_ITER: 45500
36 |   STEPS: (36000, 48000,)
37 |   MAX_ITER: 52500
38 |   WARMUP_ITERS: 5000
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   RANDOM_FLIP: "none"
42 |   CROP:
43 |     ENABLED: True
44 | SEED: 34112225
45 | DATALOADER:
46 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
47 |   PAIR_OFFSET_RANGE: 9
48 |   SELCTED_NUMBER: 3
49 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet_random_scaling1"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_regnet_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.120, 58.395]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 | #  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   MASK_ON: False
10 |   RESNETS:
11 |     DEPTH: 50
12 |   ROI_HEADS:
13 |     NAME: "StandardROIHeadsSS"
14 |     NUM_CLASSES: 10
15 |   FPN:
16 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
17 |   SS:
18 |     FEAT_LEVEL: "res4"
19 |     NAME: ["build_cycle_energy_direct_add_all_head"]
20 |     LOSS_SCALE: 0.0
21 |     RATIO: 2.0
22 |     COEF: 1.0
23 |     FILTERING1: 0.4
24 |     FILTERING2: 0.6
25 |     ENERGY_WEIGHT: 0.05
26 | DATASETS:
27 |   TRAIN: ("bdd_tracking_2k_train",)
28 |   TEST: ("nu_bdd_ood", )
29 | SOLVER:
30 |   IMS_PER_BATCH: 4
31 |   WEIGHT_DECAY: 5e-5
32 | #   IMS_PER_BATCH: 16
33 |   BASE_LR: 0.02
34 | #  STEPS: (31200, 41600,)
35 | #  MAX_ITER: 45500
36 |   STEPS: (36000, 48000,)
37 |   MAX_ITER: 52500
38 |   WARMUP_ITERS: 5000
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   RANDOM_FLIP: "none"
42 |   CROP:
43 |     ENABLED: True
44 | SEED: 34112225
45 | DATALOADER:
46 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
47 |   PAIR_OFFSET_RANGE: 9
48 |   SELCTED_NUMBER: 3
49 | OUTPUT_DIR: "/nobackup/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_resnet.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 10
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.05
20 | DATASETS:
21 |   TRAIN: ("bdd_tracking_2k_train",)
22 |   TEST: ("bdd_tracking_2k_val", )
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 34112225
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   PAIR_OFFSET_RANGE: 9
41 |   SELCTED_NUMBER: 3
42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_resnet_ood_coco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 10
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.05
20 | DATASETS:
21 |   TRAIN: ("bdd_tracking_2k_train",)
22 |   TEST: ("coco_2017_val_ood_wrt_bdd", )
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 34112225
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   PAIR_OFFSET_RANGE: 9
41 |   SELCTED_NUMBER: 3
42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor"


--------------------------------------------------------------------------------
/configs/BDD100k/stud_resnet_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 10
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.05
20 | DATASETS:
21 |   TRAIN: ("bdd_tracking_2k_train",)
22 |   TEST: ("nu_bdd_ood", )
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 34112225
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   PAIR_OFFSET_RANGE: 9
41 |   SELCTED_NUMBER: 3
42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor"


--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | 


--------------------------------------------------------------------------------
/configs/VIS/R50_FPN_all.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 40
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("vis21_train",)
13 |   TEST: ("vis21_val",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/VIS/R50_FPN_all_ood_coco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 40
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("vis21_train",)
13 |   TEST: ("vis_coco_ood",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/VIS/R50_FPN_all_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN1"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 40
10 | DATASETS:
11 | #  TRAIN: ("waymo_all_train", )
12 |   TRAIN:  ("vis21_train",)
13 |   TEST: ("nu_bdd_ood",)
14 | SOLVER:
15 |   # the size of the BDD tracking daytime is about 2x of the BDD100k dataset
16 |   # for initial experiments, this schedule will be approximately, 0.5X schedule
17 |   # ~5 epochs
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.02
20 |   STEPS: (36000, 48000,)
21 |   MAX_ITER: 52500
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (800,)
24 |   CROP:
25 |     ENABLED: True
26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla"


--------------------------------------------------------------------------------
/configs/VIS/stud_regnet.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 |   MASK_ON: False
 9 |   RESNETS:
10 |     DEPTH: 50
11 |   ROI_HEADS:
12 |     NAME: "StandardROIHeadsSS"
13 |     NUM_CLASSES: 40
14 |   FPN:
15 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
16 |   SS:
17 |     FEAT_LEVEL: "res4"
18 |     NAME: ["build_cycle_energy_direct_add_all_head"]
19 |     LOSS_SCALE: 0.0
20 |     RATIO: 2.0
21 |     COEF: 1.0
22 |     FILTERING1: 0.4
23 |     FILTERING2: 0.6
24 |     ENERGY_WEIGHT: 0.02
25 | DATASETS:
26 |   TRAIN: ("vis21_train",)
27 |   TEST: ("vis21_val", )
28 | SOLVER:
29 |   IMS_PER_BATCH: 4
30 |   WEIGHT_DECAY: 5e-5
31 | #   IMS_PER_BATCH: 16
32 |   BASE_LR: 0.02
33 | #  STEPS: (31200, 41600,)
34 | #  MAX_ITER: 45500
35 |   STEPS: (36000, 48000,)
36 |   MAX_ITER: 52500
37 |   WARMUP_ITERS: 5000
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (800,)
40 |   RANDOM_FLIP: "none"
41 |   CROP:
42 |     ENABLED: True
43 | SEED: 1998
44 | DATALOADER:
45 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
46 |   SELCTED_NUMBER: 3
47 |   PAIR_OFFSET_RANGE: 9
48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer"


--------------------------------------------------------------------------------
/configs/VIS/stud_regnet_ood_coco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 |   MASK_ON: False
 9 |   RESNETS:
10 |     DEPTH: 50
11 |   ROI_HEADS:
12 |     NAME: "StandardROIHeadsSS"
13 |     NUM_CLASSES: 40
14 |   FPN:
15 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
16 |   SS:
17 |     FEAT_LEVEL: "res4"
18 |     NAME: ["build_cycle_energy_direct_add_all_head"]
19 |     LOSS_SCALE: 0.0
20 |     RATIO: 2.0
21 |     COEF: 1.0
22 |     FILTERING1: 0.4
23 |     FILTERING2: 0.6
24 |     ENERGY_WEIGHT: 0.02
25 | DATASETS:
26 |   TRAIN: ("vis21_train",)
27 |   TEST: ("vis_coco_ood", )
28 | SOLVER:
29 |   IMS_PER_BATCH: 4
30 |   WEIGHT_DECAY: 5e-5
31 | #   IMS_PER_BATCH: 16
32 |   BASE_LR: 0.02
33 | #  STEPS: (31200, 41600,)
34 | #  MAX_ITER: 45500
35 |   STEPS: (36000, 48000,)
36 |   MAX_ITER: 52500
37 |   WARMUP_ITERS: 5000
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (800,)
40 |   RANDOM_FLIP: "none"
41 |   CROP:
42 |     ENABLED: True
43 | SEED: 1998
44 | DATALOADER:
45 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
46 |   SELCTED_NUMBER: 3
47 |   PAIR_OFFSET_RANGE: 9
48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer"


--------------------------------------------------------------------------------
/configs/VIS/stud_regnet_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 4 |   BACKBONE:
 5 |     NAME: "build_regnetx_fpn_backbone"
 6 |   WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth"
 7 |   META_ARCHITECTURE: "SSRCNN"
 8 |   MASK_ON: False
 9 |   RESNETS:
10 |     DEPTH: 50
11 |   ROI_HEADS:
12 |     NAME: "StandardROIHeadsSS"
13 |     NUM_CLASSES: 40
14 |   FPN:
15 |     IN_FEATURES: [ "s1", "s2", "s3", "s4" ]
16 |   SS:
17 |     FEAT_LEVEL: "res4"
18 |     NAME: ["build_cycle_energy_direct_add_all_head"]
19 |     LOSS_SCALE: 0.0
20 |     RATIO: 2.0
21 |     COEF: 1.0
22 |     FILTERING1: 0.4
23 |     FILTERING2: 0.6
24 |     ENERGY_WEIGHT: 0.02
25 | DATASETS:
26 |   TRAIN: ("vis21_train",)
27 |   TEST: ("nu_bdd_ood", )
28 | SOLVER:
29 |   IMS_PER_BATCH: 4
30 |   WEIGHT_DECAY: 5e-5
31 | #   IMS_PER_BATCH: 16
32 |   BASE_LR: 0.02
33 | #  STEPS: (31200, 41600,)
34 | #  MAX_ITER: 45500
35 |   STEPS: (36000, 48000,)
36 |   MAX_ITER: 52500
37 |   WARMUP_ITERS: 5000
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (800,)
40 |   RANDOM_FLIP: "none"
41 |   CROP:
42 |     ENABLED: True
43 | SEED: 1998
44 | DATALOADER:
45 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
46 |   SELCTED_NUMBER: 3
47 |   PAIR_OFFSET_RANGE: 9
48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer"


--------------------------------------------------------------------------------
/configs/VIS/stud_resnet.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 40
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.02
20 | DATASETS:
21 |   TRAIN: ("vis21_train",)
22 |   TEST: ("vis21_val", )
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 1998
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   SELCTED_NUMBER: 3
41 |   PAIR_OFFSET_RANGE: 9
42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed1"


--------------------------------------------------------------------------------
/configs/VIS/stud_resnet_ood_coco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 40
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.02
20 | DATASETS:
21 |   TRAIN: ("vis21_train",)
22 |   TEST: ("vis_coco_ood", ) #vis_coco_ood
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 1998
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   SELCTED_NUMBER: 3
41 |   PAIR_OFFSET_RANGE: 9
42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer"


--------------------------------------------------------------------------------
/configs/VIS/stud_resnet_ood_nu.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SSRCNN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   ROI_HEADS:
 9 |     NAME: "StandardROIHeadsSS"
10 |     NUM_CLASSES: 40
11 |   SS:
12 |     FEAT_LEVEL: "res4"
13 |     NAME: ["build_cycle_energy_direct_add_all_head"]
14 |     LOSS_SCALE: 0.0
15 |     RATIO: 2.0
16 |     COEF: 1.0
17 |     FILTERING1: 0.4
18 |     FILTERING2: 0.6
19 |     ENERGY_WEIGHT: 0.02
20 | DATASETS:
21 |   TRAIN: ("vis21_train",)
22 |   TEST: ("nu_bdd_ood", ) #vis_coco_ood
23 | SOLVER:
24 |   IMS_PER_BATCH: 4
25 | #   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 | #  STEPS: (31200, 41600,)
28 | #  MAX_ITER: 45500
29 |   STEPS: (36000, 48000,)
30 |   MAX_ITER: 52500
31 |   WARMUP_ITERS: 5000
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (800,)
34 |   RANDOM_FLIP: "none"
35 |   CROP:
36 |     ENABLED: True
37 | SEED: 1998
38 | DATALOADER:
39 |   SAMPLER_TRAIN: "PairTrainingMultiRandomSampler"
40 |   SELCTED_NUMBER: 3
41 |   PAIR_OFFSET_RANGE: 9
42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer"


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/datasets/convert_bdd_3cls.py:
--------------------------------------------------------------------------------
  1 | """BDD .
  2 | - Converting BDD labels to 3 classes to match the Waymo dataset.
  3 | 
  4 | Run `python3 -m datasets.convert_bdd_3cls`
  5 | """
  6 | 
  7 | import json
  8 | import os
  9 | import os.path as osp
 10 | from collections import defaultdict
 11 | 
 12 | 
 13 | def load_json(filename):
 14 |     with open(filename, "r") as fp:
 15 |         reg_file = json.load(fp)
 16 |     return reg_file
 17 | 
 18 | 
 19 | # BDD100K MOT set domain splits.
 20 | _PREDEFINED_SPLITS_BDDT = {
 21 |     "bdd_tracking_2k": {
 22 |         "bdd_tracking_2k_train": (
 23 |             "bdd100k/images/track/train",
 24 |             "bdd100k/labels/track/bdd100k_mot_train_coco.json",
 25 |         ),
 26 |         "bdd_tracking_2k_val": (
 27 |             "bdd100k/images/track/val",
 28 |             "bdd100k/labels/track/bdd100k_mot_val_coco.json",
 29 |         ),
 30 |     },
 31 | }
 32 | 
 33 | # Register data for different domains as well as different sequence.
 34 | domain_path = "bdd100k/labels/box_track_20/domain_splits/"
 35 | train_splits = load_json(
 36 |     osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_train.json")
 37 | )
 38 | val_splits = load_json(
 39 |     osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_val.json")
 40 | )
 41 | 
 42 | 
 43 | # per_seq_{split}_{key}_{_attr}: [dataset_names]
 44 | per_seq_maps = defaultdict(list)
 45 | 
 46 | # register the BDD100K per domain sets
 47 | for split, result in [("train", train_splits), ("val", val_splits)]:
 48 |     for key, values in result.items():
 49 |         # key is ["timeofday", "scene", "weather"]
 50 |         for attr, seqs in values.items():
 51 |             # attr is the actual attribute under each category like
 52 |             # `daytime`, `night`, etc. Values are list of sequence names.
 53 |             if "/" in attr or " " in attr:
 54 |                 if "/" in attr:
 55 |                     _attr = attr.replace("/", "-")
 56 |                 if " " in attr:
 57 |                     _attr = attr.replace(" ", "-")
 58 |             else:
 59 |                 _attr = attr
 60 | 
 61 |             # register per domain values.
 62 |             _PREDEFINED_SPLITS_BDDT["bdd_tracking_2k"][
 63 |                 "bdd_tracking_2k_{}_{}".format(split, _attr)
 64 |             ] = (
 65 |                 "bdd100k/images/track/{}".format(split),
 66 |                 osp.join(
 67 |                     domain_path,
 68 |                     "labels",
 69 |                     split,
 70 |                     "{}_{}_{}_coco.json".format(split, key, _attr),
 71 |                 ),
 72 |             )
 73 | 
 74 | MAPPING = {1: 2, 2: 3, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1}
 75 | os.makedirs(osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', 'bdd100k/labels/track_3cls'), exist_ok=True)
 76 | os.makedirs(
 77 |     osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', 'bdd100k/labels/box_track_20_3cls'), exist_ok=True)
 78 | 
 79 | datasets = _PREDEFINED_SPLITS_BDDT['bdd_tracking_2k']
 80 | files = [datasets[k][1] for k in datasets.keys()]
 81 | 
 82 | for json_file in files:
 83 |     print(json_file)
 84 |     data_path = osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', json_file)
 85 |     prefix = json_file.split('/')[-1]
 86 |     data = json.load(open(data_path))
 87 |     new_cats = [
 88 |         {'supercategory': 'none', 'id': 1, 'name': 'vehicle'},
 89 |         {'supercategory': 'none', 'id': 2, 'name': 'pedestrian'},
 90 |         {'supercategory': 'none', 'id': 3, 'name': 'cyclist'},
 91 |     ]
 92 |     new_annos = []
 93 |     for anno in data['annotations']:
 94 |         anno['category_id'] = MAPPING[anno['category_id']]
 95 |         new_annos.append(anno)
 96 |     new_labels = {
 97 |         'categories': new_cats,
 98 |         'images': data['images'],
 99 |         'annotations': new_annos,
100 |         'videos': data['videos'],
101 |     }
102 |     if 'box_track_20' in data_path:
103 |         save_path = osp.dirname(data_path).replace(
104 |             'box_track_20', 'box_track_20_3cls')
105 |     elif 'track' in data_path:
106 |         save_path = osp.dirname(data_path).replace(
107 |             'track', 'track_3cls')
108 |     else:
109 |         assert False
110 |     os.makedirs(save_path, exist_ok=True)
111 |     save_path = osp.join(save_path, prefix)
112 |     with open(save_path, 'w') as fp:
113 |         json.dump(new_labels, fp)
114 | 


--------------------------------------------------------------------------------
/datasets/convert_bdd_ood.py:
--------------------------------------------------------------------------------
  1 | """BDD .
  2 | - Converting BDD labels to 3 classes to match the Waymo dataset.
  3 | 
  4 | Run `python3 -m datasets.convert_bdd_3cls`
  5 | """
  6 | 
  7 | import json
  8 | import os
  9 | import os.path as osp
 10 | from collections import defaultdict
 11 | 
 12 | 
 13 | def load_json(filename):
 14 |     with open(filename, "r") as fp:
 15 |         reg_file = json.load(fp)
 16 |     return reg_file
 17 | 
 18 | 
 19 | # BDD100K MOT set domain splits.
 20 | _PREDEFINED_SPLITS_BDDT = {
 21 |     "bdd_tracking_2k": {
 22 |         "bdd_tracking_2k_train": (
 23 |             "bdd100k/images/track/train",
 24 |             "bdd100k/labels/track/bdd100k_mot_train_coco.json",
 25 |         ),
 26 |         "bdd_tracking_2k_val": (
 27 |             "bdd100k/images/track/val",
 28 |             "bdd100k/labels/track/bdd100k_mot_val_coco.json",
 29 |         ),
 30 |     },
 31 | }
 32 | 
 33 | # Register data for different domains as well as different sequence.
 34 | domain_path = "bdd100k/labels/box_track_20/domain_splits/"
 35 | train_splits = load_json(
 36 |     osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_train.json")
 37 | )
 38 | val_splits = load_json(
 39 |     osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_val.json")
 40 | )
 41 | 
 42 | 
 43 | # per_seq_{split}_{key}_{_attr}: [dataset_names]
 44 | per_seq_maps = defaultdict(list)
 45 | 
 46 | # register the BDD100K per domain sets
 47 | for split, result in [("train", train_splits), ("val", val_splits)]:
 48 |     for key, values in result.items():
 49 |         # key is ["timeofday", "scene", "weather"]
 50 |         for attr, seqs in values.items():
 51 |             # attr is the actual attribute under each category like
 52 |             # `daytime`, `night`, etc. Values are list of sequence names.
 53 |             if "/" in attr or " " in attr:
 54 |                 if "/" in attr:
 55 |                     _attr = attr.replace("/", "-")
 56 |                 if " " in attr:
 57 |                     _attr = attr.replace(" ", "-")
 58 |             else:
 59 |                 _attr = attr
 60 | 
 61 |             # register per domain values.
 62 |             _PREDEFINED_SPLITS_BDDT["bdd_tracking_2k"][
 63 |                 "bdd_tracking_2k_{}_{}".format(split, _attr)
 64 |             ] = (
 65 |                 "bdd100k/images/track/{}".format(split),
 66 |                 osp.join(
 67 |                     domain_path,
 68 |                     "labels",
 69 |                     split,
 70 |                     "{}_{}_{}_coco.json".format(split, key, _attr),
 71 |                 ),
 72 |             )
 73 | 
 74 | '''
 75 | {"supercategory": "human", "id": 1, "name": "pedestrian"},
 76 |         {"supercategory": "human", "id": 2, "name": "rider"},
 77 |         {"supercategory": "vehicle", "id": 3, "name": "car"},
 78 |         {"supercategory": "vehicle", "id": 4, "name": "truck"},
 79 |         {"supercategory": "vehicle", "id": 5, "name": "bus"},
 80 |         {"supercategory": "vehicle", "id": 6, "name": "train"},
 81 |         {"supercategory": "bike", "id": 7, "name": "motorcycle"},
 82 |         {"supercategory": "bike", "id": 8, "name": "bicycle"},
 83 | ["vehicle", "pedestrian", "cyclist"]
 84 | '''
 85 | 
 86 | MAPPING = {1: 2, 2: 3, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1}
 87 | 
 88 | 
 89 | datasets = _PREDEFINED_SPLITS_BDDT['bdd_tracking_2k']
 90 | files = [datasets[k][1] for k in datasets.keys()]
 91 | 
 92 | for json_file in [files[0]]:
 93 |     print(json_file)
 94 |     data_path = osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', json_file)
 95 |     prefix = json_file.split('/')[-1]
 96 |     data = json.load(open(data_path))
 97 |     # new_cats = [
 98 |     #     {'supercategory': 'none', 'id': 1, 'name': 'vehicle'},
 99 |     #     {'supercategory': 'none', 'id': 2, 'name': 'pedestrian'},
100 |     #     {'supercategory': 'none', 'id': 3, 'name': 'cyclist'},
101 |     # ]
102 |     new_cats = [{"supercategory": "human", "id": 1, "name": "pedestrian"},
103 |         {"supercategory": "human", "id": 2, "name": "rider"},
104 |         {"supercategory": "vehicle", "id": 3, "name": "car"},
105 |         {"supercategory": "vehicle", "id": 4, "name": "truck"},
106 |         {"supercategory": "vehicle", "id": 5, "name": "bus"},
107 |         {"supercategory": "vehicle", "id": 6, "name": "train"},
108 |         {"supercategory": "bike", "id": 7, "name": "motorcycle"},
109 |         {"supercategory": "bike", "id": 8, "name": "bicycle"}]
110 | 
111 |     new_annos = []
112 |     remove_image_id = []
113 |     # breakpoint()
114 |     for anno in data['annotations']:
115 |         if anno['category_id'] not in [4, 5, 6, 7, 8]:
116 |             remove_image_id.append(anno['image_id'])
117 |             continue
118 |         else:
119 |             # anno['category_id'] = MAPPING[anno['category_id']]
120 |             new_annos.append(anno)
121 |     # import numpy as np
122 |     all_image_id = range(1, len(data['images'])+1)
123 |     kept_image_id = set(all_image_id).difference(set(remove_image_id))
124 |     # kept_image_id = [item for item in all_image_id if item not in remove_image_id]
125 |     kept_video_id = []
126 |     for index in range(len(data['images'])):
127 |         if index + 1 in kept_image_id:
128 |             kept_video_id.append(data['images'][index]['video_id'])
129 |     kept_video_id = list(set(kept_video_id))
130 | 
131 | 
132 |     kept_images = []
133 |     for index in range(len(data['images'])):
134 |         if index + 1 in kept_image_id:
135 |             kept_images.append(data['images'][index])
136 |     kept_videos = []
137 |     for index in range(len(data['videos'])):
138 |         if index + 1 in kept_video_id:
139 |             kept_videos.append(data['videos'][index])
140 |     # breakpoint()
141 |     # breakpoint()
142 | 
143 |     new_labels = {
144 |         'categories': new_cats,
145 |         'images': kept_images,#data['images'],
146 |         'annotations': new_annos,
147 |         'videos': kept_videos,
148 |     }
149 | 
150 |     save_path = '/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/labels/track/'
151 |     prefix = 'bdd_ood.json'
152 | 
153 |     save_path = osp.join(save_path, prefix)
154 |     with open(save_path, 'w') as fp:
155 |         json.dump(new_labels, fp)
156 | 


--------------------------------------------------------------------------------
/datasets/convert_city.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import glob
  4 | import os.path as osp
  5 | 
  6 | import cityscapesscripts.helpers.labels as CSLabels
  7 | import mmcv
  8 | import numpy as np
  9 | import pycocotools.mask as maskUtils
 10 | 
 11 | 
 12 | def collect_files(img_dir, gt_dir):
 13 |     suffix = 'leftImg8bit.png'
 14 |     files = []
 15 |     for img_file in glob.glob(osp.join(img_dir, '**/*.png')):
 16 |         assert img_file.endswith(suffix), img_file
 17 |         inst_file = gt_dir + img_file[
 18 |             len(img_dir):-len(suffix)] + 'gtFine_instanceIds.png'
 19 |         # Note that labelIds are not converted to trainId for seg map
 20 |         segm_file = gt_dir + img_file[
 21 |             len(img_dir):-len(suffix)] + 'gtFine_labelIds.png'
 22 |         files.append((img_file, inst_file, segm_file))
 23 |     assert len(files), f'No images found in {img_dir}'
 24 |     print(f'Loaded {len(files)} images from {img_dir}')
 25 | 
 26 |     return files
 27 | 
 28 | 
 29 | def collect_annotations(files, nproc=1):
 30 |     print('Loading annotation images')
 31 |     if nproc > 1:
 32 |         images = mmcv.track_parallel_progress(
 33 |             load_img_info, files, nproc=nproc)
 34 |     else:
 35 |         images = mmcv.track_progress(load_img_info, files)
 36 | 
 37 |     return images
 38 | 
 39 | 
 40 | def load_img_info(files):
 41 |     img_file, inst_file, segm_file = files
 42 |     inst_img = mmcv.imread(inst_file, 'unchanged')
 43 |     # ids < 24 are stuff labels (filtering them first is about 5% faster)
 44 |     unique_inst_ids = np.unique(inst_img[inst_img >= 24])
 45 |     anno_info = []
 46 |     for inst_id in unique_inst_ids:
 47 |         # For non-crowd annotations, inst_id // 1000 is the label_id
 48 |         # Crowd annotations have <1000 instance ids
 49 |         label_id = inst_id // 1000 if inst_id >= 1000 else inst_id
 50 |         label = CSLabels.id2label[label_id]
 51 |         if not label.hasInstances or label.ignoreInEval:
 52 |             continue
 53 | 
 54 |         category_id = label.id
 55 |         iscrowd = int(inst_id < 1000)
 56 |         mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order='F')
 57 |         mask_rle = maskUtils.encode(mask[:, :, None])[0]
 58 | 
 59 |         area = maskUtils.area(mask_rle)
 60 |         # convert to COCO style XYWH format
 61 |         bbox = maskUtils.toBbox(mask_rle)
 62 | 
 63 |         # for json encoding
 64 |         mask_rle['counts'] = mask_rle['counts'].decode()
 65 | 
 66 |         anno = dict(
 67 |             iscrowd=iscrowd,
 68 |             category_id=category_id,
 69 |             bbox=bbox.tolist(),
 70 |             area=area.tolist(),
 71 |             segmentation=mask_rle)
 72 |         anno_info.append(anno)
 73 |     video_name = osp.basename(osp.dirname(img_file))
 74 |     img_info = dict(
 75 |         # remove img_prefix for filename
 76 |         file_name=osp.join(video_name, osp.basename(img_file)),
 77 |         height=inst_img.shape[0],
 78 |         width=inst_img.shape[1],
 79 |         anno_info=anno_info,
 80 |         segm_file=osp.join(video_name, osp.basename(segm_file)))
 81 | 
 82 |     return img_info
 83 | 
 84 | 
 85 | def cvt_annotations(image_infos, out_json_name):
 86 |     out_json = dict()
 87 |     img_id = 0
 88 |     ann_id = 0
 89 |     out_json['images'] = []
 90 |     out_json['categories'] = []
 91 |     out_json['annotations'] = []
 92 |     for image_info in image_infos:
 93 |         image_info['id'] = img_id
 94 |         anno_infos = image_info.pop('anno_info')
 95 |         out_json['images'].append(image_info)
 96 |         for anno_info in anno_infos:
 97 |             anno_info['image_id'] = img_id
 98 |             anno_info['id'] = ann_id
 99 |             out_json['annotations'].append(anno_info)
100 |             ann_id += 1
101 |         img_id += 1
102 |     for label in CSLabels.labels:
103 |         if label.hasInstances and not label.ignoreInEval:
104 |             cat = dict(id=label.id, name=label.name)
105 |             out_json['categories'].append(cat)
106 | 
107 |     if len(out_json['annotations']) == 0:
108 |         out_json.pop('annotations')
109 | 
110 |     mmcv.dump(out_json, out_json_name)
111 |     return out_json
112 | 
113 | 
114 | def parse_args():
115 |     parser = argparse.ArgumentParser(
116 |         description='Convert Cityscapes annotations to COCO format')
117 |     parser.add_argument('--cityscapes_path', default='/nobackup-slow/dataset/my_xfdu/video/city/')
118 |     parser.add_argument('--img-dir', default='leftImg8bit', type=str)
119 |     parser.add_argument('--gt-dir', default='gtFine', type=str)
120 |     parser.add_argument('-o', '--out-dir', help='output path')
121 |     parser.add_argument(
122 |         '--nproc', default=1, type=int, help='number of process')
123 |     args = parser.parse_args()
124 |     return args
125 | 
126 | 
127 | def main():
128 |     args = parse_args()
129 |     cityscapes_path = args.cityscapes_path
130 |     out_dir = args.out_dir if args.out_dir else cityscapes_path
131 |     mmcv.mkdir_or_exist(out_dir)
132 | 
133 |     img_dir = osp.join(cityscapes_path, args.img_dir)
134 |     gt_dir = osp.join(cityscapes_path, args.gt_dir)
135 | 
136 |     set_name = dict(
137 |         train='instancesonly_filtered_gtFine_train.json',
138 |         val='instancesonly_filtered_gtFine_val.json',
139 |         test='instancesonly_filtered_gtFine_test.json')
140 | 
141 |     for split, json_name in set_name.items():
142 |         print(f'Converting {split} into {json_name}')
143 |         with mmcv.Timer(
144 |                 print_tmpl='It took {}s to convert Cityscapes annotation'):
145 |             files = collect_files(
146 |                 osp.join(img_dir, split), osp.join(gt_dir, split))
147 |             image_infos = collect_annotations(files, nproc=args.nproc)
148 |             cvt_annotations(image_infos, osp.join(out_dir, json_name))
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     main()


--------------------------------------------------------------------------------
/datasets/convert_coco_vis.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pycocotools.coco import COCO
 3 | 
 4 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_train2017.json'))
 5 | new_dict = dict()
 6 | 
 7 | new_dict['info'] = data['info']
 8 | new_dict['licenses'] = data['licenses']
 9 | new_dict['categories'] = data['categories']
10 | 
11 | images = []
12 | annotations = []
13 | keep_image_ids = []
14 | 
15 | 
16 | coco = COCO('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_train2017.json')
17 | # import ipdb; ipdb.set_trace()
18 | #
19 | CLASSES = ['bicycle',
20 |                           'bus', 'traffic light',
21 |                           'fire hydrant', 'stop sign', 'parking meter', 'bench',
22 |                        'sheep',
23 |                             'backpack', 'umbrella', 'handbag',
24 |                           'tie', 'suitcase', 'skis', 'sports ball',
25 |                           'kite', 'baseball bat', 'baseball glove'
26 |                           , 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
27 |                           'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
28 |                           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
29 |                           'bed', 'dining table', 'toilet', 'tv', 'laptop', 'remote',
30 |                           'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
31 |                           'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
32 |                           'hair drier', 'toothbrush']
33 | 
34 | 
35 | cat_ids = coco.get_cat_ids(cat_names=CLASSES)
36 | cat2label = {cat_id: i for i, cat_id in enumerate(cat_ids)}
37 | img_ids = coco.get_img_ids()
38 | # import ipdb; ipdb.set_trace()
39 | 
40 | 
41 | for i in img_ids:
42 |     mark = 0
43 |     info = coco.load_imgs([i])[0]
44 |     # info['filename'] = info['file_name']
45 | 
46 |     # added part.
47 |     ann_ids = coco.get_ann_ids(img_ids=[info['id']])
48 |     ann_info = coco.load_anns(ann_ids)
49 |     for object1 in ann_info:
50 |         if object1['category_id'] not in cat_ids:
51 |             mark = 1
52 |             continue
53 |     if mark == 0:
54 |         keep_image_ids.append(i)
55 | # import ipdb; ipdb.set_trace()
56 | # for index in keep_image_ids:
57 | #     annotations.append()
58 | for annotations1 in data['annotations']:
59 |     if annotations1['image_id'] in keep_image_ids:
60 |         annotations.append(annotations1)
61 |         # keep_image_ids.append(annotations1['image_id'])
62 | 
63 | for image_info in data['images']:
64 |     if image_info['id'] in keep_image_ids:
65 |         images.append(image_info)
66 | 
67 | new_dict['images'] = images
68 | new_dict['annotations'] = annotations
69 | 
70 | with open('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_val2017_ood_wrt_vis.json', 'w') as file:
71 |     json.dump(new_dict, file)


--------------------------------------------------------------------------------
/datasets/convert_nu_ood.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pycocotools.coco import COCO
 3 | from os import path as osp
 4 | 
 5 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/video/nuscene/nuimages_v1.0-val.json'))
 6 | 
 7 | 
 8 | new_annos = []
 9 | remove_image_id = []
10 | # breakpoint()
11 | for anno in data['annotations']:
12 |     if anno['category_id'] not in [8, 9, 10, 11, 12, 13, 14, 15]:
13 |         remove_image_id.append(anno['image_id'])
14 |         continue
15 |     else:
16 |         # anno['category_id'] = MAPPING[anno['category_id']]
17 |         new_annos.append(anno)
18 | # import numpy as np
19 | all_image_id = range(0, len(data['images']))
20 | # breakpoint()
21 | kept_image_id = set(all_image_id).difference(set(remove_image_id))
22 | 
23 | kept_images = []
24 | for index in range(len(data['images'])):
25 |     if index in kept_image_id:
26 |         kept_images.append(data['images'][index])
27 | 
28 | 
29 | new_labels = {
30 |     'categories': data['categories'],
31 |     'images': kept_images,#data['images'],
32 |     'annotations': new_annos
33 | }
34 | 
35 | save_path = '/nobackup-slow/dataset/my_xfdu/video/nuscene/'
36 | prefix = 'nu_ood.json'
37 | 
38 | save_path = osp.join(save_path, prefix)
39 | with open(save_path, 'w') as fp:
40 |     json.dump(new_labels, fp)
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/datasets/convert_openimages_vis.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/OpenImages/coco_classes/COCO-Format/val_coco_format.json','rb'))
 4 | all_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane',
 5 |                           'bus', 'train', 'truck', 'boat', 'traffic light',
 6 |                           'fire hydrant', 'stop sign', 'parking meter', 'bench',
 7 |                           'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
 8 |                           'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
 9 |                           'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
10 |                           'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
11 |                           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
12 |                           'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
13 |                           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
14 |                           'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
15 |                           'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
16 |                           'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
17 |                           'hair drier', 'toothbrush']
18 | all_dict = {}
19 | for i in range(len(all_classes)):
20 |     all_dict[all_classes[i]] = i + 1
21 | 
22 | not_vis_classes = ['bicycle',
23 |                           'bus', 'traffic light',
24 |                           'fire hydrant', 'stop sign', 'parking meter', 'bench',
25 |                        'sheep',
26 |                             'backpack', 'umbrella', 'handbag',
27 |                           'tie', 'suitcase', 'skis', 'sports ball',
28 |                           'kite', 'baseball bat', 'baseball glove'
29 |                           , 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
30 |                           'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
31 |                           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
32 |                           'bed', 'dining table', 'toilet', 'tv', 'laptop', 'remote',
33 |                           'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
34 |                           'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
35 |                           'hair drier', 'toothbrush']
36 | not_vis_id = []
37 | for item in not_vis_classes:
38 |     not_vis_id.append(all_dict[item])
39 | remove_image_id = []
40 | # breakpoint()
41 | for annotation in data['annotations']:
42 |     if annotation['category_id'] not in not_vis_id:
43 |         remove_image_id.append(annotation['image_id'])
44 | remove_image_id = list(set(remove_image_id))
45 | new_annotation = []
46 | new_image_id = []
47 | for annotation in data['annotations']:
48 |     if annotation['image_id'] not in remove_image_id:
49 |         new_annotation.append(annotation)
50 | for image in data['images']:
51 |     if image['id'] not in remove_image_id:
52 |         new_image_id.append(image)
53 | # breakpoint()
54 | new_annotation_all = data
55 | new_annotation_all['annotations'] = new_annotation
56 | new_annotation_all['images'] = new_image_id
57 | breakpoint()
58 | json.dump(new_annotation_all, open('/nobackup-slow/dataset/my_xfdu/OpenImages/coco_classes/COCO-Format/vis_open_ood.json','w'))
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/datasets/convert_vg_ood.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pycocotools.coco import COCO
 3 | from os import path as osp
 4 | 
 5 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/video/vg/anno/visual_genome_val.json'))
 6 | 
 7 | 
 8 | new_annos = []
 9 | remove_image_id = []
10 | # breakpoint()
11 | for anno in data['annotations']:
12 |     if anno['category_id'] in [131, 488, 110,130,146,218,343,646,180,999,58,157,233,52,625,685,954,1181,1478,53,
13 |                                    184,97,150,744,117,337,341,351,83,141,992,1509,444,35,37,470,42,186,1388,639,127,
14 |                                    9,364,19,86,297,1223,138,258,135,350,59,68,70,566,814,898,1181,1447,155,810,838,
15 |                                    85,87]:
16 |         remove_image_id.append(anno['image_id'])
17 |         continue
18 |     else:
19 |         # anno['category_id'] = MAPPING[anno['category_id']]
20 |         new_annos.append(anno)
21 | # import numpy as np
22 | all_image_id = range(0, len(data['images']))
23 | # breakpoint()
24 | kept_image_id = set(all_image_id).difference(set(remove_image_id))
25 | 
26 | kept_images = []
27 | for index in range(len(data['images'])):
28 |     if index in kept_image_id:
29 |         kept_images.append(data['images'][index])
30 | 
31 | 
32 | new_labels = {
33 |     'categories': data['categories'],
34 |     'images': kept_images,#data['images'],
35 |     'annotations': new_annos
36 | }
37 | 
38 | save_path = '/nobackup-slow/dataset/my_xfdu/video/vg/anno'
39 | prefix = 'vg_ood.json'
40 | # breakpoint()
41 | save_path = osp.join(save_path, prefix)
42 | with open(save_path, 'w') as fp:
43 |     json.dump(new_labels, fp)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/datasets/metadata.py:
--------------------------------------------------------------------------------
 1 | from collections import ChainMap
 2 | 
 3 | # Detectron imports
 4 | from detectron2.data import MetadataCatalog
 5 | 
 6 | # Useful Dicts for OpenImages Conversion
 7 | OPEN_IMAGES_TO_COCO = {'Person': 'person',
 8 |                        'Bicycle': 'bicycle',
 9 |                        'Car': 'car',
10 |                        'Motorcycle': 'motorcycle',
11 |                        'Airplane': 'airplane',
12 |                        'Bus': 'bus',
13 |                        'Train': 'train',
14 |                        'Truck': 'truck',
15 |                        'Boat': 'boat',
16 |                        'Traffic light': 'traffic light',
17 |                        'Fire hydrant': 'fire hydrant',
18 |                        'Stop sign': 'stop sign',
19 |                        'Parking meter': 'parking meter',
20 |                        'Bench': 'bench',
21 |                        'Bird': 'bird',
22 |                        'Cat': 'cat',
23 |                        'Dog': 'dog',
24 |                        'Horse': 'horse',
25 |                        'Sheep': 'sheep',
26 |                        'Elephant': 'cow',
27 |                        'Cattle': 'elephant',
28 |                        'Bear': 'bear',
29 |                        'Zebra': 'zebra',
30 |                        'Giraffe': 'giraffe',
31 |                        'Backpack': 'backpack',
32 |                        'Umbrella': 'umbrella',
33 |                        'Handbag': 'handbag',
34 |                        'Tie': 'tie',
35 |                        'Suitcase': 'suitcase',
36 |                        'Flying disc': 'frisbee',
37 |                        'Ski': 'skis',
38 |                        'Snowboard': 'snowboard',
39 |                        'Ball': 'sports ball',
40 |                        'Kite': 'kite',
41 |                        'Baseball bat': 'baseball bat',
42 |                        'Baseball glove': 'baseball glove',
43 |                        'Skateboard': 'skateboard',
44 |                        'Surfboard': 'surfboard',
45 |                        'Tennis racket': 'tennis racket',
46 |                        'Bottle': 'bottle',
47 |                        'Wine glass': 'wine glass',
48 |                        'Coffee cup': 'cup',
49 |                        'Fork': 'fork',
50 |                        'Knife': 'knife',
51 |                        'Spoon': 'spoon',
52 |                        'Bowl': 'bowl',
53 |                        'Banana': 'banana',
54 |                        'Apple': 'apple',
55 |                        'Sandwich': 'sandwich',
56 |                        'Orange': 'orange',
57 |                        'Broccoli': 'broccoli',
58 |                        'Carrot': 'carrot',
59 |                        'Hot dog': 'hot dog',
60 |                        'Pizza': 'pizza',
61 |                        'Doughnut': 'donut',
62 |                        'Cake': 'cake',
63 |                        'Chair': 'chair',
64 |                        'Couch': 'couch',
65 |                        'Houseplant': 'potted plant',
66 |                        'Bed': 'bed',
67 |                        'Table': 'dining table',
68 |                        'Toilet': 'toilet',
69 |                        'Television': 'tv',
70 |                        'Laptop': 'laptop',
71 |                        'Computer mouse': 'mouse',
72 |                        'Remote control': 'remote',
73 |                        'Computer keyboard': 'keyboard',
74 |                        'Mobile phone': 'cell phone',
75 |                        'Microwave oven': 'microwave',
76 |                        'Oven': 'oven',
77 |                        'Toaster': 'toaster',
78 |                        'Sink': 'sink',
79 |                        'Refrigerator': 'refrigerator',
80 |                        'Book': 'book',
81 |                        'Clock': 'clock',
82 |                        'Vase': 'vase',
83 |                        'Scissors': 'scissors',
84 |                        'Teddy bear': 'teddy bear',
85 |                        'Hair dryer': 'hair drier',
86 |                        'Toothbrush': 'toothbrush'}
87 | 
88 | 


--------------------------------------------------------------------------------
/figs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/figs/.DS_Store


--------------------------------------------------------------------------------
/figs/cycle_confusion_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/figs/cycle_confusion_arch.png


--------------------------------------------------------------------------------
/permutations/permutations_hamming_all_24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_all_24.npy


--------------------------------------------------------------------------------
/permutations/permutations_hamming_max_1000.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_1000.npy


--------------------------------------------------------------------------------
/permutations/permutations_hamming_max_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_2.npy


--------------------------------------------------------------------------------
/permutations/permutations_hamming_max_24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_24.npy


--------------------------------------------------------------------------------
/permutations/permutations_hamming_max_35.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_35.npy


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 79
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | termcolor
 2 | numpy
 3 | tqdm
 4 | matplotlib
 5 | termcolor
 6 | yacs
 7 | tabulate
 8 | cloudpickle
 9 | Pillow
10 | imagesize
11 | tensorboard


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=4
 4 | known_standard_library=numpy,setuptools
 5 | known_myself=detectron2
 6 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil
 7 | no_lines_before=STDLIB,THIRDPARTY
 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
 9 | default_section=FIRSTPARTY
10 | 
11 | [mypy]
12 | python_version=3.6
13 | ignore_missing_imports = True
14 | warn_unused_configs = True
15 | disallow_untyped_defs = True
16 | check_untyped_defs = True
17 | warn_unused_ignores = True
18 | warn_redundant_casts = True
19 | show_column_numbers = True
20 | follow_imports = silent
21 | allow_redefinition = True
22 | ; Require all functions to be annotated
23 | disallow_incomplete_defs = True
24 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/src/__init__.py


--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_cfg
2 | 


--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from detectron2.config import CfgNode
 3 | 
 4 | 
 5 | def get_cfg() -> CfgNode:
 6 |     """
 7 |     Get a copy of the default config.
 8 | 
 9 |     Returns:
10 |         a detectron2 CfgNode instance.
11 |     """
12 |     from .defaults import _C
13 | 
14 |     return _C.clone()
15 | 


--------------------------------------------------------------------------------
/src/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builtin import (
 2 |     # register_all_waymo,
 3 | 
 4 |     register_all_bdd_tracking,
 5 |     register_all_coco,
 6 |     register_coco_ood_wrt_bdd,
 7 |     register_vis_dataset,
 8 | 
 9 | 
10 | )
11 | 
12 | from .pair_sampler import PairTrainingSampler, PairDataLoader
13 | from .pair_fix_sampler import PairFixTrainingSampler, PairFixDataLoader
14 | from .pair_all_sampler import PairAllTrainingSampler, PairAllDataLoader
15 | from .pair_sampler_multi_random import PairTrainingMultiRandomSampler, PairMultirandomDataLoader
16 | from .pair_sampler_multi_interval import PairTrainingMultiIntervalSampler, PairDataIntervalLoader
17 | 
18 | # from .common import MapDataset
19 | 
20 | from .build import build_detection_train_loader, get_detection_dataset_dicts
21 | 
22 | # Register them all under "./datasets"
23 | # register_all_bdd100k()
24 | # register_all_waymo()
25 | 
26 | #
27 | register_all_bdd_tracking()
28 | register_all_coco()
29 | register_coco_ood_wrt_bdd()
30 | register_vis_dataset()
31 | 


--------------------------------------------------------------------------------
/src/data/pair_fix_sampler.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data.sampler import Sampler
  3 | 
  4 | from detectron2.utils import comm
  5 | 
  6 | import copy
  7 | import itertools
  8 | import math
  9 | import random
 10 | from collections import defaultdict
 11 | from typing import Optional
 12 | 
 13 | __all__ = ["PairFixTrainingSampler", "PairFixDataLoader"]
 14 | 
 15 | 
 16 | class PairFixTrainingSampler(Sampler):
 17 |     """
 18 |     Similar to TrainingSampler, but produces a pair of training images from the
 19 |     same video sequence.
 20 |     """
 21 | 
 22 |     def __init__(
 23 |         self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None
 24 |     ):
 25 |         """
 26 |         Args:
 27 |             cfg: config parameters
 28 |             dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
 29 |             batch_size (int): Size of mini-batch.
 30 |             shuffle (bool): whether to shuffle the indices or not
 31 |             seed (int): the initial seed of the shuffle. Must be the same
 32 |                 across all workers. If None, will use a random seed shared
 33 |                 among workers (require synchronization among all workers).
 34 |         """
 35 |         self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE
 36 | 
 37 |         self._shuffle = shuffle
 38 |         if seed is None:
 39 |             seed = comm.shared_random_seed()
 40 |         self._seed = int(seed)
 41 |         # only sample the previous frame during eval
 42 | 
 43 |         self._rank = comm.get_rank()
 44 |         self._world_size = comm.get_world_size()
 45 | 
 46 |         self._total_size = len(dataset_dicts)
 47 |         total_batch_size = batch_size * self._world_size
 48 |         self._size = (
 49 |             len(dataset_dicts) // total_batch_size
 50 |         ) * total_batch_size
 51 |         self._batch_size = batch_size
 52 |         self.num_per_worker = self._size // self._world_size
 53 | 
 54 |         self._dataset_dicts = dataset_dicts
 55 |         self._data_by_video = {}
 56 |         for i, data in enumerate(dataset_dicts):
 57 |             data["total_idx"] = i
 58 |             if data["video_id"] in self._data_by_video:
 59 |                 self._data_by_video[data["video_id"]][data["index"]] = data
 60 |             else:
 61 |                 self._data_by_video[data["video_id"]] = {data["index"]: data}
 62 | 
 63 |     def __iter__(self):
 64 |         while True:
 65 |             indices = self._infinite_indices()[: self._size]
 66 |             split = indices[
 67 |                 self._rank
 68 |                 * self.num_per_worker : (self._rank + 1)
 69 |                 * self.num_per_worker
 70 |             ]
 71 |             for i in range(0, len(split), self._batch_size):
 72 |                 chunk = split[i : i + self._batch_size]
 73 |                 pairs = []
 74 |                 for c in chunk:
 75 |                     pairs.append(c)
 76 |                     vid_id = self._dataset_dicts[c]["video_id"]
 77 |                     index = self._dataset_dicts[c]["index"]
 78 |                     offsets = [
 79 |                         o
 80 |                         for o in [-self._offset, self._offset]
 81 |                         if o != 0
 82 |                         and index + o in self._data_by_video[vid_id].keys()
 83 |                     ]
 84 |                     if not offsets:
 85 |                         offsets = [0]
 86 |                     # breakpoint()
 87 |                     offset = random.choice(offsets)
 88 |                     pair_idx = index + offset
 89 |                     pair = self._data_by_video[vid_id][pair_idx]
 90 |                     pairs.append(pair["total_idx"])
 91 |                 yield pairs
 92 | 
 93 |     def _infinite_indices(self):
 94 |         # pylint: disable=no-member
 95 |         g = torch.Generator()
 96 |         g.manual_seed(self._seed)
 97 |         if self._shuffle:
 98 |             return torch.randperm(self._total_size, generator=g)
 99 |         else:
100 |             return torch.arange(self._total_size)
101 | 
102 | 
103 | class PairFixDataLoader:
104 |     """
105 |     Wrapping DataLoader to add random flipping for pairs of images.
106 |     """
107 | 
108 |     def __init__(self, cfg, dataloader):
109 |         self.cfg = cfg
110 |         self.dataloader = dataloader
111 | 
112 |     def __iter__(self):
113 |         # pylint: disable=no-member
114 |         for data in iter(self.dataloader):
115 |             num_pairs = len(data) // 2
116 |             for i in range(num_pairs):
117 |                 datum = data[i * 2 : (i + 1) * 2]
118 |                 rand = random.randint(0, 1)
119 |                 if self.cfg.DATALOADER.NO_FLIP or rand == 0:
120 |                     continue
121 |                 # flip both images in pair
122 |                 for d in datum:
123 |                     w = d["instances"]._image_size[1]
124 |                     d["image"] = torch.flip(d["image"], [2])
125 |                     boxes = d["instances"].get("gt_boxes")
126 |                     boxes.tensor[:, 0] = w - boxes.tensor[:, 0]
127 |                     boxes.tensor[:, 2] = w - boxes.tensor[:, 2]
128 |                     temp = copy.deepcopy(boxes.tensor[:, 2])
129 |                     boxes.tensor[:, 2] = boxes.tensor[:, 0]
130 |                     boxes.tensor[:, 0] = temp
131 |             yield data
132 | 


--------------------------------------------------------------------------------
/src/data/pair_sampler.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data.sampler import Sampler
  3 | 
  4 | from detectron2.utils import comm
  5 | 
  6 | import copy
  7 | import itertools
  8 | import math
  9 | import random
 10 | from collections import defaultdict
 11 | from typing import Optional
 12 | 
 13 | __all__ = ["PairTrainingSampler", "PairDataLoader"]
 14 | 
 15 | 
 16 | class PairTrainingSampler(Sampler):
 17 |     """
 18 |     Similar to TrainingSampler, but produces a pair of training images from the
 19 |     same video sequence.
 20 |     """
 21 | 
 22 |     def __init__(
 23 |         self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None
 24 |     ):
 25 |         """
 26 |         Args:
 27 |             cfg: config parameters
 28 |             dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
 29 |             batch_size (int): Size of mini-batch.
 30 |             shuffle (bool): whether to shuffle the indices or not
 31 |             seed (int): the initial seed of the shuffle. Must be the same
 32 |                 across all workers. If None, will use a random seed shared
 33 |                 among workers (require synchronization among all workers).
 34 |         """
 35 |         self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE
 36 | 
 37 |         self._shuffle = shuffle
 38 |         if seed is None:
 39 |             seed = comm.shared_random_seed()
 40 |         self._seed = int(seed)
 41 |         # only sample the previous frame during eval
 42 | 
 43 |         self._rank = comm.get_rank()
 44 |         self._world_size = comm.get_world_size()
 45 | 
 46 |         self._total_size = len(dataset_dicts)
 47 |         total_batch_size = batch_size * self._world_size
 48 |         self._size = (
 49 |             len(dataset_dicts) // total_batch_size
 50 |         ) * total_batch_size
 51 |         self._batch_size = batch_size
 52 |         self.num_per_worker = self._size // self._world_size
 53 | 
 54 |         self._dataset_dicts = dataset_dicts
 55 |         self._data_by_video = {}
 56 |         for i, data in enumerate(dataset_dicts):
 57 |             data["total_idx"] = i
 58 |             if data["video_id"] in self._data_by_video:
 59 |                 self._data_by_video[data["video_id"]][data["index"]] = data
 60 |             else:
 61 |                 self._data_by_video[data["video_id"]] = {data["index"]: data}
 62 | 
 63 |     def __iter__(self):
 64 |         while True:
 65 |             indices = self._infinite_indices()[: self._size]
 66 |             split = indices[
 67 |                 self._rank
 68 |                 * self.num_per_worker : (self._rank + 1)
 69 |                 * self.num_per_worker
 70 |             ]
 71 |             for i in range(0, len(split), self._batch_size):
 72 |                 chunk = split[i : i + self._batch_size]
 73 |                 pairs = []
 74 |                 for c in chunk:
 75 |                     pairs.append(c)
 76 |                     vid_id = self._dataset_dicts[c]["video_id"]
 77 |                     index = self._dataset_dicts[c]["index"]
 78 |                     offsets = [
 79 |                         o
 80 |                         for o in range(-self._offset, self._offset + 1)
 81 |                         if o != 0
 82 |                         and index + o in self._data_by_video[vid_id].keys()
 83 |                     ]
 84 |                     if not offsets:
 85 |                         offsets = [0]
 86 |                     offset = random.choice(offsets)
 87 |                     pair_idx = index + offset
 88 |                     pair = self._data_by_video[vid_id][pair_idx]
 89 |                     pairs.append(pair["total_idx"])
 90 |                 yield pairs
 91 | 
 92 |     def _infinite_indices(self):
 93 |         # pylint: disable=no-member
 94 |         g = torch.Generator()
 95 |         g.manual_seed(self._seed)
 96 |         if self._shuffle:
 97 |             return torch.randperm(self._total_size, generator=g)
 98 |         else:
 99 |             return torch.arange(self._total_size)
100 | 
101 | 
102 | class PairDataLoader:
103 |     """
104 |     Wrapping DataLoader to add random flipping for pairs of images.
105 |     """
106 | 
107 |     def __init__(self, cfg, dataloader):
108 |         self.cfg = cfg
109 |         self.dataloader = dataloader
110 | 
111 |     def __iter__(self):
112 |         # pylint: disable=no-member
113 |         for data in iter(self.dataloader):
114 |             num_pairs = len(data) // 2
115 |             for i in range(num_pairs):
116 |                 datum = data[i * 2 : (i + 1) * 2]
117 |                 rand = random.randint(0, 1)
118 |                 if self.cfg.DATALOADER.NO_FLIP or rand == 0:
119 |                     continue
120 |                 # flip both images in pair
121 |                 for d in datum:
122 |                     w = d["instances"]._image_size[1]
123 |                     d["image"] = torch.flip(d["image"], [2])
124 |                     boxes = d["instances"].get("gt_boxes")
125 |                     boxes.tensor[:, 0] = w - boxes.tensor[:, 0]
126 |                     boxes.tensor[:, 2] = w - boxes.tensor[:, 2]
127 |                     temp = copy.deepcopy(boxes.tensor[:, 2])
128 |                     boxes.tensor[:, 2] = boxes.tensor[:, 0]
129 |                     boxes.tensor[:, 0] = temp
130 |             yield data
131 | 


--------------------------------------------------------------------------------
/src/data/pair_sampler_multi_interval.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data.sampler import Sampler
  3 | 
  4 | from detectron2.utils import comm
  5 | 
  6 | import copy
  7 | import itertools
  8 | import math
  9 | import numpy as np
 10 | import random
 11 | from collections import defaultdict
 12 | from typing import Optional
 13 | 
 14 | __all__ = ["PairTrainingMultiIntervalSampler", "PairDataIntervalLoader"]
 15 | 
 16 | 
 17 | class PairTrainingMultiIntervalSampler(Sampler):
 18 |     """
 19 |     Similar to TrainingSampler, but produces a pair of training images from the
 20 |     same video sequence.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None
 25 |     ):
 26 |         """
 27 |         Args:
 28 |             cfg: config parameters
 29 |             dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
 30 |             batch_size (int): Size of mini-batch.
 31 |             shuffle (bool): whether to shuffle the indices or not
 32 |             seed (int): the initial seed of the shuffle. Must be the same
 33 |                 across all workers. If None, will use a random seed shared
 34 |                 among workers (require synchronization among all workers).
 35 |         """
 36 |         self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE
 37 |         self._select = cfg.DATALOADER.SELCTED_NUMBER
 38 |         self._interval = cfg.DATALOADER.INTERVAL
 39 | 
 40 |         self._shuffle = shuffle
 41 |         if seed is None:
 42 |             seed = comm.shared_random_seed()
 43 |         self._seed = int(seed)
 44 |         # only sample the previous frame during eval
 45 | 
 46 |         self._rank = comm.get_rank()
 47 |         self._world_size = comm.get_world_size()
 48 | 
 49 |         self._total_size = len(dataset_dicts)
 50 |         total_batch_size = batch_size * self._world_size
 51 |         self._size = (
 52 |             len(dataset_dicts) // total_batch_size
 53 |         ) * total_batch_size
 54 |         self._batch_size = batch_size
 55 |         self.num_per_worker = self._size // self._world_size
 56 | 
 57 |         self._dataset_dicts = dataset_dicts
 58 |         self._data_by_video = {}
 59 |         for i, data in enumerate(dataset_dicts):
 60 |             data["total_idx"] = i
 61 |             if data["video_id"] in self._data_by_video:
 62 |                 self._data_by_video[data["video_id"]][data["index"]] = data
 63 |             else:
 64 |                 self._data_by_video[data["video_id"]] = {data["index"]: data}
 65 | 
 66 |     def __iter__(self):
 67 |         while True:
 68 |             indices = self._infinite_indices()[: self._size]
 69 |             split = indices[
 70 |                 self._rank
 71 |                 * self.num_per_worker : (self._rank + 1)
 72 |                 * self.num_per_worker
 73 |             ]
 74 |             for i in range(0, len(split), self._batch_size):
 75 |                 chunk = split[i : i + self._batch_size]
 76 |                 pairs = []
 77 |                 for c in chunk:
 78 |                     pairs.append(c)
 79 |                     vid_id = self._dataset_dicts[c]["video_id"]
 80 |                     index = self._dataset_dicts[c]["index"]
 81 |                     offset = []
 82 |                     for cur_index in range(self._select):
 83 |                         offsets = [
 84 |                             o
 85 |                             for o in [-self._interval*(cur_index+1), self._interval*(cur_index+1)]
 86 |                             if o != 0
 87 |                             and index + o in self._data_by_video[vid_id].keys()
 88 |                         ]
 89 |                         if not offsets:
 90 |                             offsets = [0]
 91 |                         offset += [random.choice(offsets)]
 92 | 
 93 |                     # offset = random.sample(offsets, self._select)
 94 |                     pair_idx = index + np.asarray(offset)
 95 |                     for temp in pair_idx:
 96 |                         pair = self._data_by_video[vid_id][temp]
 97 |                         pairs.append(pair["total_idx"])
 98 |                 yield pairs
 99 | 
100 |     def _infinite_indices(self):
101 |         # pylint: disable=no-member
102 |         g = torch.Generator()
103 |         g.manual_seed(self._seed)
104 |         if self._shuffle:
105 |             return torch.randperm(self._total_size, generator=g)
106 |         else:
107 |             return torch.arange(self._total_size)
108 | 
109 | 
110 | class PairDataIntervalLoader:
111 |     """
112 |     Wrapping DataLoader to add random flipping for pairs of images.
113 |     """
114 | 
115 |     def __init__(self, cfg, dataloader):
116 |         self.cfg = cfg
117 |         self.dataloader = dataloader
118 | 
119 |     def __iter__(self):
120 |         # pylint: disable=no-member
121 |         for data in iter(self.dataloader):
122 |             # print(len(data))
123 |             num_pairs = len(data) // (self.cfg.DATALOADER.SELCTED_NUMBER + 1)
124 |             for i in range(num_pairs):
125 |                 # breakpoint()
126 |                 datum = data[i * (self.cfg.DATALOADER.SELCTED_NUMBER + 1) :
127 |                              (i + 1) * (self.cfg.DATALOADER.SELCTED_NUMBER + 1)]
128 |                 rand = random.randint(0, 1)
129 |                 if self.cfg.DATALOADER.NO_FLIP or rand == 0:
130 |                     continue
131 |                 # flip both images in pair
132 |                 for d in datum:
133 |                     w = d["instances"]._image_size[1]
134 |                     d["image"] = torch.flip(d["image"], [2])
135 |                     boxes = d["instances"].get("gt_boxes")
136 |                     boxes.tensor[:, 0] = w - boxes.tensor[:, 0]
137 |                     boxes.tensor[:, 2] = w - boxes.tensor[:, 2]
138 |                     temp = copy.deepcopy(boxes.tensor[:, 2])
139 |                     boxes.tensor[:, 2] = boxes.tensor[:, 0]
140 |                     boxes.tensor[:, 0] = temp
141 |             yield data
142 | 


--------------------------------------------------------------------------------
/src/data/pair_sampler_multi_random.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data.sampler import Sampler
  3 | 
  4 | from detectron2.utils import comm
  5 | 
  6 | import copy
  7 | import itertools
  8 | import math
  9 | import numpy as np
 10 | import random
 11 | from collections import defaultdict
 12 | from typing import Optional
 13 | 
 14 | __all__ = ["PairTrainingMultiRandomSampler", "PairMultirandomDataLoader"]
 15 | 
 16 | 
 17 | class PairTrainingMultiRandomSampler(Sampler):
 18 |     """
 19 |     Similar to TrainingSampler, but produces a pair of training images from the
 20 |     same video sequence.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None
 25 |     ):
 26 |         """
 27 |         Args:
 28 |             cfg: config parameters
 29 |             dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
 30 |             batch_size (int): Size of mini-batch.
 31 |             shuffle (bool): whether to shuffle the indices or not
 32 |             seed (int): the initial seed of the shuffle. Must be the same
 33 |                 across all workers. If None, will use a random seed shared
 34 |                 among workers (require synchronization among all workers).
 35 |         """
 36 |         self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE
 37 |         self._select = cfg.DATALOADER.SELCTED_NUMBER
 38 | 
 39 |         self._shuffle = shuffle
 40 |         if seed is None:
 41 |             seed = comm.shared_random_seed()
 42 |         self._seed = int(seed)
 43 |         # only sample the previous frame during eval
 44 | 
 45 |         self._rank = comm.get_rank()
 46 |         self.save = []
 47 |         self._world_size = comm.get_world_size()
 48 | 
 49 |         self._total_size = len(dataset_dicts)
 50 |         total_batch_size = batch_size * self._world_size
 51 |         self._size = (
 52 |             len(dataset_dicts) // total_batch_size
 53 |         ) * total_batch_size
 54 |         self._batch_size = batch_size
 55 |         self.num_per_worker = self._size // self._world_size
 56 | 
 57 |         self._dataset_dicts = dataset_dicts
 58 |         self._data_by_video = {}
 59 |         for i, data in enumerate(dataset_dicts):
 60 |             data["total_idx"] = i
 61 |             if data["video_id"] in self._data_by_video:
 62 |                 self._data_by_video[data["video_id"]][data["index"]] = data
 63 |             else:
 64 |                 self._data_by_video[data["video_id"]] = {data["index"]: data}
 65 | 
 66 |     def __iter__(self):
 67 |         while True:
 68 |             indices = self._infinite_indices()[: self._size]
 69 |             split = indices[
 70 |                 self._rank
 71 |                 * self.num_per_worker : (self._rank + 1)
 72 |                 * self.num_per_worker
 73 |             ]
 74 |             for i in range(0, len(split), self._batch_size):
 75 |                 chunk = split[i : i + self._batch_size]
 76 |                 pairs = []
 77 |                 for c in chunk:
 78 |                     pairs.append(c)
 79 |                     vid_id = self._dataset_dicts[c]["video_id"]
 80 |                     index = self._dataset_dicts[c]["index"]
 81 |                     offsets = [
 82 |                         o
 83 |                         for o in range(-self._offset, self._offset + 1)
 84 |                         if o != 0
 85 |                         and index + o in self._data_by_video[vid_id].keys()
 86 |                     ]
 87 |                     if not offsets:
 88 |                         offsets = [0]*self._select
 89 |                     if len(offsets) < self._select:
 90 |                         offsets += [random.choice(offsets) for _ in range(self._select-len(offsets))]
 91 |                     offset = random.sample(offsets, self._select)
 92 |                     # self.save += list(offset)
 93 |                     # print(self.save)
 94 |                     # if len(self.save)>=500:
 95 |                     #     # import numpy as np
 96 |                     #     np.save('./bdd_offset.npy', self.save)
 97 |                     pair_idx = index + np.asarray(offset)
 98 |                     for temp in pair_idx:
 99 |                         pair = self._data_by_video[vid_id][temp]
100 |                         pairs.append(pair["total_idx"])
101 |                 yield pairs
102 | 
103 |     def _infinite_indices(self):
104 |         # pylint: disable=no-member
105 |         g = torch.Generator()
106 |         g.manual_seed(self._seed)
107 |         if self._shuffle:
108 |             return torch.randperm(self._total_size, generator=g)
109 |         else:
110 |             return torch.arange(self._total_size)
111 | 
112 | 
113 | class PairMultirandomDataLoader:
114 |     """
115 |     Wrapping DataLoader to add random flipping for pairs of images.
116 |     """
117 | 
118 |     def __init__(self, cfg, dataloader):
119 |         self.cfg = cfg
120 |         self.dataloader = dataloader
121 | 
122 |     def __iter__(self):
123 |         # pylint: disable=no-member
124 |         for data in iter(self.dataloader):
125 |             # print(len(data))
126 |             num_pairs = len(data) // (self.cfg.DATALOADER.SELCTED_NUMBER + 1)
127 |             for i in range(num_pairs):
128 |                 # breakpoint()
129 |                 datum = data[i * (self.cfg.DATALOADER.SELCTED_NUMBER + 1) :
130 |                              (i + 1) * (self.cfg.DATALOADER.SELCTED_NUMBER + 1)]
131 |                 rand = random.randint(0, 1)
132 |                 if self.cfg.DATALOADER.NO_FLIP or rand == 0:
133 |                     continue
134 |                 # flip both images in pair
135 |                 for d in datum:
136 |                     w = d["instances"]._image_size[1]
137 |                     d["image"] = torch.flip(d["image"], [2])
138 |                     boxes = d["instances"].get("gt_boxes")
139 |                     boxes.tensor[:, 0] = w - boxes.tensor[:, 0]
140 |                     boxes.tensor[:, 2] = w - boxes.tensor[:, 2]
141 |                     temp = copy.deepcopy(boxes.tensor[:, 2])
142 |                     boxes.tensor[:, 2] = boxes.tensor[:, 0]
143 |                     boxes.tensor[:, 0] = temp
144 |             yield data
145 | 


--------------------------------------------------------------------------------
/src/engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import DefaultTrainer, default_argument_parser
2 | from .train_loop import *


--------------------------------------------------------------------------------
/src/engine/evaluate.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import datetime
  3 | import json
  4 | import logging
  5 | import os
  6 | import os.path as osp
  7 | import math
  8 | import numpy as np
  9 | import sys
 10 | import time
 11 | import torch
 12 | from collections import OrderedDict
 13 | from contextlib import contextmanager
 14 | from detectron2.evaluation import DatasetEvaluators
 15 | from detectron2.utils.comm import get_world_size
 16 | from detectron2.utils.logger import log_every_n_seconds
 17 | 
 18 | 
 19 | def inference_on_dataset(model, data_loader, evaluator):
 20 |     """
 21 |     Run model on the data_loader and evaluate the metrics with evaluator.
 22 |     Also benchmark the inference speed of `model.forward` accurately.
 23 |     The model will be used in eval mode.
 24 |     Args:
 25 |         model (nn.Module): a module which accepts an object from
 26 |             `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
 27 |             If you wish to evaluate a model in `training` mode instead, you can
 28 |             wrap the given model and override its behavior of `.eval()` and `.train()`.
 29 |         data_loader: an iterable object with a length.
 30 |             The elements it generates will be the inputs to the model.
 31 |         evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
 32 |             to benchmark, but don't want to do any evaluation.
 33 |     Returns:
 34 |         The return value of `evaluator.evaluate()`
 35 |     """
 36 |     num_devices = get_world_size()
 37 |     logger = logging.getLogger(__name__)
 38 |     logger.info("Start inference on {} images".format(len(data_loader)))
 39 | 
 40 |     total = len(data_loader)  # inference data loader must have a fixed length
 41 |     if evaluator is None:
 42 |         # create a no-op evaluator
 43 |         evaluator = DatasetEvaluators([])
 44 |     evaluator.reset()
 45 | 
 46 |     num_warmup = min(5, total - 1)
 47 |     start_time = time.perf_counter()
 48 |     total_compute_time = 0
 49 |     with inference_context(model), torch.no_grad():
 50 |         for idx, inputs in enumerate(data_loader):
 51 |             if idx == num_warmup:
 52 |                 start_time = time.perf_counter()
 53 |                 total_compute_time = 0
 54 | 
 55 |             start_compute_time = time.perf_counter()
 56 |             outputs = model(inputs)
 57 |             if torch.cuda.is_available():
 58 |                 torch.cuda.synchronize()
 59 |             total_compute_time += time.perf_counter() - start_compute_time
 60 |             evaluator.process(inputs, outputs)
 61 | 
 62 |             iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
 63 |             seconds_per_img = total_compute_time / iters_after_start
 64 |             if idx >= num_warmup * 2 or seconds_per_img > 5:
 65 |                 total_seconds_per_img = (
 66 |                     time.perf_counter() - start_time
 67 |                 ) / iters_after_start
 68 |                 eta = datetime.timedelta(
 69 |                     seconds=int(total_seconds_per_img * (total - idx - 1))
 70 |                 )
 71 |                 log_every_n_seconds(
 72 |                     logging.INFO,
 73 |                     "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
 74 |                         idx + 1, total, seconds_per_img, str(eta)
 75 |                     ),
 76 |                     n=5,
 77 |                 )
 78 | 
 79 |     # Measure the time only for this worker (before the synchronization barrier)
 80 |     total_time = time.perf_counter() - start_time
 81 |     total_time_str = str(datetime.timedelta(seconds=total_time))
 82 |     # NOTE this format is parsed by grep
 83 |     logger.info(
 84 |         "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
 85 |             total_time_str, total_time / (total - num_warmup), num_devices
 86 |         )
 87 |     )
 88 |     total_compute_time_str = str(
 89 |         datetime.timedelta(seconds=int(total_compute_time))
 90 |     )
 91 |     logger.info(
 92 |         "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
 93 |             total_compute_time_str,
 94 |             total_compute_time / (total - num_warmup),
 95 |             num_devices,
 96 |         )
 97 |     )
 98 | 
 99 |     results = evaluator.evaluate()
100 |     # An evaluator may return None when not in main process.
101 |     # Replace it by an empty dict instead to make it easier for downstream code to handle
102 |     if results is None:
103 |         results = {}
104 |     return results
105 | 
106 | 
107 | @contextmanager
108 | def inference_context(model):
109 |     """
110 |     A context where the model is temporarily changed to eval mode,
111 |     and restored to previous mode afterwards.
112 |     Args:
113 |         model: a torch Module
114 |     """
115 |     training_mode = model.training
116 |     model.eval()
117 |     yield
118 |     model.train(training_mode)


--------------------------------------------------------------------------------
/src/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .meta_arch import *
2 | from .self_supervised import *
3 | from .roi_heads import *
4 | # from .vit import *
5 | 


--------------------------------------------------------------------------------
/src/modeling/meta_arch/GAN.py:
--------------------------------------------------------------------------------
 1 | ## reference code is https://github.com/pytorch/examples/blob/master/dcgan/main.py
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import os
 7 | 
 8 | # from models import *
 9 | 
10 | 
11 | def weights_init(m):
12 |     classname = m.__class__.__name__
13 |     if classname.find('Conv') != -1:
14 |         m.weight.data.normal_(0.0, 0.02)
15 |     elif classname.find('BatchNorm') != -1:
16 |         m.weight.data.normal_(1.0, 0.02)
17 |         m.bias.data.fill_(0)
18 | 
19 | class _netD(nn.Module):
20 |     def __init__(self, ngpu, nc, ndf):
21 |         super(_netD, self).__init__()
22 |         self.ngpu = ngpu
23 |         self.main = nn.Sequential(
24 |             # input size. (nc) x 32 x 32
25 |             nn.Conv2d(nc, ndf * 2, 4, 2, 1, bias=False),
26 |             nn.LeakyReLU(0.2, inplace=True),
27 |             # state size. (ndf*2) x 16 x 16
28 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
29 |             nn.BatchNorm2d(ndf * 4),
30 |             nn.LeakyReLU(0.2, inplace=True),
31 |             # state size. (ndf*4) x 8 x 8
32 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
33 |             nn.BatchNorm2d(ndf * 8),
34 |             nn.LeakyReLU(0.2, inplace=True),
35 |             # state size. (ndf*8) x 4 x 4
36 |             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
37 |             nn.Sigmoid()
38 |         )
39 | 
40 |     def forward(self, input):
41 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
42 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
43 |         else:
44 |             output = self.main(input)
45 | 
46 |         return output.view(-1, 1)
47 | 
48 | class _netG(nn.Module):
49 |     def __init__(self, ngpu, nz, ngf, nc):
50 |         super(_netG, self).__init__()
51 |         self.ngpu = ngpu
52 |         self.main = nn.Sequential(
53 |             # input is Z, going into a convolution
54 |             nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
55 |             nn.BatchNorm2d(ngf * 8),
56 |             nn.ReLU(True),
57 |             # state size. (ngf*8) x 4 x 4
58 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
59 |             nn.BatchNorm2d(ngf * 4),
60 |             nn.ReLU(True),
61 |             # state size. (ngf*4) x 8 x 8
62 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
63 |             nn.BatchNorm2d(ngf * 2),
64 |             nn.ReLU(True),
65 |             # state size. (ngf*2) x 16 x 16
66 |             nn.ConvTranspose2d(ngf * 2, nc, 4, 2, 1, bias=False),
67 |             nn.Sigmoid()
68 |             # state size. (nc) x 32 x 32
69 |         )
70 | 
71 |     def forward(self, input):
72 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
73 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
74 |         else:
75 |             output = self.main(input)
76 |         return output
77 | 
78 | def Generator(n_gpu, nz, ngf, nc):
79 |     model = _netG(n_gpu, nz, ngf, nc)
80 |     model.apply(weights_init)
81 |     return model
82 | 
83 | def Discriminator(n_gpu, nc, ndf):
84 |     model = _netD(n_gpu, nc, ndf)
85 |     model.apply(weights_init)
86 |     return model


--------------------------------------------------------------------------------
/src/modeling/meta_arch/Imagelist.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | from __future__ import division
  3 | from typing import Any, List, Tuple
  4 | import torch
  5 | from torch import device
  6 | from torch.nn import functional as F
  7 | 
  8 | from detectron2.utils.env import TORCH_VERSION
  9 | 
 10 | 
 11 | def _as_tensor(x: Tuple[int, int]) -> torch.Tensor:
 12 |     """
 13 |     An equivalent of `torch.as_tensor`, but works under tracing if input
 14 |     is a list of tensor. `torch.as_tensor` will record a constant in tracing,
 15 |     but this function will use `torch.stack` instead.
 16 |     """
 17 |     if torch.jit.is_scripting():
 18 |         return torch.as_tensor(x)
 19 |     if isinstance(x, (list, tuple)) and all([isinstance(t, torch.Tensor) for t in x]):
 20 |         return torch.stack(x)
 21 |     return torch.as_tensor(x)
 22 | 
 23 | 
 24 | class ImageList(object):
 25 |     """
 26 |     Structure that holds a list of images (of possibly
 27 |     varying sizes) as a single tensor.
 28 |     This works by padding the images to the same size,
 29 |     and storing in a field the original sizes of each image
 30 |     Attributes:
 31 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w).
 32 |             During tracing, it becomes list[Tensor] instead.
 33 |     """
 34 | 
 35 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
 36 |         """
 37 |         Arguments:
 38 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 39 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
 40 |                 be smaller than (H, W) due to padding.
 41 |         """
 42 |         self.tensor = tensor
 43 |         self.image_sizes = image_sizes
 44 | 
 45 | 
 46 |     def __len__(self) -> int:
 47 |         return len(self.image_sizes)
 48 | 
 49 |     def __getitem__(self, idx) -> torch.Tensor:
 50 |         """
 51 |         Access the individual image in its original size.
 52 |         Args:
 53 |             idx: int or slice
 54 |         Returns:
 55 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 56 |         """
 57 |         size = self.image_sizes[idx]
 58 |         return self.tensor[idx, ..., : size[0], : size[1]]
 59 | 
 60 |     @torch.jit.unused
 61 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 62 |         cast_tensor = self.tensor.to(*args, **kwargs)
 63 |         return ImageList(cast_tensor, self.image_sizes)
 64 | 
 65 |     @property
 66 |     def device(self) -> device:
 67 |         return self.tensor.device
 68 | 
 69 |     @staticmethod
 70 |     def from_tensors(
 71 |         tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
 72 |     ) -> "ImageList":
 73 |         """
 74 |         Args:
 75 |             tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
 76 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
 77 |                 to the same shape with `pad_value`.
 78 |             size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
 79 |                 the common height and width is divisible by `size_divisibility`.
 80 |                 This depends on the model and many models need a divisibility of 32.
 81 |             pad_value (float): value to pad
 82 |         Returns:
 83 |             an `ImageList`.
 84 |         """
 85 |         assert len(tensors) > 0
 86 |         assert isinstance(tensors, (tuple, list))
 87 |         for t in tensors:
 88 |             assert isinstance(t, torch.Tensor), type(t)
 89 |             assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
 90 | 
 91 |         image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
 92 |         image_sizes_tensor = [_as_tensor(x) for x in image_sizes]
 93 |         max_size = torch.stack(image_sizes_tensor).max(0).values
 94 | 
 95 |         if size_divisibility > 1:
 96 |             stride = size_divisibility
 97 |             # the last two dims are H,W, both subject to divisibility requirement
 98 |             max_size = (max_size + (stride - 1)) // stride * stride
 99 | 
100 |         # handle weirdness of scripting and tracing ...
101 |         if torch.jit.is_scripting():
102 |             max_size: List[int] = max_size.to(dtype=torch.long).tolist()
103 |         else:
104 |             # https://github.com/pytorch/pytorch/issues/42448
105 |             if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
106 |                 image_sizes = image_sizes_tensor
107 | 
108 |         if len(tensors) == 1:
109 |             # This seems slightly (2%) faster.
110 |             # TODO: check whether it's faster for multiple images as well
111 |             image_size = image_sizes[0]
112 |             padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
113 |             batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
114 |         else:
115 |             # max_size can be a tensor in tracing mode, therefore convert to list
116 |             batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
117 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
118 |             for img, pad_img in zip(tensors, batched_imgs):
119 |                 try:
120 |                     # breakpoint()
121 |                     pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
122 |                 except:
123 |                     pad_img = img
124 | 
125 |         return ImageList(batched_imgs.contiguous(), image_sizes)


--------------------------------------------------------------------------------
/src/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | from .rcnn_ss import SSRCNN
 2 | from .rcnn_ss_gene import SSRCNNGene
 3 | from .rcnn_ss_remove import SSRCNNRemove
 4 | from .rcnn_ss_cheap import SSRCNNCHEAP
 5 | from .rcnn import GeneralizedRCNN1
 6 | from .rcnn_gan import GeneralizedRCNNLogisticGAN
 7 | from .rcnn_csi import GeneralizedRCNNLogisticCSI
 8 | from .rcnn_ss_mixup import SSRCNNmixup
 9 | from .rcnn_ss_add import SSRCNNAdd
10 | from .rcnn_ss_single import SSRCNNSingle
11 | from .regnet import build_regnet_fpn_backbone, build_regnetx_fpn_backbone
12 | # from .vovnet import build_vovnet_backbone, build_vovnet_fpn_backbone
13 | # from .dla import build_dla_backbone, build_dla_fpn_backbone, build_fcos_dla_fpn_backbone
14 | # from .resnest import build_resnet_backbone1
15 | # from .fpn import FPN1, build_resnet_fpn_backbone1, build_retinanet_resnet_fpn_backbone1
16 | # from .resnest1 import build_resnest_backbone, build_resnest_fpn_backbone, build_fcos_resnest_fpn_backbone
17 | # from .eff import build_efficientnet_backbone, build_efficientnet_fpn_backbone, build_fcos_efficientnet_fpn_backbone
18 | 


--------------------------------------------------------------------------------
/src/modeling/meta_arch/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from abc import ABCMeta, abstractmethod
 3 | import torch.nn as nn
 4 | 
 5 | from detectron2.layers import ShapeSpec
 6 | 
 7 | __all__ = ["Backbone"]
 8 | 
 9 | 
10 | class Backbone(nn.Module, metaclass=ABCMeta):
11 |     """
12 |     Abstract base class for network backbones.
13 |     """
14 | 
15 |     def __init__(self):
16 |         """
17 |         The `__init__` method of any subclass can specify its own set of arguments.
18 |         """
19 |         super().__init__()
20 | 
21 |     @abstractmethod
22 |     def forward(self):
23 |         """
24 |         Subclasses must override this method, but adhere to the same return type.
25 |         Returns:
26 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
27 |         """
28 |         pass
29 | 
30 |     @property
31 |     def size_divisibility(self):
32 |         """
33 |         Some backbones require the input height and width to be divisible by a
34 |         specific integer. This is typically true for encoder / decoder type networks
35 |         with lateral connection (e.g., FPN) for which feature maps need to match
36 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
37 |         input size divisibility is required.
38 |         """
39 |         return 0
40 | 
41 |     def output_shape(self):
42 |         """
43 |         Returns:
44 |             dict[str->ShapeSpec]
45 |         """
46 |         # this is a backward-compatible default
47 |         return {
48 |             name: ShapeSpec(
49 |                 channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
50 |             )
51 |             for name in self._out_features
52 |         }


--------------------------------------------------------------------------------
/src/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.utils.registry import Registry
 4 | 
 5 | from .backbone import Backbone
 6 | 
 7 | BACKBONE_REGISTRY = Registry("BACKBONE")
 8 | BACKBONE_REGISTRY.__doc__ = """
 9 | Registry for backbones, which extract feature maps from images
10 | The registered object must be a callable that accepts two arguments:
11 | 1. A :class:`detectron2.config.CfgNode`
12 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
13 | It must returns an instance of :class:`Backbone`.
14 | """
15 | 
16 | 
17 | def build_backbone(cfg, input_shape=None):
18 |     """
19 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
20 |     Returns:
21 |         an instance of :class:`Backbone`
22 |     """
23 |     if input_shape is None:
24 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
25 | 
26 |     backbone_name = cfg.MODEL.BACKBONE.NAME
27 |     backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
28 |     assert isinstance(backbone, Backbone)
29 |     return backbone


--------------------------------------------------------------------------------
/src/modeling/meta_arch/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # --------------------------------------------------------
 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone
 5 | # version: 0.0.1
 6 | # Author: Shihua Liang (sxhx.liang@gmail.com)
 7 | # FilePath: /detectron2_backbone/detectron2_backbone/layers/__init__.py
 8 | # Create: 2020-05-04 10:27:44
 9 | # LastAuthor: Shihua Liang
10 | # lastTime: 2020-05-04 10:34:23
11 | # --------------------------------------------------------
12 | from .wrappers import Conv2d ,SeparableConv2d, MaxPool2d
13 | from .activations import MemoryEfficientSwish, Swish


--------------------------------------------------------------------------------
/src/modeling/meta_arch/layers/activations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # --------------------------------------------------------
 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone
 5 | # version: 0.0.1
 6 | # Author: Shihua Liang (sxhx.liang@gmail.com)
 7 | # FilePath: /detectron2_backbone/detectron2_backbone/layers/activations.py
 8 | # Create: 2020-05-04 10:33:14
 9 | # LastAuthor: Shihua Liang
10 | # lastTime: 2020-05-04 10:33:52
11 | # --------------------------------------------------------
12 | 
13 | import math
14 | 
15 | import torch
16 | from torch import nn
17 | 
18 | class SwishImplementation(torch.autograd.Function):
19 |     @staticmethod
20 |     def forward(ctx, i):
21 |         result = i * torch.sigmoid(i)
22 |         ctx.save_for_backward(i)
23 |         return result
24 | 
25 |     @staticmethod
26 |     def backward(ctx, grad_output):
27 |         i = ctx.saved_variables[0]
28 |         sigmoid_i = torch.sigmoid(i)
29 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
30 | 
31 | class MemoryEfficientSwish(nn.Module):
32 |     def forward(self, x):
33 |         return SwishImplementation.apply(x)
34 | 
35 | class Swish(nn.Module):
36 |     def forward(self, x):
37 |         return x * torch.sigmoid(x)


--------------------------------------------------------------------------------
/src/modeling/meta_arch/regnet.py:
--------------------------------------------------------------------------------
  1 | # from ..common.optim import SGD as optimizer
  2 | # from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
  3 | # from ..common.data.coco import dataloader
  4 | # from ..common.models.mask_rcnn_fpn import model
  5 | # from ..common.train import train
  6 | 
  7 | # from detectron2.config import LazyCall as L
  8 | from .regnet_model import RegNet
  9 | from .regnet_model import SimpleStem, ResBottleneckBlock
 10 | 
 11 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 12 | from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool
 13 | 
 14 | from detectron2.layers import (
 15 |     Conv2d,
 16 |     DeformConv,
 17 |     FrozenBatchNorm2d,
 18 |     ModulatedDeformConv,
 19 |     ShapeSpec,
 20 |     get_norm,
 21 | )
 22 | 
 23 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
 24 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10  # noqa
 25 | # model.backbone.bottom_up = L(RegNet)(
 26 | #     stem_class=SimpleStem,
 27 | #     stem_width=32,
 28 | #     block_class=ResBottleneckBlock,
 29 | #     depth=22,
 30 | #     w_a=31.41,
 31 | #     w_0=96,
 32 | #     w_m=2.24,
 33 | #     group_width=64,
 34 | #     se_ratio=0.25,
 35 | #     freeze_at=2,
 36 | #     norm="FrozenBN",
 37 | #     out_features=["s1", "s2", "s3", "s4"],
 38 | # )
 39 | # model.pixel_std = [57.375, 57.120, 58.395]
 40 | #
 41 | # optimizer.weight_decay = 5e-5
 42 | # train.init_checkpoint = (
 43 | #     "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
 44 | # )
 45 | # # RegNets benefit from enabling cudnn benchmark mode
 46 | # train.cudnn_benchmark = True
 47 | 
 48 | @BACKBONE_REGISTRY.register()
 49 | def build_regnet_fpn_backbone(cfg, input_shape: ShapeSpec):
 50 |     """
 51 |     Args:
 52 |         cfg: a detectron2 CfgNode
 53 |     Returns:
 54 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
 55 |     """
 56 |     bottom_up = RegNet(
 57 |             stem_class=SimpleStem,
 58 |             stem_width=32,
 59 |             block_class=ResBottleneckBlock,
 60 |             depth=22,
 61 |             w_a=31.41,
 62 |             w_0=96,
 63 |             w_m=2.24,
 64 |             group_width=64,
 65 |             se_ratio=0.25,
 66 |             freeze_at=2,
 67 |             norm="FrozenBN",
 68 |             out_features=["s1", "s2", "s3", "s4"],
 69 |         )
 70 |     in_features = cfg.MODEL.FPN.IN_FEATURES
 71 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
 72 |     backbone = FPN(
 73 |         bottom_up=bottom_up,
 74 |         in_features=in_features,
 75 |         out_channels=out_channels,
 76 |         norm=cfg.MODEL.FPN.NORM,
 77 |         top_block=LastLevelMaxPool(),
 78 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
 79 |     )
 80 |     return backbone
 81 | 
 82 | @BACKBONE_REGISTRY.register()
 83 | def build_regnetx_fpn_backbone(cfg, input_shape: ShapeSpec):
 84 |     """
 85 |     Args:
 86 |         cfg: a detectron2 CfgNode
 87 |     Returns:
 88 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
 89 |     """
 90 |     bottom_up = RegNet(
 91 |         stem_class=SimpleStem,
 92 |         stem_width=32,
 93 |         block_class=ResBottleneckBlock,
 94 |         depth=23,
 95 |         w_a=38.65,
 96 |         w_0=96,
 97 |         w_m=2.43,
 98 |         group_width=40,
 99 |         freeze_at=2,
100 |         norm="FrozenBN",
101 |         out_features=["s1", "s2", "s3", "s4"],
102 | )
103 |     in_features = cfg.MODEL.FPN.IN_FEATURES
104 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
105 |     backbone = FPN(
106 |         bottom_up=bottom_up,
107 |         in_features=in_features,
108 |         out_channels=out_channels,
109 |         norm=cfg.MODEL.FPN.NORM,
110 |         top_block=LastLevelMaxPool(),
111 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
112 |     )
113 |     return backbone


--------------------------------------------------------------------------------
/src/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_heads import StandardROIHeadsSS, build_roi_heads
2 | from .roi_heads_gan import ROIHeadsLogisticGAN#, build_roi_heads
3 | from .roi_heads_godinc import ROIHeadsLogisticODINC#,build_roi_heads
4 | from .roi_heads_csi import ROIHeadsLogisticCSI#,build_roi_heads
5 | from .roi_heads_add import StandardROIHeadsSSAdd
6 | from .fast_rcnn import FastRCNNOutputs
7 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/__init__.py:
--------------------------------------------------------------------------------
 1 | from .build import SSHEAD_REGISTRY, build_ss_head
 2 | # import all the ss head, so they will be registered
 3 | # from .cycle import CycleHead
 4 | # from .cycle_energy import CycleEnergyHead
 5 | # from .cycle_energy_1024_latter import CycleEnergy1024LatterHead
 6 | # from .cycle_energy_direct import CycleEnergyDirectHead
 7 | # from .cycle_energy_direct_add import CycleEnergyDirectAddHead
 8 | from .cycle_energy_direct_add_all import CycleEnergyDirectAddAllHead
 9 | # from .cycle_energy_direct_add_all_cache_new import CycleEnergyDirectAddAllCacheHead
10 | # from .cycle_energy_direct_add_all_max import CycleEnergyDirectAddAllMaxHead
11 | # from .cycle_energy_direct_add_all_mild_energy import CycleEnergyDirectAddAllMildHead
12 | # from .cycle_energy_direct_add_all_noise import CycleEnergyDirectAddAllNoiseHead
13 | # from .cycle_energy_direct_add_all_random import CycleEnergyDirectAddAllRandomHead
14 | # from .cycle_energy_direct_add_att import CycleEnergyDirectAddAttHead
15 | # from .cycle_energy_direct_add_att_neg import CycleEnergyDirectAddAttNegHead
16 | # from .cycle_energy_direct_random import CycleEnergyDirectRandomHead
17 | # from .cycle_energy_direct_max import CycleEnergyDirectMaxHead
18 | # from .cycle_energy_direct_no import CycleEnergyDirectAddNoHead
19 | from .rotation import RotationHead
20 | from .jigsaw import JigsawHead
21 | 
22 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/build.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import ShapeSpec
 2 | from detectron2.utils.registry import Registry
 3 | 
 4 | SSHEAD_REGISTRY = Registry("SSHEAD")
 5 | SSHEAD_REGISTRY.__doc__ = """
 6 | return self-supervised head 
 7 | """
 8 | 
 9 | 
10 | def build_ss_head(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     ss_name = cfg.MODEL.SS.NAME
21 | 
22 |     ss_head = [SSHEAD_REGISTRY.get(name)(cfg, input_shape) for name in ss_name]
23 |     assert len(ss_head) != 0
24 |     return ss_head
25 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         self.enc1 = nn.Sequential(
 22 |             nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |             # nn.BatchNorm2d(256),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |             # nn.BatchNorm2d(256),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.AdaptiveAvgPool2d(1),
 29 |         )
 30 | 
 31 |         self.topk = 100
 32 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 33 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 34 | 
 35 |         for m in self.modules():
 36 |             if isinstance(m, nn.Linear):
 37 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 38 |                 m.bias.data.zero_()
 39 |             elif isinstance(m, nn.Conv2d):
 40 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 41 |                                         nonlinearity='relu')
 42 |                 if m.bias is not None:
 43 |                     nn.init.constant_(m.bias, 0)
 44 |             elif isinstance(m, nn.BatchNorm2d):
 45 |                 nn.init.constant_(m.weight, 0)
 46 | 
 47 |     def cal_pair_dist(self, feat_u, feat_v):
 48 |         # finding the similarity score of feat_v
 49 |         us = feat_u.size(0)
 50 |         vs = feat_v.size(0)
 51 |         fs = feat_u.size(1)
 52 |         assert fs == feat_v.size(1)
 53 | 
 54 |         dist = torch.cdist(feat_u, feat_v, p=2).pow(2) * self.coef
 55 |         # uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 56 |         # vv = feat_v.repeat(us, 1)
 57 |         #
 58 |         # diff = uu - vv
 59 |         # dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 60 |         score = F.softmax(dist, dim=1)
 61 |         return dist, score
 62 | 
 63 |     def computer_corr_softmax(self, feat_u, feat_v):
 64 |         # track forward
 65 |         # calculate the L2 distance between feat_u and feat_v
 66 |         
 67 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 68 |         soft_v = torch.matmul(sim_score, feat_v)
 69 | 
 70 |         # track backward
 71 |         back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 72 |         labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 73 |         loss = nn.CrossEntropyLoss()(back_dist, labels)
 74 | 
 75 |         if back_dist.size(1) == 0:# there is no objects in the first frame.
 76 |             print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 77 |         correct = (back_dist.argmax(dim=1) == labels).float().sum()
 78 |         count = len(back_dist)
 79 |         return loss, correct, count
 80 | 
 81 | 
 82 |     def forward(self, features, prev_boxes=None):
 83 |         features, idxs, proposals = features
 84 |         total_loss = 0.0
 85 |         corrects = 0
 86 |         counts = 0
 87 |         prev = 0
 88 |         # since the number of proposals might be different for different pairs
 89 |         if prev_boxes is not None:
 90 |             feat_u = self.enc1(features)
 91 |             feat_v = self.enc1(prev_boxes)
 92 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 93 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 94 |             if feat_u.size(0) == 0:
 95 |                 print(feat_u, feat_v)
 96 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
 97 |             total_loss, correct, cnt = self.computer_corr_softmax(feat_u, feat_v)
 98 |             # print('correct: ', correct, 'cnt: ', cnt)
 99 |             total_acc = correct.item()/cnt
100 | 
101 |         else:
102 |             for i in range(0, len(idxs), 2):
103 |                 u = features[prev:idxs[i]]
104 |                 v = features[idxs[i]: idxs[i+1]]
105 |                 prev = idxs[i+1]
106 |                 feat_u = self.enc1(u)
107 |                 feat_v = self.enc1(v)
108 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
109 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
110 |                 if feat_u.size(0) == 0:
111 |                     print(feat_u.size(), feat_v.size())
112 |                     loss = feat_u.sum()
113 |                     correct = 0
114 |                     cnt = 0
115 |                 else:
116 |                     loss, correct, cnt = self.computer_corr_softmax(feat_u, feat_v)
117 |                     # print(u.view(-1, 256*49).norm(1))
118 |                 total_loss += loss*cnt
119 |                 corrects += correct
120 |                 counts += cnt
121 | 
122 |             if counts != 0:
123 |                 total_loss /= counts
124 |                 total_acc = corrects/counts
125 |             else:
126 |                 total_acc = 0.
127 | 
128 |         # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
129 |         return {'loss_cycle': total_loss * self.scale}, total_acc
130 | 
131 | 
132 | @SSHEAD_REGISTRY.register()
133 | def build_cycle_head(cfg, input_shape):
134 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
135 |     rot_head = CycleHead(cfg, in_channels)
136 |     return rot_head
137 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle_energy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleEnergyHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleEnergyHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         self.enc1 = nn.Sequential(
 22 |             nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |             # nn.BatchNorm2d(256),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |             # nn.BatchNorm2d(256),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.AdaptiveAvgPool2d(1)
 29 |             # nn.Flatten(start_dim=1, end_dim=-1)
 30 |         )
 31 |         self.map_back = nn.Linear(256, 256*49)
 32 | 
 33 |         self.topk = 100
 34 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 35 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 36 | 
 37 |         for m in self.modules():
 38 |             if isinstance(m, nn.Linear):
 39 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 40 |                 m.bias.data.zero_()
 41 |             elif isinstance(m, nn.Conv2d):
 42 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 43 |                                         nonlinearity='relu')
 44 |                 if m.bias is not None:
 45 |                     nn.init.constant_(m.bias, 0)
 46 |             elif isinstance(m, nn.BatchNorm2d):
 47 |                 nn.init.constant_(m.weight, 0)
 48 | 
 49 |     def cal_pair_dist(self, feat_u, feat_v):
 50 |         # finding the similarity score of feat_v
 51 |         us = feat_u.size(0)
 52 |         vs = feat_v.size(0)
 53 |         fs = feat_u.size(1)
 54 |         assert fs == feat_v.size(1)
 55 | 
 56 |         uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 57 |         vv = feat_v.repeat(us, 1)
 58 | 
 59 |         diff = uu - vv
 60 |         dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 61 |         score = F.softmax(dist, dim=1)
 62 |         return dist, score
 63 | 
 64 |     def computer_corr_softmax(self, feat_u, feat_v):
 65 |         # track forward
 66 |         # calculate the L2 distance between feat_u and feat_v
 67 |         
 68 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 69 |         soft_v = torch.matmul(sim_score, feat_v)
 70 | 
 71 |         # track backward
 72 |         back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 73 |         labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 74 |         loss = nn.CrossEntropyLoss()(back_dist, labels)
 75 | 
 76 |         if back_dist.size(1) == 0:# there is no objects in the first frame.
 77 |             print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 78 |         correct = (back_dist.argmax(dim=1) == labels).float().sum()
 79 |         count = len(back_dist)
 80 |         return loss, correct, count, soft_v
 81 | 
 82 | 
 83 |     def forward(self, features, prev_boxes=None):
 84 |         features, idxs, proposals = features
 85 |         total_loss = 0.0
 86 |         corrects = 0
 87 |         counts = 0
 88 |         pos_fea= None
 89 |         neg_fea = None
 90 |         prev = 0
 91 |         # since the number of proposals might be different for different pairs
 92 |         if prev_boxes is not None:
 93 |             feat_u = self.enc1(features)
 94 |             feat_v = self.enc1(prev_boxes)
 95 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 96 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 97 |             if feat_u.size(0) == 0:
 98 |                 print(feat_u, feat_v)
 99 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
100 |             total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v)
101 |             # print('correct: ', correct, 'cnt: ', cnt)
102 |             total_acc = correct.item()/cnt
103 | 
104 |         else:
105 |             for i in range(0, len(idxs), 2):
106 |                 u = features[prev:idxs[i]]
107 |                 v = features[idxs[i]: idxs[i+1]]
108 |                 prev = idxs[i+1]
109 |                 feat_u = self.enc1(u)
110 |                 feat_v = self.enc1(v)
111 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
112 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
113 |                 if feat_u.size(0) == 0:
114 |                     print(feat_u.size(), feat_v.size())
115 |                     loss = feat_u.sum()
116 |                     correct = 0
117 |                     cnt = 0
118 |                 else:
119 |                     loss, correct, cnt, soft_target = self.computer_corr_softmax(feat_u, feat_v)
120 |                     if pos_fea is None:
121 |                         pos_fea = self.map_back(feat_u)
122 |                         neg_fea = self.map_back(soft_target)
123 |                     else:
124 |                         pos_fea = torch.cat([pos_fea, self.map_back(feat_u)], 0)
125 |                         neg_fea = torch.cat([neg_fea, self.map_back(soft_target)], 0)
126 | 
127 |                 total_loss += loss*cnt
128 |                 corrects += correct
129 |                 counts += cnt
130 |             # breakpoint()
131 |             if counts != 0:
132 |                 total_loss /= counts
133 |                 total_acc = corrects/counts
134 |             else:
135 |                 total_acc = 0.
136 |         if pos_fea is not None:
137 |             assert len(pos_fea) == len(neg_fea)
138 |             # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
139 |             return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0)
140 |         else:
141 |             return {'loss_cycle': total_loss * self.scale}, total_acc, None
142 | 
143 | 
144 | @SSHEAD_REGISTRY.register()
145 | def build_cycle_energy_head(cfg, input_shape):
146 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
147 |     rot_head = CycleEnergyHead(cfg, in_channels)
148 |     return rot_head
149 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle_energy_1024_latter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleEnergy1024LatterHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleEnergy1024LatterHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         self.enc1 = nn.Sequential(
 22 |             nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |             # nn.BatchNorm2d(256),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |             # nn.BatchNorm2d(256),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.AdaptiveAvgPool2d(1)
 29 |             # nn.Flatten(start_dim=1, end_dim=-1)
 30 |         )
 31 |         self.map_back = nn.Linear(256, 1024)
 32 | 
 33 |         self.topk = 100
 34 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 35 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 36 | 
 37 |         for m in self.modules():
 38 |             if isinstance(m, nn.Linear):
 39 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 40 |                 m.bias.data.zero_()
 41 |             elif isinstance(m, nn.Conv2d):
 42 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 43 |                                         nonlinearity='relu')
 44 |                 if m.bias is not None:
 45 |                     nn.init.constant_(m.bias, 0)
 46 |             elif isinstance(m, nn.BatchNorm2d):
 47 |                 nn.init.constant_(m.weight, 0)
 48 | 
 49 |     def cal_pair_dist(self, feat_u, feat_v):
 50 |         # finding the similarity score of feat_v
 51 |         us = feat_u.size(0)
 52 |         vs = feat_v.size(0)
 53 |         fs = feat_u.size(1)
 54 |         assert fs == feat_v.size(1)
 55 | 
 56 |         uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 57 |         vv = feat_v.repeat(us, 1)
 58 | 
 59 |         diff = uu - vv
 60 |         dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 61 |         score = F.softmax(dist, dim=1)
 62 |         return dist, score
 63 | 
 64 |     def computer_corr_softmax(self, feat_u, feat_v):
 65 |         # track forward
 66 |         # calculate the L2 distance between feat_u and feat_v
 67 |         
 68 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 69 |         soft_v = torch.matmul(sim_score, feat_v)
 70 | 
 71 |         # track backward
 72 |         back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 73 |         labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 74 |         loss = nn.CrossEntropyLoss()(back_dist, labels)
 75 | 
 76 |         if back_dist.size(1) == 0:# there is no objects in the first frame.
 77 |             print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 78 |         correct = (back_dist.argmax(dim=1) == labels).float().sum()
 79 |         count = len(back_dist)
 80 |         return loss, correct, count, soft_v
 81 | 
 82 | 
 83 |     def forward(self, features, prev_boxes=None):
 84 |         features, idxs, proposals = features
 85 |         total_loss = 0.0
 86 |         corrects = 0
 87 |         counts = 0
 88 |         prev = 0
 89 |         pos_fea = None
 90 |         neg_fea = None
 91 |         # since the number of proposals might be different for different pairs
 92 |         if prev_boxes is not None:
 93 |             feat_u = self.enc1(features)
 94 |             feat_v = self.enc1(prev_boxes)
 95 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 96 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 97 |             if feat_u.size(0) == 0:
 98 |                 print(feat_u, feat_v)
 99 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
100 |             total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v)
101 |             # print('correct: ', correct, 'cnt: ', cnt)
102 |             total_acc = correct.item()/cnt
103 | 
104 |         else:
105 |             for i in range(0, len(idxs), 2):
106 |                 u = features[prev:idxs[i]]
107 |                 v = features[idxs[i]: idxs[i+1]]
108 |                 prev = idxs[i+1]
109 |                 # breakpoint()
110 |                 feat_u = self.enc1(u)
111 |                 feat_v = self.enc1(v)
112 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
113 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
114 |                 if feat_u.size(0) == 0:
115 |                     print(feat_u.size(), feat_v.size())
116 |                     loss = feat_u.sum()
117 |                     correct = 0
118 |                     cnt = 0
119 |                 else:
120 |                     loss, correct, cnt, soft_target = self.computer_corr_softmax(feat_u, feat_v)
121 |                     if pos_fea is None:
122 |                         pos_fea = self.map_back(feat_u)
123 |                         neg_fea = self.map_back(soft_target)
124 |                     else:
125 |                         pos_fea = torch.cat([pos_fea, self.map_back(feat_u)], 0)
126 |                         neg_fea = torch.cat([neg_fea, self.map_back(soft_target)], 0)
127 | 
128 |                 total_loss += loss*cnt
129 |                 corrects += correct
130 |                 counts += cnt
131 |             # breakpoint()
132 |             if counts != 0:
133 |                 total_loss /= counts
134 |                 total_acc = corrects/counts
135 |             else:
136 |                 total_acc = 0.
137 |         if pos_fea is not None:
138 |             assert len(pos_fea) == len(neg_fea)
139 |             # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
140 |             return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0)
141 |         else:
142 |             return {'loss_cycle': total_loss * self.scale}, total_acc, None
143 | 
144 | 
145 | @SSHEAD_REGISTRY.register()
146 | def build_cycle_energy_1024_latter_head(cfg, input_shape):
147 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
148 |     rot_head = CycleEnergy1024LatterHead(cfg, in_channels)
149 |     return rot_head
150 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle_energy_direct.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleEnergyDirectHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleEnergyDirectHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         self.enc1 = nn.Sequential(
 22 |             nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |             # nn.BatchNorm2d(256),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |             # nn.BatchNorm2d(256),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.AdaptiveAvgPool2d(1)
 29 |             # nn.Flatten(start_dim=1, end_dim=-1)
 30 |         )
 31 |         # self.map_back = nn.Linear(256, 256*49)
 32 | 
 33 |         self.topk = 100
 34 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 35 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 36 | 
 37 |         for m in self.modules():
 38 |             if isinstance(m, nn.Linear):
 39 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 40 |                 m.bias.data.zero_()
 41 |             elif isinstance(m, nn.Conv2d):
 42 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 43 |                                         nonlinearity='relu')
 44 |                 if m.bias is not None:
 45 |                     nn.init.constant_(m.bias, 0)
 46 |             elif isinstance(m, nn.BatchNorm2d):
 47 |                 nn.init.constant_(m.weight, 0)
 48 | 
 49 |     def cal_pair_dist(self, feat_u, feat_v):
 50 |         # finding the similarity score of feat_v
 51 |         us = feat_u.size(0)
 52 |         vs = feat_v.size(0)
 53 |         fs = feat_u.size(1)
 54 |         assert fs == feat_v.size(1)
 55 | 
 56 |         uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 57 |         vv = feat_v.repeat(us, 1)
 58 | 
 59 |         diff = uu - vv
 60 |         dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 61 |         score = F.softmax(dist, dim=1)
 62 |         return dist, score
 63 | 
 64 |     def computer_corr_softmax(self, feat_u, feat_v):
 65 |         # track forward
 66 |         # calculate the L2 distance between feat_u and feat_v
 67 |         
 68 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 69 |         soft_v = torch.matmul(sim_score, feat_v)
 70 | 
 71 |         # track backward
 72 |         back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 73 |         labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 74 |         loss = nn.CrossEntropyLoss()(back_dist, labels)
 75 | 
 76 |         if back_dist.size(1) == 0:# there is no objects in the first frame.
 77 |             print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 78 |         correct = (back_dist.argmax(dim=1) == labels).float().sum()
 79 |         count = len(back_dist)
 80 |         return loss, correct, count, sim_score
 81 | 
 82 | 
 83 |     def forward(self, features, prev_boxes=None):
 84 |         features, idxs, proposals = features
 85 |         total_loss = 0.0
 86 |         corrects = 0
 87 |         counts = 0
 88 |         pos_fea= None
 89 |         neg_fea = None
 90 |         prev = 0
 91 |         # since the number of proposals might be different for different pairs
 92 |         if prev_boxes is not None:
 93 |             feat_u = self.enc1(features)
 94 |             feat_v = self.enc1(prev_boxes)
 95 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 96 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 97 |             if feat_u.size(0) == 0:
 98 |                 print(feat_u, feat_v)
 99 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
100 |             total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v)
101 |             # print('correct: ', correct, 'cnt: ', cnt)
102 |             total_acc = correct.item()/cnt
103 | 
104 |         else:
105 |             for i in range(0, len(idxs), 2):
106 |                 u = features[prev:idxs[i]]
107 |                 v = features[idxs[i]: idxs[i+1]]
108 |                 prev = idxs[i+1]
109 |                 feat_u = self.enc1(u)
110 |                 feat_v = self.enc1(v)
111 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
112 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
113 |                 if feat_u.size(0) == 0:
114 |                     print(feat_u.size(), feat_v.size())
115 |                     loss = feat_u.sum()
116 |                     correct = 0
117 |                     cnt = 0
118 |                 else:
119 |                     loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v)
120 |                     # breakpoint()
121 |                     if pos_fea is None:
122 |                         pos_fea = u.view(-1, 256*49)
123 |                         neg_fea = torch.matmul(soft_target_score, v.view(-1, 256*49))
124 |                     else:
125 |                         pos_fea = torch.cat([pos_fea, u.view(-1, 256*49)], 0)
126 |                         neg_fea = torch.cat([neg_fea, torch.matmul(soft_target_score, v.view(-1, 256*49))], 0)
127 | 
128 |                 total_loss += loss*cnt
129 |                 corrects += correct
130 |                 counts += cnt
131 |             # breakpoint()
132 |             if counts != 0:
133 |                 total_loss /= counts
134 |                 total_acc = corrects/counts
135 |             else:
136 |                 total_acc = 0.
137 |         if pos_fea is not None:
138 |             assert len(pos_fea) == len(neg_fea)
139 |             # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
140 |             return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0)
141 |         else:
142 |             return {'loss_cycle': total_loss * self.scale}, total_acc, None
143 | 
144 | 
145 | @SSHEAD_REGISTRY.register()
146 | def build_cycle_energy_direct_head(cfg, input_shape):
147 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
148 |     rot_head = CycleEnergyDirectHead(cfg, in_channels)
149 |     return rot_head
150 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle_energy_direct_max.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleEnergyDirectMaxHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleEnergyDirectMaxHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         self.enc1 = nn.Sequential(
 22 |             nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |             # nn.BatchNorm2d(256),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |             # nn.BatchNorm2d(256),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.AdaptiveAvgPool2d(1)
 29 |             # nn.Flatten(start_dim=1, end_dim=-1)
 30 |         )
 31 |         # self.map_back = nn.Linear(256, 256*49)
 32 |         self.add = nn.Conv2d(256, 256, kernel_size=1)
 33 |         self.topk = 100
 34 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 35 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 36 | 
 37 |         for m in self.modules():
 38 |             if isinstance(m, nn.Linear):
 39 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 40 |                 m.bias.data.zero_()
 41 |             elif isinstance(m, nn.Conv2d):
 42 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 43 |                                         nonlinearity='relu')
 44 |                 if m.bias is not None:
 45 |                     nn.init.constant_(m.bias, 0)
 46 |             elif isinstance(m, nn.BatchNorm2d):
 47 |                 nn.init.constant_(m.weight, 0)
 48 | 
 49 |     def cal_pair_dist(self, feat_u, feat_v):
 50 |         # finding the similarity score of feat_v
 51 |         us = feat_u.size(0)
 52 |         vs = feat_v.size(0)
 53 |         fs = feat_u.size(1)
 54 |         assert fs == feat_v.size(1)
 55 | 
 56 |         uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 57 |         vv = feat_v.repeat(us, 1)
 58 | 
 59 |         diff = uu - vv
 60 |         dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 61 |         score = F.softmax(dist, dim=1)
 62 |         return dist, score
 63 | 
 64 |     def computer_corr_softmax(self, feat_u, feat_v):
 65 |         # track forward
 66 |         # calculate the L2 distance between feat_u and feat_v
 67 |         
 68 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 69 |         soft_v = torch.matmul(sim_score, feat_v)
 70 | 
 71 |         # track backward
 72 |         back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 73 |         labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 74 |         loss = nn.CrossEntropyLoss()(back_dist, labels)
 75 | 
 76 |         if back_dist.size(1) == 0:# there is no objects in the first frame.
 77 |             print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 78 |         correct = (back_dist.argmax(dim=1) == labels).float().sum()
 79 |         count = len(back_dist)
 80 |         return loss, correct, count, sim_score
 81 | 
 82 | 
 83 |     def forward(self, roi_head, features, prev_boxes=None):
 84 |         features, idxs, proposals = features
 85 |         total_loss = 0.0
 86 |         corrects = 0
 87 |         counts = 0
 88 |         pos_fea= None
 89 |         neg_fea = None
 90 |         prev = 0
 91 |         # since the number of proposals might be different for different pairs
 92 |         if prev_boxes is not None:
 93 |             feat_u = self.enc1(features)
 94 |             feat_v = self.enc1(prev_boxes)
 95 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 96 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 97 |             if feat_u.size(0) == 0:
 98 |                 print(feat_u, feat_v)
 99 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
100 |             total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v)
101 |             # print('correct: ', correct, 'cnt: ', cnt)
102 |             total_acc = correct.item()/cnt
103 | 
104 |         else:
105 |             for i in range(0, len(idxs), 2):
106 |                 u = features[prev:idxs[i]]
107 |                 v = features[idxs[i]: idxs[i+1]]
108 |                 prev = idxs[i+1]
109 |                 feat_u = self.enc1(u)
110 |                 feat_v = self.enc1(v)
111 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
112 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
113 |                 if feat_u.size(0) == 0:
114 |                     print(feat_u.size(), feat_v.size())
115 |                     loss = feat_u.sum()
116 |                     correct = 0
117 |                     cnt = 0
118 |                 else:
119 |                     loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v)
120 |                     max_indices = torch.argmax(soft_target_score, 1).view(-1)
121 |                     # breakpoint()
122 |                     if pos_fea is None:
123 |                         pos_fea = self.add(u).view(-1, 256*49)
124 |                         neg_fea = self.add(v).view(-1, 256*49)[max_indices]
125 |                         #torch.matmul(soft_target_score, v.view(-1, 256*49))
126 |                     else:
127 |                         pos_fea = torch.cat([pos_fea, self.add(u).view(-1, 256*49)], 0)
128 |                         neg_fea = torch.cat([neg_fea, self.add(v).view(-1, 256*49)[max_indices]], 0)
129 | 
130 |                 total_loss += loss*cnt
131 |                 corrects += correct
132 |                 counts += cnt
133 |             # breakpoint()
134 |             if counts != 0:
135 |                 total_loss /= counts
136 |                 total_acc = corrects/counts
137 |             else:
138 |                 total_acc = 0.
139 |         if pos_fea is not None:
140 |             assert len(pos_fea) == len(neg_fea)
141 |             # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
142 |             return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0), None
143 |         else:
144 |             return {'loss_cycle': total_loss * self.scale}, total_acc, None
145 | 
146 | 
147 | @SSHEAD_REGISTRY.register()
148 | def build_cycle_energy_direct_add_max_head(cfg, input_shape):
149 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
150 |     rot_head = CycleEnergyDirectMaxHead(cfg, in_channels)
151 |     return rot_head
152 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/cycle_energy_direct_no.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.structures import ImageList
  7 | 
  8 | from .build import SSHEAD_REGISTRY
  9 | from .ss_layers import Flatten
 10 | 
 11 | 
 12 | class CycleEnergyDirectAddNoHead(nn.Module):
 13 |     def __init__(self, cfg, cin):
 14 |         super(CycleEnergyDirectAddNoHead, self).__init__()
 15 | 
 16 |         self.name = 'cycle'
 17 |         self.input = 'ROI'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         self.coef = cfg.MODEL.SS.COEF
 20 |         
 21 |         # self.enc1 = nn.Sequential(
 22 |         #     nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True),
 23 |         #     # nn.BatchNorm2d(256),
 24 |         #     nn.ReLU(inplace=True),
 25 |         #     nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True),
 26 |         #     # nn.BatchNorm2d(256),
 27 |         #     nn.ReLU(inplace=True),
 28 |         #     nn.AdaptiveAvgPool2d(1)
 29 |         #     # nn.Flatten(start_dim=1, end_dim=-1)
 30 |         # )
 31 |         # self.add = nn.Conv2d(256, 256, kernel_size=1)
 32 |         # self.map_back = nn.Linear(256, 256*49)
 33 | 
 34 |         self.topk = 100
 35 |         self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
 36 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 37 | 
 38 |         for m in self.modules():
 39 |             if isinstance(m, nn.Linear):
 40 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 41 |                 m.bias.data.zero_()
 42 |             elif isinstance(m, nn.Conv2d):
 43 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 44 |                                         nonlinearity='relu')
 45 |                 if m.bias is not None:
 46 |                     nn.init.constant_(m.bias, 0)
 47 |             elif isinstance(m, nn.BatchNorm2d):
 48 |                 nn.init.constant_(m.weight, 0)
 49 | 
 50 |     def cal_pair_dist(self, feat_u, feat_v):
 51 |         # finding the similarity score of feat_v
 52 |         us = feat_u.size(0)
 53 |         vs = feat_v.size(0)
 54 |         fs = feat_u.size(1)
 55 |         assert fs == feat_v.size(1)
 56 | 
 57 |         uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs)
 58 |         vv = feat_v.repeat(us, 1)
 59 | 
 60 |         diff = uu - vv
 61 |         dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef
 62 |         score = F.softmax(dist, dim=1)
 63 |         return dist, score
 64 | 
 65 |     def computer_corr_softmax(self, feat_u, feat_v):
 66 |         # track forward
 67 |         # calculate the L2 distance between feat_u and feat_v
 68 |         
 69 |         sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v)
 70 |         # soft_v = torch.matmul(sim_score, feat_v)
 71 |         #
 72 |         # # track backward
 73 |         # back_dist, back_score = self.cal_pair_dist(soft_v, feat_u)
 74 |         # labels = torch.arange(len(feat_u)).long().to(back_dist.device)
 75 |         # loss = nn.CrossEntropyLoss()(back_dist, labels)
 76 |         #
 77 |         # if back_dist.size(1) == 0:# there is no objects in the first frame.
 78 |         #     print(back_dist.size(), feat_u.size(), feat_v.size(), loss)
 79 |         # correct = (back_dist.argmax(dim=1) == labels).float().sum()
 80 |         # count = len(back_dist)
 81 |         return torch.zeros(1).cuda(), 0, 0, sim_score
 82 | 
 83 | 
 84 |     def forward(self, features, prev_boxes=None):
 85 |         features, idxs, proposals = features
 86 |         total_loss = 0.0
 87 |         corrects = 0
 88 |         counts = 0
 89 |         pos_fea= None
 90 |         neg_fea = None
 91 |         prev = 0
 92 |         # since the number of proposals might be different for different pairs
 93 |         if prev_boxes is not None:
 94 |             feat_u = self.enc1(features)
 95 |             feat_v = self.enc1(prev_boxes)
 96 |             feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
 97 |             feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
 98 |             if feat_u.size(0) == 0:
 99 |                 print(feat_u, feat_v)
100 |                 return {'loss_cycle': feat_u.sum() * self.scale}, 0.
101 |             total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v)
102 |             # print('correct: ', correct, 'cnt: ', cnt)
103 |             total_acc = correct.item()/cnt
104 | 
105 |         else:
106 |             for i in range(0, len(idxs), 2):
107 |                 u = features[prev:idxs[i]]
108 |                 v = features[idxs[i]: idxs[i+1]]
109 |                 prev = idxs[i+1]
110 |                 feat_u = u.view(-1, 256*49)#self.enc1(u)
111 |                 feat_v = v.view(-1, 256*49)#self.enc1(v)
112 |                 feat_u = feat_u.view(feat_u.size(0), feat_u.size(1))
113 |                 feat_v = feat_v.view(feat_v.size(0), feat_v.size(1))
114 |                 if feat_u.size(0) == 0:
115 |                     print(feat_u.size(), feat_v.size())
116 |                     loss = feat_u.sum()
117 |                     correct = 0
118 |                     cnt = 0
119 |                 else:
120 |                     loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v)
121 |                     # breakpoint()
122 |                     if pos_fea is None:
123 |                         pos_fea = u.view(-1, 256*49)
124 |                         neg_fea = torch.matmul(soft_target_score, v.view(-1, 256*49))
125 |                         # breakpoint()
126 |                     else:
127 |                         pos_fea = torch.cat([pos_fea, u.view(-1, 256*49)], 0)
128 |                         neg_fea = torch.cat([neg_fea, torch.matmul(soft_target_score, v.view(-1, 256*49))], 0)
129 | 
130 |                 total_loss += loss*cnt
131 |                 corrects += correct
132 |                 counts += cnt
133 |             # breakpoint()
134 |             if counts != 0:
135 |                 total_loss /= counts
136 |                 total_acc = corrects/counts
137 |             else:
138 |                 total_acc = 0.
139 |         if pos_fea is not None:
140 |             assert len(pos_fea) == len(neg_fea)
141 |             # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc))
142 |             return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0)
143 |         else:
144 |             return {'loss_cycle': total_loss * self.scale}, total_acc, None
145 | 
146 | 
147 | @SSHEAD_REGISTRY.register()
148 | def build_cycle_energy_direct_no_head(cfg, input_shape):
149 |     in_channels = cfg.MODEL.FPN.OUT_CHANNELS
150 |     rot_head = CycleEnergyDirectAddNoHead(cfg, in_channels)
151 |     return rot_head
152 | 


--------------------------------------------------------------------------------
/src/modeling/self_supervised/rotation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from .build import SSHEAD_REGISTRY
  7 | from .ss_layers import Bottleneck, conv1x1, conv3x3
  8 | from ..utils.image_list import ImageList, crop_tensor
  9 | 
 10 | 
 11 | class RotationHead(nn.Module):
 12 |     def __init__(self, cfg, cin):
 13 |         super(RotationHead, self).__init__()
 14 | 
 15 |         # resnet config
 16 |         self.name = 'rot'
 17 |         self.input = 'images'
 18 |         self.device = torch.device(cfg.MODEL.DEVICE)
 19 |         norm_layer = nn.BatchNorm2d
 20 |         self._norm_layer = norm_layer
 21 |         self.dilation = 1
 22 |         self.groups = 1
 23 |         self.base_width = 64
 24 | 
 25 |         # hard code the task specific parameters in order to
 26 |         # support multi-tasking
 27 |         self.crop_size = 224
 28 |         # self.ratio = 2
 29 |         self.ratio = cfg.MODEL.SS.RATIO  # crop image ratio
 30 | 
 31 |         depth = cfg.MODEL.RESNETS.DEPTH
 32 |         stage_ids = {"res2": 0, "res3": 1, "res4": 2, "res5": 3}
 33 |         num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3],
 34 |                                 152: [3, 8, 36, 3]}[depth]
 35 |         self.start_stage = min(stage_ids[cfg.MODEL.SS.FEAT_LEVEL]+1, 3)
 36 |         self.inplanes = cin
 37 |         self.scale = cfg.MODEL.SS.LOSS_SCALE
 38 | 
 39 |         out_channels = self.inplanes
 40 | 
 41 |         for i in range(self.start_stage, 4):
 42 |             out_channels *= 2
 43 |             setattr(self, "layer{}".format(i),
 44 |                     self._make_layer(Bottleneck, out_channels//4,
 45 |                                      num_blocks_per_stage[i], stride=2))
 46 | 
 47 |         # num_classes = cfg.MODEL.SS.NUM_CLASSES
 48 |         num_classes = 4
 49 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
 50 |         self.fc = nn.Linear(out_channels, num_classes)
 51 |         self.criterion = nn.CrossEntropyLoss()
 52 | 
 53 |         assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
 54 |         num_channels = len(cfg.MODEL.PIXEL_MEAN)
 55 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1)
 56 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1)
 57 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 58 | 
 59 |         for m in self.modules():
 60 |             if isinstance(m, nn.Conv2d):
 61 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
 62 |                                         nonlinearity='relu')
 63 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
 64 |                 nn.init.constant_(m.weight, 1)
 65 |                 nn.init.constant_(m.bias, 0)
 66 | 
 67 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
 68 |         norm_layer = self._norm_layer
 69 |         downsample = None
 70 |         previous_dilation = self.dilation
 71 |         if dilate:
 72 |             self.dilation *= stride
 73 |             stride = 1
 74 |         if stride != 1 or self.inplanes != planes * block.expansion:
 75 |             downsample = nn.Sequential(
 76 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
 77 |                 norm_layer(planes * block.expansion),
 78 |             )
 79 | 
 80 |         layers = []
 81 |         layers.append(
 82 |             block(self.inplanes, planes, stride, downsample, self.groups,
 83 |                   self.base_width, previous_dilation, norm_layer))
 84 |         self.inplanes = planes * block.expansion
 85 |         for _ in range(1, blocks):
 86 |             layers.append(block(self.inplanes, planes, groups=self.groups,
 87 |                                 base_width=self.base_width,
 88 |                                 dilation=self.dilation,
 89 |                                 norm_layer=norm_layer))
 90 | 
 91 |         return nn.Sequential(*layers)
 92 | 
 93 |     def forward(self, batched_inputs, feat_base, feat_level):
 94 |         x, y = self.gen_ss_inputs(batched_inputs)
 95 |         x = feat_base(x)[feat_level]
 96 |         for i in range(self.start_stage, 4):
 97 |             x = getattr(self, "layer{}".format(i))(x)
 98 | 
 99 |         x = self.avgpool(x)
100 |         bs = x.size(0)
101 |         x = x.squeeze()
102 |         if bs == 1:
103 |             x = x.unsqueeze(0)
104 |         x = self.fc(x)
105 |         loss = self.criterion(x, y.long())
106 |         losses = {'loss_rot_cls': loss * self.scale}
107 |         return x, y, losses
108 | 
109 |     # add the data processing for each task
110 |     def preprocess_image_ss(self, batched_inputs):
111 |         """resize and random crop the images"""
112 |         images = [x["image"].to(self.device) for x in batched_inputs]
113 |         images = [self.normalizer(x) for x in images]
114 |         images = ImageList.from_tensors_crop(images, self.crop_size, self.ratio)
115 |         return images
116 | 
117 |     def gen_ss_inputs(self, batched_inputs):
118 |         """produce rotation targets"""
119 |         images = self.preprocess_image_ss(batched_inputs=batched_inputs)
120 |         tensors = images.tensor.clone().to(self.device)
121 |         targets = torch.zeros(len(tensors)).long().to(self.device)
122 |         for i in range(len(tensors)):
123 |             tar = np.random.choice(4)
124 |             targets[i] = tar
125 |             t = images.tensor[i]
126 |             rot = t.rot90(tar, (1, 2))
127 |             tensors[i] = rot
128 |         images.tensor = tensors
129 |         return tensors, targets
130 | 
131 | 
132 | @SSHEAD_REGISTRY.register()
133 | def build_rotation_head(cfg, input_shape):
134 |     in_channels = input_shape[cfg.MODEL.SS.FEAT_LEVEL].channels
135 |     rot_head = RotationHead(cfg, in_channels)
136 |     return rot_head


--------------------------------------------------------------------------------
/src/modeling/self_supervised/ss_layers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 6 |     """3x3 convolution with padding"""
 7 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 8 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 9 | 
10 | 
11 | def conv1x1(in_planes, out_planes, stride=1):
12 |     """1x1 convolution"""
13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 | 
15 | 
16 | class Bottleneck(nn.Module):
17 |     expansion = 4
18 | 
19 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
20 |                  base_width=64, dilation=1, norm_layer=None):
21 |         super(Bottleneck, self).__init__()
22 |         if norm_layer is None:
23 |             norm_layer = nn.BatchNorm2d
24 |         width = int(planes * (base_width / 64.)) * groups
25 |         # Both self.conv2 and self.downsample layers downsample the
26 |         # input when stride != 1
27 |         self.conv1 = conv1x1(inplanes, width)
28 |         self.bn1 = norm_layer(width)
29 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
30 |         self.bn2 = norm_layer(width)
31 |         self.conv3 = conv1x1(width, planes * self.expansion)
32 |         self.bn3 = norm_layer(planes * self.expansion)
33 |         self.relu = nn.ReLU(inplace=True)
34 |         self.downsample = downsample
35 |         self.stride = stride
36 | 
37 |     def forward(self, x):
38 |         identity = x
39 | 
40 |         out = self.conv1(x)
41 |         out = self.bn1(out)
42 |         out = self.relu(out)
43 | 
44 |         out = self.conv2(out)
45 |         out = self.bn2(out)
46 |         out = self.relu(out)
47 | 
48 |         out = self.conv3(out)
49 |         out = self.bn3(out)
50 | 
51 |         if self.downsample is not None:
52 |             identity = self.downsample(x)
53 | 
54 |         out += identity
55 |         out = self.relu(out)
56 | 
57 |         return out
58 | 
59 | 
60 | class Flatten(nn.Module):
61 |     def forward(self, input):
62 |         return input.view(input.size(0), -1)
63 | 


--------------------------------------------------------------------------------
/src/modeling/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_list import ImageList, crop_tensor


--------------------------------------------------------------------------------
/src/modeling/utils/image_list.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from typing import Any, List, Sequence, Tuple, Union
  3 | import torch
  4 | from torch.nn import functional as F
  5 | import numpy as np
  6 | 
  7 | 
  8 | class ImageList(object):
  9 |     """
 10 |     Structure that holds a list of images (of possibly
 11 |     varying sizes) as a single tensor.
 12 |     This works by padding the images to the same size,
 13 |     and storing in a field the original sizes of each image
 14 |     Attributes:
 15 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w)
 16 |     """
 17 | 
 18 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
 19 |         """
 20 |         Arguments:
 21 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 22 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w).
 23 |         """
 24 |         self.tensor = tensor
 25 |         self.image_sizes = image_sizes
 26 | 
 27 |     def __len__(self) -> int:
 28 |         return len(self.image_sizes)
 29 | 
 30 |     def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor:
 31 |         """
 32 |         Access the individual image in its original size.
 33 |         Returns:
 34 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 35 |         """
 36 |         size = self.image_sizes[idx]
 37 |         return self.tensor[idx, ..., : size[0], : size[1]]  # type: ignore
 38 | 
 39 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 40 |         cast_tensor = self.tensor.to(*args, **kwargs)
 41 |         return ImageList(cast_tensor, self.image_sizes)
 42 | 
 43 |     @staticmethod
 44 |     def from_tensors(
 45 |         tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
 46 |     ) -> "ImageList":
 47 |         """
 48 |         Args:
 49 |             tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
 50 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value`
 51 |                 so that they will have the same shape.
 52 |             size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure
 53 |                 the common height and width is divisible by `size_divisibility`
 54 |             pad_value (float): value to pad
 55 |         Returns:
 56 |             an `ImageList`.
 57 |         """
 58 |         assert len(tensors) > 0
 59 |         assert isinstance(tensors, (tuple, list))
 60 |         for t in tensors:
 61 |             assert isinstance(t, torch.Tensor), type(t)
 62 |             assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
 63 |         # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors
 64 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
 65 | 
 66 |         if size_divisibility > 0:
 67 |             import math
 68 | 
 69 |             stride = size_divisibility
 70 |             max_size = list(max_size)  # type: ignore
 71 |             max_size[-2] = int(math.ceil(max_size[-2] / stride) * stride)  # type: ignore
 72 |             max_size[-1] = int(math.ceil(max_size[-1] / stride) * stride)  # type: ignore
 73 |             max_size = tuple(max_size)
 74 | 
 75 |         image_sizes = [im.shape[-2:] for im in tensors]
 76 | 
 77 |         if len(tensors) == 1:
 78 |             # This seems slightly (2%) faster.
 79 |             # TODO: check whether it's faster for multiple images as well
 80 |             image_size = image_sizes[0]
 81 |             padded = F.pad(
 82 |                 tensors[0],
 83 |                 [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]],
 84 |                 value=pad_value,
 85 |             )
 86 |             batched_imgs = padded.unsqueeze_(0)
 87 |         else:
 88 |             batch_shape = (len(tensors),) + max_size
 89 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
 90 |             for img, pad_img in zip(tensors, batched_imgs):
 91 |                 pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
 92 | 
 93 |         return ImageList(batched_imgs.contiguous(), image_sizes)
 94 | 
 95 |     @staticmethod
 96 |     def from_tensors_crop(
 97 |         tensors: Sequence[torch.Tensor], crop_size: int = 224, ratio: int=1
 98 |     ) -> "ImageList":
 99 |         """
100 |         Args:
101 |             tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
102 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value`
103 |                 so that they will have the same shape.
104 |             size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure
105 |                 the common height and width is divisible by `size_divisibility`
106 |             pad_value (float): value to pad
107 |         Returns:
108 |             an `ImageList`.
109 |         """
110 |         assert len(tensors) > 0
111 |         assert isinstance(tensors, (tuple, list))
112 |         for t in tensors:
113 |             assert isinstance(t, torch.Tensor), type(t)
114 |             assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
115 |         # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where
116 |         # K >= 1 among all tensors
117 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
118 | 
119 |         image_sizes = [im.shape[-2:] for im in tensors]
120 | 
121 |         # resize the images to half size of the original size
122 |         croped_tensors = torch.rand(len(tensors), tensors[0].size(0),
123 |                                     crop_size, crop_size)
124 | 
125 |         new_image_sizes = []
126 |         for i, tensor in enumerate(tensors):
127 |             image_size = image_sizes[i]
128 |             tensor = tensor.unsqueeze(1) # add the channel dimension here
129 |             resized_image = F.interpolate(tensor, scale_factor=ratio).squeeze()
130 |             crop_image = crop_tensor(resized_image, (crop_size, crop_size))
131 |             croped_tensors[i] = crop_image
132 |             new_image_sizes.append(crop_image.shape[-2:])
133 | 
134 |         return ImageList(croped_tensors.contiguous(), new_image_sizes)
135 | 
136 | 
137 | def crop_tensor(image, crop_sizes):
138 |     image = image.clone()
139 |     indx = image.size(-2) - crop_sizes[0]
140 |     indy = image.size(-1) - crop_sizes[1]
141 |     if indx == 0:
142 |         startx = 0
143 |     else:
144 |         startx = np.random.choice(indx)
145 |     if indy == 0:
146 |         starty = 0
147 |     else:
148 |         starty = np.random.choice(indy)
149 |     return image[:, startx:startx+crop_sizes[0],
150 |            starty:starty+crop_sizes[1]]


--------------------------------------------------------------------------------
/src/modeling/vit/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import add_vit_config
2 | from .msvit import build_msvit_backbone
3 | from .dataset_mapper import FixSizeDatasetMapper


--------------------------------------------------------------------------------
/src/modeling/vit/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode as CN
 2 | 
 3 | 
 4 | def add_vit_config(cfg):
 5 |     """
 6 |     Add config for VIT.
 7 |     """
 8 |     cfg.MODEL.TRANSFORMER = CN()
 9 |     cfg.MODEL.TRANSFORMER.DROP = 0.0
10 |     cfg.MODEL.TRANSFORMER.DROP_PATH = 0.1
11 |     cfg.MODEL.TRANSFORMER.NORM_EMBED = False
12 |     cfg.MODEL.TRANSFORMER.AVG_POOL = False
13 | 
14 |     cfg.MODEL.TRANSFORMER.MSVIT = CN()
15 |     cfg.MODEL.TRANSFORMER.MSVIT.ARCH = 'l1,h3,d192,n1,s1,g1,f4,a0_l2,h6,d384,n10,s0,g1,f2,a0_l3,h6,d384,n1,s0,g1,f1,a0'
16 |     cfg.MODEL.TRANSFORMER.MSVIT.SHARE_W = True
17 |     cfg.MODEL.TRANSFORMER.MSVIT.ATTN_TYPE = 'longformerhand'
18 |     cfg.MODEL.TRANSFORMER.MSVIT.SHARE_KV = True
19 |     cfg.MODEL.TRANSFORMER.MSVIT.ONLY_GLOBAL = False
20 |     cfg.MODEL.TRANSFORMER.MSVIT.SW_EXACT = 0
21 |     cfg.MODEL.TRANSFORMER.MSVIT.LN_EPS = 1e-6
22 |     cfg.MODEL.TRANSFORMER.MSVIT.MODE = 0
23 |     cfg.MODEL.TRANSFORMER.MSVIT.REDRAW_INTERVAL = 1000
24 | 
25 |     cfg.MODEL.TRANSFORMER.OUT_FEATURES = ()
26 | 
27 |     # input size should be patch_size x pos_embedding_size
28 |     cfg.INPUT.FIX_SIZE = ()
29 | 
30 |     # Optimizer.
31 |     cfg.SOLVER.OPTIMIZER = "ADAMW"
32 |     cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0
33 | 
34 |     # Add LR multiplies to specific layers:
35 |     # Use case:
36 |     ##  SOLVER:
37 |     ##     LR_MULTIPLIERS:
38 |     ##          backbone: 0.1
39 |     ##          embedding: 0.2
40 |     ### it will apply 0.1 to layers with keyword 'backbone' and 0.2 to layers with keyword 'embedding'
41 |     cfg.SOLVER.LR_MULTIPLIERS = CN(new_allowed=True)
42 | 


--------------------------------------------------------------------------------
/src/modeling/vit/dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import copy
  3 | import logging
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from detectron2.data import detection_utils as utils
  8 | from detectron2.data import transforms as T
  9 | 
 10 | __all__ = ["FixSizeDatasetMapper"]
 11 | 
 12 | 
 13 | def build_transform_gen(cfg, is_train):
 14 |     """
 15 |     Create a list of :class:`TransformGen` from config.
 16 |     Returns:
 17 |         list[TransformGen]
 18 |     """
 19 |     logger = logging.getLogger(__name__)
 20 |     tfm_gens = []
 21 |     if is_train:
 22 |         tfm_gens.append(T.RandomFlip())
 23 |     if is_train:
 24 |         logger.info("TransformGens used in training: " + str(tfm_gens))
 25 |     return tfm_gens
 26 | 
 27 | 
 28 | class FixSizeDatasetMapper:
 29 |     """
 30 |     A callable which takes a dataset dict in Detectron2 Dataset format,
 31 |     and map it into a format used by DETR.
 32 | 
 33 |     The callable currently does the following:
 34 | 
 35 |     1. Read the image from "file_name"
 36 |     2. Applies geometric transforms to the image and annotation
 37 |     3. Find and applies suitable cropping to the image and annotation
 38 |     4. Prepare image and annotation to Tensors
 39 |     """
 40 | 
 41 |     def __init__(self, cfg, is_train=True):
 42 |         if cfg.INPUT.CROP.ENABLED and is_train:
 43 |             self.crop_gen = [
 44 |                 T.ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TRAIN, sample_style="choice"),
 45 |                 T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE),
 46 |                 T.Resize(cfg.INPUT.FIX_SIZE)
 47 |             ]
 48 |         else:
 49 |             self.crop_gen = [
 50 |                 T.Resize(cfg.INPUT.FIX_SIZE)
 51 |             ]
 52 | 
 53 |         self.mask_on = cfg.MODEL.MASK_ON
 54 |         self.tfm_gens = build_transform_gen(cfg, is_train)
 55 |         logging.getLogger(__name__).info(
 56 |             "Full TransformGens used in training: {}, crop: {}".format(str(self.tfm_gens), str(self.crop_gen))
 57 |         )
 58 | 
 59 |         self.img_format = cfg.INPUT.FORMAT
 60 |         self.is_train = is_train
 61 | 
 62 |     def __call__(self, dataset_dict):
 63 |         """
 64 |         Args:
 65 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
 66 | 
 67 |         Returns:
 68 |             dict: a format that builtin models in detectron2 accept
 69 |         """
 70 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
 71 |         image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
 72 |         utils.check_image_size(dataset_dict, image)
 73 | 
 74 |         image, transforms = T.apply_transform_gens(
 75 |             self.tfm_gens + self.crop_gen, image
 76 |         )
 77 | 
 78 |         image_shape = image.shape[:2]  # h, w
 79 | 
 80 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
 81 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
 82 |         # Therefore it's important to use torch.Tensor.
 83 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
 84 | 
 85 |         if not self.is_train:
 86 |             # USER: Modify this if you want to keep them for some reason.
 87 |             dataset_dict.pop("annotations", None)
 88 |             return dataset_dict
 89 | 
 90 |         if "annotations" in dataset_dict:
 91 |             # USER: Modify this if you want to keep them for some reason.
 92 |             for anno in dataset_dict["annotations"]:
 93 |                 if not self.mask_on:
 94 |                     anno.pop("segmentation", None)
 95 |                 anno.pop("keypoints", None)
 96 | 
 97 |             # USER: Implement additional transformations if you have other types of data
 98 |             annos = [
 99 |                 utils.transform_instance_annotations(obj, transforms, image_shape)
100 |                 for obj in dataset_dict.pop("annotations")
101 |                 if obj.get("iscrowd", 0) == 0
102 |             ]
103 |             instances = utils.annotations_to_instances(annos, image_shape)
104 |             dataset_dict["instances"] = utils.filter_empty_instances(instances)
105 |             if len(dataset_dict["instances"])==0:
106 |                 return None
107 |         return dataset_dict
108 | 


--------------------------------------------------------------------------------
/src/modeling/vit/linformer.py:
--------------------------------------------------------------------------------
 1 | # mainly modified from
 2 | # https://github.com/lucidrains/linformer/blob/master/linformer/linformer.py
 3 | import math
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | def init_(tensor):
 9 |     dim = tensor.shape[-1]
10 |     std = 1 / math.sqrt(dim)
11 |     tensor.uniform_(-std, std)
12 |     return tensor
13 | 
14 | 
15 | class LinformerSelfAttention(nn.Module):
16 |     def __init__(self, dim, seq_len, num_feats=256, num_heads=8, qkv_bias=False,
17 |                  qk_scale=None, attn_drop=0., proj_drop=0., share_kv=False):
18 |         super().__init__()
19 |         assert (dim % num_heads) == 0, 'dimension must be divisible by the number of heads'
20 | 
21 |         self.seq_len = seq_len
22 |         self.num_feats = num_feats
23 | 
24 |         self.num_heads = num_heads
25 |         self.head_dim = dim // num_heads
26 |         self.scale = qk_scale or self.head_dim ** -0.5
27 | 
28 |         self.query = nn.Linear(dim, dim, bias=qkv_bias)
29 |         self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
30 |         self.proj = nn.Linear(dim, dim)
31 | 
32 |         self.proj_k = nn.Parameter(init_(torch.zeros(seq_len, num_feats)))
33 |         if share_kv:
34 |             self.proj_v = self.proj_k
35 |         else:
36 |             self.proj_v = nn.Parameter(init_(torch.zeros(seq_len, num_feats)))
37 | 
38 |         self.attn_drop = nn.Dropout(attn_drop)
39 |         self.proj_drop = nn.Dropout(proj_drop)
40 | 
41 |     def forward(self, x, nx=None, ny=None):
42 |         b, n, d = x.shape
43 |         d_h, h, k = self.head_dim, self.num_heads, self.num_feats
44 |         kv_len = n
45 |         assert kv_len == self.seq_len, f'the sequence length of the key / values must be {self.seq_len} - {kv_len} given'
46 | 
47 |         queries = self.scale * self.query(x).reshape(b, n, h, d_h).transpose(1, 2)
48 |         kv = self.kv(x).reshape(b, n, 2, d).permute(2, 0, 1, 3)
49 |         keys, values = kv[0], kv[1]  # make torchscript happy (cannot use tensor as tuple)
50 | 
51 |         # project keys and values along the sequence length dimension to k
52 |         proj_seq_len = lambda args: torch.einsum('bnd,nk->bkd', *args)
53 |         kv_projs = (self.proj_k, self.proj_v)
54 |         keys, values = map(proj_seq_len, zip((keys, values), kv_projs))
55 | 
56 |         # merge head into batch for queries and key / values
57 |         merge_key_values = lambda t: t.reshape(b, k, -1, d_h).transpose(
58 |             1, 2).expand(-1, h, -1, -1)
59 |         keys, values = map(merge_key_values, (keys, values))
60 | 
61 |         # attention
62 |         attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys)
63 |         attn = (attn - torch.max(attn, dim=-1, keepdim=True)[0]).softmax(dim=-1)
64 |         attn = self.attn_drop(attn)
65 |         out = torch.einsum('bhnk,bhkd->bhnd', attn, values)
66 | 
67 |         # split heads
68 |         out = out.transpose(1, 2).reshape(b, n, -1)
69 |         out = self.proj(out)
70 |         out = self.proj_drop(out)
71 |         return out
72 | 


--------------------------------------------------------------------------------
/src/modeling/vit/srformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | 
  5 | class SRSelfAttention(nn.Module):
  6 |     def __init__(self, dim, rratio=2, num_heads=8, qkv_bias=False,
  7 |                  qk_scale=None, attn_drop=0., proj_drop=0.):
  8 |         super().__init__()
  9 |         assert (dim % num_heads) == 0, 'dimension must be divisible by the number of heads'
 10 | 
 11 |         self.rratio = rratio
 12 | 
 13 |         self.num_heads = num_heads
 14 |         self.head_dim = dim // num_heads
 15 |         self.scale = qk_scale or self.head_dim ** -0.5
 16 | 
 17 |         self.query = nn.Linear(dim, dim, bias=qkv_bias)
 18 |         self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
 19 |         self.proj = nn.Linear(dim, dim)
 20 | 
 21 |         self.proj_sr = nn.Conv2d(dim, dim, kernel_size=rratio, stride=rratio,
 22 |                                  bias=False)
 23 |         self.norm = nn.InstanceNorm2d(dim)
 24 | 
 25 |         self.attn_drop = nn.Dropout(attn_drop)
 26 |         self.proj_drop = nn.Dropout(proj_drop)
 27 | 
 28 |     def forward(self, x, nx=None, ny=None):
 29 |         b, n, d = x.shape
 30 |         d_h, h = self.head_dim, self.num_heads
 31 | 
 32 |         # get queries
 33 |         queries = self.scale * self.query(x).reshape(b, n, h, d_h).transpose(1, 2)
 34 | 
 35 |         # spatial reduction for k and v
 36 |         x_local = x[:, -nx * ny:].transpose(-2, -1).reshape(b, d, nx, ny)
 37 |         x_local = self.norm(self.proj_sr(x_local)).view(b, d, -1)
 38 |         x = torch.cat([x[:, :-nx * ny], x_local.transpose(-2, -1)], dim=1)
 39 |         # compute keys and values
 40 |         kv = self.kv(x).reshape(b, -1, 2, d).permute(2, 0, 3, 1)
 41 |         keys, values = kv[0], kv[
 42 |             1]  # make torchscript happy (cannot use tensor as tuple) b x d x k
 43 | 
 44 |         # merge head into batch for queries and key / values
 45 |         merge_key_values = lambda t: t.reshape(b, h, d_h, -1).transpose(-2, -1)
 46 |         keys, values = map(merge_key_values, (keys, values))  # b x h x k x d_h
 47 | 
 48 |         # attention
 49 |         attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys)
 50 |         attn = (attn - torch.max(attn, dim=-1, keepdim=True)[0]).softmax(dim=-1)
 51 |         attn = self.attn_drop(attn)
 52 |         out = torch.einsum('bhnk,bhkd->bhnd', attn, values)
 53 | 
 54 |         # split heads
 55 |         out = out.transpose(1, 2).reshape(b, n, -1)
 56 |         out = self.proj(out)
 57 |         out = self.proj_drop(out)
 58 |         return out
 59 | 
 60 |     @staticmethod
 61 |     def compute_macs(module, input, output):
 62 |         # n: num_query
 63 |         # S: num_key/value
 64 |         input, nx, ny = input
 65 |         _, n, d = input.shape
 66 |         macs = 0
 67 |         n_params = 0
 68 | 
 69 |         # queries = self.scale * self.query(x)
 70 |         query_params = sum([p.numel() for p in module.query.parameters()])
 71 |         n_params += query_params
 72 |         macs += query_params * n
 73 | 
 74 |         # x_local = self.norm(self.proj_sr(x_local)).view(b, d, -1)
 75 |         # x_local in (b, d, nx, ny)
 76 |         sr_params = sum([p.numel() for p in module.proj_sr.parameters()])
 77 |         n_params += sr_params
 78 |         output_dims = nx//module.rratio * ny//module.rratio
 79 |         kernel_dims = module.rratio ** 2
 80 |         in_channels = d
 81 |         out_channels = d
 82 | 
 83 |         filters_per_channel = out_channels
 84 |         conv_per_position_flops = int(kernel_dims) * \
 85 |                                   in_channels * filters_per_channel
 86 | 
 87 |         active_elements_count = output_dims
 88 | 
 89 |         overall_conv_flops = conv_per_position_flops * active_elements_count
 90 | 
 91 |         # bias = False
 92 |         bias_flops = 0
 93 | 
 94 |         macs += overall_conv_flops + bias_flops
 95 | 
 96 |         # kv = self.kv(x)
 97 |         num_kvs = n - nx * ny + output_dims
 98 |         kv_params = sum([p.numel() for p in module.kv.parameters()])
 99 |         n_params += kv_params
100 |         macs += kv_params * num_kvs
101 | 
102 |         # attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys)
103 |         macs += n * num_kvs * d
104 |         # out = torch.einsum('bhnk,bhkd->bhnd', attn, values)
105 |         macs += n * num_kvs * d
106 | 
107 |         # out = self.proj(out)
108 |         proj_params = sum([p.numel() for p in module.proj.parameters()])
109 |         n_params += proj_params
110 |         macs += (proj_params * n)
111 |         # print('macs proj', proj_params * T / 1e8)
112 | 
113 |         module.__flops__ += macs
114 |         # return n_params, macs
115 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/tools/__init__.py


--------------------------------------------------------------------------------
/tools/analyze_bdd_fea.py:
--------------------------------------------------------------------------------
 1 | import umap
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import torch
 5 | 
 6 | 
 7 | pos_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/bdd_pos_single_frame_1000.npy', allow_pickle=True)
 8 | neg_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/bdd_neg_single_frame_1000.npy', allow_pickle=True)
 9 | 
10 | index = 0
11 | for fea in pos_fea:
12 |     if index == 0:
13 |         pos_np = fea
14 |         index += 1
15 |     else:
16 |         pos_np = np.concatenate([pos_np, fea], 0)
17 | 
18 | index = 0
19 | for fea in neg_fea:
20 |     if index == 0:
21 |         neg_np = fea
22 |         index += 1
23 |     else:
24 |         neg_np = np.concatenate([neg_np, fea], 0)
25 | fea_np = np.concatenate([pos_np, neg_np], 0)
26 | print(len(fea_np))
27 | # breakpoint()
28 | reducer = umap.UMAP(random_state=42, n_neighbors=30, min_dist=0.6, n_components=2, metric='euclidean')
29 | embedding = reducer.fit_transform(fea_np)
30 | 
31 | fig, ax = plt.subplots(figsize=(6, 6))
32 | def get_cmap(n, name='hsv'):
33 |     '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
34 |     RGB color; the keyword argument name must be a standard mpl colormap name.'''
35 |     return plt.cm.get_cmap(name, n)
36 | 
37 | classes = [str(hhh) for hhh in range(2)]
38 | # color = targets.astype(int)#[index for index in range(20)]#
39 | color = get_cmap(2)
40 | # color = plt.cm.coolwarm(np.linspace(0.1,0.9,11))
41 | 
42 | index = 0
43 | for i in range(0, 2):
44 |     if i == 0:
45 |         plt.scatter(embedding[:, 0][1000 * i:1000 * i + 1000],
46 |                     embedding[:, 1][1000 * i:1000 * i + 1000],
47 |                     c='r',
48 |                     label=index, cmap="Spectral", s=1)
49 |     else:
50 |         plt.scatter(embedding[:, 0][1000 * i:1000 * i + 1000],
51 |                     embedding[:, 1][1000 * i:1000 * i + 1000],
52 |                     c='b',
53 |                     label=index, cmap="Spectral", s=1)
54 |     index += 1
55 | 
56 | plt.legend(fontsize=20)
57 | # ax.legend(markerscale=9)
58 | ax.legend(loc='lower left',markerscale=9)#, bbox_to_anchor=(1, 0.5)
59 | # plt.legend(handles=scatter.legend_elements()[0], labels=classes)
60 | # breakpoint()
61 | plt.setp(ax, xticks=[], yticks=[])
62 | # plt.title("With virtual outliers", fontsize=20)
63 | # plt.savefig('./voc_coco_umap_visual_ours.jpg', dpi=250)
64 | # plt.title("Vanilla detector", fontsize=20)
65 | plt.savefig('./bdd_ana_single_frame_1000.jpg', dpi=250)
66 | # plt.show()


--------------------------------------------------------------------------------
/tools/analyze_energy.py:
--------------------------------------------------------------------------------
 1 | import umap
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import torch
 5 | import pandas as pd
 6 | import seaborn as sns
 7 | import matplotlib as mpl
 8 | 
 9 | mpl.rcParams['axes.linewidth'] = 2
10 | import matplotlib
11 | # matplotlib.rcParams['mathtext.fontset'] = 'Arial'
12 | matplotlib.rcParams['mathtext.rm'] = 'Arial'
13 | matplotlib.rcParams['mathtext.it'] = 'Arial'
14 | 
15 | # matplotlib.rcParams['mathtext.fontset'] = 'stix'
16 | # matplotlib.rcParams['font.family'] = 'STIXGeneral'
17 | # matplotlib.pyplot.title(r'ABC123 vs $\mathrm{ABC123}^{123}$')
18 | neg_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/neg_energy.npy', allow_pickle=True)
19 | pos_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/pos_energy.npy', allow_pickle=True)
20 | 
21 | index = 0
22 | for fea in pos_fea:
23 |     if index == 0:
24 |         pos_np = fea.cpu().data.numpy()
25 |         index += 1
26 |     else:
27 |         pos_np = np.concatenate([pos_np, fea.cpu().data.numpy()], 0)
28 | 
29 | index = 0
30 | for fea in neg_fea:
31 |     if index == 0:
32 |         neg_np = fea.cpu().data.numpy()
33 |         index += 1
34 |     else:
35 |         neg_np = np.concatenate([neg_np, fea.cpu().data.numpy()], 0)
36 | # breakpoint()
37 | id_pd = pd.Series(pos_np)
38 | # # id_pd.rename('ID')
39 | #
40 | ood_pd = pd.Series(neg_np)
41 | # # ood_pd.rename('OOD')
42 | # # data_plot = {'Energy': np.concatenate((-id_score[0:2000], -ood_score), 0), 'label':['ID'] * len(-id_score[0:2000]) + \
43 | # #                                                                            ['OOD'] * len(-ood_score)}
44 | # # df_after = pd.DataFrame(data=data_plot)
45 | # # sns.histplot(data=df_after, x="Energy", hue="label")
46 | plt.figure(figsize=(10,8))
47 | p1 = sns.kdeplot(id_pd, shade=True, color="#168AAD", label='ID objects',linewidth=2.5)
48 | p1 = sns.kdeplot(ood_pd, shade=True, color="#B5E48C", label='Unknown objects',linewidth=2)
49 | plt.xlabel("Negative energy score", fontsize=25)
50 | plt.ylabel("Density", fontsize=25)
51 | plt.xticks(fontsize= 25)
52 | plt.yticks(fontsize= 25)
53 | plt.legend(fontsize=30, frameon=False)
54 | 
55 | plt.savefig('ddd.jpg', dpi=500)


--------------------------------------------------------------------------------
/tools/bdd_coco.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | import argparse
 6 | import pandas as pd
 7 | import seaborn as sns
 8 | import matplotlib
 9 | matplotlib.use('AGG')
10 | import matplotlib.pyplot as plt
11 | from metric_utils import *
12 | 
13 | recall_level_default = 0.95
14 | 
15 | 
16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector',
17 |                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin')
19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin')
20 | parser.add_argument('--thres', default=1., type=float)
21 | parser.add_argument('--name', default=1., type=str)
22 | parser.add_argument('--seed', default=0, type=int)
23 | parser.add_argument('--model', default='faster-rcnn', type=str)
24 | args = parser.parse_args()
25 | 
26 | 
27 | 
28 | concat = lambda x: np.concatenate(x, axis=0)
29 | to_np = lambda x: x.data.cpu().numpy()
30 | 
31 | 
32 | 
33 | # ID data
34 | ood_data = np.load('/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/' + str(args.model) + '/ood.npy',allow_pickle=True)
35 | id_data = np.load('/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/' + str(args.model) + '/id.npy',allow_pickle=True)
36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed)  +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed)  +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
38 | # id_score = []
39 | # energy score calculation.
40 | # import ipdb; ipdb.set_trace()
41 | index = 0
42 | for data in id_data:
43 |     if index == 0:
44 |         id_data_all = data
45 |         index += 1
46 |     else:
47 |         id_data_all = np.concatenate([id_data_all, data], 0)
48 | 
49 | id_data = torch.from_numpy(id_data_all)
50 | 
51 | index = 0
52 | for data in ood_data:
53 |     if index == 0:
54 |         ood_data_all = data
55 |         index += 1
56 |     else:
57 |         ood_data_all = np.concatenate([ood_data_all, data], 0)
58 | 
59 | ood_data = torch.from_numpy(ood_data_all)
60 | 
61 | 
62 | # id_data = id_data / 1000 - torch.max(id_data, 1, keepdim=True)[0]
63 | # ood_data = ood_data / 1000 - torch.max(ood_data, 1, keepdim=True)[0]
64 | T = 1
65 | 
66 | 
67 | 
68 | 
69 | assert len(id_data[0]) == 11
70 | if args.energy:
71 |     id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
72 |     ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
73 | else:
74 |     id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
75 |     ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
76 | # breakpoint()
77 | # id_score = id_data
78 | # ood_score = ood_data
79 | ###########
80 | ########
81 | print(len(id_score))
82 | print(len(ood_score))
83 | 
84 | measures = get_measures(-id_score, -ood_score, plot=False)
85 | 
86 | if args.energy:
87 |     print_measures(measures[0], measures[1], measures[2], 'energy')
88 | else:
89 |     print_measures(measures[0], measures[1], measures[2], 'msp')
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/tools/bdd_heatmap.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import seaborn as sns; sns.set_theme()
3 | idx_data = np.asarray(np.load('./bdd_offset.npy', allow_pickle=True))
4 | score_data = np.load('./bdd_score_visual.npy', allow_pickle=True)
5 | breakpoint()
6 | ax = sns.heatmap(score_data[6].cpu().data.numpy())
7 | ax.savefig('./bdd_heatmap.jpg')


--------------------------------------------------------------------------------
/tools/ckpt_surgery.py:
--------------------------------------------------------------------------------
 1 | from os import X_OK, pardir
 2 | import torch 
 3 | import argparse
 4 | 
 5 | def process(ckpt_path, save_path):
 6 |     ckpt = torch.load(ckpt_path)
 7 |     new_order = [0, 1, 2, 4, 3, 7, 6, 5, 8, 9, 10]
 8 |     ckpt['model']['roi_heads.box_predictor.cls_score.weight'] =ckpt['model']['roi_heads.box_predictor.cls_score.weight'][new_order]
 9 |     ckpt['model']['roi_heads.box_predictor.cls_score.bias'] = ckpt['model']['roi_heads.box_predictor.cls_score.bias'][new_order]
10 | 
11 |     new_order4 = []
12 |     for x in new_order[:-1]:
13 |         for i in range(4):
14 |             new_order4.append(4*x+i)
15 | 
16 |     ckpt['model']['roi_heads.box_predictor.bbox_pred.weight'] = ckpt['model']['roi_heads.box_predictor.bbox_pred.weight'][new_order4]
17 |     ckpt['model']['roi_heads.box_predictor.bbox_pred.bias'] = ckpt['model']['roi_heads.box_predictor.bbox_pred.bias'][new_order4]
18 | 
19 |     torch.save(ckpt, save_path)
20 |     print('done!')
21 | 
22 | 
23 | def main():
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument('--ckpt-path', '-i', type=str,  help='input ckpt path')
26 |     parser.add_argument('--save-path', '-o', type=str, help='output ckpt path')
27 |     args = parser.parse_args()
28 | 
29 |     process(args.ckpt_path, args.save_path)
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 
34 |     
35 |     
36 | 
37 | 


--------------------------------------------------------------------------------
/tools/convert_weight.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # --------------------------------------------------------
 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone
 5 | # version: 0.0.1
 6 | # Author: Shihua Liang (sxhx.liang@gmail.com)
 7 | # FilePath: /detectron2_backbone/detectron2_backbone/tools/convert_weight.py
 8 | # Create: 2020-05-05 07:32:08
 9 | # LastAuthor: Shihua Liang
10 | # lastTime: 2020-07-02 21:51:57
11 | # --------------------------------------------------------
12 | import torch
13 | import argparse
14 | from collections import OrderedDict
15 | 
16 | import torch
17 | 
18 | 
19 | def get_parser():
20 |     parser = argparse.ArgumentParser(description="Detectron2 Model Converter")
21 |     parser.add_argument(
22 |         "--model",
23 |         required=True,
24 |         metavar="FILE",
25 |         help="path to model weights",
26 |     )
27 |     parser.add_argument(
28 |         "--output",
29 |         required=True,
30 |         metavar="FILE",
31 |         help="path to model weights",
32 |     )
33 |     return parser
34 | 
35 | 
36 | def convert_weight():
37 |     args = get_parser().parse_args()
38 |     ckpt = torch.load(args.model, map_location="cpu")
39 |     if "model" in ckpt:
40 |         state_dict = ckpt["model"]
41 |     else:
42 |         state_dict = ckpt
43 |     model = {"model": state_dict, "__author__": "custom", "matching_heuristics": True}
44 | 
45 |     torch.save(model, args.output)
46 | 
47 | if __name__ == "__main__":
48 |     convert_weight()


--------------------------------------------------------------------------------
/tools/count.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | root_directory = '/nobackup-slow/dataset/my_xfdu/video/vis/train/JPEGImages/'#72
 5 | # root_directory = '/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/images/track/train/'#263
 6 | 
 7 | 
 8 | numbers = []
 9 | for video in list(os.listdir(root_directory)):
10 |     path = os.path.join(root_directory, video)
11 |     cur_frame = os.listdir(path)
12 |     numbers.append(len(list(cur_frame)))
13 | 
14 | numbers = np.asarray(numbers)
15 | print(np.min(numbers))
16 | print(np.max(numbers))


--------------------------------------------------------------------------------
/tools/plot_frame_interval.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | 
  8 | 
  9 | # # Create a dataset with many short random walks
 10 | # rs = np.random.RandomState(4)
 11 | # # pos = rs.randint(-1, 2, (20, 5)).cumsum(axis=1)
 12 | # # breakpoint()
 13 | # # pos -= pos[:, 0, np.newaxis]
 14 | # pos = np.asarray([[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43],
 15 | #                   [71.90, 73.47,74.04,74.34,73.03,71.03,70.10]])
 16 | # step = np.asarray([1,3,5,7,9,11,13,1,3,5,7,9,11,13])
 17 | # walk = np.repeat(['COCO','NuImages'], 7)
 18 | # walk1 =  np.repeat(range(2), 7)
 19 | # # breakpoint()
 20 | # df = pd.DataFrame(np.c_[pos.flat, step, walk, walk1],
 21 | #                   columns=["AUROC", "Frame range", "OOD", "dummy"])
 22 | #
 23 | # # Initialize a grid of plots with an Axes for each walk
 24 | # grid = sns.FacetGrid(df, col='OOD' , hue='dummy', palette="tab20c",
 25 | #                      col_wrap=4, height=2.2)
 26 | #
 27 | # # Draw a horizontal line to show the starting point
 28 | # # grid.refline(y=0, linestyle=":")
 29 | #
 30 | # # Draw a line plot to show the trajectory of each random walk
 31 | # grid.map(plt.plot, "Frame range", "AUROC", marker="o")
 32 | #
 33 | # # Adjust the tick positions and labels
 34 | # # grid.set(xticks=np.arange(16), yticks=[70, 90],
 35 | # #          xlim=(0,15), ylim=(70,90))
 36 | #
 37 | #
 38 | #
 39 | #
 40 | #
 41 | # # num_rows = 4
 42 | # # years = frames
 43 | # # data_preproc = pd.DataFrame({
 44 | # #     'Frame range': years,
 45 | # #     r'$T$=1, OOD=COCO': single_coco,
 46 | # #     r'$T$=1, OOD=NuImages': single_nu})
 47 | # #     # r'$T$=3, OOD=COCO': multi_coco,
 48 | # #     # r'$T$=3, OOD=NuImages': multi_nu})
 49 | # # fig = sns.lineplot(x='Frame range', y='value', hue='variable',
 50 | # #              data=pd.melt(data_preproc, ['Frame range']), marker="o")
 51 | #
 52 | # # Adjust the arrangement of the plots
 53 | # # fig.tight_layout(w_pad=1)
 54 | def get_cmap(n, name='hsv'):
 55 |     '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
 56 |     RGB color; the keyword argument name must be a standard mpl colormap name.'''
 57 |     return plt.cm.get_cmap(name, n)
 58 | 
 59 | color = get_cmap(7)
 60 | frames = [3,5,7,9,11,13]
 61 | frames1 = [1,2,3,4,5,6]
 62 | single_coco = [81.76,83.11, 83.29,82.76,81.84,80.43]#[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43]
 63 | single_nu = [ 73.47,74.04,74.34,73.03,71.03,70.10]#[71.90, 73.47,74.04,74.34,73.03,71.03,70.10]
 64 | 
 65 | multi_coco = [83.57,84.48,85.06,84.99,83.30,82.36]
 66 | multi_nu = [72.65,73.94,74.47,73.56,72.07,72.73]
 67 | import seaborn as sns
 68 | import numpy as np
 69 | import pandas as pd
 70 | import matplotlib.pyplot as plt
 71 | 
 72 | # Setting seaborn as default style even
 73 | # if use only matplotlib
 74 | # sns.set()
 75 | sns.set(font_scale = 1.5)
 76 | sns.set_theme(style="ticks")
 77 | # figure, axes = plt.subplots()
 78 | # figure.suptitle('Geeksforgeeks - one axes with no data')
 79 | # plt.bar(data.xcol,data.ycol,4)
 80 | figure, axes = plt.subplots(1, 2, sharex=True, figsize=(7,3.5))
 81 | # figure.suptitle('Geeksforgeeks')
 82 | # breakpoint()
 83 | 
 84 | 
 85 | 
 86 | 
 87 | # axes[0][0].set_title(r'$T$=1, OOD=COCO')
 88 | # # df=pd.DataFrame(dict(x=range(5),y=[3,15,9,12,4]))
 89 | # data_preproc = pd.DataFrame({
 90 | #     'Frame range': frames,
 91 | #     'AUROC': single_coco})
 92 | # sub1 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0][0], palette=sns.color_palette('Blues_r',7))
 93 | # sub1.set(ylim=(80,84))
 94 | # # axes[0][0].set_box_aspect(10/len(axes[0][0].patches))
 95 | # # sns.linplot(data=df,x='Frame_r')
 96 | # widthbars = [1,1,1,1,1,1,1]
 97 | # for bar, newwidth in zip(axes[0][0].patches, widthbars):
 98 | #     x = bar.get_x()
 99 | #     width = bar.get_width()
100 | #     print(x)
101 | #     centre = x #+ width/2.
102 | #     bar.set_x(centre)
103 | #     bar.set_width(newwidth)
104 | #
105 | #
106 | # axes[0][1].set_title(r'$T$=1, OOD=NuImages')
107 | # data_preproc = pd.DataFrame({
108 | #     'Frame range': frames,
109 | #     'AUROC': single_nu})
110 | # sub2 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0][1], palette="magma")
111 | # # sub2.set(xticks=[0, 5, 10, 15])
112 | # sub2.set(ylim=(69,75))
113 | # axes[0][1].set_ylabel("")
114 | # widthbars = [1,1,1,1,1,1,1]
115 | # for bar, newwidth in zip(axes[0][1].patches, widthbars):
116 | #     x = bar.get_x()
117 | #     width = bar.get_width()
118 | #     print(x)
119 | #     centre = x #+ width/2.
120 | #     bar.set_x(centre)
121 | #     bar.set_width(newwidth)
122 | 
123 | 
124 | 
125 | axes[0].set_title(r'$T$=3, OOD=COCO')
126 | data_preproc = pd.DataFrame({
127 |     'Frame interval': frames1,
128 |     'AUROC': multi_coco})
129 | sub3 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7))
130 | # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86])
131 | sub3.set(ylim=(82,86))
132 | 
133 | widthbars = [1,1,1,1,1,1]
134 | for bar, newwidth in zip(axes[0].patches, widthbars):
135 |     x = bar.get_x()
136 |     width = bar.get_width()
137 |     print(x)
138 |     centre = x #+ width/2.
139 |     bar.set_x(centre)
140 |     bar.set_width(newwidth)
141 | 
142 | 
143 | axes[1].set_title(r'$T$=3, OOD=NuImages')
144 | data_preproc = pd.DataFrame({
145 |     'Frame interval': frames1,
146 |     'AUROC': multi_nu})
147 | sub4 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[1], palette="magma")
148 | # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75])
149 | sub4.set(ylim=(71,75))
150 | axes[1].set_ylabel("")
151 | widthbars = [1,1,1,1,1,1]
152 | for bar, newwidth in zip(axes[1].patches, widthbars):
153 |     x = bar.get_x()
154 |     width = bar.get_width()
155 |     print(x)
156 |     centre = x #+ width/2.
157 |     bar.set_x(centre)
158 |     bar.set_width(newwidth)
159 | figure.tight_layout(w_pad=1)
160 | figure.savefig('ablation1.pdf')


--------------------------------------------------------------------------------
/tools/plot_frame_range.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | # def get_cmap(n, name='hsv'):
  8 | #     '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
  9 | #     RGB color; the keyword argument name must be a standard mpl colormap name.'''
 10 | #     return plt.cm.get_cmap(name, n)
 11 | #
 12 | # color = get_cmap(7)
 13 | frames = [1,2,3,4,5]
 14 | frames1 = [3,5,7,9,11,13,'inf']
 15 | single_coco = [81.76,83.11, 83.29,82.76,81.84,80.43]#[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43]
 16 | single_nu = [ 73.47,74.04,74.34,73.03,71.03,70.10]#[71.90, 73.47,74.04,74.34,73.03,71.03,70.10]
 17 | 
 18 | multi_coco = [83.34, 84.26,84.70,85.67,85.34,84.41, 80.35]
 19 | multi_nu = [73.89, 75.61,75.64,75.67,74.87,74.42, 71.80]
 20 | 
 21 | 
 22 | # Setting seaborn as default style even
 23 | # if use only matplotlib
 24 | # sns.set()
 25 | sns.set(font_scale = 1.5)
 26 | sns.set_theme(style="ticks")
 27 | # figure, axes = plt.subplots()
 28 | # figure.suptitle('Geeksforgeeks - one axes with no data')
 29 | # plt.bar(data.xcol,data.ycol,4)
 30 | figure, axes = plt.subplots(1, 2, sharex=True, figsize=(7,3.5))
 31 | # figure.suptitle('Geeksforgeeks')
 32 | # breakpoint()
 33 | 
 34 | 
 35 | 
 36 | # axes[0].set_title(r'$T$=3, OOD=COCO')
 37 | # data_preproc = pd.DataFrame({
 38 | #     'Frame range': frames1,
 39 | #     'AUROC': multi_coco})
 40 | # sub3 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0], palette="dark:salmon_r")
 41 | # # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86])
 42 | # sub3.set(ylim=(80,86))
 43 | #
 44 | # widthbars = [1,1,1,1,1,1, 1]
 45 | # for bar, newwidth in zip(axes[0].patches, widthbars):
 46 | #     x = bar.get_x()
 47 | #     width = bar.get_width()
 48 | #     print(x)
 49 | #     centre = x #+ width/2.
 50 | #     bar.set_x(centre)
 51 | #     bar.set_width(newwidth)
 52 | #
 53 | #
 54 | # axes[1].set_title(r'$T$=3, OOD=NuImages')
 55 | # data_preproc = pd.DataFrame({
 56 | #     'Frame range': frames1,
 57 | #     'AUROC': multi_nu})
 58 | # sub4 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[1], palette="YlOrBr")
 59 | # # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75])
 60 | # sub4.set(ylim=(71,76))
 61 | # axes[1].set_ylabel("")
 62 | # widthbars = [1,1,1,1,1,1,1]
 63 | # for bar, newwidth in zip(axes[1].patches, widthbars):
 64 | #     x = bar.get_x()
 65 | #     width = bar.get_width()
 66 | #     print(x)
 67 | #     centre = x #+ width/2.
 68 | #     bar.set_x(centre)
 69 | #     bar.set_width(newwidth)
 70 | 
 71 | 
 72 | 
 73 | 
 74 | multi_coco = [80.43,82.71,85.67,81.41,80.81]
 75 | multi_nu = [70.10,75.29,75.67,73.26,72.76]
 76 | axes[0].set_title(r'$T$=3, OOD=COCO')
 77 | data_preproc = pd.DataFrame({
 78 |     'Number of Frame': frames,
 79 |     'AUROC': multi_coco})
 80 | sub1 = sns.barplot(data=data_preproc,x='Number of Frame',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7))
 81 | # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86])
 82 | sub1.set(ylim=(80,86))
 83 | axes[0].set_ylabel("")
 84 | widthbars = [1,1,1,1,1]
 85 | for bar, newwidth in zip(axes[0].patches, widthbars):
 86 |     x = bar.get_x()
 87 |     width = bar.get_width()
 88 |     print(x)
 89 |     centre = x #+ width/2.
 90 |     bar.set_x(centre)
 91 |     bar.set_width(newwidth)
 92 | 
 93 | 
 94 | axes[1].set_title(r'$T$=3, OOD=NuImages')
 95 | data_preproc = pd.DataFrame({
 96 |     'Number of Frame': frames,
 97 |     'AUROC': multi_nu})
 98 | sub2 = sns.barplot(data=data_preproc,x='Number of Frame',y='AUROC', ax=axes[1], palette="magma")
 99 | # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75])
100 | sub2.set(ylim=(69,76))
101 | axes[1].set_ylabel("")
102 | widthbars = [1,1,1,1,1]
103 | for bar, newwidth in zip(axes[1].patches, widthbars):
104 |     x = bar.get_x()
105 |     width = bar.get_width()
106 |     print(x)
107 |     centre = x #+ width/2.
108 |     bar.set_x(centre)
109 |     bar.set_width(newwidth)
110 | 
111 | 
112 | #
113 | # multi_coco = [83.57,84.48,85.06,84.99,83.30,82.36]
114 | # multi_nu = [72.65,73.94,74.47,73.56,72.07,72.73]
115 | # axes[0].set_title(r'$T$=3, OOD=COCO')
116 | # data_preproc = pd.DataFrame({
117 | #     'Frame interval': frames,
118 | #     'AUROC': multi_coco})
119 | # sub1 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7))
120 | # # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86])
121 | # sub1.set(ylim=(82,86))
122 | # axes[0].set_ylabel("")
123 | # widthbars = [1,1,1,1,1,1]
124 | # for bar, newwidth in zip(axes[0].patches, widthbars):
125 | #     x = bar.get_x()
126 | #     width = bar.get_width()
127 | #     print(x)
128 | #     centre = x #+ width/2.
129 | #     bar.set_x(centre)
130 | #     bar.set_width(newwidth)
131 | #
132 | #
133 | # axes[1].set_title(r'$T$=3, OOD=NuImages')
134 | # data_preproc = pd.DataFrame({
135 | #     'Frame interval': frames,
136 | #     'AUROC': multi_nu})
137 | # sub2 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[1], palette="magma")
138 | # # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75])
139 | # sub2.set(ylim=(71,75))
140 | # axes[1].set_ylabel("")
141 | # widthbars = [1,1,1,1,1,1]
142 | # for bar, newwidth in zip(axes[1].patches, widthbars):
143 | #     x = bar.get_x()
144 | #     width = bar.get_width()
145 | #     print(x)
146 | #     centre = x #+ width/2.
147 | #     bar.set_x(centre)
148 | #     bar.set_width(newwidth)
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | figure.tight_layout(w_pad=1)
158 | figure.savefig('ablation2.pdf')


--------------------------------------------------------------------------------
/tools/plot_loss.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import json
 3 | import numpy as np
 4 | import matplotlib
 5 | import matplotlib as mpl
 6 | 
 7 | mpl.rcParams['axes.linewidth'] = 2
 8 | 
 9 | # matplotlib.rcParams['mathtext.fontset'] = 'Arial'
10 | matplotlib.rcParams['mathtext.rm'] = 'Arial'
11 | matplotlib.rcParams['mathtext.it'] = 'Arial'
12 | 
13 | # matplotlib.rcParams['mathtext.fontset'] = 'stix'
14 | # matplotlib.rcParams['font.family'] = 'STIXGeneral'
15 | matplotlib.pyplot.title(r'ABC123 vs $\mathrm{ABC123}^{123}$')
16 | data  =open('/nobackup/dataset/my_xfdu/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed1/metrics.json','r')
17 | tweets = []
18 | for line in data:
19 |     tweets.append(json.loads(line))
20 | data= tweets
21 | epochs = []
22 | losses = []
23 | for epoch, loss in enumerate(data):
24 |     epochs.append(epoch)
25 |     losses.append(loss['ene_reg_loss']*20)
26 | 
27 | # plt.figure(figsize=(10,5))
28 | # ax.set_title('Sine and cosine waves')
29 | 
30 | 
31 | plt.figure(figsize=(10,8))
32 | # plt.title("Training and Validation Loss")
33 | # plt.plot(val_losses,label="val")
34 | # plt.plot(train_losses,label="train")
35 | x = [i*20 for i in range(len(losses))]
36 | plt.plot(x,losses, label=r'$\mathcal{L}_{\mathrm{uncertainty}}$',color='#184E77',linewidth=3)
37 | plt.xlabel("iterations", fontsize=25)
38 | plt.ylabel("Uncertainty loss", fontsize=25)
39 | plt.xticks(fontsize= 25)
40 | plt.yticks(fontsize= 25)
41 | plt.legend(fontsize=30, frameon=False)
42 | plt.savefig('./loss.jpg', dpi=500)
43 | 


--------------------------------------------------------------------------------
/tools/select_permutation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Sep 14 15:50:28 2017
 4 | @author: bbrattol
 5 | """
 6 | import argparse
 7 | from tqdm import trange
 8 | import numpy as np
 9 | import itertools
10 | from scipy.spatial.distance import cdist
11 | import os
12 | 
13 | parser = argparse.ArgumentParser(description='Train network on Imagenet')
14 | parser.add_argument('--classes', default=1000, type=int,
15 |                     help='Number of permutations to select')
16 | parser.add_argument('--selection', default='max', type=str,
17 |                     help='Sample selected per iteration based on hamming distance: [max] highest; [mean] average')
18 | args = parser.parse_args()
19 | 
20 | if __name__ == "__main__":
21 |     outname = 'permutations/permutations_hamming_%s_%d' % (
22 |     args.selection, args.classes)
23 |     os.makedirs(os.path.dirname(outname), exist_ok=True)
24 | 
25 |     P_hat = np.array(list(itertools.permutations(list(range(9)), 9)))
26 |     n = P_hat.shape[0]
27 | 
28 |     for i in trange(args.classes):
29 |         if i == 0:
30 |             j = np.random.randint(n)
31 |             P = np.array(P_hat[j]).reshape([1, -1])
32 |         else:
33 |             P = np.concatenate([P, P_hat[j].reshape([1, -1])], axis=0)
34 | 
35 |         P_hat = np.delete(P_hat, j, axis=0)
36 |         D = cdist(P, P_hat, metric='hamming').mean(axis=0).flatten()
37 | 
38 |         if args.selection == 'max':
39 |             j = D.argmax()
40 |         else:
41 |             m = int(D.shape[0] / 2)
42 |             S = D.argsort()
43 |             j = S[np.random.randint(m - 10, m + 10)]
44 | 
45 |         if i % 100 == 0:
46 |             np.save(outname, P)
47 | 
48 |     np.save(outname, P)
49 |     print('file created --> ' + outname)


--------------------------------------------------------------------------------
/tools/simple_permutation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | from tqdm import trange
 4 | import numpy as np
 5 | import itertools
 6 | from scipy.spatial.distance import cdist
 7 | import os
 8 | 
 9 | parser = argparse.ArgumentParser(description='Train network on Imagenet')
10 | parser.add_argument('--classes', default=24, type=int,
11 |                     help='Number of permutations to select')
12 | parser.add_argument('--selection', default='max', type=str,
13 |                     help='Sample selected per iteration based on hamming distance: [max] highest; [mean] average')
14 | args = parser.parse_args()
15 | 
16 | if __name__ == "__main__":
17 |     outname = 'permutations/permutations_hamming_%s_%d' % (
18 |     args.selection, args.classes)
19 |     os.makedirs(os.path.dirname(outname), exist_ok=True)
20 | 
21 |     P_hat = np.array(list(itertools.permutations(list(range(2)), 2)))
22 |     np.save(outname, P_hat)
23 |     print('file created --> ' + outname)
24 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | """Train/eval script."""
  2 | import logging
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | from collections import OrderedDict
  7 | import torch
  8 | 
  9 | import detectron2.utils.comm as comm
 10 | from detectron2.checkpoint import DetectionCheckpointer
 11 | from detectron2.data import MetadataCatalog
 12 | from detectron2.engine import default_setup, hooks, launch
 13 | from detectron2.evaluation import (
 14 |     COCOEvaluator,
 15 |     DatasetEvaluators,
 16 |     verify_results,
 17 | )
 18 | 
 19 | # updated code
 20 | from src.config import get_cfg
 21 | from src import data
 22 | from src.engine import default_argument_parser, DefaultTrainer
 23 | from src import modeling
 24 | 
 25 | 
 26 | class Trainer(DefaultTrainer):
 27 |     """
 28 |     We use the "DefaultTrainer" which contains a number pre-defined logic for
 29 |     standard training workflow. They may not work for you, especially if you
 30 |     are working on a new research project. In that case you can use the cleaner
 31 |     "SimpleTrainer", or write your own training loop.
 32 |     """
 33 | 
 34 |     @classmethod
 35 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
 36 |         """
 37 |         Create evaluator(s) for a given dataset.
 38 |         This uses the special metadata "evaluator_type" associated with each builtin dataset.
 39 |         For your own dataset, you can simply create an evaluator manually in your
 40 |         script and do not have to worry about the hacky if-else logic here.
 41 |         """
 42 |         if output_folder is None:
 43 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
 44 |         evaluator_list = []
 45 |         evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
 46 |         if evaluator_type in ["coco", "coco_panoptic_seg"]:
 47 |             evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
 48 |         if len(evaluator_list) == 0:
 49 |             raise NotImplementedError(
 50 |                 "no Evaluator for the dataset {} with the type {}".format(
 51 |                     dataset_name, evaluator_type
 52 |                 )
 53 |             )
 54 |         if len(evaluator_list) == 1:
 55 |             return evaluator_list[0]
 56 |         return DatasetEvaluators(evaluator_list)
 57 | 
 58 | 
 59 | def setup(args):
 60 |     """
 61 |     Create configs and perform basic setups.
 62 |     """
 63 |     cfg = get_cfg()
 64 |     cfg.merge_from_file(args.config_file)
 65 |     cfg.merge_from_list(args.opts)
 66 |     cfg.freeze()
 67 |     default_setup(cfg, args)
 68 |     return cfg
 69 | 
 70 | 
 71 | def main(args):
 72 |     cfg = setup(args)
 73 | 
 74 |     # eval_only and eval_during_train are mainly used for jointly
 75 |     # training detection and self-supervised models.
 76 |     # breakpoint()
 77 |     if args.eval_only:
 78 |         model = Trainer.build_model(cfg)
 79 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
 80 |             cfg.MODEL.WEIGHTS, resume=args.resume
 81 |         )
 82 |         # breakpoint()
 83 |         position_list = [x for x, v in enumerate(cfg.MODEL.WEIGHTS) if v == '/']
 84 |         if 'ood' not in cfg.DATASETS.TEST[0]:
 85 |             res = Trainer.test(cfg, model,
 86 |                                saved_address=cfg.MODEL.WEIGHTS[:position_list[-1]] + '/id.npy',
 87 |                                visualize=args.visualize, savefigdir=args.savefigdir)
 88 |             if comm.is_main_process():
 89 |                 verify_results(cfg, res)
 90 |             if cfg.TEST.AUG.ENABLED:
 91 |                 res.update(Trainer.test_with_TTA(cfg, model))
 92 |             return res
 93 |         else:
 94 |             res = Trainer.test(cfg, model,
 95 |                                saved_address=cfg.MODEL.WEIGHTS[:position_list[-1]] + '/ood.npy',
 96 |                                visualize=args.visualize, savefigdir=args.savefigdir)
 97 |             return res
 98 | 
 99 |     elif args.eval_during_train:#False
100 |         model = Trainer.build_model(cfg)
101 |         check_pointer = DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR)
102 |         saved_checkpoint = None
103 |         best_res = {}
104 |         best_file = None
105 |         while True:
106 |             if check_pointer.has_checkpoint():
107 |                 current_ckpt = check_pointer.get_checkpoint_file()
108 |                 if (
109 |                     saved_checkpoint is None
110 |                     or current_ckpt != saved_checkpoint
111 |                 ):
112 |                     check_pointer._load_model(
113 |                         check_pointer._load_file(current_ckpt)
114 |                     )
115 |                     saved_checkpoint = current_ckpt
116 |                     print("evaluating checkpoint {}".format(current_ckpt))
117 |                     iters = int(
118 |                         osp.splitext(osp.basename(current_ckpt))[0].split("_")[
119 |                             -1
120 |                         ]
121 |                     )
122 |                     res = Trainer.test(cfg, model)
123 |                     if comm.is_main_process():
124 |                         verify_results(cfg, res)
125 |                     if cfg.TEST.AUG.ENABLED:
126 |                         res.update(Trainer.test_with_TTA(cfg, model))
127 |                     print(res)
128 |                     if (len(best_res) == 0) or (
129 |                         len(best_res) > 0
130 |                         and best_res["bbox"]["AP"] < res["bbox"]["AP"]
131 |                     ):
132 |                         best_res = res
133 |                         best_file = current_ckpt
134 |                     print("best so far is from {}".format(best_file))
135 |                     print(best_res)
136 |                     if iters + 1 >= cfg.SOLVER.MAX_ITER:
137 |                         return best_res
138 |             time.sleep(10)
139 |     """
140 |     If you'd like to do anything fancier than the standard training logic,
141 |     consider writing your own training loop or subclassing the trainer.
142 |     """
143 |     trainer = Trainer(cfg)
144 |     trainer.resume_or_load(resume=args.resume)
145 |     if cfg.TEST.AUG.ENABLED:
146 |         trainer.register_hooks(
147 |             [
148 |                 hooks.EvalHook(
149 |                     0, lambda: trainer.test_with_TTA(cfg, trainer.model)
150 |                 )
151 |             ]
152 |         )
153 |     return trainer.train()
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     args = default_argument_parser().parse_args()
158 |     print("Command Line Args:", args)
159 |     launch(
160 |         main,
161 |         args.num_gpus,
162 |         num_machines=args.num_machines,
163 |         machine_rank=args.machine_rank,
164 |         dist_url=args.dist_url,
165 |         args=(args,),
166 |     )
167 | 


--------------------------------------------------------------------------------
/tools/vis_coco.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | import argparse
 6 | import pandas as pd
 7 | import seaborn as sns
 8 | import matplotlib
 9 | matplotlib.use('AGG')
10 | import matplotlib.pyplot as plt
11 | from metric_utils import *
12 | 
13 | recall_level_default = 0.95
14 | 
15 | 
16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector',
17 |                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin')
19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin')
20 | parser.add_argument('--thres', default=1., type=float)
21 | parser.add_argument('--name', default=1., type=str)
22 | parser.add_argument('--seed', default=0, type=int)
23 | parser.add_argument('--model', default='faster-rcnn', type=str)
24 | args = parser.parse_args()
25 | 
26 | 
27 | 
28 | concat = lambda x: np.concatenate(x, axis=0)
29 | to_np = lambda x: x.data.cpu().numpy()
30 | 
31 | 
32 | 
33 | # ID data
34 | ood_data = np.load('/nobackup/my_xfdu/video/vis/checkpoints/VIS/' + str(args.model) + '/ood.npy',allow_pickle=True)
35 | id_data = np.load('/nobackup/my_xfdu/video/vis/checkpoints/VIS/' + str(args.model) + '/id.npy',allow_pickle=True)
36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed)  +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed)  +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
38 | # id_score = []
39 | # energy score calculation.
40 | # import ipdb; ipdb.set_trace()
41 | index = 0
42 | for data in id_data:
43 |     if index == 0:
44 |         id_data_all = data
45 |         index += 1
46 |     else:
47 |         id_data_all = np.concatenate([id_data_all, data], 0)
48 | 
49 | id_data = torch.from_numpy(id_data_all)
50 | 
51 | index = 0
52 | for data in ood_data:
53 |     if index == 0:
54 |         ood_data_all = data
55 |         index += 1
56 |     else:
57 |         ood_data_all = np.concatenate([ood_data_all, data], 0)
58 | 
59 | ood_data = torch.from_numpy(ood_data_all)
60 | 
61 | # id_data = id_data / 1000 - torch.max(id_data, 1, keepdim=True)[0]
62 | # ood_data = ood_data / 1000 - torch.max(ood_data, 1, keepdim=True)[0]
63 | T = 1
64 | 
65 | # breakpoint()
66 | # id_data = id_data[F.softmax(id_data[:,:-1], 1).max(1)[0] > 0.2]
67 | # ood_data = ood_data[F.softmax(ood_data[:,:-1], 1).max(1)[0] > 0.2]
68 | 
69 | assert len(id_data[0]) == 41
70 | if args.energy:
71 |     id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
72 |     ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
73 | else:
74 |     id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
75 |     ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
76 | 
77 | ###########
78 | ########
79 | print(len(id_score))
80 | print(len(ood_score))
81 | 
82 | # measures = get_measures(-id_score[:15866], -ood_score[:5258], plot=False)
83 | measures = get_measures(-id_score, -ood_score, plot=False)
84 | if args.energy:
85 |     print_measures(measures[0], measures[1], measures[2], 'energy')
86 | else:
87 |     print_measures(measures[0], measures[1], measures[2], 'msp')
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/tools/visualize_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import argparse
 3 | import numpy as np
 4 | import os
 5 | from itertools import chain
 6 | import cv2
 7 | from PIL import Image
 8 | 
 9 | from detectron2.config import get_cfg
10 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
11 | from detectron2.data import detection_utils as utils
12 | from detectron2.data.build import filter_images_with_few_keypoints
13 | from detectron2.utils.logger import setup_logger
14 | from detectron2.utils.visualizer import Visualizer
15 | 
16 | 
17 | def setup(args):
18 |     cfg = get_cfg()
19 |     if args.config_file:
20 |         cfg.merge_from_file(args.config_file)
21 |     cfg.merge_from_list(args.opts)
22 |     cfg.freeze()
23 |     return cfg
24 | 
25 | 
26 | def parse_args(in_args=None):
27 |     parser = argparse.ArgumentParser(description="Visualize ground-truth data")
28 |     parser.add_argument(
29 |         "--source",
30 |         choices=["annotation", "dataloader"],
31 |         required=True,
32 |         help="visualize the annotations or the data loader (with pre-processing)",
33 |     )
34 |     parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
35 |     parser.add_argument("--output-dir", default="./", help="path to output directory")
36 |     parser.add_argument("--show", action="store_true", help="show output in a window")
37 |     parser.add_argument(
38 |         "opts",
39 |         help="Modify config options using the command-line",
40 |         default=None,
41 |         nargs=argparse.REMAINDER,
42 |     )
43 |     return parser.parse_args(in_args)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     args = parse_args()
48 |     logger = setup_logger()
49 |     logger.info("Arguments: " + str(args))
50 |     cfg = setup(args)
51 | 
52 |     dirname = args.output_dir
53 |     os.makedirs(dirname, exist_ok=True)
54 |     metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
55 | 
56 |     def output(vis, fname):
57 |         if args.show:
58 |             print(fname)
59 |             cv2.imshow("window", vis.get_image()[:, :, ::-1])
60 |             cv2.waitKey()
61 |         else:
62 |             filepath = os.path.join(dirname, fname)
63 |             print("Saving to {} ...".format(filepath))
64 |             vis.save(filepath)
65 | 
66 |     scale = 2.0 if args.show else 1.0
67 |     if args.source == "dataloader":
68 |         train_data_loader = build_detection_train_loader(cfg)
69 |         for batch in train_data_loader:
70 |             for per_image in batch:
71 |                 # Pytorch tensor is in (C, H, W) format
72 |                 img = per_image["image"].permute(1, 2, 0)
73 |                 if cfg.INPUT.FORMAT == "BGR":
74 |                     img = img[:, :, [2, 1, 0]]
75 |                 else:
76 |                     img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))
77 | 
78 |                 visualizer = Visualizer(img, metadata=metadata, scale=scale)
79 |                 target_fields = per_image["instances"].get_fields()
80 |                 labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
81 |                 vis = visualizer.overlay_instances(
82 |                     labels=labels,
83 |                     boxes=target_fields.get("gt_boxes", None),
84 |                     masks=target_fields.get("gt_masks", None),
85 |                     keypoints=target_fields.get("gt_keypoints", None),
86 |                 )
87 |                 output(vis, str(per_image["image_id"]) + ".jpg")
88 |     else:
89 |         dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
90 |         if cfg.MODEL.KEYPOINT_ON:
91 |             dicts = filter_images_with_few_keypoints(dicts, 1)
92 |         for dic in dicts:
93 |             img = utils.read_image(dic["file_name"], "RGB")
94 |             visualizer = Visualizer(img, metadata=metadata, scale=scale)
95 |             vis = visualizer.draw_dataset_dict(dic)
96 |             output(vis, os.path.basename(dic["file_name"]))
97 | 


--------------------------------------------------------------------------------
/tools/visualize_json_results.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import argparse
 5 | import json
 6 | import numpy as np
 7 | import os
 8 | from collections import defaultdict
 9 | import cv2
10 | import tqdm
11 | from fvcore.common.file_io import PathManager
12 | 
13 | from detectron2.data import DatasetCatalog, MetadataCatalog
14 | from detectron2.structures import Boxes, BoxMode, Instances
15 | from detectron2.utils.logger import setup_logger
16 | from detectron2.utils.visualizer import Visualizer
17 | 
18 | 
19 | def create_instances(predictions, image_size):
20 |     ret = Instances(image_size)
21 | 
22 |     score = np.asarray([x["score"] for x in predictions])
23 |     chosen = (score > args.conf_threshold).nonzero()[0]
24 |     score = score[chosen]
25 |     bbox = np.asarray([predictions[i]["bbox"] for i in chosen])
26 |     bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
27 | 
28 |     labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen])
29 | 
30 |     ret.scores = score
31 |     ret.pred_boxes = Boxes(bbox)
32 |     ret.pred_classes = labels
33 | 
34 |     try:
35 |         ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
36 |     except KeyError:
37 |         pass
38 |     return ret
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     parser = argparse.ArgumentParser(
43 |         description="A script that visualizes the json predictions from COCO or LVIS dataset."
44 |     )
45 |     parser.add_argument("--input", '-i', required=True, help="JSON file produced by the model")
46 |     parser.add_argument("--output", '-o', required=True, help="output directory")
47 |     parser.add_argument("--dataset", '-d',  help="name of the dataset", default="coco_2017_val")
48 |     parser.add_argument("--conf-threshold", '-c', default=0.5, type=float, help="confidence threshold")
49 |     args = parser.parse_args()
50 | 
51 |     logger = setup_logger()
52 | 
53 |     with PathManager.open(args.input, "r") as f:
54 |         predictions = json.load(f)
55 | 
56 |     os.makedirs(args.output, exist_ok=True)
57 | 
58 |     pred_by_image = defaultdict(list)
59 |     for p in predictions:
60 |         pred_by_image[p["image_id"]].append(p)
61 | 
62 |     dicts = list(DatasetCatalog.get(args.dataset))
63 |     metadata = MetadataCatalog.get(args.dataset)
64 |     if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
65 | 
66 |         def dataset_id_map(ds_id):
67 |             return metadata.thing_dataset_id_to_contiguous_id[ds_id]
68 | 
69 |     elif "lvis" in args.dataset:
70 |         # LVIS results are in the same format as COCO results, but have a different
71 |         # mapping from dataset category id to contiguous category id in [0, #categories - 1]
72 |         def dataset_id_map(ds_id):
73 |             return ds_id - 1
74 | 
75 |     else:
76 |         raise ValueError("Unsupported dataset: {}".format(args.dataset))
77 | 
78 |     os.makedirs(args.output, exist_ok=True)
79 | 
80 |     for dic in tqdm.tqdm(dicts):
81 |         img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
82 |         basename = os.path.basename(dic["file_name"])
83 | 
84 |         predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
85 |         vis = Visualizer(img, metadata)
86 |         vis_pred = vis.draw_instance_predictions(predictions).get_image()
87 | 
88 |         vis = Visualizer(img, metadata)
89 |         vis_gt = vis.draw_dataset_dict(dic).get_image()
90 | 
91 |         concat = np.concatenate((vis_pred, vis_gt), axis=1)
92 |         cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
93 | 


--------------------------------------------------------------------------------
/tools/waymo_bdd.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | import argparse
  6 | import pandas as pd
  7 | import seaborn as sns
  8 | import matplotlib
  9 | matplotlib.use('AGG')
 10 | import matplotlib.pyplot as plt
 11 | from metric_utils import *
 12 | 
 13 | recall_level_default = 0.95
 14 | 
 15 | 
 16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector',
 17 |                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin')
 19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin')
 20 | parser.add_argument('--thres', default=1., type=float)
 21 | parser.add_argument('--name', default=1., type=str)
 22 | parser.add_argument('--seed', default=0, type=int)
 23 | parser.add_argument('--model', default='faster-rcnn', type=str)
 24 | args = parser.parse_args()
 25 | 
 26 | 
 27 | 
 28 | concat = lambda x: np.concatenate(x, axis=0)
 29 | to_np = lambda x: x.data.cpu().numpy()
 30 | 
 31 | 
 32 | 
 33 | # ID data
 34 | ood_data = np.load('/nobackup-fast/dataset/my_xfdu/video/waymo/checkpoints/waymo_reported/' + str(args.model) + '/ood.npy',allow_pickle=True)
 35 | id_data = np.load('/nobackup-fast/dataset/my_xfdu/video/waymo/checkpoints/waymo_reported/' + str(args.model) + '/id.npy',allow_pickle=True)
 36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed)  +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
 37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed)  +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb'))
 38 | # id_score = []
 39 | # energy score calculation.
 40 | # import ipdb; ipdb.set_trace()
 41 | index = 0
 42 | for data in id_data:
 43 |     if index == 0:
 44 |         id_data_all = data
 45 |         index += 1
 46 |     else:
 47 |         id_data_all = np.concatenate([id_data_all, data], 0)
 48 | 
 49 | id_data = torch.from_numpy(id_data_all)
 50 | 
 51 | index = 0
 52 | for data in ood_data:
 53 |     if index == 0:
 54 |         ood_data_all = data
 55 |         index += 1
 56 |     else:
 57 |         ood_data_all = np.concatenate([ood_data_all, data], 0)
 58 | 
 59 | ood_data = torch.from_numpy(ood_data_all)
 60 | 
 61 | 
 62 | 
 63 | T = 1
 64 | # breakpoint()
 65 | 
 66 | 
 67 | 
 68 | assert len(id_data[0]) == 4
 69 | if args.energy:
 70 |     id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
 71 |     ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy()
 72 | else:
 73 |     id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
 74 |     ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1)
 75 | 
 76 | ###########
 77 | ########
 78 | print(len(id_score))
 79 | print(len(ood_score))
 80 | 
 81 | measures = get_measures(-id_score, -ood_score, plot=False)
 82 | 
 83 | if args.energy:
 84 |     print_measures(measures[0], measures[1], measures[2], 'energy')
 85 | else:
 86 |     print_measures(measures[0], measures[1], measures[2], 'msp')
 87 | 
 88 | # # import ipdb; ipdb.set_trace()
 89 | # plt.figure(figsize=(5.5,3))
 90 | # # plot of 2 variables
 91 | # id_pd = pd.Series(-id_score)
 92 | # # id_pd.rename('ID')
 93 | #
 94 | # ood_pd = pd.Series(-ood_score)
 95 | # # ood_pd.rename('OOD')
 96 | # # data_plot = {'Energy': np.concatenate((-id_score[0:2000], -ood_score), 0), 'label':['ID'] * len(-id_score[0:2000]) + \
 97 | # #                                                                            ['OOD'] * len(-ood_score)}
 98 | # # df_after = pd.DataFrame(data=data_plot)
 99 | # # sns.histplot(data=df_after, x="Energy", hue="label")
100 | # p1 = sns.kdeplot(id_pd, shade=True, color="r", label='ID')
101 | # p1 = sns.kdeplot(ood_pd, shade=True, color="b", label='OOD')
102 | # plt.legend(fontsize=12)
103 | # plt.xticks(fontsize=12)
104 | # plt.yticks(fontsize=12)
105 | # # plt.ylabel('Density', fontsize=12)
106 | # if args.energy:
107 | #     plt.savefig('voc_coco_gan.jpg', dpi=250)
108 | # else:
109 | #     plt.savefig('voc_coco_msp_probdet.jpg', dpi=250)
110 | # # sns.plt.show()
111 | 


--------------------------------------------------------------------------------