├── LICENSE ├── README.md ├── __init__.py ├── configs ├── .DS_Store ├── BDD100k │ ├── R50_FPN_all.yaml │ ├── R50_FPN_all_ood.yaml │ ├── R50_FPN_all_ood_nu.yaml │ ├── stud_regnet.yaml │ ├── stud_regnet_ood_coco.yaml │ ├── stud_regnet_ood_nu.yaml │ ├── stud_resnet.yaml │ ├── stud_resnet_ood_coco.yaml │ └── stud_resnet_ood_nu.yaml ├── Base-RCNN-FPN.yaml └── VIS │ ├── R50_FPN_all.yaml │ ├── R50_FPN_all_ood_coco.yaml │ ├── R50_FPN_all_ood_nu.yaml │ ├── stud_regnet.yaml │ ├── stud_regnet_ood_coco.yaml │ ├── stud_regnet_ood_nu.yaml │ ├── stud_resnet.yaml │ ├── stud_resnet_ood_coco.yaml │ └── stud_resnet_ood_nu.yaml ├── datasets ├── __init__.py ├── bdd100k2coco.py ├── convert_bdd_3cls.py ├── convert_bdd_ood.py ├── convert_city.py ├── convert_coco_vis.py ├── convert_nu.py ├── convert_nu_ood.py ├── convert_openimages_vis.py ├── convert_vg_ood.py ├── convert_vis_val.py ├── convert_vis_val1.py ├── convert_vis_val_new.py ├── domain_splits_bdd100k.py ├── metadata.py ├── vg_classes.py └── waymo2coco.py ├── figs ├── .DS_Store └── cycle_confusion_arch.png ├── permutations ├── permutations_hamming_all_24.npy ├── permutations_hamming_max_1000.npy ├── permutations_hamming_max_2.npy ├── permutations_hamming_max_24.npy └── permutations_hamming_max_35.npy ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── src ├── __init__.py ├── config │ ├── __init__.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── build.py │ ├── builtin.py │ ├── coco.py │ ├── pair_all_sampler.py │ ├── pair_fix_sampler.py │ ├── pair_sampler.py │ ├── pair_sampler_multi_interval.py │ └── pair_sampler_multi_random.py ├── engine │ ├── __init__.py │ ├── defaults.py │ ├── evaluate.py │ ├── evaluator.py │ ├── myvisualizer.py │ └── train_loop.py └── modeling │ ├── __init__.py │ ├── meta_arch │ ├── GAN.py │ ├── Imagelist.py │ ├── __init__.py │ ├── backbone.py │ ├── build.py │ ├── dla.py │ ├── eff.py │ ├── fpn.py │ ├── layers │ │ ├── __init__.py │ │ ├── activations.py │ │ └── wrappers.py │ ├── rcnn.py │ ├── rcnn_csi.py │ ├── rcnn_gan.py │ ├── rcnn_ss.py │ ├── rcnn_ss_add.py │ ├── rcnn_ss_cache.py │ ├── rcnn_ss_cheap.py │ ├── rcnn_ss_gene.py │ ├── rcnn_ss_mixup.py │ ├── rcnn_ss_remove.py │ ├── rcnn_ss_single.py │ ├── regnet.py │ ├── regnet_model.py │ ├── resnest.py │ ├── resnest1.py │ ├── splat.py │ └── vovnet.py │ ├── roi_heads │ ├── __init__.py │ ├── fast_rcnn.py │ ├── roi_heads.py │ ├── roi_heads_add.py │ ├── roi_heads_csi.py │ ├── roi_heads_gan.py │ └── roi_heads_godinc.py │ ├── self_supervised │ ├── __init__.py │ ├── build.py │ ├── cycle.py │ ├── cycle_energy.py │ ├── cycle_energy_1024_latter.py │ ├── cycle_energy_direct.py │ ├── cycle_energy_direct_add.py │ ├── cycle_energy_direct_add_all.py │ ├── cycle_energy_direct_add_all_cache.py │ ├── cycle_energy_direct_add_all_cache_new.py │ ├── cycle_energy_direct_add_all_max.py │ ├── cycle_energy_direct_add_all_mild_energy.py │ ├── cycle_energy_direct_add_all_noise.py │ ├── cycle_energy_direct_add_all_random.py │ ├── cycle_energy_direct_add_att.py │ ├── cycle_energy_direct_add_att_neg.py │ ├── cycle_energy_direct_add_cache.py │ ├── cycle_energy_direct_add_cache_new.py │ ├── cycle_energy_direct_max.py │ ├── cycle_energy_direct_no.py │ ├── cycle_energy_direct_random.py │ ├── jigsaw.py │ ├── leftright.py │ ├── rotation.py │ └── ss_layers.py │ ├── utils │ ├── __init__.py │ └── image_list.py │ └── vit │ ├── __init__.py │ ├── config.py │ ├── dataset_mapper.py │ ├── linformer.py │ ├── longformer2d.py │ ├── msvit.py │ ├── performer.py │ ├── slidingchunk_2d.py │ └── srformer.py └── tools ├── __init__.py ├── agg_results.py ├── analyze_bdd_fea.py ├── analyze_energy.py ├── bdd_coco.py ├── bdd_heatmap.py ├── ckpt_surgery.py ├── convert_weight.py ├── count.py ├── metric_utils.py ├── plot_frame_interval.py ├── plot_frame_range.py ├── plot_loss.py ├── plot_vos.py ├── select_permutation.py ├── simple_permutation.py ├── train_net.py ├── vis_coco.py ├── visualize_data.py ├── visualize_json_results.py └── waymo_bdd.py /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/__init__.py -------------------------------------------------------------------------------- /configs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/configs/.DS_Store -------------------------------------------------------------------------------- /configs/BDD100k/R50_FPN_all.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 10 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("bdd_tracking_2k_train",) 13 | TEST: ("bdd_tracking_2k_val",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/BDD100k/R50_FPN_all_ood.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 10 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("bdd_tracking_2k_train",) 13 | TEST: ("coco_2017_val_ood_wrt_bdd",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/BDD100k/R50_FPN_all_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 10 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("bdd_tracking_2k_train",) 13 | TEST: ("nu_bdd_ood",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/BDD100k/stud_regnet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.120, 58.395] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | MASK_ON: False 10 | RESNETS: 11 | DEPTH: 50 12 | ROI_HEADS: 13 | NAME: "StandardROIHeadsSS" 14 | NUM_CLASSES: 10 15 | FPN: 16 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 17 | SS: 18 | FEAT_LEVEL: "res4" 19 | NAME: ["build_cycle_energy_direct_add_all_head"] 20 | LOSS_SCALE: 0.0 21 | RATIO: 2.0 22 | COEF: 1.0 23 | FILTERING1: 0.4 24 | FILTERING2: 0.6 25 | ENERGY_WEIGHT: 0.05 26 | DATASETS: 27 | TRAIN: ("bdd_tracking_2k_train",) 28 | TEST: ("bdd_tracking_2k_val", ) 29 | SOLVER: 30 | IMS_PER_BATCH: 4 31 | WEIGHT_DECAY: 5e-5 32 | # IMS_PER_BATCH: 16 33 | BASE_LR: 0.02 34 | # STEPS: (31200, 41600,) 35 | # MAX_ITER: 45500 36 | STEPS: (36000, 48000,) 37 | MAX_ITER: 52500 38 | WARMUP_ITERS: 5000 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | RANDOM_FLIP: "none" 42 | CROP: 43 | ENABLED: True 44 | SEED: 34112225 45 | DATALOADER: 46 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 47 | PAIR_OFFSET_RANGE: 9 48 | SELCTED_NUMBER: 3 49 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet_longer" -------------------------------------------------------------------------------- /configs/BDD100k/stud_regnet_ood_coco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.120, 58.395] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | MASK_ON: False 10 | RESNETS: 11 | DEPTH: 50 12 | ROI_HEADS: 13 | NAME: "StandardROIHeadsSS" 14 | NUM_CLASSES: 10 15 | FPN: 16 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 17 | SS: 18 | FEAT_LEVEL: "res4" 19 | NAME: ["build_cycle_energy_direct_add_all_head"] 20 | LOSS_SCALE: 0.0 21 | RATIO: 2.0 22 | COEF: 1.0 23 | FILTERING1: 0.4 24 | FILTERING2: 0.6 25 | ENERGY_WEIGHT: 0.05 26 | DATASETS: 27 | TRAIN: ("bdd_tracking_2k_train",) 28 | TEST: ("nu_bdd_ood", ) #coco_2017_val_ood_wrt_bdd 29 | SOLVER: 30 | IMS_PER_BATCH: 4 31 | WEIGHT_DECAY: 5e-5 32 | # IMS_PER_BATCH: 16 33 | BASE_LR: 0.02 34 | # STEPS: (31200, 41600,) 35 | # MAX_ITER: 45500 36 | STEPS: (36000, 48000,) 37 | MAX_ITER: 52500 38 | WARMUP_ITERS: 5000 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | RANDOM_FLIP: "none" 42 | CROP: 43 | ENABLED: True 44 | SEED: 34112225 45 | DATALOADER: 46 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 47 | PAIR_OFFSET_RANGE: 9 48 | SELCTED_NUMBER: 3 49 | OUTPUT_DIR: "/nobackup-slow/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet_random_scaling1" -------------------------------------------------------------------------------- /configs/BDD100k/stud_regnet_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.120, 58.395] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | MASK_ON: False 10 | RESNETS: 11 | DEPTH: 50 12 | ROI_HEADS: 13 | NAME: "StandardROIHeadsSS" 14 | NUM_CLASSES: 10 15 | FPN: 16 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 17 | SS: 18 | FEAT_LEVEL: "res4" 19 | NAME: ["build_cycle_energy_direct_add_all_head"] 20 | LOSS_SCALE: 0.0 21 | RATIO: 2.0 22 | COEF: 1.0 23 | FILTERING1: 0.4 24 | FILTERING2: 0.6 25 | ENERGY_WEIGHT: 0.05 26 | DATASETS: 27 | TRAIN: ("bdd_tracking_2k_train",) 28 | TEST: ("nu_bdd_ood", ) 29 | SOLVER: 30 | IMS_PER_BATCH: 4 31 | WEIGHT_DECAY: 5e-5 32 | # IMS_PER_BATCH: 16 33 | BASE_LR: 0.02 34 | # STEPS: (31200, 41600,) 35 | # MAX_ITER: 45500 36 | STEPS: (36000, 48000,) 37 | MAX_ITER: 52500 38 | WARMUP_ITERS: 5000 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | RANDOM_FLIP: "none" 42 | CROP: 43 | ENABLED: True 44 | SEED: 34112225 45 | DATALOADER: 46 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 47 | PAIR_OFFSET_RANGE: 9 48 | SELCTED_NUMBER: 3 49 | OUTPUT_DIR: "/nobackup/dataset/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_regnet" -------------------------------------------------------------------------------- /configs/BDD100k/stud_resnet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 10 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.05 20 | DATASETS: 21 | TRAIN: ("bdd_tracking_2k_train",) 22 | TEST: ("bdd_tracking_2k_val", ) 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 34112225 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | PAIR_OFFSET_RANGE: 9 41 | SELCTED_NUMBER: 3 42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor" -------------------------------------------------------------------------------- /configs/BDD100k/stud_resnet_ood_coco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 10 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.05 20 | DATASETS: 21 | TRAIN: ("bdd_tracking_2k_train",) 22 | TEST: ("coco_2017_val_ood_wrt_bdd", ) 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 34112225 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | PAIR_OFFSET_RANGE: 9 41 | SELCTED_NUMBER: 3 42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor" -------------------------------------------------------------------------------- /configs/BDD100k/stud_resnet_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 10 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.05 20 | DATASETS: 21 | TRAIN: ("bdd_tracking_2k_train",) 22 | TEST: ("nu_bdd_ood", ) 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 34112225 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | PAIR_OFFSET_RANGE: 9 41 | SELCTED_NUMBER: 3 42 | OUTPUT_DIR: "/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/energy_no_original_loss_direct_add_frame_9_revise_4to6_multi_random_seed_refactor" -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | -------------------------------------------------------------------------------- /configs/VIS/R50_FPN_all.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 40 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("vis21_train",) 13 | TEST: ("vis21_val",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/VIS/R50_FPN_all_ood_coco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 40 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("vis21_train",) 13 | TEST: ("vis_coco_ood",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/VIS/R50_FPN_all_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN1" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NUM_CLASSES: 40 10 | DATASETS: 11 | # TRAIN: ("waymo_all_train", ) 12 | TRAIN: ("vis21_train",) 13 | TEST: ("nu_bdd_ood",) 14 | SOLVER: 15 | # the size of the BDD tracking daytime is about 2x of the BDD100k dataset 16 | # for initial experiments, this schedule will be approximately, 0.5X schedule 17 | # ~5 epochs 18 | IMS_PER_BATCH: 16 19 | BASE_LR: 0.02 20 | STEPS: (36000, 48000,) 21 | MAX_ITER: 52500 22 | INPUT: 23 | MIN_SIZE_TRAIN: (800,) 24 | CROP: 25 | ENABLED: True 26 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/R50_FPN_all_vanilla" -------------------------------------------------------------------------------- /configs/VIS/stud_regnet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [ 57.375, 57.120, 58.395 ] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | MASK_ON: False 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_HEADS: 12 | NAME: "StandardROIHeadsSS" 13 | NUM_CLASSES: 40 14 | FPN: 15 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 16 | SS: 17 | FEAT_LEVEL: "res4" 18 | NAME: ["build_cycle_energy_direct_add_all_head"] 19 | LOSS_SCALE: 0.0 20 | RATIO: 2.0 21 | COEF: 1.0 22 | FILTERING1: 0.4 23 | FILTERING2: 0.6 24 | ENERGY_WEIGHT: 0.02 25 | DATASETS: 26 | TRAIN: ("vis21_train",) 27 | TEST: ("vis21_val", ) 28 | SOLVER: 29 | IMS_PER_BATCH: 4 30 | WEIGHT_DECAY: 5e-5 31 | # IMS_PER_BATCH: 16 32 | BASE_LR: 0.02 33 | # STEPS: (31200, 41600,) 34 | # MAX_ITER: 45500 35 | STEPS: (36000, 48000,) 36 | MAX_ITER: 52500 37 | WARMUP_ITERS: 5000 38 | INPUT: 39 | MIN_SIZE_TRAIN: (800,) 40 | RANDOM_FLIP: "none" 41 | CROP: 42 | ENABLED: True 43 | SEED: 1998 44 | DATALOADER: 45 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 46 | SELCTED_NUMBER: 3 47 | PAIR_OFFSET_RANGE: 9 48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer" -------------------------------------------------------------------------------- /configs/VIS/stud_regnet_ood_coco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [ 57.375, 57.120, 58.395 ] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | MASK_ON: False 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_HEADS: 12 | NAME: "StandardROIHeadsSS" 13 | NUM_CLASSES: 40 14 | FPN: 15 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 16 | SS: 17 | FEAT_LEVEL: "res4" 18 | NAME: ["build_cycle_energy_direct_add_all_head"] 19 | LOSS_SCALE: 0.0 20 | RATIO: 2.0 21 | COEF: 1.0 22 | FILTERING1: 0.4 23 | FILTERING2: 0.6 24 | ENERGY_WEIGHT: 0.02 25 | DATASETS: 26 | TRAIN: ("vis21_train",) 27 | TEST: ("vis_coco_ood", ) 28 | SOLVER: 29 | IMS_PER_BATCH: 4 30 | WEIGHT_DECAY: 5e-5 31 | # IMS_PER_BATCH: 16 32 | BASE_LR: 0.02 33 | # STEPS: (31200, 41600,) 34 | # MAX_ITER: 45500 35 | STEPS: (36000, 48000,) 36 | MAX_ITER: 52500 37 | WARMUP_ITERS: 5000 38 | INPUT: 39 | MIN_SIZE_TRAIN: (800,) 40 | RANDOM_FLIP: "none" 41 | CROP: 42 | ENABLED: True 43 | SEED: 1998 44 | DATALOADER: 45 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 46 | SELCTED_NUMBER: 3 47 | PAIR_OFFSET_RANGE: 9 48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer" -------------------------------------------------------------------------------- /configs/VIS/stud_regnet_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_STD: [ 57.375, 57.120, 58.395 ] 4 | BACKBONE: 5 | NAME: "build_regnetx_fpn_backbone" 6 | WEIGHTS: "RegNetX-4.0GF_dds_8gpu.pyth" #"regnetx_detectron2.pth" 7 | META_ARCHITECTURE: "SSRCNN" 8 | MASK_ON: False 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_HEADS: 12 | NAME: "StandardROIHeadsSS" 13 | NUM_CLASSES: 40 14 | FPN: 15 | IN_FEATURES: [ "s1", "s2", "s3", "s4" ] 16 | SS: 17 | FEAT_LEVEL: "res4" 18 | NAME: ["build_cycle_energy_direct_add_all_head"] 19 | LOSS_SCALE: 0.0 20 | RATIO: 2.0 21 | COEF: 1.0 22 | FILTERING1: 0.4 23 | FILTERING2: 0.6 24 | ENERGY_WEIGHT: 0.02 25 | DATASETS: 26 | TRAIN: ("vis21_train",) 27 | TEST: ("nu_bdd_ood", ) 28 | SOLVER: 29 | IMS_PER_BATCH: 4 30 | WEIGHT_DECAY: 5e-5 31 | # IMS_PER_BATCH: 16 32 | BASE_LR: 0.02 33 | # STEPS: (31200, 41600,) 34 | # MAX_ITER: 45500 35 | STEPS: (36000, 48000,) 36 | MAX_ITER: 52500 37 | WARMUP_ITERS: 5000 38 | INPUT: 39 | MIN_SIZE_TRAIN: (800,) 40 | RANDOM_FLIP: "none" 41 | CROP: 42 | ENABLED: True 43 | SEED: 1998 44 | DATALOADER: 45 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 46 | SELCTED_NUMBER: 3 47 | PAIR_OFFSET_RANGE: 9 48 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer" -------------------------------------------------------------------------------- /configs/VIS/stud_resnet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 40 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.02 20 | DATASETS: 21 | TRAIN: ("vis21_train",) 22 | TEST: ("vis21_val", ) 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 1998 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | SELCTED_NUMBER: 3 41 | PAIR_OFFSET_RANGE: 9 42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed1" -------------------------------------------------------------------------------- /configs/VIS/stud_resnet_ood_coco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 40 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.02 20 | DATASETS: 21 | TRAIN: ("vis21_train",) 22 | TEST: ("vis_coco_ood", ) #vis_coco_ood 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 1998 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | SELCTED_NUMBER: 3 41 | PAIR_OFFSET_RANGE: 9 42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer" -------------------------------------------------------------------------------- /configs/VIS/stud_resnet_ood_nu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SSRCNN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | ROI_HEADS: 9 | NAME: "StandardROIHeadsSS" 10 | NUM_CLASSES: 40 11 | SS: 12 | FEAT_LEVEL: "res4" 13 | NAME: ["build_cycle_energy_direct_add_all_head"] 14 | LOSS_SCALE: 0.0 15 | RATIO: 2.0 16 | COEF: 1.0 17 | FILTERING1: 0.4 18 | FILTERING2: 0.6 19 | ENERGY_WEIGHT: 0.02 20 | DATASETS: 21 | TRAIN: ("vis21_train",) 22 | TEST: ("nu_bdd_ood", ) #vis_coco_ood 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | # IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | # STEPS: (31200, 41600,) 28 | # MAX_ITER: 45500 29 | STEPS: (36000, 48000,) 30 | MAX_ITER: 52500 31 | WARMUP_ITERS: 5000 32 | INPUT: 33 | MIN_SIZE_TRAIN: (800,) 34 | RANDOM_FLIP: "none" 35 | CROP: 36 | ENABLED: True 37 | SEED: 1998 38 | DATALOADER: 39 | SAMPLER_TRAIN: "PairTrainingMultiRandomSampler" 40 | SELCTED_NUMBER: 3 41 | PAIR_OFFSET_RANGE: 9 42 | OUTPUT_DIR: "/nobackup-slow/dataset/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed_regnet_longer" -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /datasets/convert_bdd_3cls.py: -------------------------------------------------------------------------------- 1 | """BDD . 2 | - Converting BDD labels to 3 classes to match the Waymo dataset. 3 | 4 | Run `python3 -m datasets.convert_bdd_3cls` 5 | """ 6 | 7 | import json 8 | import os 9 | import os.path as osp 10 | from collections import defaultdict 11 | 12 | 13 | def load_json(filename): 14 | with open(filename, "r") as fp: 15 | reg_file = json.load(fp) 16 | return reg_file 17 | 18 | 19 | # BDD100K MOT set domain splits. 20 | _PREDEFINED_SPLITS_BDDT = { 21 | "bdd_tracking_2k": { 22 | "bdd_tracking_2k_train": ( 23 | "bdd100k/images/track/train", 24 | "bdd100k/labels/track/bdd100k_mot_train_coco.json", 25 | ), 26 | "bdd_tracking_2k_val": ( 27 | "bdd100k/images/track/val", 28 | "bdd100k/labels/track/bdd100k_mot_val_coco.json", 29 | ), 30 | }, 31 | } 32 | 33 | # Register data for different domains as well as different sequence. 34 | domain_path = "bdd100k/labels/box_track_20/domain_splits/" 35 | train_splits = load_json( 36 | osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_train.json") 37 | ) 38 | val_splits = load_json( 39 | osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_val.json") 40 | ) 41 | 42 | 43 | # per_seq_{split}_{key}_{_attr}: [dataset_names] 44 | per_seq_maps = defaultdict(list) 45 | 46 | # register the BDD100K per domain sets 47 | for split, result in [("train", train_splits), ("val", val_splits)]: 48 | for key, values in result.items(): 49 | # key is ["timeofday", "scene", "weather"] 50 | for attr, seqs in values.items(): 51 | # attr is the actual attribute under each category like 52 | # `daytime`, `night`, etc. Values are list of sequence names. 53 | if "/" in attr or " " in attr: 54 | if "/" in attr: 55 | _attr = attr.replace("/", "-") 56 | if " " in attr: 57 | _attr = attr.replace(" ", "-") 58 | else: 59 | _attr = attr 60 | 61 | # register per domain values. 62 | _PREDEFINED_SPLITS_BDDT["bdd_tracking_2k"][ 63 | "bdd_tracking_2k_{}_{}".format(split, _attr) 64 | ] = ( 65 | "bdd100k/images/track/{}".format(split), 66 | osp.join( 67 | domain_path, 68 | "labels", 69 | split, 70 | "{}_{}_{}_coco.json".format(split, key, _attr), 71 | ), 72 | ) 73 | 74 | MAPPING = {1: 2, 2: 3, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1} 75 | os.makedirs(osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', 'bdd100k/labels/track_3cls'), exist_ok=True) 76 | os.makedirs( 77 | osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', 'bdd100k/labels/box_track_20_3cls'), exist_ok=True) 78 | 79 | datasets = _PREDEFINED_SPLITS_BDDT['bdd_tracking_2k'] 80 | files = [datasets[k][1] for k in datasets.keys()] 81 | 82 | for json_file in files: 83 | print(json_file) 84 | data_path = osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', json_file) 85 | prefix = json_file.split('/')[-1] 86 | data = json.load(open(data_path)) 87 | new_cats = [ 88 | {'supercategory': 'none', 'id': 1, 'name': 'vehicle'}, 89 | {'supercategory': 'none', 'id': 2, 'name': 'pedestrian'}, 90 | {'supercategory': 'none', 'id': 3, 'name': 'cyclist'}, 91 | ] 92 | new_annos = [] 93 | for anno in data['annotations']: 94 | anno['category_id'] = MAPPING[anno['category_id']] 95 | new_annos.append(anno) 96 | new_labels = { 97 | 'categories': new_cats, 98 | 'images': data['images'], 99 | 'annotations': new_annos, 100 | 'videos': data['videos'], 101 | } 102 | if 'box_track_20' in data_path: 103 | save_path = osp.dirname(data_path).replace( 104 | 'box_track_20', 'box_track_20_3cls') 105 | elif 'track' in data_path: 106 | save_path = osp.dirname(data_path).replace( 107 | 'track', 'track_3cls') 108 | else: 109 | assert False 110 | os.makedirs(save_path, exist_ok=True) 111 | save_path = osp.join(save_path, prefix) 112 | with open(save_path, 'w') as fp: 113 | json.dump(new_labels, fp) 114 | -------------------------------------------------------------------------------- /datasets/convert_bdd_ood.py: -------------------------------------------------------------------------------- 1 | """BDD . 2 | - Converting BDD labels to 3 classes to match the Waymo dataset. 3 | 4 | Run `python3 -m datasets.convert_bdd_3cls` 5 | """ 6 | 7 | import json 8 | import os 9 | import os.path as osp 10 | from collections import defaultdict 11 | 12 | 13 | def load_json(filename): 14 | with open(filename, "r") as fp: 15 | reg_file = json.load(fp) 16 | return reg_file 17 | 18 | 19 | # BDD100K MOT set domain splits. 20 | _PREDEFINED_SPLITS_BDDT = { 21 | "bdd_tracking_2k": { 22 | "bdd_tracking_2k_train": ( 23 | "bdd100k/images/track/train", 24 | "bdd100k/labels/track/bdd100k_mot_train_coco.json", 25 | ), 26 | "bdd_tracking_2k_val": ( 27 | "bdd100k/images/track/val", 28 | "bdd100k/labels/track/bdd100k_mot_val_coco.json", 29 | ), 30 | }, 31 | } 32 | 33 | # Register data for different domains as well as different sequence. 34 | domain_path = "bdd100k/labels/box_track_20/domain_splits/" 35 | train_splits = load_json( 36 | osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_train.json") 37 | ) 38 | val_splits = load_json( 39 | osp.join("/nobackup-slow/dataset/my_xfdu/video/bdd/", domain_path, "bdd100k_mot_domain_splits_val.json") 40 | ) 41 | 42 | 43 | # per_seq_{split}_{key}_{_attr}: [dataset_names] 44 | per_seq_maps = defaultdict(list) 45 | 46 | # register the BDD100K per domain sets 47 | for split, result in [("train", train_splits), ("val", val_splits)]: 48 | for key, values in result.items(): 49 | # key is ["timeofday", "scene", "weather"] 50 | for attr, seqs in values.items(): 51 | # attr is the actual attribute under each category like 52 | # `daytime`, `night`, etc. Values are list of sequence names. 53 | if "/" in attr or " " in attr: 54 | if "/" in attr: 55 | _attr = attr.replace("/", "-") 56 | if " " in attr: 57 | _attr = attr.replace(" ", "-") 58 | else: 59 | _attr = attr 60 | 61 | # register per domain values. 62 | _PREDEFINED_SPLITS_BDDT["bdd_tracking_2k"][ 63 | "bdd_tracking_2k_{}_{}".format(split, _attr) 64 | ] = ( 65 | "bdd100k/images/track/{}".format(split), 66 | osp.join( 67 | domain_path, 68 | "labels", 69 | split, 70 | "{}_{}_{}_coco.json".format(split, key, _attr), 71 | ), 72 | ) 73 | 74 | ''' 75 | {"supercategory": "human", "id": 1, "name": "pedestrian"}, 76 | {"supercategory": "human", "id": 2, "name": "rider"}, 77 | {"supercategory": "vehicle", "id": 3, "name": "car"}, 78 | {"supercategory": "vehicle", "id": 4, "name": "truck"}, 79 | {"supercategory": "vehicle", "id": 5, "name": "bus"}, 80 | {"supercategory": "vehicle", "id": 6, "name": "train"}, 81 | {"supercategory": "bike", "id": 7, "name": "motorcycle"}, 82 | {"supercategory": "bike", "id": 8, "name": "bicycle"}, 83 | ["vehicle", "pedestrian", "cyclist"] 84 | ''' 85 | 86 | MAPPING = {1: 2, 2: 3, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1} 87 | 88 | 89 | datasets = _PREDEFINED_SPLITS_BDDT['bdd_tracking_2k'] 90 | files = [datasets[k][1] for k in datasets.keys()] 91 | 92 | for json_file in [files[0]]: 93 | print(json_file) 94 | data_path = osp.join('/nobackup-slow/dataset/my_xfdu/video/bdd/', json_file) 95 | prefix = json_file.split('/')[-1] 96 | data = json.load(open(data_path)) 97 | # new_cats = [ 98 | # {'supercategory': 'none', 'id': 1, 'name': 'vehicle'}, 99 | # {'supercategory': 'none', 'id': 2, 'name': 'pedestrian'}, 100 | # {'supercategory': 'none', 'id': 3, 'name': 'cyclist'}, 101 | # ] 102 | new_cats = [{"supercategory": "human", "id": 1, "name": "pedestrian"}, 103 | {"supercategory": "human", "id": 2, "name": "rider"}, 104 | {"supercategory": "vehicle", "id": 3, "name": "car"}, 105 | {"supercategory": "vehicle", "id": 4, "name": "truck"}, 106 | {"supercategory": "vehicle", "id": 5, "name": "bus"}, 107 | {"supercategory": "vehicle", "id": 6, "name": "train"}, 108 | {"supercategory": "bike", "id": 7, "name": "motorcycle"}, 109 | {"supercategory": "bike", "id": 8, "name": "bicycle"}] 110 | 111 | new_annos = [] 112 | remove_image_id = [] 113 | # breakpoint() 114 | for anno in data['annotations']: 115 | if anno['category_id'] not in [4, 5, 6, 7, 8]: 116 | remove_image_id.append(anno['image_id']) 117 | continue 118 | else: 119 | # anno['category_id'] = MAPPING[anno['category_id']] 120 | new_annos.append(anno) 121 | # import numpy as np 122 | all_image_id = range(1, len(data['images'])+1) 123 | kept_image_id = set(all_image_id).difference(set(remove_image_id)) 124 | # kept_image_id = [item for item in all_image_id if item not in remove_image_id] 125 | kept_video_id = [] 126 | for index in range(len(data['images'])): 127 | if index + 1 in kept_image_id: 128 | kept_video_id.append(data['images'][index]['video_id']) 129 | kept_video_id = list(set(kept_video_id)) 130 | 131 | 132 | kept_images = [] 133 | for index in range(len(data['images'])): 134 | if index + 1 in kept_image_id: 135 | kept_images.append(data['images'][index]) 136 | kept_videos = [] 137 | for index in range(len(data['videos'])): 138 | if index + 1 in kept_video_id: 139 | kept_videos.append(data['videos'][index]) 140 | # breakpoint() 141 | # breakpoint() 142 | 143 | new_labels = { 144 | 'categories': new_cats, 145 | 'images': kept_images,#data['images'], 146 | 'annotations': new_annos, 147 | 'videos': kept_videos, 148 | } 149 | 150 | save_path = '/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/labels/track/' 151 | prefix = 'bdd_ood.json' 152 | 153 | save_path = osp.join(save_path, prefix) 154 | with open(save_path, 'w') as fp: 155 | json.dump(new_labels, fp) 156 | -------------------------------------------------------------------------------- /datasets/convert_city.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import glob 4 | import os.path as osp 5 | 6 | import cityscapesscripts.helpers.labels as CSLabels 7 | import mmcv 8 | import numpy as np 9 | import pycocotools.mask as maskUtils 10 | 11 | 12 | def collect_files(img_dir, gt_dir): 13 | suffix = 'leftImg8bit.png' 14 | files = [] 15 | for img_file in glob.glob(osp.join(img_dir, '**/*.png')): 16 | assert img_file.endswith(suffix), img_file 17 | inst_file = gt_dir + img_file[ 18 | len(img_dir):-len(suffix)] + 'gtFine_instanceIds.png' 19 | # Note that labelIds are not converted to trainId for seg map 20 | segm_file = gt_dir + img_file[ 21 | len(img_dir):-len(suffix)] + 'gtFine_labelIds.png' 22 | files.append((img_file, inst_file, segm_file)) 23 | assert len(files), f'No images found in {img_dir}' 24 | print(f'Loaded {len(files)} images from {img_dir}') 25 | 26 | return files 27 | 28 | 29 | def collect_annotations(files, nproc=1): 30 | print('Loading annotation images') 31 | if nproc > 1: 32 | images = mmcv.track_parallel_progress( 33 | load_img_info, files, nproc=nproc) 34 | else: 35 | images = mmcv.track_progress(load_img_info, files) 36 | 37 | return images 38 | 39 | 40 | def load_img_info(files): 41 | img_file, inst_file, segm_file = files 42 | inst_img = mmcv.imread(inst_file, 'unchanged') 43 | # ids < 24 are stuff labels (filtering them first is about 5% faster) 44 | unique_inst_ids = np.unique(inst_img[inst_img >= 24]) 45 | anno_info = [] 46 | for inst_id in unique_inst_ids: 47 | # For non-crowd annotations, inst_id // 1000 is the label_id 48 | # Crowd annotations have <1000 instance ids 49 | label_id = inst_id // 1000 if inst_id >= 1000 else inst_id 50 | label = CSLabels.id2label[label_id] 51 | if not label.hasInstances or label.ignoreInEval: 52 | continue 53 | 54 | category_id = label.id 55 | iscrowd = int(inst_id < 1000) 56 | mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order='F') 57 | mask_rle = maskUtils.encode(mask[:, :, None])[0] 58 | 59 | area = maskUtils.area(mask_rle) 60 | # convert to COCO style XYWH format 61 | bbox = maskUtils.toBbox(mask_rle) 62 | 63 | # for json encoding 64 | mask_rle['counts'] = mask_rle['counts'].decode() 65 | 66 | anno = dict( 67 | iscrowd=iscrowd, 68 | category_id=category_id, 69 | bbox=bbox.tolist(), 70 | area=area.tolist(), 71 | segmentation=mask_rle) 72 | anno_info.append(anno) 73 | video_name = osp.basename(osp.dirname(img_file)) 74 | img_info = dict( 75 | # remove img_prefix for filename 76 | file_name=osp.join(video_name, osp.basename(img_file)), 77 | height=inst_img.shape[0], 78 | width=inst_img.shape[1], 79 | anno_info=anno_info, 80 | segm_file=osp.join(video_name, osp.basename(segm_file))) 81 | 82 | return img_info 83 | 84 | 85 | def cvt_annotations(image_infos, out_json_name): 86 | out_json = dict() 87 | img_id = 0 88 | ann_id = 0 89 | out_json['images'] = [] 90 | out_json['categories'] = [] 91 | out_json['annotations'] = [] 92 | for image_info in image_infos: 93 | image_info['id'] = img_id 94 | anno_infos = image_info.pop('anno_info') 95 | out_json['images'].append(image_info) 96 | for anno_info in anno_infos: 97 | anno_info['image_id'] = img_id 98 | anno_info['id'] = ann_id 99 | out_json['annotations'].append(anno_info) 100 | ann_id += 1 101 | img_id += 1 102 | for label in CSLabels.labels: 103 | if label.hasInstances and not label.ignoreInEval: 104 | cat = dict(id=label.id, name=label.name) 105 | out_json['categories'].append(cat) 106 | 107 | if len(out_json['annotations']) == 0: 108 | out_json.pop('annotations') 109 | 110 | mmcv.dump(out_json, out_json_name) 111 | return out_json 112 | 113 | 114 | def parse_args(): 115 | parser = argparse.ArgumentParser( 116 | description='Convert Cityscapes annotations to COCO format') 117 | parser.add_argument('--cityscapes_path', default='/nobackup-slow/dataset/my_xfdu/video/city/') 118 | parser.add_argument('--img-dir', default='leftImg8bit', type=str) 119 | parser.add_argument('--gt-dir', default='gtFine', type=str) 120 | parser.add_argument('-o', '--out-dir', help='output path') 121 | parser.add_argument( 122 | '--nproc', default=1, type=int, help='number of process') 123 | args = parser.parse_args() 124 | return args 125 | 126 | 127 | def main(): 128 | args = parse_args() 129 | cityscapes_path = args.cityscapes_path 130 | out_dir = args.out_dir if args.out_dir else cityscapes_path 131 | mmcv.mkdir_or_exist(out_dir) 132 | 133 | img_dir = osp.join(cityscapes_path, args.img_dir) 134 | gt_dir = osp.join(cityscapes_path, args.gt_dir) 135 | 136 | set_name = dict( 137 | train='instancesonly_filtered_gtFine_train.json', 138 | val='instancesonly_filtered_gtFine_val.json', 139 | test='instancesonly_filtered_gtFine_test.json') 140 | 141 | for split, json_name in set_name.items(): 142 | print(f'Converting {split} into {json_name}') 143 | with mmcv.Timer( 144 | print_tmpl='It took {}s to convert Cityscapes annotation'): 145 | files = collect_files( 146 | osp.join(img_dir, split), osp.join(gt_dir, split)) 147 | image_infos = collect_annotations(files, nproc=args.nproc) 148 | cvt_annotations(image_infos, osp.join(out_dir, json_name)) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() -------------------------------------------------------------------------------- /datasets/convert_coco_vis.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pycocotools.coco import COCO 3 | 4 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_train2017.json')) 5 | new_dict = dict() 6 | 7 | new_dict['info'] = data['info'] 8 | new_dict['licenses'] = data['licenses'] 9 | new_dict['categories'] = data['categories'] 10 | 11 | images = [] 12 | annotations = [] 13 | keep_image_ids = [] 14 | 15 | 16 | coco = COCO('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_train2017.json') 17 | # import ipdb; ipdb.set_trace() 18 | # 19 | CLASSES = ['bicycle', 20 | 'bus', 'traffic light', 21 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 22 | 'sheep', 23 | 'backpack', 'umbrella', 'handbag', 24 | 'tie', 'suitcase', 'skis', 'sports ball', 25 | 'kite', 'baseball bat', 'baseball glove' 26 | , 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 27 | 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 28 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 29 | 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'remote', 30 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 31 | 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 32 | 'hair drier', 'toothbrush'] 33 | 34 | 35 | cat_ids = coco.get_cat_ids(cat_names=CLASSES) 36 | cat2label = {cat_id: i for i, cat_id in enumerate(cat_ids)} 37 | img_ids = coco.get_img_ids() 38 | # import ipdb; ipdb.set_trace() 39 | 40 | 41 | for i in img_ids: 42 | mark = 0 43 | info = coco.load_imgs([i])[0] 44 | # info['filename'] = info['file_name'] 45 | 46 | # added part. 47 | ann_ids = coco.get_ann_ids(img_ids=[info['id']]) 48 | ann_info = coco.load_anns(ann_ids) 49 | for object1 in ann_info: 50 | if object1['category_id'] not in cat_ids: 51 | mark = 1 52 | continue 53 | if mark == 0: 54 | keep_image_ids.append(i) 55 | # import ipdb; ipdb.set_trace() 56 | # for index in keep_image_ids: 57 | # annotations.append() 58 | for annotations1 in data['annotations']: 59 | if annotations1['image_id'] in keep_image_ids: 60 | annotations.append(annotations1) 61 | # keep_image_ids.append(annotations1['image_id']) 62 | 63 | for image_info in data['images']: 64 | if image_info['id'] in keep_image_ids: 65 | images.append(image_info) 66 | 67 | new_dict['images'] = images 68 | new_dict['annotations'] = annotations 69 | 70 | with open('/nobackup-slow/dataset/my_xfdu/coco2017/annotations/instances_val2017_ood_wrt_vis.json', 'w') as file: 71 | json.dump(new_dict, file) -------------------------------------------------------------------------------- /datasets/convert_nu_ood.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pycocotools.coco import COCO 3 | from os import path as osp 4 | 5 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/video/nuscene/nuimages_v1.0-val.json')) 6 | 7 | 8 | new_annos = [] 9 | remove_image_id = [] 10 | # breakpoint() 11 | for anno in data['annotations']: 12 | if anno['category_id'] not in [8, 9, 10, 11, 12, 13, 14, 15]: 13 | remove_image_id.append(anno['image_id']) 14 | continue 15 | else: 16 | # anno['category_id'] = MAPPING[anno['category_id']] 17 | new_annos.append(anno) 18 | # import numpy as np 19 | all_image_id = range(0, len(data['images'])) 20 | # breakpoint() 21 | kept_image_id = set(all_image_id).difference(set(remove_image_id)) 22 | 23 | kept_images = [] 24 | for index in range(len(data['images'])): 25 | if index in kept_image_id: 26 | kept_images.append(data['images'][index]) 27 | 28 | 29 | new_labels = { 30 | 'categories': data['categories'], 31 | 'images': kept_images,#data['images'], 32 | 'annotations': new_annos 33 | } 34 | 35 | save_path = '/nobackup-slow/dataset/my_xfdu/video/nuscene/' 36 | prefix = 'nu_ood.json' 37 | 38 | save_path = osp.join(save_path, prefix) 39 | with open(save_path, 'w') as fp: 40 | json.dump(new_labels, fp) 41 | 42 | 43 | -------------------------------------------------------------------------------- /datasets/convert_openimages_vis.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/OpenImages/coco_classes/COCO-Format/val_coco_format.json','rb')) 4 | all_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 5 | 'bus', 'train', 'truck', 'boat', 'traffic light', 6 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 7 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 8 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 9 | 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 10 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 11 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 12 | 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 13 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 14 | 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 15 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 16 | 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 17 | 'hair drier', 'toothbrush'] 18 | all_dict = {} 19 | for i in range(len(all_classes)): 20 | all_dict[all_classes[i]] = i + 1 21 | 22 | not_vis_classes = ['bicycle', 23 | 'bus', 'traffic light', 24 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 25 | 'sheep', 26 | 'backpack', 'umbrella', 'handbag', 27 | 'tie', 'suitcase', 'skis', 'sports ball', 28 | 'kite', 'baseball bat', 'baseball glove' 29 | , 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 30 | 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 31 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 32 | 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'remote', 33 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 34 | 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 35 | 'hair drier', 'toothbrush'] 36 | not_vis_id = [] 37 | for item in not_vis_classes: 38 | not_vis_id.append(all_dict[item]) 39 | remove_image_id = [] 40 | # breakpoint() 41 | for annotation in data['annotations']: 42 | if annotation['category_id'] not in not_vis_id: 43 | remove_image_id.append(annotation['image_id']) 44 | remove_image_id = list(set(remove_image_id)) 45 | new_annotation = [] 46 | new_image_id = [] 47 | for annotation in data['annotations']: 48 | if annotation['image_id'] not in remove_image_id: 49 | new_annotation.append(annotation) 50 | for image in data['images']: 51 | if image['id'] not in remove_image_id: 52 | new_image_id.append(image) 53 | # breakpoint() 54 | new_annotation_all = data 55 | new_annotation_all['annotations'] = new_annotation 56 | new_annotation_all['images'] = new_image_id 57 | breakpoint() 58 | json.dump(new_annotation_all, open('/nobackup-slow/dataset/my_xfdu/OpenImages/coco_classes/COCO-Format/vis_open_ood.json','w')) 59 | 60 | 61 | -------------------------------------------------------------------------------- /datasets/convert_vg_ood.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pycocotools.coco import COCO 3 | from os import path as osp 4 | 5 | data = json.load(open('/nobackup-slow/dataset/my_xfdu/video/vg/anno/visual_genome_val.json')) 6 | 7 | 8 | new_annos = [] 9 | remove_image_id = [] 10 | # breakpoint() 11 | for anno in data['annotations']: 12 | if anno['category_id'] in [131, 488, 110,130,146,218,343,646,180,999,58,157,233,52,625,685,954,1181,1478,53, 13 | 184,97,150,744,117,337,341,351,83,141,992,1509,444,35,37,470,42,186,1388,639,127, 14 | 9,364,19,86,297,1223,138,258,135,350,59,68,70,566,814,898,1181,1447,155,810,838, 15 | 85,87]: 16 | remove_image_id.append(anno['image_id']) 17 | continue 18 | else: 19 | # anno['category_id'] = MAPPING[anno['category_id']] 20 | new_annos.append(anno) 21 | # import numpy as np 22 | all_image_id = range(0, len(data['images'])) 23 | # breakpoint() 24 | kept_image_id = set(all_image_id).difference(set(remove_image_id)) 25 | 26 | kept_images = [] 27 | for index in range(len(data['images'])): 28 | if index in kept_image_id: 29 | kept_images.append(data['images'][index]) 30 | 31 | 32 | new_labels = { 33 | 'categories': data['categories'], 34 | 'images': kept_images,#data['images'], 35 | 'annotations': new_annos 36 | } 37 | 38 | save_path = '/nobackup-slow/dataset/my_xfdu/video/vg/anno' 39 | prefix = 'vg_ood.json' 40 | # breakpoint() 41 | save_path = osp.join(save_path, prefix) 42 | with open(save_path, 'w') as fp: 43 | json.dump(new_labels, fp) 44 | 45 | 46 | -------------------------------------------------------------------------------- /datasets/metadata.py: -------------------------------------------------------------------------------- 1 | from collections import ChainMap 2 | 3 | # Detectron imports 4 | from detectron2.data import MetadataCatalog 5 | 6 | # Useful Dicts for OpenImages Conversion 7 | OPEN_IMAGES_TO_COCO = {'Person': 'person', 8 | 'Bicycle': 'bicycle', 9 | 'Car': 'car', 10 | 'Motorcycle': 'motorcycle', 11 | 'Airplane': 'airplane', 12 | 'Bus': 'bus', 13 | 'Train': 'train', 14 | 'Truck': 'truck', 15 | 'Boat': 'boat', 16 | 'Traffic light': 'traffic light', 17 | 'Fire hydrant': 'fire hydrant', 18 | 'Stop sign': 'stop sign', 19 | 'Parking meter': 'parking meter', 20 | 'Bench': 'bench', 21 | 'Bird': 'bird', 22 | 'Cat': 'cat', 23 | 'Dog': 'dog', 24 | 'Horse': 'horse', 25 | 'Sheep': 'sheep', 26 | 'Elephant': 'cow', 27 | 'Cattle': 'elephant', 28 | 'Bear': 'bear', 29 | 'Zebra': 'zebra', 30 | 'Giraffe': 'giraffe', 31 | 'Backpack': 'backpack', 32 | 'Umbrella': 'umbrella', 33 | 'Handbag': 'handbag', 34 | 'Tie': 'tie', 35 | 'Suitcase': 'suitcase', 36 | 'Flying disc': 'frisbee', 37 | 'Ski': 'skis', 38 | 'Snowboard': 'snowboard', 39 | 'Ball': 'sports ball', 40 | 'Kite': 'kite', 41 | 'Baseball bat': 'baseball bat', 42 | 'Baseball glove': 'baseball glove', 43 | 'Skateboard': 'skateboard', 44 | 'Surfboard': 'surfboard', 45 | 'Tennis racket': 'tennis racket', 46 | 'Bottle': 'bottle', 47 | 'Wine glass': 'wine glass', 48 | 'Coffee cup': 'cup', 49 | 'Fork': 'fork', 50 | 'Knife': 'knife', 51 | 'Spoon': 'spoon', 52 | 'Bowl': 'bowl', 53 | 'Banana': 'banana', 54 | 'Apple': 'apple', 55 | 'Sandwich': 'sandwich', 56 | 'Orange': 'orange', 57 | 'Broccoli': 'broccoli', 58 | 'Carrot': 'carrot', 59 | 'Hot dog': 'hot dog', 60 | 'Pizza': 'pizza', 61 | 'Doughnut': 'donut', 62 | 'Cake': 'cake', 63 | 'Chair': 'chair', 64 | 'Couch': 'couch', 65 | 'Houseplant': 'potted plant', 66 | 'Bed': 'bed', 67 | 'Table': 'dining table', 68 | 'Toilet': 'toilet', 69 | 'Television': 'tv', 70 | 'Laptop': 'laptop', 71 | 'Computer mouse': 'mouse', 72 | 'Remote control': 'remote', 73 | 'Computer keyboard': 'keyboard', 74 | 'Mobile phone': 'cell phone', 75 | 'Microwave oven': 'microwave', 76 | 'Oven': 'oven', 77 | 'Toaster': 'toaster', 78 | 'Sink': 'sink', 79 | 'Refrigerator': 'refrigerator', 80 | 'Book': 'book', 81 | 'Clock': 'clock', 82 | 'Vase': 'vase', 83 | 'Scissors': 'scissors', 84 | 'Teddy bear': 'teddy bear', 85 | 'Hair dryer': 'hair drier', 86 | 'Toothbrush': 'toothbrush'} 87 | 88 | -------------------------------------------------------------------------------- /figs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/figs/.DS_Store -------------------------------------------------------------------------------- /figs/cycle_confusion_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/figs/cycle_confusion_arch.png -------------------------------------------------------------------------------- /permutations/permutations_hamming_all_24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_all_24.npy -------------------------------------------------------------------------------- /permutations/permutations_hamming_max_1000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_1000.npy -------------------------------------------------------------------------------- /permutations/permutations_hamming_max_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_2.npy -------------------------------------------------------------------------------- /permutations/permutations_hamming_max_24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_24.npy -------------------------------------------------------------------------------- /permutations/permutations_hamming_max_35.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/permutations/permutations_hamming_max_35.npy -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor 2 | numpy 3 | tqdm 4 | matplotlib 5 | termcolor 6 | yacs 7 | tabulate 8 | cloudpickle 9 | Pillow 10 | imagesize 11 | tensorboard -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=4 4 | known_standard_library=numpy,setuptools 5 | known_myself=detectron2 6 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil 7 | no_lines_before=STDLIB,THIRDPARTY 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 9 | default_section=FIRSTPARTY 10 | 11 | [mypy] 12 | python_version=3.6 13 | ignore_missing_imports = True 14 | warn_unused_configs = True 15 | disallow_untyped_defs = True 16 | check_untyped_defs = True 17 | warn_unused_ignores = True 18 | warn_redundant_casts = True 19 | show_column_numbers = True 20 | follow_imports = silent 21 | allow_redefinition = True 22 | ; Require all functions to be annotated 23 | disallow_incomplete_defs = True 24 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/src/__init__.py -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import get_cfg 2 | -------------------------------------------------------------------------------- /src/config/config.py: -------------------------------------------------------------------------------- 1 | 2 | from detectron2.config import CfgNode 3 | 4 | 5 | def get_cfg() -> CfgNode: 6 | """ 7 | Get a copy of the default config. 8 | 9 | Returns: 10 | a detectron2 CfgNode instance. 11 | """ 12 | from .defaults import _C 13 | 14 | return _C.clone() 15 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .builtin import ( 2 | # register_all_waymo, 3 | 4 | register_all_bdd_tracking, 5 | register_all_coco, 6 | register_coco_ood_wrt_bdd, 7 | register_vis_dataset, 8 | 9 | 10 | ) 11 | 12 | from .pair_sampler import PairTrainingSampler, PairDataLoader 13 | from .pair_fix_sampler import PairFixTrainingSampler, PairFixDataLoader 14 | from .pair_all_sampler import PairAllTrainingSampler, PairAllDataLoader 15 | from .pair_sampler_multi_random import PairTrainingMultiRandomSampler, PairMultirandomDataLoader 16 | from .pair_sampler_multi_interval import PairTrainingMultiIntervalSampler, PairDataIntervalLoader 17 | 18 | # from .common import MapDataset 19 | 20 | from .build import build_detection_train_loader, get_detection_dataset_dicts 21 | 22 | # Register them all under "./datasets" 23 | # register_all_bdd100k() 24 | # register_all_waymo() 25 | 26 | # 27 | register_all_bdd_tracking() 28 | register_all_coco() 29 | register_coco_ood_wrt_bdd() 30 | register_vis_dataset() 31 | -------------------------------------------------------------------------------- /src/data/pair_fix_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import Sampler 3 | 4 | from detectron2.utils import comm 5 | 6 | import copy 7 | import itertools 8 | import math 9 | import random 10 | from collections import defaultdict 11 | from typing import Optional 12 | 13 | __all__ = ["PairFixTrainingSampler", "PairFixDataLoader"] 14 | 15 | 16 | class PairFixTrainingSampler(Sampler): 17 | """ 18 | Similar to TrainingSampler, but produces a pair of training images from the 19 | same video sequence. 20 | """ 21 | 22 | def __init__( 23 | self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None 24 | ): 25 | """ 26 | Args: 27 | cfg: config parameters 28 | dataset_dicts (list[dict]): annotations in Detectron2 dataset format. 29 | batch_size (int): Size of mini-batch. 30 | shuffle (bool): whether to shuffle the indices or not 31 | seed (int): the initial seed of the shuffle. Must be the same 32 | across all workers. If None, will use a random seed shared 33 | among workers (require synchronization among all workers). 34 | """ 35 | self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE 36 | 37 | self._shuffle = shuffle 38 | if seed is None: 39 | seed = comm.shared_random_seed() 40 | self._seed = int(seed) 41 | # only sample the previous frame during eval 42 | 43 | self._rank = comm.get_rank() 44 | self._world_size = comm.get_world_size() 45 | 46 | self._total_size = len(dataset_dicts) 47 | total_batch_size = batch_size * self._world_size 48 | self._size = ( 49 | len(dataset_dicts) // total_batch_size 50 | ) * total_batch_size 51 | self._batch_size = batch_size 52 | self.num_per_worker = self._size // self._world_size 53 | 54 | self._dataset_dicts = dataset_dicts 55 | self._data_by_video = {} 56 | for i, data in enumerate(dataset_dicts): 57 | data["total_idx"] = i 58 | if data["video_id"] in self._data_by_video: 59 | self._data_by_video[data["video_id"]][data["index"]] = data 60 | else: 61 | self._data_by_video[data["video_id"]] = {data["index"]: data} 62 | 63 | def __iter__(self): 64 | while True: 65 | indices = self._infinite_indices()[: self._size] 66 | split = indices[ 67 | self._rank 68 | * self.num_per_worker : (self._rank + 1) 69 | * self.num_per_worker 70 | ] 71 | for i in range(0, len(split), self._batch_size): 72 | chunk = split[i : i + self._batch_size] 73 | pairs = [] 74 | for c in chunk: 75 | pairs.append(c) 76 | vid_id = self._dataset_dicts[c]["video_id"] 77 | index = self._dataset_dicts[c]["index"] 78 | offsets = [ 79 | o 80 | for o in [-self._offset, self._offset] 81 | if o != 0 82 | and index + o in self._data_by_video[vid_id].keys() 83 | ] 84 | if not offsets: 85 | offsets = [0] 86 | # breakpoint() 87 | offset = random.choice(offsets) 88 | pair_idx = index + offset 89 | pair = self._data_by_video[vid_id][pair_idx] 90 | pairs.append(pair["total_idx"]) 91 | yield pairs 92 | 93 | def _infinite_indices(self): 94 | # pylint: disable=no-member 95 | g = torch.Generator() 96 | g.manual_seed(self._seed) 97 | if self._shuffle: 98 | return torch.randperm(self._total_size, generator=g) 99 | else: 100 | return torch.arange(self._total_size) 101 | 102 | 103 | class PairFixDataLoader: 104 | """ 105 | Wrapping DataLoader to add random flipping for pairs of images. 106 | """ 107 | 108 | def __init__(self, cfg, dataloader): 109 | self.cfg = cfg 110 | self.dataloader = dataloader 111 | 112 | def __iter__(self): 113 | # pylint: disable=no-member 114 | for data in iter(self.dataloader): 115 | num_pairs = len(data) // 2 116 | for i in range(num_pairs): 117 | datum = data[i * 2 : (i + 1) * 2] 118 | rand = random.randint(0, 1) 119 | if self.cfg.DATALOADER.NO_FLIP or rand == 0: 120 | continue 121 | # flip both images in pair 122 | for d in datum: 123 | w = d["instances"]._image_size[1] 124 | d["image"] = torch.flip(d["image"], [2]) 125 | boxes = d["instances"].get("gt_boxes") 126 | boxes.tensor[:, 0] = w - boxes.tensor[:, 0] 127 | boxes.tensor[:, 2] = w - boxes.tensor[:, 2] 128 | temp = copy.deepcopy(boxes.tensor[:, 2]) 129 | boxes.tensor[:, 2] = boxes.tensor[:, 0] 130 | boxes.tensor[:, 0] = temp 131 | yield data 132 | -------------------------------------------------------------------------------- /src/data/pair_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import Sampler 3 | 4 | from detectron2.utils import comm 5 | 6 | import copy 7 | import itertools 8 | import math 9 | import random 10 | from collections import defaultdict 11 | from typing import Optional 12 | 13 | __all__ = ["PairTrainingSampler", "PairDataLoader"] 14 | 15 | 16 | class PairTrainingSampler(Sampler): 17 | """ 18 | Similar to TrainingSampler, but produces a pair of training images from the 19 | same video sequence. 20 | """ 21 | 22 | def __init__( 23 | self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None 24 | ): 25 | """ 26 | Args: 27 | cfg: config parameters 28 | dataset_dicts (list[dict]): annotations in Detectron2 dataset format. 29 | batch_size (int): Size of mini-batch. 30 | shuffle (bool): whether to shuffle the indices or not 31 | seed (int): the initial seed of the shuffle. Must be the same 32 | across all workers. If None, will use a random seed shared 33 | among workers (require synchronization among all workers). 34 | """ 35 | self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE 36 | 37 | self._shuffle = shuffle 38 | if seed is None: 39 | seed = comm.shared_random_seed() 40 | self._seed = int(seed) 41 | # only sample the previous frame during eval 42 | 43 | self._rank = comm.get_rank() 44 | self._world_size = comm.get_world_size() 45 | 46 | self._total_size = len(dataset_dicts) 47 | total_batch_size = batch_size * self._world_size 48 | self._size = ( 49 | len(dataset_dicts) // total_batch_size 50 | ) * total_batch_size 51 | self._batch_size = batch_size 52 | self.num_per_worker = self._size // self._world_size 53 | 54 | self._dataset_dicts = dataset_dicts 55 | self._data_by_video = {} 56 | for i, data in enumerate(dataset_dicts): 57 | data["total_idx"] = i 58 | if data["video_id"] in self._data_by_video: 59 | self._data_by_video[data["video_id"]][data["index"]] = data 60 | else: 61 | self._data_by_video[data["video_id"]] = {data["index"]: data} 62 | 63 | def __iter__(self): 64 | while True: 65 | indices = self._infinite_indices()[: self._size] 66 | split = indices[ 67 | self._rank 68 | * self.num_per_worker : (self._rank + 1) 69 | * self.num_per_worker 70 | ] 71 | for i in range(0, len(split), self._batch_size): 72 | chunk = split[i : i + self._batch_size] 73 | pairs = [] 74 | for c in chunk: 75 | pairs.append(c) 76 | vid_id = self._dataset_dicts[c]["video_id"] 77 | index = self._dataset_dicts[c]["index"] 78 | offsets = [ 79 | o 80 | for o in range(-self._offset, self._offset + 1) 81 | if o != 0 82 | and index + o in self._data_by_video[vid_id].keys() 83 | ] 84 | if not offsets: 85 | offsets = [0] 86 | offset = random.choice(offsets) 87 | pair_idx = index + offset 88 | pair = self._data_by_video[vid_id][pair_idx] 89 | pairs.append(pair["total_idx"]) 90 | yield pairs 91 | 92 | def _infinite_indices(self): 93 | # pylint: disable=no-member 94 | g = torch.Generator() 95 | g.manual_seed(self._seed) 96 | if self._shuffle: 97 | return torch.randperm(self._total_size, generator=g) 98 | else: 99 | return torch.arange(self._total_size) 100 | 101 | 102 | class PairDataLoader: 103 | """ 104 | Wrapping DataLoader to add random flipping for pairs of images. 105 | """ 106 | 107 | def __init__(self, cfg, dataloader): 108 | self.cfg = cfg 109 | self.dataloader = dataloader 110 | 111 | def __iter__(self): 112 | # pylint: disable=no-member 113 | for data in iter(self.dataloader): 114 | num_pairs = len(data) // 2 115 | for i in range(num_pairs): 116 | datum = data[i * 2 : (i + 1) * 2] 117 | rand = random.randint(0, 1) 118 | if self.cfg.DATALOADER.NO_FLIP or rand == 0: 119 | continue 120 | # flip both images in pair 121 | for d in datum: 122 | w = d["instances"]._image_size[1] 123 | d["image"] = torch.flip(d["image"], [2]) 124 | boxes = d["instances"].get("gt_boxes") 125 | boxes.tensor[:, 0] = w - boxes.tensor[:, 0] 126 | boxes.tensor[:, 2] = w - boxes.tensor[:, 2] 127 | temp = copy.deepcopy(boxes.tensor[:, 2]) 128 | boxes.tensor[:, 2] = boxes.tensor[:, 0] 129 | boxes.tensor[:, 0] = temp 130 | yield data 131 | -------------------------------------------------------------------------------- /src/data/pair_sampler_multi_interval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import Sampler 3 | 4 | from detectron2.utils import comm 5 | 6 | import copy 7 | import itertools 8 | import math 9 | import numpy as np 10 | import random 11 | from collections import defaultdict 12 | from typing import Optional 13 | 14 | __all__ = ["PairTrainingMultiIntervalSampler", "PairDataIntervalLoader"] 15 | 16 | 17 | class PairTrainingMultiIntervalSampler(Sampler): 18 | """ 19 | Similar to TrainingSampler, but produces a pair of training images from the 20 | same video sequence. 21 | """ 22 | 23 | def __init__( 24 | self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None 25 | ): 26 | """ 27 | Args: 28 | cfg: config parameters 29 | dataset_dicts (list[dict]): annotations in Detectron2 dataset format. 30 | batch_size (int): Size of mini-batch. 31 | shuffle (bool): whether to shuffle the indices or not 32 | seed (int): the initial seed of the shuffle. Must be the same 33 | across all workers. If None, will use a random seed shared 34 | among workers (require synchronization among all workers). 35 | """ 36 | self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE 37 | self._select = cfg.DATALOADER.SELCTED_NUMBER 38 | self._interval = cfg.DATALOADER.INTERVAL 39 | 40 | self._shuffle = shuffle 41 | if seed is None: 42 | seed = comm.shared_random_seed() 43 | self._seed = int(seed) 44 | # only sample the previous frame during eval 45 | 46 | self._rank = comm.get_rank() 47 | self._world_size = comm.get_world_size() 48 | 49 | self._total_size = len(dataset_dicts) 50 | total_batch_size = batch_size * self._world_size 51 | self._size = ( 52 | len(dataset_dicts) // total_batch_size 53 | ) * total_batch_size 54 | self._batch_size = batch_size 55 | self.num_per_worker = self._size // self._world_size 56 | 57 | self._dataset_dicts = dataset_dicts 58 | self._data_by_video = {} 59 | for i, data in enumerate(dataset_dicts): 60 | data["total_idx"] = i 61 | if data["video_id"] in self._data_by_video: 62 | self._data_by_video[data["video_id"]][data["index"]] = data 63 | else: 64 | self._data_by_video[data["video_id"]] = {data["index"]: data} 65 | 66 | def __iter__(self): 67 | while True: 68 | indices = self._infinite_indices()[: self._size] 69 | split = indices[ 70 | self._rank 71 | * self.num_per_worker : (self._rank + 1) 72 | * self.num_per_worker 73 | ] 74 | for i in range(0, len(split), self._batch_size): 75 | chunk = split[i : i + self._batch_size] 76 | pairs = [] 77 | for c in chunk: 78 | pairs.append(c) 79 | vid_id = self._dataset_dicts[c]["video_id"] 80 | index = self._dataset_dicts[c]["index"] 81 | offset = [] 82 | for cur_index in range(self._select): 83 | offsets = [ 84 | o 85 | for o in [-self._interval*(cur_index+1), self._interval*(cur_index+1)] 86 | if o != 0 87 | and index + o in self._data_by_video[vid_id].keys() 88 | ] 89 | if not offsets: 90 | offsets = [0] 91 | offset += [random.choice(offsets)] 92 | 93 | # offset = random.sample(offsets, self._select) 94 | pair_idx = index + np.asarray(offset) 95 | for temp in pair_idx: 96 | pair = self._data_by_video[vid_id][temp] 97 | pairs.append(pair["total_idx"]) 98 | yield pairs 99 | 100 | def _infinite_indices(self): 101 | # pylint: disable=no-member 102 | g = torch.Generator() 103 | g.manual_seed(self._seed) 104 | if self._shuffle: 105 | return torch.randperm(self._total_size, generator=g) 106 | else: 107 | return torch.arange(self._total_size) 108 | 109 | 110 | class PairDataIntervalLoader: 111 | """ 112 | Wrapping DataLoader to add random flipping for pairs of images. 113 | """ 114 | 115 | def __init__(self, cfg, dataloader): 116 | self.cfg = cfg 117 | self.dataloader = dataloader 118 | 119 | def __iter__(self): 120 | # pylint: disable=no-member 121 | for data in iter(self.dataloader): 122 | # print(len(data)) 123 | num_pairs = len(data) // (self.cfg.DATALOADER.SELCTED_NUMBER + 1) 124 | for i in range(num_pairs): 125 | # breakpoint() 126 | datum = data[i * (self.cfg.DATALOADER.SELCTED_NUMBER + 1) : 127 | (i + 1) * (self.cfg.DATALOADER.SELCTED_NUMBER + 1)] 128 | rand = random.randint(0, 1) 129 | if self.cfg.DATALOADER.NO_FLIP or rand == 0: 130 | continue 131 | # flip both images in pair 132 | for d in datum: 133 | w = d["instances"]._image_size[1] 134 | d["image"] = torch.flip(d["image"], [2]) 135 | boxes = d["instances"].get("gt_boxes") 136 | boxes.tensor[:, 0] = w - boxes.tensor[:, 0] 137 | boxes.tensor[:, 2] = w - boxes.tensor[:, 2] 138 | temp = copy.deepcopy(boxes.tensor[:, 2]) 139 | boxes.tensor[:, 2] = boxes.tensor[:, 0] 140 | boxes.tensor[:, 0] = temp 141 | yield data 142 | -------------------------------------------------------------------------------- /src/data/pair_sampler_multi_random.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import Sampler 3 | 4 | from detectron2.utils import comm 5 | 6 | import copy 7 | import itertools 8 | import math 9 | import numpy as np 10 | import random 11 | from collections import defaultdict 12 | from typing import Optional 13 | 14 | __all__ = ["PairTrainingMultiRandomSampler", "PairMultirandomDataLoader"] 15 | 16 | 17 | class PairTrainingMultiRandomSampler(Sampler): 18 | """ 19 | Similar to TrainingSampler, but produces a pair of training images from the 20 | same video sequence. 21 | """ 22 | 23 | def __init__( 24 | self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None 25 | ): 26 | """ 27 | Args: 28 | cfg: config parameters 29 | dataset_dicts (list[dict]): annotations in Detectron2 dataset format. 30 | batch_size (int): Size of mini-batch. 31 | shuffle (bool): whether to shuffle the indices or not 32 | seed (int): the initial seed of the shuffle. Must be the same 33 | across all workers. If None, will use a random seed shared 34 | among workers (require synchronization among all workers). 35 | """ 36 | self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE 37 | self._select = cfg.DATALOADER.SELCTED_NUMBER 38 | 39 | self._shuffle = shuffle 40 | if seed is None: 41 | seed = comm.shared_random_seed() 42 | self._seed = int(seed) 43 | # only sample the previous frame during eval 44 | 45 | self._rank = comm.get_rank() 46 | self.save = [] 47 | self._world_size = comm.get_world_size() 48 | 49 | self._total_size = len(dataset_dicts) 50 | total_batch_size = batch_size * self._world_size 51 | self._size = ( 52 | len(dataset_dicts) // total_batch_size 53 | ) * total_batch_size 54 | self._batch_size = batch_size 55 | self.num_per_worker = self._size // self._world_size 56 | 57 | self._dataset_dicts = dataset_dicts 58 | self._data_by_video = {} 59 | for i, data in enumerate(dataset_dicts): 60 | data["total_idx"] = i 61 | if data["video_id"] in self._data_by_video: 62 | self._data_by_video[data["video_id"]][data["index"]] = data 63 | else: 64 | self._data_by_video[data["video_id"]] = {data["index"]: data} 65 | 66 | def __iter__(self): 67 | while True: 68 | indices = self._infinite_indices()[: self._size] 69 | split = indices[ 70 | self._rank 71 | * self.num_per_worker : (self._rank + 1) 72 | * self.num_per_worker 73 | ] 74 | for i in range(0, len(split), self._batch_size): 75 | chunk = split[i : i + self._batch_size] 76 | pairs = [] 77 | for c in chunk: 78 | pairs.append(c) 79 | vid_id = self._dataset_dicts[c]["video_id"] 80 | index = self._dataset_dicts[c]["index"] 81 | offsets = [ 82 | o 83 | for o in range(-self._offset, self._offset + 1) 84 | if o != 0 85 | and index + o in self._data_by_video[vid_id].keys() 86 | ] 87 | if not offsets: 88 | offsets = [0]*self._select 89 | if len(offsets) < self._select: 90 | offsets += [random.choice(offsets) for _ in range(self._select-len(offsets))] 91 | offset = random.sample(offsets, self._select) 92 | # self.save += list(offset) 93 | # print(self.save) 94 | # if len(self.save)>=500: 95 | # # import numpy as np 96 | # np.save('./bdd_offset.npy', self.save) 97 | pair_idx = index + np.asarray(offset) 98 | for temp in pair_idx: 99 | pair = self._data_by_video[vid_id][temp] 100 | pairs.append(pair["total_idx"]) 101 | yield pairs 102 | 103 | def _infinite_indices(self): 104 | # pylint: disable=no-member 105 | g = torch.Generator() 106 | g.manual_seed(self._seed) 107 | if self._shuffle: 108 | return torch.randperm(self._total_size, generator=g) 109 | else: 110 | return torch.arange(self._total_size) 111 | 112 | 113 | class PairMultirandomDataLoader: 114 | """ 115 | Wrapping DataLoader to add random flipping for pairs of images. 116 | """ 117 | 118 | def __init__(self, cfg, dataloader): 119 | self.cfg = cfg 120 | self.dataloader = dataloader 121 | 122 | def __iter__(self): 123 | # pylint: disable=no-member 124 | for data in iter(self.dataloader): 125 | # print(len(data)) 126 | num_pairs = len(data) // (self.cfg.DATALOADER.SELCTED_NUMBER + 1) 127 | for i in range(num_pairs): 128 | # breakpoint() 129 | datum = data[i * (self.cfg.DATALOADER.SELCTED_NUMBER + 1) : 130 | (i + 1) * (self.cfg.DATALOADER.SELCTED_NUMBER + 1)] 131 | rand = random.randint(0, 1) 132 | if self.cfg.DATALOADER.NO_FLIP or rand == 0: 133 | continue 134 | # flip both images in pair 135 | for d in datum: 136 | w = d["instances"]._image_size[1] 137 | d["image"] = torch.flip(d["image"], [2]) 138 | boxes = d["instances"].get("gt_boxes") 139 | boxes.tensor[:, 0] = w - boxes.tensor[:, 0] 140 | boxes.tensor[:, 2] = w - boxes.tensor[:, 2] 141 | temp = copy.deepcopy(boxes.tensor[:, 2]) 142 | boxes.tensor[:, 2] = boxes.tensor[:, 0] 143 | boxes.tensor[:, 0] = temp 144 | yield data 145 | -------------------------------------------------------------------------------- /src/engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import DefaultTrainer, default_argument_parser 2 | from .train_loop import * -------------------------------------------------------------------------------- /src/engine/evaluate.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import datetime 3 | import json 4 | import logging 5 | import os 6 | import os.path as osp 7 | import math 8 | import numpy as np 9 | import sys 10 | import time 11 | import torch 12 | from collections import OrderedDict 13 | from contextlib import contextmanager 14 | from detectron2.evaluation import DatasetEvaluators 15 | from detectron2.utils.comm import get_world_size 16 | from detectron2.utils.logger import log_every_n_seconds 17 | 18 | 19 | def inference_on_dataset(model, data_loader, evaluator): 20 | """ 21 | Run model on the data_loader and evaluate the metrics with evaluator. 22 | Also benchmark the inference speed of `model.forward` accurately. 23 | The model will be used in eval mode. 24 | Args: 25 | model (nn.Module): a module which accepts an object from 26 | `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. 27 | If you wish to evaluate a model in `training` mode instead, you can 28 | wrap the given model and override its behavior of `.eval()` and `.train()`. 29 | data_loader: an iterable object with a length. 30 | The elements it generates will be the inputs to the model. 31 | evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want 32 | to benchmark, but don't want to do any evaluation. 33 | Returns: 34 | The return value of `evaluator.evaluate()` 35 | """ 36 | num_devices = get_world_size() 37 | logger = logging.getLogger(__name__) 38 | logger.info("Start inference on {} images".format(len(data_loader))) 39 | 40 | total = len(data_loader) # inference data loader must have a fixed length 41 | if evaluator is None: 42 | # create a no-op evaluator 43 | evaluator = DatasetEvaluators([]) 44 | evaluator.reset() 45 | 46 | num_warmup = min(5, total - 1) 47 | start_time = time.perf_counter() 48 | total_compute_time = 0 49 | with inference_context(model), torch.no_grad(): 50 | for idx, inputs in enumerate(data_loader): 51 | if idx == num_warmup: 52 | start_time = time.perf_counter() 53 | total_compute_time = 0 54 | 55 | start_compute_time = time.perf_counter() 56 | outputs = model(inputs) 57 | if torch.cuda.is_available(): 58 | torch.cuda.synchronize() 59 | total_compute_time += time.perf_counter() - start_compute_time 60 | evaluator.process(inputs, outputs) 61 | 62 | iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) 63 | seconds_per_img = total_compute_time / iters_after_start 64 | if idx >= num_warmup * 2 or seconds_per_img > 5: 65 | total_seconds_per_img = ( 66 | time.perf_counter() - start_time 67 | ) / iters_after_start 68 | eta = datetime.timedelta( 69 | seconds=int(total_seconds_per_img * (total - idx - 1)) 70 | ) 71 | log_every_n_seconds( 72 | logging.INFO, 73 | "Inference done {}/{}. {:.4f} s / img. ETA={}".format( 74 | idx + 1, total, seconds_per_img, str(eta) 75 | ), 76 | n=5, 77 | ) 78 | 79 | # Measure the time only for this worker (before the synchronization barrier) 80 | total_time = time.perf_counter() - start_time 81 | total_time_str = str(datetime.timedelta(seconds=total_time)) 82 | # NOTE this format is parsed by grep 83 | logger.info( 84 | "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( 85 | total_time_str, total_time / (total - num_warmup), num_devices 86 | ) 87 | ) 88 | total_compute_time_str = str( 89 | datetime.timedelta(seconds=int(total_compute_time)) 90 | ) 91 | logger.info( 92 | "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( 93 | total_compute_time_str, 94 | total_compute_time / (total - num_warmup), 95 | num_devices, 96 | ) 97 | ) 98 | 99 | results = evaluator.evaluate() 100 | # An evaluator may return None when not in main process. 101 | # Replace it by an empty dict instead to make it easier for downstream code to handle 102 | if results is None: 103 | results = {} 104 | return results 105 | 106 | 107 | @contextmanager 108 | def inference_context(model): 109 | """ 110 | A context where the model is temporarily changed to eval mode, 111 | and restored to previous mode afterwards. 112 | Args: 113 | model: a torch Module 114 | """ 115 | training_mode = model.training 116 | model.eval() 117 | yield 118 | model.train(training_mode) -------------------------------------------------------------------------------- /src/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .meta_arch import * 2 | from .self_supervised import * 3 | from .roi_heads import * 4 | # from .vit import * 5 | -------------------------------------------------------------------------------- /src/modeling/meta_arch/GAN.py: -------------------------------------------------------------------------------- 1 | ## reference code is https://github.com/pytorch/examples/blob/master/dcgan/main.py 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import os 7 | 8 | # from models import * 9 | 10 | 11 | def weights_init(m): 12 | classname = m.__class__.__name__ 13 | if classname.find('Conv') != -1: 14 | m.weight.data.normal_(0.0, 0.02) 15 | elif classname.find('BatchNorm') != -1: 16 | m.weight.data.normal_(1.0, 0.02) 17 | m.bias.data.fill_(0) 18 | 19 | class _netD(nn.Module): 20 | def __init__(self, ngpu, nc, ndf): 21 | super(_netD, self).__init__() 22 | self.ngpu = ngpu 23 | self.main = nn.Sequential( 24 | # input size. (nc) x 32 x 32 25 | nn.Conv2d(nc, ndf * 2, 4, 2, 1, bias=False), 26 | nn.LeakyReLU(0.2, inplace=True), 27 | # state size. (ndf*2) x 16 x 16 28 | nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), 29 | nn.BatchNorm2d(ndf * 4), 30 | nn.LeakyReLU(0.2, inplace=True), 31 | # state size. (ndf*4) x 8 x 8 32 | nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), 33 | nn.BatchNorm2d(ndf * 8), 34 | nn.LeakyReLU(0.2, inplace=True), 35 | # state size. (ndf*8) x 4 x 4 36 | nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), 37 | nn.Sigmoid() 38 | ) 39 | 40 | def forward(self, input): 41 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 42 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 43 | else: 44 | output = self.main(input) 45 | 46 | return output.view(-1, 1) 47 | 48 | class _netG(nn.Module): 49 | def __init__(self, ngpu, nz, ngf, nc): 50 | super(_netG, self).__init__() 51 | self.ngpu = ngpu 52 | self.main = nn.Sequential( 53 | # input is Z, going into a convolution 54 | nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), 55 | nn.BatchNorm2d(ngf * 8), 56 | nn.ReLU(True), 57 | # state size. (ngf*8) x 4 x 4 58 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 59 | nn.BatchNorm2d(ngf * 4), 60 | nn.ReLU(True), 61 | # state size. (ngf*4) x 8 x 8 62 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 63 | nn.BatchNorm2d(ngf * 2), 64 | nn.ReLU(True), 65 | # state size. (ngf*2) x 16 x 16 66 | nn.ConvTranspose2d(ngf * 2, nc, 4, 2, 1, bias=False), 67 | nn.Sigmoid() 68 | # state size. (nc) x 32 x 32 69 | ) 70 | 71 | def forward(self, input): 72 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 73 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 74 | else: 75 | output = self.main(input) 76 | return output 77 | 78 | def Generator(n_gpu, nz, ngf, nc): 79 | model = _netG(n_gpu, nz, ngf, nc) 80 | model.apply(weights_init) 81 | return model 82 | 83 | def Discriminator(n_gpu, nc, ndf): 84 | model = _netD(n_gpu, nc, ndf) 85 | model.apply(weights_init) 86 | return model -------------------------------------------------------------------------------- /src/modeling/meta_arch/Imagelist.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from __future__ import division 3 | from typing import Any, List, Tuple 4 | import torch 5 | from torch import device 6 | from torch.nn import functional as F 7 | 8 | from detectron2.utils.env import TORCH_VERSION 9 | 10 | 11 | def _as_tensor(x: Tuple[int, int]) -> torch.Tensor: 12 | """ 13 | An equivalent of `torch.as_tensor`, but works under tracing if input 14 | is a list of tensor. `torch.as_tensor` will record a constant in tracing, 15 | but this function will use `torch.stack` instead. 16 | """ 17 | if torch.jit.is_scripting(): 18 | return torch.as_tensor(x) 19 | if isinstance(x, (list, tuple)) and all([isinstance(t, torch.Tensor) for t in x]): 20 | return torch.stack(x) 21 | return torch.as_tensor(x) 22 | 23 | 24 | class ImageList(object): 25 | """ 26 | Structure that holds a list of images (of possibly 27 | varying sizes) as a single tensor. 28 | This works by padding the images to the same size, 29 | and storing in a field the original sizes of each image 30 | Attributes: 31 | image_sizes (list[tuple[int, int]]): each tuple is (h, w). 32 | During tracing, it becomes list[Tensor] instead. 33 | """ 34 | 35 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): 36 | """ 37 | Arguments: 38 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 39 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can 40 | be smaller than (H, W) due to padding. 41 | """ 42 | self.tensor = tensor 43 | self.image_sizes = image_sizes 44 | 45 | 46 | def __len__(self) -> int: 47 | return len(self.image_sizes) 48 | 49 | def __getitem__(self, idx) -> torch.Tensor: 50 | """ 51 | Access the individual image in its original size. 52 | Args: 53 | idx: int or slice 54 | Returns: 55 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 56 | """ 57 | size = self.image_sizes[idx] 58 | return self.tensor[idx, ..., : size[0], : size[1]] 59 | 60 | @torch.jit.unused 61 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 62 | cast_tensor = self.tensor.to(*args, **kwargs) 63 | return ImageList(cast_tensor, self.image_sizes) 64 | 65 | @property 66 | def device(self) -> device: 67 | return self.tensor.device 68 | 69 | @staticmethod 70 | def from_tensors( 71 | tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 72 | ) -> "ImageList": 73 | """ 74 | Args: 75 | tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or 76 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded 77 | to the same shape with `pad_value`. 78 | size_divisibility (int): If `size_divisibility > 0`, add padding to ensure 79 | the common height and width is divisible by `size_divisibility`. 80 | This depends on the model and many models need a divisibility of 32. 81 | pad_value (float): value to pad 82 | Returns: 83 | an `ImageList`. 84 | """ 85 | assert len(tensors) > 0 86 | assert isinstance(tensors, (tuple, list)) 87 | for t in tensors: 88 | assert isinstance(t, torch.Tensor), type(t) 89 | assert t.shape[:-2] == tensors[0].shape[:-2], t.shape 90 | 91 | image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] 92 | image_sizes_tensor = [_as_tensor(x) for x in image_sizes] 93 | max_size = torch.stack(image_sizes_tensor).max(0).values 94 | 95 | if size_divisibility > 1: 96 | stride = size_divisibility 97 | # the last two dims are H,W, both subject to divisibility requirement 98 | max_size = (max_size + (stride - 1)) // stride * stride 99 | 100 | # handle weirdness of scripting and tracing ... 101 | if torch.jit.is_scripting(): 102 | max_size: List[int] = max_size.to(dtype=torch.long).tolist() 103 | else: 104 | # https://github.com/pytorch/pytorch/issues/42448 105 | if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing(): 106 | image_sizes = image_sizes_tensor 107 | 108 | if len(tensors) == 1: 109 | # This seems slightly (2%) faster. 110 | # TODO: check whether it's faster for multiple images as well 111 | image_size = image_sizes[0] 112 | padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] 113 | batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0) 114 | else: 115 | # max_size can be a tensor in tracing mode, therefore convert to list 116 | batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size) 117 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 118 | for img, pad_img in zip(tensors, batched_imgs): 119 | try: 120 | # breakpoint() 121 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) 122 | except: 123 | pad_img = img 124 | 125 | return ImageList(batched_imgs.contiguous(), image_sizes) -------------------------------------------------------------------------------- /src/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | from .rcnn_ss import SSRCNN 2 | from .rcnn_ss_gene import SSRCNNGene 3 | from .rcnn_ss_remove import SSRCNNRemove 4 | from .rcnn_ss_cheap import SSRCNNCHEAP 5 | from .rcnn import GeneralizedRCNN1 6 | from .rcnn_gan import GeneralizedRCNNLogisticGAN 7 | from .rcnn_csi import GeneralizedRCNNLogisticCSI 8 | from .rcnn_ss_mixup import SSRCNNmixup 9 | from .rcnn_ss_add import SSRCNNAdd 10 | from .rcnn_ss_single import SSRCNNSingle 11 | from .regnet import build_regnet_fpn_backbone, build_regnetx_fpn_backbone 12 | # from .vovnet import build_vovnet_backbone, build_vovnet_fpn_backbone 13 | # from .dla import build_dla_backbone, build_dla_fpn_backbone, build_fcos_dla_fpn_backbone 14 | # from .resnest import build_resnet_backbone1 15 | # from .fpn import FPN1, build_resnet_fpn_backbone1, build_retinanet_resnet_fpn_backbone1 16 | # from .resnest1 import build_resnest_backbone, build_resnest_fpn_backbone, build_fcos_resnest_fpn_backbone 17 | # from .eff import build_efficientnet_backbone, build_efficientnet_fpn_backbone, build_fcos_efficientnet_fpn_backbone 18 | -------------------------------------------------------------------------------- /src/modeling/meta_arch/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | Returns: 26 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 27 | """ 28 | pass 29 | 30 | @property 31 | def size_divisibility(self): 32 | """ 33 | Some backbones require the input height and width to be divisible by a 34 | specific integer. This is typically true for encoder / decoder type networks 35 | with lateral connection (e.g., FPN) for which feature maps need to match 36 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 37 | input size divisibility is required. 38 | """ 39 | return 0 40 | 41 | def output_shape(self): 42 | """ 43 | Returns: 44 | dict[str->ShapeSpec] 45 | """ 46 | # this is a backward-compatible default 47 | return { 48 | name: ShapeSpec( 49 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 50 | ) 51 | for name in self._out_features 52 | } -------------------------------------------------------------------------------- /src/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | BACKBONE_REGISTRY.__doc__ = """ 9 | Registry for backbones, which extract feature maps from images 10 | The registered object must be a callable that accepts two arguments: 11 | 1. A :class:`detectron2.config.CfgNode` 12 | 2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. 13 | It must returns an instance of :class:`Backbone`. 14 | """ 15 | 16 | 17 | def build_backbone(cfg, input_shape=None): 18 | """ 19 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 20 | Returns: 21 | an instance of :class:`Backbone` 22 | """ 23 | if input_shape is None: 24 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 25 | 26 | backbone_name = cfg.MODEL.BACKBONE.NAME 27 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 28 | assert isinstance(backbone, Backbone) 29 | return backbone -------------------------------------------------------------------------------- /src/modeling/meta_arch/layers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # -------------------------------------------------------- 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone 5 | # version: 0.0.1 6 | # Author: Shihua Liang (sxhx.liang@gmail.com) 7 | # FilePath: /detectron2_backbone/detectron2_backbone/layers/__init__.py 8 | # Create: 2020-05-04 10:27:44 9 | # LastAuthor: Shihua Liang 10 | # lastTime: 2020-05-04 10:34:23 11 | # -------------------------------------------------------- 12 | from .wrappers import Conv2d ,SeparableConv2d, MaxPool2d 13 | from .activations import MemoryEfficientSwish, Swish -------------------------------------------------------------------------------- /src/modeling/meta_arch/layers/activations.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # -------------------------------------------------------- 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone 5 | # version: 0.0.1 6 | # Author: Shihua Liang (sxhx.liang@gmail.com) 7 | # FilePath: /detectron2_backbone/detectron2_backbone/layers/activations.py 8 | # Create: 2020-05-04 10:33:14 9 | # LastAuthor: Shihua Liang 10 | # lastTime: 2020-05-04 10:33:52 11 | # -------------------------------------------------------- 12 | 13 | import math 14 | 15 | import torch 16 | from torch import nn 17 | 18 | class SwishImplementation(torch.autograd.Function): 19 | @staticmethod 20 | def forward(ctx, i): 21 | result = i * torch.sigmoid(i) 22 | ctx.save_for_backward(i) 23 | return result 24 | 25 | @staticmethod 26 | def backward(ctx, grad_output): 27 | i = ctx.saved_variables[0] 28 | sigmoid_i = torch.sigmoid(i) 29 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 30 | 31 | class MemoryEfficientSwish(nn.Module): 32 | def forward(self, x): 33 | return SwishImplementation.apply(x) 34 | 35 | class Swish(nn.Module): 36 | def forward(self, x): 37 | return x * torch.sigmoid(x) -------------------------------------------------------------------------------- /src/modeling/meta_arch/regnet.py: -------------------------------------------------------------------------------- 1 | # from ..common.optim import SGD as optimizer 2 | # from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | # from ..common.data.coco import dataloader 4 | # from ..common.models.mask_rcnn_fpn import model 5 | # from ..common.train import train 6 | 7 | # from detectron2.config import LazyCall as L 8 | from .regnet_model import RegNet 9 | from .regnet_model import SimpleStem, ResBottleneckBlock 10 | 11 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 12 | from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool 13 | 14 | from detectron2.layers import ( 15 | Conv2d, 16 | DeformConv, 17 | FrozenBatchNorm2d, 18 | ModulatedDeformConv, 19 | ShapeSpec, 20 | get_norm, 21 | ) 22 | 23 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source: 24 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa 25 | # model.backbone.bottom_up = L(RegNet)( 26 | # stem_class=SimpleStem, 27 | # stem_width=32, 28 | # block_class=ResBottleneckBlock, 29 | # depth=22, 30 | # w_a=31.41, 31 | # w_0=96, 32 | # w_m=2.24, 33 | # group_width=64, 34 | # se_ratio=0.25, 35 | # freeze_at=2, 36 | # norm="FrozenBN", 37 | # out_features=["s1", "s2", "s3", "s4"], 38 | # ) 39 | # model.pixel_std = [57.375, 57.120, 58.395] 40 | # 41 | # optimizer.weight_decay = 5e-5 42 | # train.init_checkpoint = ( 43 | # "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth" 44 | # ) 45 | # # RegNets benefit from enabling cudnn benchmark mode 46 | # train.cudnn_benchmark = True 47 | 48 | @BACKBONE_REGISTRY.register() 49 | def build_regnet_fpn_backbone(cfg, input_shape: ShapeSpec): 50 | """ 51 | Args: 52 | cfg: a detectron2 CfgNode 53 | Returns: 54 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 55 | """ 56 | bottom_up = RegNet( 57 | stem_class=SimpleStem, 58 | stem_width=32, 59 | block_class=ResBottleneckBlock, 60 | depth=22, 61 | w_a=31.41, 62 | w_0=96, 63 | w_m=2.24, 64 | group_width=64, 65 | se_ratio=0.25, 66 | freeze_at=2, 67 | norm="FrozenBN", 68 | out_features=["s1", "s2", "s3", "s4"], 69 | ) 70 | in_features = cfg.MODEL.FPN.IN_FEATURES 71 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 72 | backbone = FPN( 73 | bottom_up=bottom_up, 74 | in_features=in_features, 75 | out_channels=out_channels, 76 | norm=cfg.MODEL.FPN.NORM, 77 | top_block=LastLevelMaxPool(), 78 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 79 | ) 80 | return backbone 81 | 82 | @BACKBONE_REGISTRY.register() 83 | def build_regnetx_fpn_backbone(cfg, input_shape: ShapeSpec): 84 | """ 85 | Args: 86 | cfg: a detectron2 CfgNode 87 | Returns: 88 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 89 | """ 90 | bottom_up = RegNet( 91 | stem_class=SimpleStem, 92 | stem_width=32, 93 | block_class=ResBottleneckBlock, 94 | depth=23, 95 | w_a=38.65, 96 | w_0=96, 97 | w_m=2.43, 98 | group_width=40, 99 | freeze_at=2, 100 | norm="FrozenBN", 101 | out_features=["s1", "s2", "s3", "s4"], 102 | ) 103 | in_features = cfg.MODEL.FPN.IN_FEATURES 104 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 105 | backbone = FPN( 106 | bottom_up=bottom_up, 107 | in_features=in_features, 108 | out_channels=out_channels, 109 | norm=cfg.MODEL.FPN.NORM, 110 | top_block=LastLevelMaxPool(), 111 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 112 | ) 113 | return backbone -------------------------------------------------------------------------------- /src/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_heads import StandardROIHeadsSS, build_roi_heads 2 | from .roi_heads_gan import ROIHeadsLogisticGAN#, build_roi_heads 3 | from .roi_heads_godinc import ROIHeadsLogisticODINC#,build_roi_heads 4 | from .roi_heads_csi import ROIHeadsLogisticCSI#,build_roi_heads 5 | from .roi_heads_add import StandardROIHeadsSSAdd 6 | from .fast_rcnn import FastRCNNOutputs 7 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import SSHEAD_REGISTRY, build_ss_head 2 | # import all the ss head, so they will be registered 3 | # from .cycle import CycleHead 4 | # from .cycle_energy import CycleEnergyHead 5 | # from .cycle_energy_1024_latter import CycleEnergy1024LatterHead 6 | # from .cycle_energy_direct import CycleEnergyDirectHead 7 | # from .cycle_energy_direct_add import CycleEnergyDirectAddHead 8 | from .cycle_energy_direct_add_all import CycleEnergyDirectAddAllHead 9 | # from .cycle_energy_direct_add_all_cache_new import CycleEnergyDirectAddAllCacheHead 10 | # from .cycle_energy_direct_add_all_max import CycleEnergyDirectAddAllMaxHead 11 | # from .cycle_energy_direct_add_all_mild_energy import CycleEnergyDirectAddAllMildHead 12 | # from .cycle_energy_direct_add_all_noise import CycleEnergyDirectAddAllNoiseHead 13 | # from .cycle_energy_direct_add_all_random import CycleEnergyDirectAddAllRandomHead 14 | # from .cycle_energy_direct_add_att import CycleEnergyDirectAddAttHead 15 | # from .cycle_energy_direct_add_att_neg import CycleEnergyDirectAddAttNegHead 16 | # from .cycle_energy_direct_random import CycleEnergyDirectRandomHead 17 | # from .cycle_energy_direct_max import CycleEnergyDirectMaxHead 18 | # from .cycle_energy_direct_no import CycleEnergyDirectAddNoHead 19 | from .rotation import RotationHead 20 | from .jigsaw import JigsawHead 21 | 22 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/build.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import ShapeSpec 2 | from detectron2.utils.registry import Registry 3 | 4 | SSHEAD_REGISTRY = Registry("SSHEAD") 5 | SSHEAD_REGISTRY.__doc__ = """ 6 | return self-supervised head 7 | """ 8 | 9 | 10 | def build_ss_head(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | ss_name = cfg.MODEL.SS.NAME 21 | 22 | ss_head = [SSHEAD_REGISTRY.get(name)(cfg, input_shape) for name in ss_name] 23 | assert len(ss_head) != 0 24 | return ss_head 25 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | self.enc1 = nn.Sequential( 22 | nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # nn.BatchNorm2d(256), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # nn.BatchNorm2d(256), 27 | nn.ReLU(inplace=True), 28 | nn.AdaptiveAvgPool2d(1), 29 | ) 30 | 31 | self.topk = 100 32 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 33 | self.scale = cfg.MODEL.SS.LOSS_SCALE 34 | 35 | for m in self.modules(): 36 | if isinstance(m, nn.Linear): 37 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 38 | m.bias.data.zero_() 39 | elif isinstance(m, nn.Conv2d): 40 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 41 | nonlinearity='relu') 42 | if m.bias is not None: 43 | nn.init.constant_(m.bias, 0) 44 | elif isinstance(m, nn.BatchNorm2d): 45 | nn.init.constant_(m.weight, 0) 46 | 47 | def cal_pair_dist(self, feat_u, feat_v): 48 | # finding the similarity score of feat_v 49 | us = feat_u.size(0) 50 | vs = feat_v.size(0) 51 | fs = feat_u.size(1) 52 | assert fs == feat_v.size(1) 53 | 54 | dist = torch.cdist(feat_u, feat_v, p=2).pow(2) * self.coef 55 | # uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 56 | # vv = feat_v.repeat(us, 1) 57 | # 58 | # diff = uu - vv 59 | # dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 60 | score = F.softmax(dist, dim=1) 61 | return dist, score 62 | 63 | def computer_corr_softmax(self, feat_u, feat_v): 64 | # track forward 65 | # calculate the L2 distance between feat_u and feat_v 66 | 67 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 68 | soft_v = torch.matmul(sim_score, feat_v) 69 | 70 | # track backward 71 | back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 72 | labels = torch.arange(len(feat_u)).long().to(back_dist.device) 73 | loss = nn.CrossEntropyLoss()(back_dist, labels) 74 | 75 | if back_dist.size(1) == 0:# there is no objects in the first frame. 76 | print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 77 | correct = (back_dist.argmax(dim=1) == labels).float().sum() 78 | count = len(back_dist) 79 | return loss, correct, count 80 | 81 | 82 | def forward(self, features, prev_boxes=None): 83 | features, idxs, proposals = features 84 | total_loss = 0.0 85 | corrects = 0 86 | counts = 0 87 | prev = 0 88 | # since the number of proposals might be different for different pairs 89 | if prev_boxes is not None: 90 | feat_u = self.enc1(features) 91 | feat_v = self.enc1(prev_boxes) 92 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 93 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 94 | if feat_u.size(0) == 0: 95 | print(feat_u, feat_v) 96 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 97 | total_loss, correct, cnt = self.computer_corr_softmax(feat_u, feat_v) 98 | # print('correct: ', correct, 'cnt: ', cnt) 99 | total_acc = correct.item()/cnt 100 | 101 | else: 102 | for i in range(0, len(idxs), 2): 103 | u = features[prev:idxs[i]] 104 | v = features[idxs[i]: idxs[i+1]] 105 | prev = idxs[i+1] 106 | feat_u = self.enc1(u) 107 | feat_v = self.enc1(v) 108 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 109 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 110 | if feat_u.size(0) == 0: 111 | print(feat_u.size(), feat_v.size()) 112 | loss = feat_u.sum() 113 | correct = 0 114 | cnt = 0 115 | else: 116 | loss, correct, cnt = self.computer_corr_softmax(feat_u, feat_v) 117 | # print(u.view(-1, 256*49).norm(1)) 118 | total_loss += loss*cnt 119 | corrects += correct 120 | counts += cnt 121 | 122 | if counts != 0: 123 | total_loss /= counts 124 | total_acc = corrects/counts 125 | else: 126 | total_acc = 0. 127 | 128 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 129 | return {'loss_cycle': total_loss * self.scale}, total_acc 130 | 131 | 132 | @SSHEAD_REGISTRY.register() 133 | def build_cycle_head(cfg, input_shape): 134 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 135 | rot_head = CycleHead(cfg, in_channels) 136 | return rot_head 137 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle_energy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleEnergyHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleEnergyHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | self.enc1 = nn.Sequential( 22 | nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # nn.BatchNorm2d(256), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # nn.BatchNorm2d(256), 27 | nn.ReLU(inplace=True), 28 | nn.AdaptiveAvgPool2d(1) 29 | # nn.Flatten(start_dim=1, end_dim=-1) 30 | ) 31 | self.map_back = nn.Linear(256, 256*49) 32 | 33 | self.topk = 100 34 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 35 | self.scale = cfg.MODEL.SS.LOSS_SCALE 36 | 37 | for m in self.modules(): 38 | if isinstance(m, nn.Linear): 39 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.Conv2d): 42 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 43 | nonlinearity='relu') 44 | if m.bias is not None: 45 | nn.init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.BatchNorm2d): 47 | nn.init.constant_(m.weight, 0) 48 | 49 | def cal_pair_dist(self, feat_u, feat_v): 50 | # finding the similarity score of feat_v 51 | us = feat_u.size(0) 52 | vs = feat_v.size(0) 53 | fs = feat_u.size(1) 54 | assert fs == feat_v.size(1) 55 | 56 | uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 57 | vv = feat_v.repeat(us, 1) 58 | 59 | diff = uu - vv 60 | dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 61 | score = F.softmax(dist, dim=1) 62 | return dist, score 63 | 64 | def computer_corr_softmax(self, feat_u, feat_v): 65 | # track forward 66 | # calculate the L2 distance between feat_u and feat_v 67 | 68 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 69 | soft_v = torch.matmul(sim_score, feat_v) 70 | 71 | # track backward 72 | back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 73 | labels = torch.arange(len(feat_u)).long().to(back_dist.device) 74 | loss = nn.CrossEntropyLoss()(back_dist, labels) 75 | 76 | if back_dist.size(1) == 0:# there is no objects in the first frame. 77 | print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 78 | correct = (back_dist.argmax(dim=1) == labels).float().sum() 79 | count = len(back_dist) 80 | return loss, correct, count, soft_v 81 | 82 | 83 | def forward(self, features, prev_boxes=None): 84 | features, idxs, proposals = features 85 | total_loss = 0.0 86 | corrects = 0 87 | counts = 0 88 | pos_fea= None 89 | neg_fea = None 90 | prev = 0 91 | # since the number of proposals might be different for different pairs 92 | if prev_boxes is not None: 93 | feat_u = self.enc1(features) 94 | feat_v = self.enc1(prev_boxes) 95 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 96 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 97 | if feat_u.size(0) == 0: 98 | print(feat_u, feat_v) 99 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 100 | total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v) 101 | # print('correct: ', correct, 'cnt: ', cnt) 102 | total_acc = correct.item()/cnt 103 | 104 | else: 105 | for i in range(0, len(idxs), 2): 106 | u = features[prev:idxs[i]] 107 | v = features[idxs[i]: idxs[i+1]] 108 | prev = idxs[i+1] 109 | feat_u = self.enc1(u) 110 | feat_v = self.enc1(v) 111 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 112 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 113 | if feat_u.size(0) == 0: 114 | print(feat_u.size(), feat_v.size()) 115 | loss = feat_u.sum() 116 | correct = 0 117 | cnt = 0 118 | else: 119 | loss, correct, cnt, soft_target = self.computer_corr_softmax(feat_u, feat_v) 120 | if pos_fea is None: 121 | pos_fea = self.map_back(feat_u) 122 | neg_fea = self.map_back(soft_target) 123 | else: 124 | pos_fea = torch.cat([pos_fea, self.map_back(feat_u)], 0) 125 | neg_fea = torch.cat([neg_fea, self.map_back(soft_target)], 0) 126 | 127 | total_loss += loss*cnt 128 | corrects += correct 129 | counts += cnt 130 | # breakpoint() 131 | if counts != 0: 132 | total_loss /= counts 133 | total_acc = corrects/counts 134 | else: 135 | total_acc = 0. 136 | if pos_fea is not None: 137 | assert len(pos_fea) == len(neg_fea) 138 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 139 | return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0) 140 | else: 141 | return {'loss_cycle': total_loss * self.scale}, total_acc, None 142 | 143 | 144 | @SSHEAD_REGISTRY.register() 145 | def build_cycle_energy_head(cfg, input_shape): 146 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 147 | rot_head = CycleEnergyHead(cfg, in_channels) 148 | return rot_head 149 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle_energy_1024_latter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleEnergy1024LatterHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleEnergy1024LatterHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | self.enc1 = nn.Sequential( 22 | nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # nn.BatchNorm2d(256), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # nn.BatchNorm2d(256), 27 | nn.ReLU(inplace=True), 28 | nn.AdaptiveAvgPool2d(1) 29 | # nn.Flatten(start_dim=1, end_dim=-1) 30 | ) 31 | self.map_back = nn.Linear(256, 1024) 32 | 33 | self.topk = 100 34 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 35 | self.scale = cfg.MODEL.SS.LOSS_SCALE 36 | 37 | for m in self.modules(): 38 | if isinstance(m, nn.Linear): 39 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.Conv2d): 42 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 43 | nonlinearity='relu') 44 | if m.bias is not None: 45 | nn.init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.BatchNorm2d): 47 | nn.init.constant_(m.weight, 0) 48 | 49 | def cal_pair_dist(self, feat_u, feat_v): 50 | # finding the similarity score of feat_v 51 | us = feat_u.size(0) 52 | vs = feat_v.size(0) 53 | fs = feat_u.size(1) 54 | assert fs == feat_v.size(1) 55 | 56 | uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 57 | vv = feat_v.repeat(us, 1) 58 | 59 | diff = uu - vv 60 | dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 61 | score = F.softmax(dist, dim=1) 62 | return dist, score 63 | 64 | def computer_corr_softmax(self, feat_u, feat_v): 65 | # track forward 66 | # calculate the L2 distance between feat_u and feat_v 67 | 68 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 69 | soft_v = torch.matmul(sim_score, feat_v) 70 | 71 | # track backward 72 | back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 73 | labels = torch.arange(len(feat_u)).long().to(back_dist.device) 74 | loss = nn.CrossEntropyLoss()(back_dist, labels) 75 | 76 | if back_dist.size(1) == 0:# there is no objects in the first frame. 77 | print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 78 | correct = (back_dist.argmax(dim=1) == labels).float().sum() 79 | count = len(back_dist) 80 | return loss, correct, count, soft_v 81 | 82 | 83 | def forward(self, features, prev_boxes=None): 84 | features, idxs, proposals = features 85 | total_loss = 0.0 86 | corrects = 0 87 | counts = 0 88 | prev = 0 89 | pos_fea = None 90 | neg_fea = None 91 | # since the number of proposals might be different for different pairs 92 | if prev_boxes is not None: 93 | feat_u = self.enc1(features) 94 | feat_v = self.enc1(prev_boxes) 95 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 96 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 97 | if feat_u.size(0) == 0: 98 | print(feat_u, feat_v) 99 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 100 | total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v) 101 | # print('correct: ', correct, 'cnt: ', cnt) 102 | total_acc = correct.item()/cnt 103 | 104 | else: 105 | for i in range(0, len(idxs), 2): 106 | u = features[prev:idxs[i]] 107 | v = features[idxs[i]: idxs[i+1]] 108 | prev = idxs[i+1] 109 | # breakpoint() 110 | feat_u = self.enc1(u) 111 | feat_v = self.enc1(v) 112 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 113 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 114 | if feat_u.size(0) == 0: 115 | print(feat_u.size(), feat_v.size()) 116 | loss = feat_u.sum() 117 | correct = 0 118 | cnt = 0 119 | else: 120 | loss, correct, cnt, soft_target = self.computer_corr_softmax(feat_u, feat_v) 121 | if pos_fea is None: 122 | pos_fea = self.map_back(feat_u) 123 | neg_fea = self.map_back(soft_target) 124 | else: 125 | pos_fea = torch.cat([pos_fea, self.map_back(feat_u)], 0) 126 | neg_fea = torch.cat([neg_fea, self.map_back(soft_target)], 0) 127 | 128 | total_loss += loss*cnt 129 | corrects += correct 130 | counts += cnt 131 | # breakpoint() 132 | if counts != 0: 133 | total_loss /= counts 134 | total_acc = corrects/counts 135 | else: 136 | total_acc = 0. 137 | if pos_fea is not None: 138 | assert len(pos_fea) == len(neg_fea) 139 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 140 | return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0) 141 | else: 142 | return {'loss_cycle': total_loss * self.scale}, total_acc, None 143 | 144 | 145 | @SSHEAD_REGISTRY.register() 146 | def build_cycle_energy_1024_latter_head(cfg, input_shape): 147 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 148 | rot_head = CycleEnergy1024LatterHead(cfg, in_channels) 149 | return rot_head 150 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle_energy_direct.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleEnergyDirectHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleEnergyDirectHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | self.enc1 = nn.Sequential( 22 | nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # nn.BatchNorm2d(256), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # nn.BatchNorm2d(256), 27 | nn.ReLU(inplace=True), 28 | nn.AdaptiveAvgPool2d(1) 29 | # nn.Flatten(start_dim=1, end_dim=-1) 30 | ) 31 | # self.map_back = nn.Linear(256, 256*49) 32 | 33 | self.topk = 100 34 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 35 | self.scale = cfg.MODEL.SS.LOSS_SCALE 36 | 37 | for m in self.modules(): 38 | if isinstance(m, nn.Linear): 39 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.Conv2d): 42 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 43 | nonlinearity='relu') 44 | if m.bias is not None: 45 | nn.init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.BatchNorm2d): 47 | nn.init.constant_(m.weight, 0) 48 | 49 | def cal_pair_dist(self, feat_u, feat_v): 50 | # finding the similarity score of feat_v 51 | us = feat_u.size(0) 52 | vs = feat_v.size(0) 53 | fs = feat_u.size(1) 54 | assert fs == feat_v.size(1) 55 | 56 | uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 57 | vv = feat_v.repeat(us, 1) 58 | 59 | diff = uu - vv 60 | dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 61 | score = F.softmax(dist, dim=1) 62 | return dist, score 63 | 64 | def computer_corr_softmax(self, feat_u, feat_v): 65 | # track forward 66 | # calculate the L2 distance between feat_u and feat_v 67 | 68 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 69 | soft_v = torch.matmul(sim_score, feat_v) 70 | 71 | # track backward 72 | back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 73 | labels = torch.arange(len(feat_u)).long().to(back_dist.device) 74 | loss = nn.CrossEntropyLoss()(back_dist, labels) 75 | 76 | if back_dist.size(1) == 0:# there is no objects in the first frame. 77 | print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 78 | correct = (back_dist.argmax(dim=1) == labels).float().sum() 79 | count = len(back_dist) 80 | return loss, correct, count, sim_score 81 | 82 | 83 | def forward(self, features, prev_boxes=None): 84 | features, idxs, proposals = features 85 | total_loss = 0.0 86 | corrects = 0 87 | counts = 0 88 | pos_fea= None 89 | neg_fea = None 90 | prev = 0 91 | # since the number of proposals might be different for different pairs 92 | if prev_boxes is not None: 93 | feat_u = self.enc1(features) 94 | feat_v = self.enc1(prev_boxes) 95 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 96 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 97 | if feat_u.size(0) == 0: 98 | print(feat_u, feat_v) 99 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 100 | total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v) 101 | # print('correct: ', correct, 'cnt: ', cnt) 102 | total_acc = correct.item()/cnt 103 | 104 | else: 105 | for i in range(0, len(idxs), 2): 106 | u = features[prev:idxs[i]] 107 | v = features[idxs[i]: idxs[i+1]] 108 | prev = idxs[i+1] 109 | feat_u = self.enc1(u) 110 | feat_v = self.enc1(v) 111 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 112 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 113 | if feat_u.size(0) == 0: 114 | print(feat_u.size(), feat_v.size()) 115 | loss = feat_u.sum() 116 | correct = 0 117 | cnt = 0 118 | else: 119 | loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v) 120 | # breakpoint() 121 | if pos_fea is None: 122 | pos_fea = u.view(-1, 256*49) 123 | neg_fea = torch.matmul(soft_target_score, v.view(-1, 256*49)) 124 | else: 125 | pos_fea = torch.cat([pos_fea, u.view(-1, 256*49)], 0) 126 | neg_fea = torch.cat([neg_fea, torch.matmul(soft_target_score, v.view(-1, 256*49))], 0) 127 | 128 | total_loss += loss*cnt 129 | corrects += correct 130 | counts += cnt 131 | # breakpoint() 132 | if counts != 0: 133 | total_loss /= counts 134 | total_acc = corrects/counts 135 | else: 136 | total_acc = 0. 137 | if pos_fea is not None: 138 | assert len(pos_fea) == len(neg_fea) 139 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 140 | return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0) 141 | else: 142 | return {'loss_cycle': total_loss * self.scale}, total_acc, None 143 | 144 | 145 | @SSHEAD_REGISTRY.register() 146 | def build_cycle_energy_direct_head(cfg, input_shape): 147 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 148 | rot_head = CycleEnergyDirectHead(cfg, in_channels) 149 | return rot_head 150 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle_energy_direct_max.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleEnergyDirectMaxHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleEnergyDirectMaxHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | self.enc1 = nn.Sequential( 22 | nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # nn.BatchNorm2d(256), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # nn.BatchNorm2d(256), 27 | nn.ReLU(inplace=True), 28 | nn.AdaptiveAvgPool2d(1) 29 | # nn.Flatten(start_dim=1, end_dim=-1) 30 | ) 31 | # self.map_back = nn.Linear(256, 256*49) 32 | self.add = nn.Conv2d(256, 256, kernel_size=1) 33 | self.topk = 100 34 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 35 | self.scale = cfg.MODEL.SS.LOSS_SCALE 36 | 37 | for m in self.modules(): 38 | if isinstance(m, nn.Linear): 39 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.Conv2d): 42 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 43 | nonlinearity='relu') 44 | if m.bias is not None: 45 | nn.init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.BatchNorm2d): 47 | nn.init.constant_(m.weight, 0) 48 | 49 | def cal_pair_dist(self, feat_u, feat_v): 50 | # finding the similarity score of feat_v 51 | us = feat_u.size(0) 52 | vs = feat_v.size(0) 53 | fs = feat_u.size(1) 54 | assert fs == feat_v.size(1) 55 | 56 | uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 57 | vv = feat_v.repeat(us, 1) 58 | 59 | diff = uu - vv 60 | dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 61 | score = F.softmax(dist, dim=1) 62 | return dist, score 63 | 64 | def computer_corr_softmax(self, feat_u, feat_v): 65 | # track forward 66 | # calculate the L2 distance between feat_u and feat_v 67 | 68 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 69 | soft_v = torch.matmul(sim_score, feat_v) 70 | 71 | # track backward 72 | back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 73 | labels = torch.arange(len(feat_u)).long().to(back_dist.device) 74 | loss = nn.CrossEntropyLoss()(back_dist, labels) 75 | 76 | if back_dist.size(1) == 0:# there is no objects in the first frame. 77 | print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 78 | correct = (back_dist.argmax(dim=1) == labels).float().sum() 79 | count = len(back_dist) 80 | return loss, correct, count, sim_score 81 | 82 | 83 | def forward(self, roi_head, features, prev_boxes=None): 84 | features, idxs, proposals = features 85 | total_loss = 0.0 86 | corrects = 0 87 | counts = 0 88 | pos_fea= None 89 | neg_fea = None 90 | prev = 0 91 | # since the number of proposals might be different for different pairs 92 | if prev_boxes is not None: 93 | feat_u = self.enc1(features) 94 | feat_v = self.enc1(prev_boxes) 95 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 96 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 97 | if feat_u.size(0) == 0: 98 | print(feat_u, feat_v) 99 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 100 | total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v) 101 | # print('correct: ', correct, 'cnt: ', cnt) 102 | total_acc = correct.item()/cnt 103 | 104 | else: 105 | for i in range(0, len(idxs), 2): 106 | u = features[prev:idxs[i]] 107 | v = features[idxs[i]: idxs[i+1]] 108 | prev = idxs[i+1] 109 | feat_u = self.enc1(u) 110 | feat_v = self.enc1(v) 111 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 112 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 113 | if feat_u.size(0) == 0: 114 | print(feat_u.size(), feat_v.size()) 115 | loss = feat_u.sum() 116 | correct = 0 117 | cnt = 0 118 | else: 119 | loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v) 120 | max_indices = torch.argmax(soft_target_score, 1).view(-1) 121 | # breakpoint() 122 | if pos_fea is None: 123 | pos_fea = self.add(u).view(-1, 256*49) 124 | neg_fea = self.add(v).view(-1, 256*49)[max_indices] 125 | #torch.matmul(soft_target_score, v.view(-1, 256*49)) 126 | else: 127 | pos_fea = torch.cat([pos_fea, self.add(u).view(-1, 256*49)], 0) 128 | neg_fea = torch.cat([neg_fea, self.add(v).view(-1, 256*49)[max_indices]], 0) 129 | 130 | total_loss += loss*cnt 131 | corrects += correct 132 | counts += cnt 133 | # breakpoint() 134 | if counts != 0: 135 | total_loss /= counts 136 | total_acc = corrects/counts 137 | else: 138 | total_acc = 0. 139 | if pos_fea is not None: 140 | assert len(pos_fea) == len(neg_fea) 141 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 142 | return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0), None 143 | else: 144 | return {'loss_cycle': total_loss * self.scale}, total_acc, None 145 | 146 | 147 | @SSHEAD_REGISTRY.register() 148 | def build_cycle_energy_direct_add_max_head(cfg, input_shape): 149 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 150 | rot_head = CycleEnergyDirectMaxHead(cfg, in_channels) 151 | return rot_head 152 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/cycle_energy_direct_no.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from detectron2.structures import ImageList 7 | 8 | from .build import SSHEAD_REGISTRY 9 | from .ss_layers import Flatten 10 | 11 | 12 | class CycleEnergyDirectAddNoHead(nn.Module): 13 | def __init__(self, cfg, cin): 14 | super(CycleEnergyDirectAddNoHead, self).__init__() 15 | 16 | self.name = 'cycle' 17 | self.input = 'ROI' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | self.coef = cfg.MODEL.SS.COEF 20 | 21 | # self.enc1 = nn.Sequential( 22 | # nn.Conv2d(cin, 256, kernel_size=3, padding=0, bias=True), 23 | # # nn.BatchNorm2d(256), 24 | # nn.ReLU(inplace=True), 25 | # nn.Conv2d(256, 256, kernel_size=3, padding=0, bias=True), 26 | # # nn.BatchNorm2d(256), 27 | # nn.ReLU(inplace=True), 28 | # nn.AdaptiveAvgPool2d(1) 29 | # # nn.Flatten(start_dim=1, end_dim=-1) 30 | # ) 31 | # self.add = nn.Conv2d(256, 256, kernel_size=1) 32 | # self.map_back = nn.Linear(256, 256*49) 33 | 34 | self.topk = 100 35 | self.bs = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE 36 | self.scale = cfg.MODEL.SS.LOSS_SCALE 37 | 38 | for m in self.modules(): 39 | if isinstance(m, nn.Linear): 40 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 41 | m.bias.data.zero_() 42 | elif isinstance(m, nn.Conv2d): 43 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 44 | nonlinearity='relu') 45 | if m.bias is not None: 46 | nn.init.constant_(m.bias, 0) 47 | elif isinstance(m, nn.BatchNorm2d): 48 | nn.init.constant_(m.weight, 0) 49 | 50 | def cal_pair_dist(self, feat_u, feat_v): 51 | # finding the similarity score of feat_v 52 | us = feat_u.size(0) 53 | vs = feat_v.size(0) 54 | fs = feat_u.size(1) 55 | assert fs == feat_v.size(1) 56 | 57 | uu = feat_u.unsqueeze(1).repeat(1, vs, 1).view(-1, fs) 58 | vv = feat_v.repeat(us, 1) 59 | 60 | diff = uu - vv 61 | dist = (diff * diff).sum(dim=1).view(us, vs) * self.coef 62 | score = F.softmax(dist, dim=1) 63 | return dist, score 64 | 65 | def computer_corr_softmax(self, feat_u, feat_v): 66 | # track forward 67 | # calculate the L2 distance between feat_u and feat_v 68 | 69 | sim_dist, sim_score = self.cal_pair_dist(feat_u, feat_v) 70 | # soft_v = torch.matmul(sim_score, feat_v) 71 | # 72 | # # track backward 73 | # back_dist, back_score = self.cal_pair_dist(soft_v, feat_u) 74 | # labels = torch.arange(len(feat_u)).long().to(back_dist.device) 75 | # loss = nn.CrossEntropyLoss()(back_dist, labels) 76 | # 77 | # if back_dist.size(1) == 0:# there is no objects in the first frame. 78 | # print(back_dist.size(), feat_u.size(), feat_v.size(), loss) 79 | # correct = (back_dist.argmax(dim=1) == labels).float().sum() 80 | # count = len(back_dist) 81 | return torch.zeros(1).cuda(), 0, 0, sim_score 82 | 83 | 84 | def forward(self, features, prev_boxes=None): 85 | features, idxs, proposals = features 86 | total_loss = 0.0 87 | corrects = 0 88 | counts = 0 89 | pos_fea= None 90 | neg_fea = None 91 | prev = 0 92 | # since the number of proposals might be different for different pairs 93 | if prev_boxes is not None: 94 | feat_u = self.enc1(features) 95 | feat_v = self.enc1(prev_boxes) 96 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 97 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 98 | if feat_u.size(0) == 0: 99 | print(feat_u, feat_v) 100 | return {'loss_cycle': feat_u.sum() * self.scale}, 0. 101 | total_loss, correct, cnt, _ = self.computer_corr_softmax(feat_u, feat_v) 102 | # print('correct: ', correct, 'cnt: ', cnt) 103 | total_acc = correct.item()/cnt 104 | 105 | else: 106 | for i in range(0, len(idxs), 2): 107 | u = features[prev:idxs[i]] 108 | v = features[idxs[i]: idxs[i+1]] 109 | prev = idxs[i+1] 110 | feat_u = u.view(-1, 256*49)#self.enc1(u) 111 | feat_v = v.view(-1, 256*49)#self.enc1(v) 112 | feat_u = feat_u.view(feat_u.size(0), feat_u.size(1)) 113 | feat_v = feat_v.view(feat_v.size(0), feat_v.size(1)) 114 | if feat_u.size(0) == 0: 115 | print(feat_u.size(), feat_v.size()) 116 | loss = feat_u.sum() 117 | correct = 0 118 | cnt = 0 119 | else: 120 | loss, correct, cnt, soft_target_score = self.computer_corr_softmax(feat_u, feat_v) 121 | # breakpoint() 122 | if pos_fea is None: 123 | pos_fea = u.view(-1, 256*49) 124 | neg_fea = torch.matmul(soft_target_score, v.view(-1, 256*49)) 125 | # breakpoint() 126 | else: 127 | pos_fea = torch.cat([pos_fea, u.view(-1, 256*49)], 0) 128 | neg_fea = torch.cat([neg_fea, torch.matmul(soft_target_score, v.view(-1, 256*49))], 0) 129 | 130 | total_loss += loss*cnt 131 | corrects += correct 132 | counts += cnt 133 | # breakpoint() 134 | if counts != 0: 135 | total_loss /= counts 136 | total_acc = corrects/counts 137 | else: 138 | total_acc = 0. 139 | if pos_fea is not None: 140 | assert len(pos_fea) == len(neg_fea) 141 | # print('total loss: {:.4f}\ttotal acc: {:.3f}'.format(total_loss, total_acc)) 142 | return {'loss_cycle': total_loss * self.scale}, total_acc, torch.cat([pos_fea, neg_fea], 0) 143 | else: 144 | return {'loss_cycle': total_loss * self.scale}, total_acc, None 145 | 146 | 147 | @SSHEAD_REGISTRY.register() 148 | def build_cycle_energy_direct_no_head(cfg, input_shape): 149 | in_channels = cfg.MODEL.FPN.OUT_CHANNELS 150 | rot_head = CycleEnergyDirectAddNoHead(cfg, in_channels) 151 | return rot_head 152 | -------------------------------------------------------------------------------- /src/modeling/self_supervised/rotation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .build import SSHEAD_REGISTRY 7 | from .ss_layers import Bottleneck, conv1x1, conv3x3 8 | from ..utils.image_list import ImageList, crop_tensor 9 | 10 | 11 | class RotationHead(nn.Module): 12 | def __init__(self, cfg, cin): 13 | super(RotationHead, self).__init__() 14 | 15 | # resnet config 16 | self.name = 'rot' 17 | self.input = 'images' 18 | self.device = torch.device(cfg.MODEL.DEVICE) 19 | norm_layer = nn.BatchNorm2d 20 | self._norm_layer = norm_layer 21 | self.dilation = 1 22 | self.groups = 1 23 | self.base_width = 64 24 | 25 | # hard code the task specific parameters in order to 26 | # support multi-tasking 27 | self.crop_size = 224 28 | # self.ratio = 2 29 | self.ratio = cfg.MODEL.SS.RATIO # crop image ratio 30 | 31 | depth = cfg.MODEL.RESNETS.DEPTH 32 | stage_ids = {"res2": 0, "res3": 1, "res4": 2, "res5": 3} 33 | num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 34 | 152: [3, 8, 36, 3]}[depth] 35 | self.start_stage = min(stage_ids[cfg.MODEL.SS.FEAT_LEVEL]+1, 3) 36 | self.inplanes = cin 37 | self.scale = cfg.MODEL.SS.LOSS_SCALE 38 | 39 | out_channels = self.inplanes 40 | 41 | for i in range(self.start_stage, 4): 42 | out_channels *= 2 43 | setattr(self, "layer{}".format(i), 44 | self._make_layer(Bottleneck, out_channels//4, 45 | num_blocks_per_stage[i], stride=2)) 46 | 47 | # num_classes = cfg.MODEL.SS.NUM_CLASSES 48 | num_classes = 4 49 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 50 | self.fc = nn.Linear(out_channels, num_classes) 51 | self.criterion = nn.CrossEntropyLoss() 52 | 53 | assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) 54 | num_channels = len(cfg.MODEL.PIXEL_MEAN) 55 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1) 56 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1) 57 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 58 | 59 | for m in self.modules(): 60 | if isinstance(m, nn.Conv2d): 61 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 62 | nonlinearity='relu') 63 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 64 | nn.init.constant_(m.weight, 1) 65 | nn.init.constant_(m.bias, 0) 66 | 67 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 68 | norm_layer = self._norm_layer 69 | downsample = None 70 | previous_dilation = self.dilation 71 | if dilate: 72 | self.dilation *= stride 73 | stride = 1 74 | if stride != 1 or self.inplanes != planes * block.expansion: 75 | downsample = nn.Sequential( 76 | conv1x1(self.inplanes, planes * block.expansion, stride), 77 | norm_layer(planes * block.expansion), 78 | ) 79 | 80 | layers = [] 81 | layers.append( 82 | block(self.inplanes, planes, stride, downsample, self.groups, 83 | self.base_width, previous_dilation, norm_layer)) 84 | self.inplanes = planes * block.expansion 85 | for _ in range(1, blocks): 86 | layers.append(block(self.inplanes, planes, groups=self.groups, 87 | base_width=self.base_width, 88 | dilation=self.dilation, 89 | norm_layer=norm_layer)) 90 | 91 | return nn.Sequential(*layers) 92 | 93 | def forward(self, batched_inputs, feat_base, feat_level): 94 | x, y = self.gen_ss_inputs(batched_inputs) 95 | x = feat_base(x)[feat_level] 96 | for i in range(self.start_stage, 4): 97 | x = getattr(self, "layer{}".format(i))(x) 98 | 99 | x = self.avgpool(x) 100 | bs = x.size(0) 101 | x = x.squeeze() 102 | if bs == 1: 103 | x = x.unsqueeze(0) 104 | x = self.fc(x) 105 | loss = self.criterion(x, y.long()) 106 | losses = {'loss_rot_cls': loss * self.scale} 107 | return x, y, losses 108 | 109 | # add the data processing for each task 110 | def preprocess_image_ss(self, batched_inputs): 111 | """resize and random crop the images""" 112 | images = [x["image"].to(self.device) for x in batched_inputs] 113 | images = [self.normalizer(x) for x in images] 114 | images = ImageList.from_tensors_crop(images, self.crop_size, self.ratio) 115 | return images 116 | 117 | def gen_ss_inputs(self, batched_inputs): 118 | """produce rotation targets""" 119 | images = self.preprocess_image_ss(batched_inputs=batched_inputs) 120 | tensors = images.tensor.clone().to(self.device) 121 | targets = torch.zeros(len(tensors)).long().to(self.device) 122 | for i in range(len(tensors)): 123 | tar = np.random.choice(4) 124 | targets[i] = tar 125 | t = images.tensor[i] 126 | rot = t.rot90(tar, (1, 2)) 127 | tensors[i] = rot 128 | images.tensor = tensors 129 | return tensors, targets 130 | 131 | 132 | @SSHEAD_REGISTRY.register() 133 | def build_rotation_head(cfg, input_shape): 134 | in_channels = input_shape[cfg.MODEL.SS.FEAT_LEVEL].channels 135 | rot_head = RotationHead(cfg, in_channels) 136 | return rot_head -------------------------------------------------------------------------------- /src/modeling/self_supervised/ss_layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 6 | """3x3 convolution with padding""" 7 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 8 | padding=dilation, groups=groups, bias=False, dilation=dilation) 9 | 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class Bottleneck(nn.Module): 17 | expansion = 4 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 20 | base_width=64, dilation=1, norm_layer=None): 21 | super(Bottleneck, self).__init__() 22 | if norm_layer is None: 23 | norm_layer = nn.BatchNorm2d 24 | width = int(planes * (base_width / 64.)) * groups 25 | # Both self.conv2 and self.downsample layers downsample the 26 | # input when stride != 1 27 | self.conv1 = conv1x1(inplanes, width) 28 | self.bn1 = norm_layer(width) 29 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 30 | self.bn2 = norm_layer(width) 31 | self.conv3 = conv1x1(width, planes * self.expansion) 32 | self.bn3 = norm_layer(planes * self.expansion) 33 | self.relu = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.stride = stride 36 | 37 | def forward(self, x): 38 | identity = x 39 | 40 | out = self.conv1(x) 41 | out = self.bn1(out) 42 | out = self.relu(out) 43 | 44 | out = self.conv2(out) 45 | out = self.bn2(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv3(out) 49 | out = self.bn3(out) 50 | 51 | if self.downsample is not None: 52 | identity = self.downsample(x) 53 | 54 | out += identity 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class Flatten(nn.Module): 61 | def forward(self, input): 62 | return input.view(input.size(0), -1) 63 | -------------------------------------------------------------------------------- /src/modeling/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_list import ImageList, crop_tensor -------------------------------------------------------------------------------- /src/modeling/utils/image_list.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from typing import Any, List, Sequence, Tuple, Union 3 | import torch 4 | from torch.nn import functional as F 5 | import numpy as np 6 | 7 | 8 | class ImageList(object): 9 | """ 10 | Structure that holds a list of images (of possibly 11 | varying sizes) as a single tensor. 12 | This works by padding the images to the same size, 13 | and storing in a field the original sizes of each image 14 | Attributes: 15 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 16 | """ 17 | 18 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): 19 | """ 20 | Arguments: 21 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 22 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). 23 | """ 24 | self.tensor = tensor 25 | self.image_sizes = image_sizes 26 | 27 | def __len__(self) -> int: 28 | return len(self.image_sizes) 29 | 30 | def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor: 31 | """ 32 | Access the individual image in its original size. 33 | Returns: 34 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 35 | """ 36 | size = self.image_sizes[idx] 37 | return self.tensor[idx, ..., : size[0], : size[1]] # type: ignore 38 | 39 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 40 | cast_tensor = self.tensor.to(*args, **kwargs) 41 | return ImageList(cast_tensor, self.image_sizes) 42 | 43 | @staticmethod 44 | def from_tensors( 45 | tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 46 | ) -> "ImageList": 47 | """ 48 | Args: 49 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 50 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value` 51 | so that they will have the same shape. 52 | size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure 53 | the common height and width is divisible by `size_divisibility` 54 | pad_value (float): value to pad 55 | Returns: 56 | an `ImageList`. 57 | """ 58 | assert len(tensors) > 0 59 | assert isinstance(tensors, (tuple, list)) 60 | for t in tensors: 61 | assert isinstance(t, torch.Tensor), type(t) 62 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 63 | # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors 64 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 65 | 66 | if size_divisibility > 0: 67 | import math 68 | 69 | stride = size_divisibility 70 | max_size = list(max_size) # type: ignore 71 | max_size[-2] = int(math.ceil(max_size[-2] / stride) * stride) # type: ignore 72 | max_size[-1] = int(math.ceil(max_size[-1] / stride) * stride) # type: ignore 73 | max_size = tuple(max_size) 74 | 75 | image_sizes = [im.shape[-2:] for im in tensors] 76 | 77 | if len(tensors) == 1: 78 | # This seems slightly (2%) faster. 79 | # TODO: check whether it's faster for multiple images as well 80 | image_size = image_sizes[0] 81 | padded = F.pad( 82 | tensors[0], 83 | [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]], 84 | value=pad_value, 85 | ) 86 | batched_imgs = padded.unsqueeze_(0) 87 | else: 88 | batch_shape = (len(tensors),) + max_size 89 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 90 | for img, pad_img in zip(tensors, batched_imgs): 91 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) 92 | 93 | return ImageList(batched_imgs.contiguous(), image_sizes) 94 | 95 | @staticmethod 96 | def from_tensors_crop( 97 | tensors: Sequence[torch.Tensor], crop_size: int = 224, ratio: int=1 98 | ) -> "ImageList": 99 | """ 100 | Args: 101 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 102 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value` 103 | so that they will have the same shape. 104 | size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure 105 | the common height and width is divisible by `size_divisibility` 106 | pad_value (float): value to pad 107 | Returns: 108 | an `ImageList`. 109 | """ 110 | assert len(tensors) > 0 111 | assert isinstance(tensors, (tuple, list)) 112 | for t in tensors: 113 | assert isinstance(t, torch.Tensor), type(t) 114 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 115 | # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where 116 | # K >= 1 among all tensors 117 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 118 | 119 | image_sizes = [im.shape[-2:] for im in tensors] 120 | 121 | # resize the images to half size of the original size 122 | croped_tensors = torch.rand(len(tensors), tensors[0].size(0), 123 | crop_size, crop_size) 124 | 125 | new_image_sizes = [] 126 | for i, tensor in enumerate(tensors): 127 | image_size = image_sizes[i] 128 | tensor = tensor.unsqueeze(1) # add the channel dimension here 129 | resized_image = F.interpolate(tensor, scale_factor=ratio).squeeze() 130 | crop_image = crop_tensor(resized_image, (crop_size, crop_size)) 131 | croped_tensors[i] = crop_image 132 | new_image_sizes.append(crop_image.shape[-2:]) 133 | 134 | return ImageList(croped_tensors.contiguous(), new_image_sizes) 135 | 136 | 137 | def crop_tensor(image, crop_sizes): 138 | image = image.clone() 139 | indx = image.size(-2) - crop_sizes[0] 140 | indy = image.size(-1) - crop_sizes[1] 141 | if indx == 0: 142 | startx = 0 143 | else: 144 | startx = np.random.choice(indx) 145 | if indy == 0: 146 | starty = 0 147 | else: 148 | starty = np.random.choice(indy) 149 | return image[:, startx:startx+crop_sizes[0], 150 | starty:starty+crop_sizes[1]] -------------------------------------------------------------------------------- /src/modeling/vit/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import add_vit_config 2 | from .msvit import build_msvit_backbone 3 | from .dataset_mapper import FixSizeDatasetMapper -------------------------------------------------------------------------------- /src/modeling/vit/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode as CN 2 | 3 | 4 | def add_vit_config(cfg): 5 | """ 6 | Add config for VIT. 7 | """ 8 | cfg.MODEL.TRANSFORMER = CN() 9 | cfg.MODEL.TRANSFORMER.DROP = 0.0 10 | cfg.MODEL.TRANSFORMER.DROP_PATH = 0.1 11 | cfg.MODEL.TRANSFORMER.NORM_EMBED = False 12 | cfg.MODEL.TRANSFORMER.AVG_POOL = False 13 | 14 | cfg.MODEL.TRANSFORMER.MSVIT = CN() 15 | cfg.MODEL.TRANSFORMER.MSVIT.ARCH = 'l1,h3,d192,n1,s1,g1,f4,a0_l2,h6,d384,n10,s0,g1,f2,a0_l3,h6,d384,n1,s0,g1,f1,a0' 16 | cfg.MODEL.TRANSFORMER.MSVIT.SHARE_W = True 17 | cfg.MODEL.TRANSFORMER.MSVIT.ATTN_TYPE = 'longformerhand' 18 | cfg.MODEL.TRANSFORMER.MSVIT.SHARE_KV = True 19 | cfg.MODEL.TRANSFORMER.MSVIT.ONLY_GLOBAL = False 20 | cfg.MODEL.TRANSFORMER.MSVIT.SW_EXACT = 0 21 | cfg.MODEL.TRANSFORMER.MSVIT.LN_EPS = 1e-6 22 | cfg.MODEL.TRANSFORMER.MSVIT.MODE = 0 23 | cfg.MODEL.TRANSFORMER.MSVIT.REDRAW_INTERVAL = 1000 24 | 25 | cfg.MODEL.TRANSFORMER.OUT_FEATURES = () 26 | 27 | # input size should be patch_size x pos_embedding_size 28 | cfg.INPUT.FIX_SIZE = () 29 | 30 | # Optimizer. 31 | cfg.SOLVER.OPTIMIZER = "ADAMW" 32 | cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0 33 | 34 | # Add LR multiplies to specific layers: 35 | # Use case: 36 | ## SOLVER: 37 | ## LR_MULTIPLIERS: 38 | ## backbone: 0.1 39 | ## embedding: 0.2 40 | ### it will apply 0.1 to layers with keyword 'backbone' and 0.2 to layers with keyword 'embedding' 41 | cfg.SOLVER.LR_MULTIPLIERS = CN(new_allowed=True) 42 | -------------------------------------------------------------------------------- /src/modeling/vit/dataset_mapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import copy 3 | import logging 4 | import numpy as np 5 | import torch 6 | 7 | from detectron2.data import detection_utils as utils 8 | from detectron2.data import transforms as T 9 | 10 | __all__ = ["FixSizeDatasetMapper"] 11 | 12 | 13 | def build_transform_gen(cfg, is_train): 14 | """ 15 | Create a list of :class:`TransformGen` from config. 16 | Returns: 17 | list[TransformGen] 18 | """ 19 | logger = logging.getLogger(__name__) 20 | tfm_gens = [] 21 | if is_train: 22 | tfm_gens.append(T.RandomFlip()) 23 | if is_train: 24 | logger.info("TransformGens used in training: " + str(tfm_gens)) 25 | return tfm_gens 26 | 27 | 28 | class FixSizeDatasetMapper: 29 | """ 30 | A callable which takes a dataset dict in Detectron2 Dataset format, 31 | and map it into a format used by DETR. 32 | 33 | The callable currently does the following: 34 | 35 | 1. Read the image from "file_name" 36 | 2. Applies geometric transforms to the image and annotation 37 | 3. Find and applies suitable cropping to the image and annotation 38 | 4. Prepare image and annotation to Tensors 39 | """ 40 | 41 | def __init__(self, cfg, is_train=True): 42 | if cfg.INPUT.CROP.ENABLED and is_train: 43 | self.crop_gen = [ 44 | T.ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TRAIN, sample_style="choice"), 45 | T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE), 46 | T.Resize(cfg.INPUT.FIX_SIZE) 47 | ] 48 | else: 49 | self.crop_gen = [ 50 | T.Resize(cfg.INPUT.FIX_SIZE) 51 | ] 52 | 53 | self.mask_on = cfg.MODEL.MASK_ON 54 | self.tfm_gens = build_transform_gen(cfg, is_train) 55 | logging.getLogger(__name__).info( 56 | "Full TransformGens used in training: {}, crop: {}".format(str(self.tfm_gens), str(self.crop_gen)) 57 | ) 58 | 59 | self.img_format = cfg.INPUT.FORMAT 60 | self.is_train = is_train 61 | 62 | def __call__(self, dataset_dict): 63 | """ 64 | Args: 65 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 66 | 67 | Returns: 68 | dict: a format that builtin models in detectron2 accept 69 | """ 70 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 71 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format) 72 | utils.check_image_size(dataset_dict, image) 73 | 74 | image, transforms = T.apply_transform_gens( 75 | self.tfm_gens + self.crop_gen, image 76 | ) 77 | 78 | image_shape = image.shape[:2] # h, w 79 | 80 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 81 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 82 | # Therefore it's important to use torch.Tensor. 83 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) 84 | 85 | if not self.is_train: 86 | # USER: Modify this if you want to keep them for some reason. 87 | dataset_dict.pop("annotations", None) 88 | return dataset_dict 89 | 90 | if "annotations" in dataset_dict: 91 | # USER: Modify this if you want to keep them for some reason. 92 | for anno in dataset_dict["annotations"]: 93 | if not self.mask_on: 94 | anno.pop("segmentation", None) 95 | anno.pop("keypoints", None) 96 | 97 | # USER: Implement additional transformations if you have other types of data 98 | annos = [ 99 | utils.transform_instance_annotations(obj, transforms, image_shape) 100 | for obj in dataset_dict.pop("annotations") 101 | if obj.get("iscrowd", 0) == 0 102 | ] 103 | instances = utils.annotations_to_instances(annos, image_shape) 104 | dataset_dict["instances"] = utils.filter_empty_instances(instances) 105 | if len(dataset_dict["instances"])==0: 106 | return None 107 | return dataset_dict 108 | -------------------------------------------------------------------------------- /src/modeling/vit/linformer.py: -------------------------------------------------------------------------------- 1 | # mainly modified from 2 | # https://github.com/lucidrains/linformer/blob/master/linformer/linformer.py 3 | import math 4 | import torch 5 | from torch import nn 6 | 7 | 8 | def init_(tensor): 9 | dim = tensor.shape[-1] 10 | std = 1 / math.sqrt(dim) 11 | tensor.uniform_(-std, std) 12 | return tensor 13 | 14 | 15 | class LinformerSelfAttention(nn.Module): 16 | def __init__(self, dim, seq_len, num_feats=256, num_heads=8, qkv_bias=False, 17 | qk_scale=None, attn_drop=0., proj_drop=0., share_kv=False): 18 | super().__init__() 19 | assert (dim % num_heads) == 0, 'dimension must be divisible by the number of heads' 20 | 21 | self.seq_len = seq_len 22 | self.num_feats = num_feats 23 | 24 | self.num_heads = num_heads 25 | self.head_dim = dim // num_heads 26 | self.scale = qk_scale or self.head_dim ** -0.5 27 | 28 | self.query = nn.Linear(dim, dim, bias=qkv_bias) 29 | self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) 30 | self.proj = nn.Linear(dim, dim) 31 | 32 | self.proj_k = nn.Parameter(init_(torch.zeros(seq_len, num_feats))) 33 | if share_kv: 34 | self.proj_v = self.proj_k 35 | else: 36 | self.proj_v = nn.Parameter(init_(torch.zeros(seq_len, num_feats))) 37 | 38 | self.attn_drop = nn.Dropout(attn_drop) 39 | self.proj_drop = nn.Dropout(proj_drop) 40 | 41 | def forward(self, x, nx=None, ny=None): 42 | b, n, d = x.shape 43 | d_h, h, k = self.head_dim, self.num_heads, self.num_feats 44 | kv_len = n 45 | assert kv_len == self.seq_len, f'the sequence length of the key / values must be {self.seq_len} - {kv_len} given' 46 | 47 | queries = self.scale * self.query(x).reshape(b, n, h, d_h).transpose(1, 2) 48 | kv = self.kv(x).reshape(b, n, 2, d).permute(2, 0, 1, 3) 49 | keys, values = kv[0], kv[1] # make torchscript happy (cannot use tensor as tuple) 50 | 51 | # project keys and values along the sequence length dimension to k 52 | proj_seq_len = lambda args: torch.einsum('bnd,nk->bkd', *args) 53 | kv_projs = (self.proj_k, self.proj_v) 54 | keys, values = map(proj_seq_len, zip((keys, values), kv_projs)) 55 | 56 | # merge head into batch for queries and key / values 57 | merge_key_values = lambda t: t.reshape(b, k, -1, d_h).transpose( 58 | 1, 2).expand(-1, h, -1, -1) 59 | keys, values = map(merge_key_values, (keys, values)) 60 | 61 | # attention 62 | attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys) 63 | attn = (attn - torch.max(attn, dim=-1, keepdim=True)[0]).softmax(dim=-1) 64 | attn = self.attn_drop(attn) 65 | out = torch.einsum('bhnk,bhkd->bhnd', attn, values) 66 | 67 | # split heads 68 | out = out.transpose(1, 2).reshape(b, n, -1) 69 | out = self.proj(out) 70 | out = self.proj_drop(out) 71 | return out 72 | -------------------------------------------------------------------------------- /src/modeling/vit/srformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class SRSelfAttention(nn.Module): 6 | def __init__(self, dim, rratio=2, num_heads=8, qkv_bias=False, 7 | qk_scale=None, attn_drop=0., proj_drop=0.): 8 | super().__init__() 9 | assert (dim % num_heads) == 0, 'dimension must be divisible by the number of heads' 10 | 11 | self.rratio = rratio 12 | 13 | self.num_heads = num_heads 14 | self.head_dim = dim // num_heads 15 | self.scale = qk_scale or self.head_dim ** -0.5 16 | 17 | self.query = nn.Linear(dim, dim, bias=qkv_bias) 18 | self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) 19 | self.proj = nn.Linear(dim, dim) 20 | 21 | self.proj_sr = nn.Conv2d(dim, dim, kernel_size=rratio, stride=rratio, 22 | bias=False) 23 | self.norm = nn.InstanceNorm2d(dim) 24 | 25 | self.attn_drop = nn.Dropout(attn_drop) 26 | self.proj_drop = nn.Dropout(proj_drop) 27 | 28 | def forward(self, x, nx=None, ny=None): 29 | b, n, d = x.shape 30 | d_h, h = self.head_dim, self.num_heads 31 | 32 | # get queries 33 | queries = self.scale * self.query(x).reshape(b, n, h, d_h).transpose(1, 2) 34 | 35 | # spatial reduction for k and v 36 | x_local = x[:, -nx * ny:].transpose(-2, -1).reshape(b, d, nx, ny) 37 | x_local = self.norm(self.proj_sr(x_local)).view(b, d, -1) 38 | x = torch.cat([x[:, :-nx * ny], x_local.transpose(-2, -1)], dim=1) 39 | # compute keys and values 40 | kv = self.kv(x).reshape(b, -1, 2, d).permute(2, 0, 3, 1) 41 | keys, values = kv[0], kv[ 42 | 1] # make torchscript happy (cannot use tensor as tuple) b x d x k 43 | 44 | # merge head into batch for queries and key / values 45 | merge_key_values = lambda t: t.reshape(b, h, d_h, -1).transpose(-2, -1) 46 | keys, values = map(merge_key_values, (keys, values)) # b x h x k x d_h 47 | 48 | # attention 49 | attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys) 50 | attn = (attn - torch.max(attn, dim=-1, keepdim=True)[0]).softmax(dim=-1) 51 | attn = self.attn_drop(attn) 52 | out = torch.einsum('bhnk,bhkd->bhnd', attn, values) 53 | 54 | # split heads 55 | out = out.transpose(1, 2).reshape(b, n, -1) 56 | out = self.proj(out) 57 | out = self.proj_drop(out) 58 | return out 59 | 60 | @staticmethod 61 | def compute_macs(module, input, output): 62 | # n: num_query 63 | # S: num_key/value 64 | input, nx, ny = input 65 | _, n, d = input.shape 66 | macs = 0 67 | n_params = 0 68 | 69 | # queries = self.scale * self.query(x) 70 | query_params = sum([p.numel() for p in module.query.parameters()]) 71 | n_params += query_params 72 | macs += query_params * n 73 | 74 | # x_local = self.norm(self.proj_sr(x_local)).view(b, d, -1) 75 | # x_local in (b, d, nx, ny) 76 | sr_params = sum([p.numel() for p in module.proj_sr.parameters()]) 77 | n_params += sr_params 78 | output_dims = nx//module.rratio * ny//module.rratio 79 | kernel_dims = module.rratio ** 2 80 | in_channels = d 81 | out_channels = d 82 | 83 | filters_per_channel = out_channels 84 | conv_per_position_flops = int(kernel_dims) * \ 85 | in_channels * filters_per_channel 86 | 87 | active_elements_count = output_dims 88 | 89 | overall_conv_flops = conv_per_position_flops * active_elements_count 90 | 91 | # bias = False 92 | bias_flops = 0 93 | 94 | macs += overall_conv_flops + bias_flops 95 | 96 | # kv = self.kv(x) 97 | num_kvs = n - nx * ny + output_dims 98 | kv_params = sum([p.numel() for p in module.kv.parameters()]) 99 | n_params += kv_params 100 | macs += kv_params * num_kvs 101 | 102 | # attn = torch.einsum('bhnd,bhkd->bhnk', queries, keys) 103 | macs += n * num_kvs * d 104 | # out = torch.einsum('bhnk,bhkd->bhnd', attn, values) 105 | macs += n * num_kvs * d 106 | 107 | # out = self.proj(out) 108 | proj_params = sum([p.numel() for p in module.proj.parameters()]) 109 | n_params += proj_params 110 | macs += (proj_params * n) 111 | # print('macs proj', proj_params * T / 1e8) 112 | 113 | module.__flops__ += macs 114 | # return n_params, macs 115 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deeplearning-wisc/stud/21b9492c63804d7acf41fefd0d6ad40cf29975a5/tools/__init__.py -------------------------------------------------------------------------------- /tools/analyze_bdd_fea.py: -------------------------------------------------------------------------------- 1 | import umap 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import torch 5 | 6 | 7 | pos_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/bdd_pos_single_frame_1000.npy', allow_pickle=True) 8 | neg_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/bdd_neg_single_frame_1000.npy', allow_pickle=True) 9 | 10 | index = 0 11 | for fea in pos_fea: 12 | if index == 0: 13 | pos_np = fea 14 | index += 1 15 | else: 16 | pos_np = np.concatenate([pos_np, fea], 0) 17 | 18 | index = 0 19 | for fea in neg_fea: 20 | if index == 0: 21 | neg_np = fea 22 | index += 1 23 | else: 24 | neg_np = np.concatenate([neg_np, fea], 0) 25 | fea_np = np.concatenate([pos_np, neg_np], 0) 26 | print(len(fea_np)) 27 | # breakpoint() 28 | reducer = umap.UMAP(random_state=42, n_neighbors=30, min_dist=0.6, n_components=2, metric='euclidean') 29 | embedding = reducer.fit_transform(fea_np) 30 | 31 | fig, ax = plt.subplots(figsize=(6, 6)) 32 | def get_cmap(n, name='hsv'): 33 | '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 34 | RGB color; the keyword argument name must be a standard mpl colormap name.''' 35 | return plt.cm.get_cmap(name, n) 36 | 37 | classes = [str(hhh) for hhh in range(2)] 38 | # color = targets.astype(int)#[index for index in range(20)]# 39 | color = get_cmap(2) 40 | # color = plt.cm.coolwarm(np.linspace(0.1,0.9,11)) 41 | 42 | index = 0 43 | for i in range(0, 2): 44 | if i == 0: 45 | plt.scatter(embedding[:, 0][1000 * i:1000 * i + 1000], 46 | embedding[:, 1][1000 * i:1000 * i + 1000], 47 | c='r', 48 | label=index, cmap="Spectral", s=1) 49 | else: 50 | plt.scatter(embedding[:, 0][1000 * i:1000 * i + 1000], 51 | embedding[:, 1][1000 * i:1000 * i + 1000], 52 | c='b', 53 | label=index, cmap="Spectral", s=1) 54 | index += 1 55 | 56 | plt.legend(fontsize=20) 57 | # ax.legend(markerscale=9) 58 | ax.legend(loc='lower left',markerscale=9)#, bbox_to_anchor=(1, 0.5) 59 | # plt.legend(handles=scatter.legend_elements()[0], labels=classes) 60 | # breakpoint() 61 | plt.setp(ax, xticks=[], yticks=[]) 62 | # plt.title("With virtual outliers", fontsize=20) 63 | # plt.savefig('./voc_coco_umap_visual_ours.jpg', dpi=250) 64 | # plt.title("Vanilla detector", fontsize=20) 65 | plt.savefig('./bdd_ana_single_frame_1000.jpg', dpi=250) 66 | # plt.show() -------------------------------------------------------------------------------- /tools/analyze_energy.py: -------------------------------------------------------------------------------- 1 | import umap 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import torch 5 | import pandas as pd 6 | import seaborn as sns 7 | import matplotlib as mpl 8 | 9 | mpl.rcParams['axes.linewidth'] = 2 10 | import matplotlib 11 | # matplotlib.rcParams['mathtext.fontset'] = 'Arial' 12 | matplotlib.rcParams['mathtext.rm'] = 'Arial' 13 | matplotlib.rcParams['mathtext.it'] = 'Arial' 14 | 15 | # matplotlib.rcParams['mathtext.fontset'] = 'stix' 16 | # matplotlib.rcParams['font.family'] = 'STIXGeneral' 17 | # matplotlib.pyplot.title(r'ABC123 vs $\mathrm{ABC123}^{123}$') 18 | neg_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/neg_energy.npy', allow_pickle=True) 19 | pos_fea = np.load('/afs/cs.wisc.edu/u/x/f/xfdu/workspace/video/cycle-confusion/pos_energy.npy', allow_pickle=True) 20 | 21 | index = 0 22 | for fea in pos_fea: 23 | if index == 0: 24 | pos_np = fea.cpu().data.numpy() 25 | index += 1 26 | else: 27 | pos_np = np.concatenate([pos_np, fea.cpu().data.numpy()], 0) 28 | 29 | index = 0 30 | for fea in neg_fea: 31 | if index == 0: 32 | neg_np = fea.cpu().data.numpy() 33 | index += 1 34 | else: 35 | neg_np = np.concatenate([neg_np, fea.cpu().data.numpy()], 0) 36 | # breakpoint() 37 | id_pd = pd.Series(pos_np) 38 | # # id_pd.rename('ID') 39 | # 40 | ood_pd = pd.Series(neg_np) 41 | # # ood_pd.rename('OOD') 42 | # # data_plot = {'Energy': np.concatenate((-id_score[0:2000], -ood_score), 0), 'label':['ID'] * len(-id_score[0:2000]) + \ 43 | # # ['OOD'] * len(-ood_score)} 44 | # # df_after = pd.DataFrame(data=data_plot) 45 | # # sns.histplot(data=df_after, x="Energy", hue="label") 46 | plt.figure(figsize=(10,8)) 47 | p1 = sns.kdeplot(id_pd, shade=True, color="#168AAD", label='ID objects',linewidth=2.5) 48 | p1 = sns.kdeplot(ood_pd, shade=True, color="#B5E48C", label='Unknown objects',linewidth=2) 49 | plt.xlabel("Negative energy score", fontsize=25) 50 | plt.ylabel("Density", fontsize=25) 51 | plt.xticks(fontsize= 25) 52 | plt.yticks(fontsize= 25) 53 | plt.legend(fontsize=30, frameon=False) 54 | 55 | plt.savefig('ddd.jpg', dpi=500) -------------------------------------------------------------------------------- /tools/bdd_coco.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import argparse 6 | import pandas as pd 7 | import seaborn as sns 8 | import matplotlib 9 | matplotlib.use('AGG') 10 | import matplotlib.pyplot as plt 11 | from metric_utils import * 12 | 13 | recall_level_default = 0.95 14 | 15 | 16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin') 19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin') 20 | parser.add_argument('--thres', default=1., type=float) 21 | parser.add_argument('--name', default=1., type=str) 22 | parser.add_argument('--seed', default=0, type=int) 23 | parser.add_argument('--model', default='faster-rcnn', type=str) 24 | args = parser.parse_args() 25 | 26 | 27 | 28 | concat = lambda x: np.concatenate(x, axis=0) 29 | to_np = lambda x: x.data.cpu().numpy() 30 | 31 | 32 | 33 | # ID data 34 | ood_data = np.load('/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/' + str(args.model) + '/ood.npy',allow_pickle=True) 35 | id_data = np.load('/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/checkpoints/bdd_tracking_2k/daytime/' + str(args.model) + '/id.npy',allow_pickle=True) 36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed) +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed) +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 38 | # id_score = [] 39 | # energy score calculation. 40 | # import ipdb; ipdb.set_trace() 41 | index = 0 42 | for data in id_data: 43 | if index == 0: 44 | id_data_all = data 45 | index += 1 46 | else: 47 | id_data_all = np.concatenate([id_data_all, data], 0) 48 | 49 | id_data = torch.from_numpy(id_data_all) 50 | 51 | index = 0 52 | for data in ood_data: 53 | if index == 0: 54 | ood_data_all = data 55 | index += 1 56 | else: 57 | ood_data_all = np.concatenate([ood_data_all, data], 0) 58 | 59 | ood_data = torch.from_numpy(ood_data_all) 60 | 61 | 62 | # id_data = id_data / 1000 - torch.max(id_data, 1, keepdim=True)[0] 63 | # ood_data = ood_data / 1000 - torch.max(ood_data, 1, keepdim=True)[0] 64 | T = 1 65 | 66 | 67 | 68 | 69 | assert len(id_data[0]) == 11 70 | if args.energy: 71 | id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 72 | ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 73 | else: 74 | id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 75 | ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 76 | # breakpoint() 77 | # id_score = id_data 78 | # ood_score = ood_data 79 | ########### 80 | ######## 81 | print(len(id_score)) 82 | print(len(ood_score)) 83 | 84 | measures = get_measures(-id_score, -ood_score, plot=False) 85 | 86 | if args.energy: 87 | print_measures(measures[0], measures[1], measures[2], 'energy') 88 | else: 89 | print_measures(measures[0], measures[1], measures[2], 'msp') 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /tools/bdd_heatmap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns; sns.set_theme() 3 | idx_data = np.asarray(np.load('./bdd_offset.npy', allow_pickle=True)) 4 | score_data = np.load('./bdd_score_visual.npy', allow_pickle=True) 5 | breakpoint() 6 | ax = sns.heatmap(score_data[6].cpu().data.numpy()) 7 | ax.savefig('./bdd_heatmap.jpg') -------------------------------------------------------------------------------- /tools/ckpt_surgery.py: -------------------------------------------------------------------------------- 1 | from os import X_OK, pardir 2 | import torch 3 | import argparse 4 | 5 | def process(ckpt_path, save_path): 6 | ckpt = torch.load(ckpt_path) 7 | new_order = [0, 1, 2, 4, 3, 7, 6, 5, 8, 9, 10] 8 | ckpt['model']['roi_heads.box_predictor.cls_score.weight'] =ckpt['model']['roi_heads.box_predictor.cls_score.weight'][new_order] 9 | ckpt['model']['roi_heads.box_predictor.cls_score.bias'] = ckpt['model']['roi_heads.box_predictor.cls_score.bias'][new_order] 10 | 11 | new_order4 = [] 12 | for x in new_order[:-1]: 13 | for i in range(4): 14 | new_order4.append(4*x+i) 15 | 16 | ckpt['model']['roi_heads.box_predictor.bbox_pred.weight'] = ckpt['model']['roi_heads.box_predictor.bbox_pred.weight'][new_order4] 17 | ckpt['model']['roi_heads.box_predictor.bbox_pred.bias'] = ckpt['model']['roi_heads.box_predictor.bbox_pred.bias'][new_order4] 18 | 19 | torch.save(ckpt, save_path) 20 | print('done!') 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--ckpt-path', '-i', type=str, help='input ckpt path') 26 | parser.add_argument('--save-path', '-o', type=str, help='output ckpt path') 27 | args = parser.parse_args() 28 | 29 | process(args.ckpt_path, args.save_path) 30 | 31 | if __name__ == '__main__': 32 | main() 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /tools/convert_weight.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # -------------------------------------------------------- 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone 5 | # version: 0.0.1 6 | # Author: Shihua Liang (sxhx.liang@gmail.com) 7 | # FilePath: /detectron2_backbone/detectron2_backbone/tools/convert_weight.py 8 | # Create: 2020-05-05 07:32:08 9 | # LastAuthor: Shihua Liang 10 | # lastTime: 2020-07-02 21:51:57 11 | # -------------------------------------------------------- 12 | import torch 13 | import argparse 14 | from collections import OrderedDict 15 | 16 | import torch 17 | 18 | 19 | def get_parser(): 20 | parser = argparse.ArgumentParser(description="Detectron2 Model Converter") 21 | parser.add_argument( 22 | "--model", 23 | required=True, 24 | metavar="FILE", 25 | help="path to model weights", 26 | ) 27 | parser.add_argument( 28 | "--output", 29 | required=True, 30 | metavar="FILE", 31 | help="path to model weights", 32 | ) 33 | return parser 34 | 35 | 36 | def convert_weight(): 37 | args = get_parser().parse_args() 38 | ckpt = torch.load(args.model, map_location="cpu") 39 | if "model" in ckpt: 40 | state_dict = ckpt["model"] 41 | else: 42 | state_dict = ckpt 43 | model = {"model": state_dict, "__author__": "custom", "matching_heuristics": True} 44 | 45 | torch.save(model, args.output) 46 | 47 | if __name__ == "__main__": 48 | convert_weight() -------------------------------------------------------------------------------- /tools/count.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | root_directory = '/nobackup-slow/dataset/my_xfdu/video/vis/train/JPEGImages/'#72 5 | # root_directory = '/nobackup-slow/dataset/my_xfdu/video/bdd/bdd100k/images/track/train/'#263 6 | 7 | 8 | numbers = [] 9 | for video in list(os.listdir(root_directory)): 10 | path = os.path.join(root_directory, video) 11 | cur_frame = os.listdir(path) 12 | numbers.append(len(list(cur_frame))) 13 | 14 | numbers = np.asarray(numbers) 15 | print(np.min(numbers)) 16 | print(np.max(numbers)) -------------------------------------------------------------------------------- /tools/plot_frame_interval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | 8 | 9 | # # Create a dataset with many short random walks 10 | # rs = np.random.RandomState(4) 11 | # # pos = rs.randint(-1, 2, (20, 5)).cumsum(axis=1) 12 | # # breakpoint() 13 | # # pos -= pos[:, 0, np.newaxis] 14 | # pos = np.asarray([[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43], 15 | # [71.90, 73.47,74.04,74.34,73.03,71.03,70.10]]) 16 | # step = np.asarray([1,3,5,7,9,11,13,1,3,5,7,9,11,13]) 17 | # walk = np.repeat(['COCO','NuImages'], 7) 18 | # walk1 = np.repeat(range(2), 7) 19 | # # breakpoint() 20 | # df = pd.DataFrame(np.c_[pos.flat, step, walk, walk1], 21 | # columns=["AUROC", "Frame range", "OOD", "dummy"]) 22 | # 23 | # # Initialize a grid of plots with an Axes for each walk 24 | # grid = sns.FacetGrid(df, col='OOD' , hue='dummy', palette="tab20c", 25 | # col_wrap=4, height=2.2) 26 | # 27 | # # Draw a horizontal line to show the starting point 28 | # # grid.refline(y=0, linestyle=":") 29 | # 30 | # # Draw a line plot to show the trajectory of each random walk 31 | # grid.map(plt.plot, "Frame range", "AUROC", marker="o") 32 | # 33 | # # Adjust the tick positions and labels 34 | # # grid.set(xticks=np.arange(16), yticks=[70, 90], 35 | # # xlim=(0,15), ylim=(70,90)) 36 | # 37 | # 38 | # 39 | # 40 | # 41 | # # num_rows = 4 42 | # # years = frames 43 | # # data_preproc = pd.DataFrame({ 44 | # # 'Frame range': years, 45 | # # r'$T$=1, OOD=COCO': single_coco, 46 | # # r'$T$=1, OOD=NuImages': single_nu}) 47 | # # # r'$T$=3, OOD=COCO': multi_coco, 48 | # # # r'$T$=3, OOD=NuImages': multi_nu}) 49 | # # fig = sns.lineplot(x='Frame range', y='value', hue='variable', 50 | # # data=pd.melt(data_preproc, ['Frame range']), marker="o") 51 | # 52 | # # Adjust the arrangement of the plots 53 | # # fig.tight_layout(w_pad=1) 54 | def get_cmap(n, name='hsv'): 55 | '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 56 | RGB color; the keyword argument name must be a standard mpl colormap name.''' 57 | return plt.cm.get_cmap(name, n) 58 | 59 | color = get_cmap(7) 60 | frames = [3,5,7,9,11,13] 61 | frames1 = [1,2,3,4,5,6] 62 | single_coco = [81.76,83.11, 83.29,82.76,81.84,80.43]#[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43] 63 | single_nu = [ 73.47,74.04,74.34,73.03,71.03,70.10]#[71.90, 73.47,74.04,74.34,73.03,71.03,70.10] 64 | 65 | multi_coco = [83.57,84.48,85.06,84.99,83.30,82.36] 66 | multi_nu = [72.65,73.94,74.47,73.56,72.07,72.73] 67 | import seaborn as sns 68 | import numpy as np 69 | import pandas as pd 70 | import matplotlib.pyplot as plt 71 | 72 | # Setting seaborn as default style even 73 | # if use only matplotlib 74 | # sns.set() 75 | sns.set(font_scale = 1.5) 76 | sns.set_theme(style="ticks") 77 | # figure, axes = plt.subplots() 78 | # figure.suptitle('Geeksforgeeks - one axes with no data') 79 | # plt.bar(data.xcol,data.ycol,4) 80 | figure, axes = plt.subplots(1, 2, sharex=True, figsize=(7,3.5)) 81 | # figure.suptitle('Geeksforgeeks') 82 | # breakpoint() 83 | 84 | 85 | 86 | 87 | # axes[0][0].set_title(r'$T$=1, OOD=COCO') 88 | # # df=pd.DataFrame(dict(x=range(5),y=[3,15,9,12,4])) 89 | # data_preproc = pd.DataFrame({ 90 | # 'Frame range': frames, 91 | # 'AUROC': single_coco}) 92 | # sub1 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0][0], palette=sns.color_palette('Blues_r',7)) 93 | # sub1.set(ylim=(80,84)) 94 | # # axes[0][0].set_box_aspect(10/len(axes[0][0].patches)) 95 | # # sns.linplot(data=df,x='Frame_r') 96 | # widthbars = [1,1,1,1,1,1,1] 97 | # for bar, newwidth in zip(axes[0][0].patches, widthbars): 98 | # x = bar.get_x() 99 | # width = bar.get_width() 100 | # print(x) 101 | # centre = x #+ width/2. 102 | # bar.set_x(centre) 103 | # bar.set_width(newwidth) 104 | # 105 | # 106 | # axes[0][1].set_title(r'$T$=1, OOD=NuImages') 107 | # data_preproc = pd.DataFrame({ 108 | # 'Frame range': frames, 109 | # 'AUROC': single_nu}) 110 | # sub2 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0][1], palette="magma") 111 | # # sub2.set(xticks=[0, 5, 10, 15]) 112 | # sub2.set(ylim=(69,75)) 113 | # axes[0][1].set_ylabel("") 114 | # widthbars = [1,1,1,1,1,1,1] 115 | # for bar, newwidth in zip(axes[0][1].patches, widthbars): 116 | # x = bar.get_x() 117 | # width = bar.get_width() 118 | # print(x) 119 | # centre = x #+ width/2. 120 | # bar.set_x(centre) 121 | # bar.set_width(newwidth) 122 | 123 | 124 | 125 | axes[0].set_title(r'$T$=3, OOD=COCO') 126 | data_preproc = pd.DataFrame({ 127 | 'Frame interval': frames1, 128 | 'AUROC': multi_coco}) 129 | sub3 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7)) 130 | # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86]) 131 | sub3.set(ylim=(82,86)) 132 | 133 | widthbars = [1,1,1,1,1,1] 134 | for bar, newwidth in zip(axes[0].patches, widthbars): 135 | x = bar.get_x() 136 | width = bar.get_width() 137 | print(x) 138 | centre = x #+ width/2. 139 | bar.set_x(centre) 140 | bar.set_width(newwidth) 141 | 142 | 143 | axes[1].set_title(r'$T$=3, OOD=NuImages') 144 | data_preproc = pd.DataFrame({ 145 | 'Frame interval': frames1, 146 | 'AUROC': multi_nu}) 147 | sub4 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[1], palette="magma") 148 | # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75]) 149 | sub4.set(ylim=(71,75)) 150 | axes[1].set_ylabel("") 151 | widthbars = [1,1,1,1,1,1] 152 | for bar, newwidth in zip(axes[1].patches, widthbars): 153 | x = bar.get_x() 154 | width = bar.get_width() 155 | print(x) 156 | centre = x #+ width/2. 157 | bar.set_x(centre) 158 | bar.set_width(newwidth) 159 | figure.tight_layout(w_pad=1) 160 | figure.savefig('ablation1.pdf') -------------------------------------------------------------------------------- /tools/plot_frame_range.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | # def get_cmap(n, name='hsv'): 8 | # '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 9 | # RGB color; the keyword argument name must be a standard mpl colormap name.''' 10 | # return plt.cm.get_cmap(name, n) 11 | # 12 | # color = get_cmap(7) 13 | frames = [1,2,3,4,5] 14 | frames1 = [3,5,7,9,11,13,'inf'] 15 | single_coco = [81.76,83.11, 83.29,82.76,81.84,80.43]#[80.88, 81.76,83.11, 83.29,82.76,81.84,80.43] 16 | single_nu = [ 73.47,74.04,74.34,73.03,71.03,70.10]#[71.90, 73.47,74.04,74.34,73.03,71.03,70.10] 17 | 18 | multi_coco = [83.34, 84.26,84.70,85.67,85.34,84.41, 80.35] 19 | multi_nu = [73.89, 75.61,75.64,75.67,74.87,74.42, 71.80] 20 | 21 | 22 | # Setting seaborn as default style even 23 | # if use only matplotlib 24 | # sns.set() 25 | sns.set(font_scale = 1.5) 26 | sns.set_theme(style="ticks") 27 | # figure, axes = plt.subplots() 28 | # figure.suptitle('Geeksforgeeks - one axes with no data') 29 | # plt.bar(data.xcol,data.ycol,4) 30 | figure, axes = plt.subplots(1, 2, sharex=True, figsize=(7,3.5)) 31 | # figure.suptitle('Geeksforgeeks') 32 | # breakpoint() 33 | 34 | 35 | 36 | # axes[0].set_title(r'$T$=3, OOD=COCO') 37 | # data_preproc = pd.DataFrame({ 38 | # 'Frame range': frames1, 39 | # 'AUROC': multi_coco}) 40 | # sub3 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[0], palette="dark:salmon_r") 41 | # # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86]) 42 | # sub3.set(ylim=(80,86)) 43 | # 44 | # widthbars = [1,1,1,1,1,1, 1] 45 | # for bar, newwidth in zip(axes[0].patches, widthbars): 46 | # x = bar.get_x() 47 | # width = bar.get_width() 48 | # print(x) 49 | # centre = x #+ width/2. 50 | # bar.set_x(centre) 51 | # bar.set_width(newwidth) 52 | # 53 | # 54 | # axes[1].set_title(r'$T$=3, OOD=NuImages') 55 | # data_preproc = pd.DataFrame({ 56 | # 'Frame range': frames1, 57 | # 'AUROC': multi_nu}) 58 | # sub4 = sns.barplot(data=data_preproc,x='Frame range',y='AUROC', ax=axes[1], palette="YlOrBr") 59 | # # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75]) 60 | # sub4.set(ylim=(71,76)) 61 | # axes[1].set_ylabel("") 62 | # widthbars = [1,1,1,1,1,1,1] 63 | # for bar, newwidth in zip(axes[1].patches, widthbars): 64 | # x = bar.get_x() 65 | # width = bar.get_width() 66 | # print(x) 67 | # centre = x #+ width/2. 68 | # bar.set_x(centre) 69 | # bar.set_width(newwidth) 70 | 71 | 72 | 73 | 74 | multi_coco = [80.43,82.71,85.67,81.41,80.81] 75 | multi_nu = [70.10,75.29,75.67,73.26,72.76] 76 | axes[0].set_title(r'$T$=3, OOD=COCO') 77 | data_preproc = pd.DataFrame({ 78 | 'Number of Frame': frames, 79 | 'AUROC': multi_coco}) 80 | sub1 = sns.barplot(data=data_preproc,x='Number of Frame',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7)) 81 | # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86]) 82 | sub1.set(ylim=(80,86)) 83 | axes[0].set_ylabel("") 84 | widthbars = [1,1,1,1,1] 85 | for bar, newwidth in zip(axes[0].patches, widthbars): 86 | x = bar.get_x() 87 | width = bar.get_width() 88 | print(x) 89 | centre = x #+ width/2. 90 | bar.set_x(centre) 91 | bar.set_width(newwidth) 92 | 93 | 94 | axes[1].set_title(r'$T$=3, OOD=NuImages') 95 | data_preproc = pd.DataFrame({ 96 | 'Number of Frame': frames, 97 | 'AUROC': multi_nu}) 98 | sub2 = sns.barplot(data=data_preproc,x='Number of Frame',y='AUROC', ax=axes[1], palette="magma") 99 | # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75]) 100 | sub2.set(ylim=(69,76)) 101 | axes[1].set_ylabel("") 102 | widthbars = [1,1,1,1,1] 103 | for bar, newwidth in zip(axes[1].patches, widthbars): 104 | x = bar.get_x() 105 | width = bar.get_width() 106 | print(x) 107 | centre = x #+ width/2. 108 | bar.set_x(centre) 109 | bar.set_width(newwidth) 110 | 111 | 112 | # 113 | # multi_coco = [83.57,84.48,85.06,84.99,83.30,82.36] 114 | # multi_nu = [72.65,73.94,74.47,73.56,72.07,72.73] 115 | # axes[0].set_title(r'$T$=3, OOD=COCO') 116 | # data_preproc = pd.DataFrame({ 117 | # 'Frame interval': frames, 118 | # 'AUROC': multi_coco}) 119 | # sub1 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[0], palette=sns.color_palette('Blues_r',7)) 120 | # # sub3.set(xticks=[0, 5, 10, 15], yticks= [83,84,85,86]) 121 | # sub1.set(ylim=(82,86)) 122 | # axes[0].set_ylabel("") 123 | # widthbars = [1,1,1,1,1,1] 124 | # for bar, newwidth in zip(axes[0].patches, widthbars): 125 | # x = bar.get_x() 126 | # width = bar.get_width() 127 | # print(x) 128 | # centre = x #+ width/2. 129 | # bar.set_x(centre) 130 | # bar.set_width(newwidth) 131 | # 132 | # 133 | # axes[1].set_title(r'$T$=3, OOD=NuImages') 134 | # data_preproc = pd.DataFrame({ 135 | # 'Frame interval': frames, 136 | # 'AUROC': multi_nu}) 137 | # sub2 = sns.barplot(data=data_preproc,x='Frame interval',y='AUROC', ax=axes[1], palette="magma") 138 | # # sub4.set(xticks=[0, 5, 10, 15], yticks= [74,75]) 139 | # sub2.set(ylim=(71,75)) 140 | # axes[1].set_ylabel("") 141 | # widthbars = [1,1,1,1,1,1] 142 | # for bar, newwidth in zip(axes[1].patches, widthbars): 143 | # x = bar.get_x() 144 | # width = bar.get_width() 145 | # print(x) 146 | # centre = x #+ width/2. 147 | # bar.set_x(centre) 148 | # bar.set_width(newwidth) 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | figure.tight_layout(w_pad=1) 158 | figure.savefig('ablation2.pdf') -------------------------------------------------------------------------------- /tools/plot_loss.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import json 3 | import numpy as np 4 | import matplotlib 5 | import matplotlib as mpl 6 | 7 | mpl.rcParams['axes.linewidth'] = 2 8 | 9 | # matplotlib.rcParams['mathtext.fontset'] = 'Arial' 10 | matplotlib.rcParams['mathtext.rm'] = 'Arial' 11 | matplotlib.rcParams['mathtext.it'] = 'Arial' 12 | 13 | # matplotlib.rcParams['mathtext.fontset'] = 'stix' 14 | # matplotlib.rcParams['font.family'] = 'STIXGeneral' 15 | matplotlib.pyplot.title(r'ABC123 vs $\mathrm{ABC123}^{123}$') 16 | data =open('/nobackup/dataset/my_xfdu/video/vis/checkpoints/VIS/energy_no_original_loss_direct_add_0_02_frame_9_revise_4to6_multi_random_seed1/metrics.json','r') 17 | tweets = [] 18 | for line in data: 19 | tweets.append(json.loads(line)) 20 | data= tweets 21 | epochs = [] 22 | losses = [] 23 | for epoch, loss in enumerate(data): 24 | epochs.append(epoch) 25 | losses.append(loss['ene_reg_loss']*20) 26 | 27 | # plt.figure(figsize=(10,5)) 28 | # ax.set_title('Sine and cosine waves') 29 | 30 | 31 | plt.figure(figsize=(10,8)) 32 | # plt.title("Training and Validation Loss") 33 | # plt.plot(val_losses,label="val") 34 | # plt.plot(train_losses,label="train") 35 | x = [i*20 for i in range(len(losses))] 36 | plt.plot(x,losses, label=r'$\mathcal{L}_{\mathrm{uncertainty}}$',color='#184E77',linewidth=3) 37 | plt.xlabel("iterations", fontsize=25) 38 | plt.ylabel("Uncertainty loss", fontsize=25) 39 | plt.xticks(fontsize= 25) 40 | plt.yticks(fontsize= 25) 41 | plt.legend(fontsize=30, frameon=False) 42 | plt.savefig('./loss.jpg', dpi=500) 43 | -------------------------------------------------------------------------------- /tools/select_permutation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Sep 14 15:50:28 2017 4 | @author: bbrattol 5 | """ 6 | import argparse 7 | from tqdm import trange 8 | import numpy as np 9 | import itertools 10 | from scipy.spatial.distance import cdist 11 | import os 12 | 13 | parser = argparse.ArgumentParser(description='Train network on Imagenet') 14 | parser.add_argument('--classes', default=1000, type=int, 15 | help='Number of permutations to select') 16 | parser.add_argument('--selection', default='max', type=str, 17 | help='Sample selected per iteration based on hamming distance: [max] highest; [mean] average') 18 | args = parser.parse_args() 19 | 20 | if __name__ == "__main__": 21 | outname = 'permutations/permutations_hamming_%s_%d' % ( 22 | args.selection, args.classes) 23 | os.makedirs(os.path.dirname(outname), exist_ok=True) 24 | 25 | P_hat = np.array(list(itertools.permutations(list(range(9)), 9))) 26 | n = P_hat.shape[0] 27 | 28 | for i in trange(args.classes): 29 | if i == 0: 30 | j = np.random.randint(n) 31 | P = np.array(P_hat[j]).reshape([1, -1]) 32 | else: 33 | P = np.concatenate([P, P_hat[j].reshape([1, -1])], axis=0) 34 | 35 | P_hat = np.delete(P_hat, j, axis=0) 36 | D = cdist(P, P_hat, metric='hamming').mean(axis=0).flatten() 37 | 38 | if args.selection == 'max': 39 | j = D.argmax() 40 | else: 41 | m = int(D.shape[0] / 2) 42 | S = D.argsort() 43 | j = S[np.random.randint(m - 10, m + 10)] 44 | 45 | if i % 100 == 0: 46 | np.save(outname, P) 47 | 48 | np.save(outname, P) 49 | print('file created --> ' + outname) -------------------------------------------------------------------------------- /tools/simple_permutation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | from tqdm import trange 4 | import numpy as np 5 | import itertools 6 | from scipy.spatial.distance import cdist 7 | import os 8 | 9 | parser = argparse.ArgumentParser(description='Train network on Imagenet') 10 | parser.add_argument('--classes', default=24, type=int, 11 | help='Number of permutations to select') 12 | parser.add_argument('--selection', default='max', type=str, 13 | help='Sample selected per iteration based on hamming distance: [max] highest; [mean] average') 14 | args = parser.parse_args() 15 | 16 | if __name__ == "__main__": 17 | outname = 'permutations/permutations_hamming_%s_%d' % ( 18 | args.selection, args.classes) 19 | os.makedirs(os.path.dirname(outname), exist_ok=True) 20 | 21 | P_hat = np.array(list(itertools.permutations(list(range(2)), 2))) 22 | np.save(outname, P_hat) 23 | print('file created --> ' + outname) 24 | -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | """Train/eval script.""" 2 | import logging 3 | import os 4 | import os.path as osp 5 | import time 6 | from collections import OrderedDict 7 | import torch 8 | 9 | import detectron2.utils.comm as comm 10 | from detectron2.checkpoint import DetectionCheckpointer 11 | from detectron2.data import MetadataCatalog 12 | from detectron2.engine import default_setup, hooks, launch 13 | from detectron2.evaluation import ( 14 | COCOEvaluator, 15 | DatasetEvaluators, 16 | verify_results, 17 | ) 18 | 19 | # updated code 20 | from src.config import get_cfg 21 | from src import data 22 | from src.engine import default_argument_parser, DefaultTrainer 23 | from src import modeling 24 | 25 | 26 | class Trainer(DefaultTrainer): 27 | """ 28 | We use the "DefaultTrainer" which contains a number pre-defined logic for 29 | standard training workflow. They may not work for you, especially if you 30 | are working on a new research project. In that case you can use the cleaner 31 | "SimpleTrainer", or write your own training loop. 32 | """ 33 | 34 | @classmethod 35 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 36 | """ 37 | Create evaluator(s) for a given dataset. 38 | This uses the special metadata "evaluator_type" associated with each builtin dataset. 39 | For your own dataset, you can simply create an evaluator manually in your 40 | script and do not have to worry about the hacky if-else logic here. 41 | """ 42 | if output_folder is None: 43 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 44 | evaluator_list = [] 45 | evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type 46 | if evaluator_type in ["coco", "coco_panoptic_seg"]: 47 | evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) 48 | if len(evaluator_list) == 0: 49 | raise NotImplementedError( 50 | "no Evaluator for the dataset {} with the type {}".format( 51 | dataset_name, evaluator_type 52 | ) 53 | ) 54 | if len(evaluator_list) == 1: 55 | return evaluator_list[0] 56 | return DatasetEvaluators(evaluator_list) 57 | 58 | 59 | def setup(args): 60 | """ 61 | Create configs and perform basic setups. 62 | """ 63 | cfg = get_cfg() 64 | cfg.merge_from_file(args.config_file) 65 | cfg.merge_from_list(args.opts) 66 | cfg.freeze() 67 | default_setup(cfg, args) 68 | return cfg 69 | 70 | 71 | def main(args): 72 | cfg = setup(args) 73 | 74 | # eval_only and eval_during_train are mainly used for jointly 75 | # training detection and self-supervised models. 76 | # breakpoint() 77 | if args.eval_only: 78 | model = Trainer.build_model(cfg) 79 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 80 | cfg.MODEL.WEIGHTS, resume=args.resume 81 | ) 82 | # breakpoint() 83 | position_list = [x for x, v in enumerate(cfg.MODEL.WEIGHTS) if v == '/'] 84 | if 'ood' not in cfg.DATASETS.TEST[0]: 85 | res = Trainer.test(cfg, model, 86 | saved_address=cfg.MODEL.WEIGHTS[:position_list[-1]] + '/id.npy', 87 | visualize=args.visualize, savefigdir=args.savefigdir) 88 | if comm.is_main_process(): 89 | verify_results(cfg, res) 90 | if cfg.TEST.AUG.ENABLED: 91 | res.update(Trainer.test_with_TTA(cfg, model)) 92 | return res 93 | else: 94 | res = Trainer.test(cfg, model, 95 | saved_address=cfg.MODEL.WEIGHTS[:position_list[-1]] + '/ood.npy', 96 | visualize=args.visualize, savefigdir=args.savefigdir) 97 | return res 98 | 99 | elif args.eval_during_train:#False 100 | model = Trainer.build_model(cfg) 101 | check_pointer = DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR) 102 | saved_checkpoint = None 103 | best_res = {} 104 | best_file = None 105 | while True: 106 | if check_pointer.has_checkpoint(): 107 | current_ckpt = check_pointer.get_checkpoint_file() 108 | if ( 109 | saved_checkpoint is None 110 | or current_ckpt != saved_checkpoint 111 | ): 112 | check_pointer._load_model( 113 | check_pointer._load_file(current_ckpt) 114 | ) 115 | saved_checkpoint = current_ckpt 116 | print("evaluating checkpoint {}".format(current_ckpt)) 117 | iters = int( 118 | osp.splitext(osp.basename(current_ckpt))[0].split("_")[ 119 | -1 120 | ] 121 | ) 122 | res = Trainer.test(cfg, model) 123 | if comm.is_main_process(): 124 | verify_results(cfg, res) 125 | if cfg.TEST.AUG.ENABLED: 126 | res.update(Trainer.test_with_TTA(cfg, model)) 127 | print(res) 128 | if (len(best_res) == 0) or ( 129 | len(best_res) > 0 130 | and best_res["bbox"]["AP"] < res["bbox"]["AP"] 131 | ): 132 | best_res = res 133 | best_file = current_ckpt 134 | print("best so far is from {}".format(best_file)) 135 | print(best_res) 136 | if iters + 1 >= cfg.SOLVER.MAX_ITER: 137 | return best_res 138 | time.sleep(10) 139 | """ 140 | If you'd like to do anything fancier than the standard training logic, 141 | consider writing your own training loop or subclassing the trainer. 142 | """ 143 | trainer = Trainer(cfg) 144 | trainer.resume_or_load(resume=args.resume) 145 | if cfg.TEST.AUG.ENABLED: 146 | trainer.register_hooks( 147 | [ 148 | hooks.EvalHook( 149 | 0, lambda: trainer.test_with_TTA(cfg, trainer.model) 150 | ) 151 | ] 152 | ) 153 | return trainer.train() 154 | 155 | 156 | if __name__ == "__main__": 157 | args = default_argument_parser().parse_args() 158 | print("Command Line Args:", args) 159 | launch( 160 | main, 161 | args.num_gpus, 162 | num_machines=args.num_machines, 163 | machine_rank=args.machine_rank, 164 | dist_url=args.dist_url, 165 | args=(args,), 166 | ) 167 | -------------------------------------------------------------------------------- /tools/vis_coco.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import argparse 6 | import pandas as pd 7 | import seaborn as sns 8 | import matplotlib 9 | matplotlib.use('AGG') 10 | import matplotlib.pyplot as plt 11 | from metric_utils import * 12 | 13 | recall_level_default = 0.95 14 | 15 | 16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin') 19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin') 20 | parser.add_argument('--thres', default=1., type=float) 21 | parser.add_argument('--name', default=1., type=str) 22 | parser.add_argument('--seed', default=0, type=int) 23 | parser.add_argument('--model', default='faster-rcnn', type=str) 24 | args = parser.parse_args() 25 | 26 | 27 | 28 | concat = lambda x: np.concatenate(x, axis=0) 29 | to_np = lambda x: x.data.cpu().numpy() 30 | 31 | 32 | 33 | # ID data 34 | ood_data = np.load('/nobackup/my_xfdu/video/vis/checkpoints/VIS/' + str(args.model) + '/ood.npy',allow_pickle=True) 35 | id_data = np.load('/nobackup/my_xfdu/video/vis/checkpoints/VIS/' + str(args.model) + '/id.npy',allow_pickle=True) 36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed) +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed) +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 38 | # id_score = [] 39 | # energy score calculation. 40 | # import ipdb; ipdb.set_trace() 41 | index = 0 42 | for data in id_data: 43 | if index == 0: 44 | id_data_all = data 45 | index += 1 46 | else: 47 | id_data_all = np.concatenate([id_data_all, data], 0) 48 | 49 | id_data = torch.from_numpy(id_data_all) 50 | 51 | index = 0 52 | for data in ood_data: 53 | if index == 0: 54 | ood_data_all = data 55 | index += 1 56 | else: 57 | ood_data_all = np.concatenate([ood_data_all, data], 0) 58 | 59 | ood_data = torch.from_numpy(ood_data_all) 60 | 61 | # id_data = id_data / 1000 - torch.max(id_data, 1, keepdim=True)[0] 62 | # ood_data = ood_data / 1000 - torch.max(ood_data, 1, keepdim=True)[0] 63 | T = 1 64 | 65 | # breakpoint() 66 | # id_data = id_data[F.softmax(id_data[:,:-1], 1).max(1)[0] > 0.2] 67 | # ood_data = ood_data[F.softmax(ood_data[:,:-1], 1).max(1)[0] > 0.2] 68 | 69 | assert len(id_data[0]) == 41 70 | if args.energy: 71 | id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 72 | ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 73 | else: 74 | id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 75 | ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 76 | 77 | ########### 78 | ######## 79 | print(len(id_score)) 80 | print(len(ood_score)) 81 | 82 | # measures = get_measures(-id_score[:15866], -ood_score[:5258], plot=False) 83 | measures = get_measures(-id_score, -ood_score, plot=False) 84 | if args.energy: 85 | print_measures(measures[0], measures[1], measures[2], 'energy') 86 | else: 87 | print_measures(measures[0], measures[1], measures[2], 'msp') 88 | 89 | 90 | -------------------------------------------------------------------------------- /tools/visualize_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import argparse 3 | import numpy as np 4 | import os 5 | from itertools import chain 6 | import cv2 7 | from PIL import Image 8 | 9 | from detectron2.config import get_cfg 10 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader 11 | from detectron2.data import detection_utils as utils 12 | from detectron2.data.build import filter_images_with_few_keypoints 13 | from detectron2.utils.logger import setup_logger 14 | from detectron2.utils.visualizer import Visualizer 15 | 16 | 17 | def setup(args): 18 | cfg = get_cfg() 19 | if args.config_file: 20 | cfg.merge_from_file(args.config_file) 21 | cfg.merge_from_list(args.opts) 22 | cfg.freeze() 23 | return cfg 24 | 25 | 26 | def parse_args(in_args=None): 27 | parser = argparse.ArgumentParser(description="Visualize ground-truth data") 28 | parser.add_argument( 29 | "--source", 30 | choices=["annotation", "dataloader"], 31 | required=True, 32 | help="visualize the annotations or the data loader (with pre-processing)", 33 | ) 34 | parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") 35 | parser.add_argument("--output-dir", default="./", help="path to output directory") 36 | parser.add_argument("--show", action="store_true", help="show output in a window") 37 | parser.add_argument( 38 | "opts", 39 | help="Modify config options using the command-line", 40 | default=None, 41 | nargs=argparse.REMAINDER, 42 | ) 43 | return parser.parse_args(in_args) 44 | 45 | 46 | if __name__ == "__main__": 47 | args = parse_args() 48 | logger = setup_logger() 49 | logger.info("Arguments: " + str(args)) 50 | cfg = setup(args) 51 | 52 | dirname = args.output_dir 53 | os.makedirs(dirname, exist_ok=True) 54 | metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) 55 | 56 | def output(vis, fname): 57 | if args.show: 58 | print(fname) 59 | cv2.imshow("window", vis.get_image()[:, :, ::-1]) 60 | cv2.waitKey() 61 | else: 62 | filepath = os.path.join(dirname, fname) 63 | print("Saving to {} ...".format(filepath)) 64 | vis.save(filepath) 65 | 66 | scale = 2.0 if args.show else 1.0 67 | if args.source == "dataloader": 68 | train_data_loader = build_detection_train_loader(cfg) 69 | for batch in train_data_loader: 70 | for per_image in batch: 71 | # Pytorch tensor is in (C, H, W) format 72 | img = per_image["image"].permute(1, 2, 0) 73 | if cfg.INPUT.FORMAT == "BGR": 74 | img = img[:, :, [2, 1, 0]] 75 | else: 76 | img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) 77 | 78 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 79 | target_fields = per_image["instances"].get_fields() 80 | labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] 81 | vis = visualizer.overlay_instances( 82 | labels=labels, 83 | boxes=target_fields.get("gt_boxes", None), 84 | masks=target_fields.get("gt_masks", None), 85 | keypoints=target_fields.get("gt_keypoints", None), 86 | ) 87 | output(vis, str(per_image["image_id"]) + ".jpg") 88 | else: 89 | dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) 90 | if cfg.MODEL.KEYPOINT_ON: 91 | dicts = filter_images_with_few_keypoints(dicts, 1) 92 | for dic in dicts: 93 | img = utils.read_image(dic["file_name"], "RGB") 94 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 95 | vis = visualizer.draw_dataset_dict(dic) 96 | output(vis, os.path.basename(dic["file_name"])) 97 | -------------------------------------------------------------------------------- /tools/visualize_json_results.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import argparse 5 | import json 6 | import numpy as np 7 | import os 8 | from collections import defaultdict 9 | import cv2 10 | import tqdm 11 | from fvcore.common.file_io import PathManager 12 | 13 | from detectron2.data import DatasetCatalog, MetadataCatalog 14 | from detectron2.structures import Boxes, BoxMode, Instances 15 | from detectron2.utils.logger import setup_logger 16 | from detectron2.utils.visualizer import Visualizer 17 | 18 | 19 | def create_instances(predictions, image_size): 20 | ret = Instances(image_size) 21 | 22 | score = np.asarray([x["score"] for x in predictions]) 23 | chosen = (score > args.conf_threshold).nonzero()[0] 24 | score = score[chosen] 25 | bbox = np.asarray([predictions[i]["bbox"] for i in chosen]) 26 | bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) 27 | 28 | labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) 29 | 30 | ret.scores = score 31 | ret.pred_boxes = Boxes(bbox) 32 | ret.pred_classes = labels 33 | 34 | try: 35 | ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] 36 | except KeyError: 37 | pass 38 | return ret 39 | 40 | 41 | if __name__ == "__main__": 42 | parser = argparse.ArgumentParser( 43 | description="A script that visualizes the json predictions from COCO or LVIS dataset." 44 | ) 45 | parser.add_argument("--input", '-i', required=True, help="JSON file produced by the model") 46 | parser.add_argument("--output", '-o', required=True, help="output directory") 47 | parser.add_argument("--dataset", '-d', help="name of the dataset", default="coco_2017_val") 48 | parser.add_argument("--conf-threshold", '-c', default=0.5, type=float, help="confidence threshold") 49 | args = parser.parse_args() 50 | 51 | logger = setup_logger() 52 | 53 | with PathManager.open(args.input, "r") as f: 54 | predictions = json.load(f) 55 | 56 | os.makedirs(args.output, exist_ok=True) 57 | 58 | pred_by_image = defaultdict(list) 59 | for p in predictions: 60 | pred_by_image[p["image_id"]].append(p) 61 | 62 | dicts = list(DatasetCatalog.get(args.dataset)) 63 | metadata = MetadataCatalog.get(args.dataset) 64 | if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): 65 | 66 | def dataset_id_map(ds_id): 67 | return metadata.thing_dataset_id_to_contiguous_id[ds_id] 68 | 69 | elif "lvis" in args.dataset: 70 | # LVIS results are in the same format as COCO results, but have a different 71 | # mapping from dataset category id to contiguous category id in [0, #categories - 1] 72 | def dataset_id_map(ds_id): 73 | return ds_id - 1 74 | 75 | else: 76 | raise ValueError("Unsupported dataset: {}".format(args.dataset)) 77 | 78 | os.makedirs(args.output, exist_ok=True) 79 | 80 | for dic in tqdm.tqdm(dicts): 81 | img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] 82 | basename = os.path.basename(dic["file_name"]) 83 | 84 | predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) 85 | vis = Visualizer(img, metadata) 86 | vis_pred = vis.draw_instance_predictions(predictions).get_image() 87 | 88 | vis = Visualizer(img, metadata) 89 | vis_gt = vis.draw_dataset_dict(dic).get_image() 90 | 91 | concat = np.concatenate((vis_pred, vis_gt), axis=1) 92 | cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]) 93 | -------------------------------------------------------------------------------- /tools/waymo_bdd.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import argparse 6 | import pandas as pd 7 | import seaborn as sns 8 | import matplotlib 9 | matplotlib.use('AGG') 10 | import matplotlib.pyplot as plt 11 | from metric_utils import * 12 | 13 | recall_level_default = 0.95 14 | 15 | 16 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('--energy', type=int, default=1, help='noise for Odin') 19 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin') 20 | parser.add_argument('--thres', default=1., type=float) 21 | parser.add_argument('--name', default=1., type=str) 22 | parser.add_argument('--seed', default=0, type=int) 23 | parser.add_argument('--model', default='faster-rcnn', type=str) 24 | args = parser.parse_args() 25 | 26 | 27 | 28 | concat = lambda x: np.concatenate(x, axis=0) 29 | to_np = lambda x: x.data.cpu().numpy() 30 | 31 | 32 | 33 | # ID data 34 | ood_data = np.load('/nobackup-fast/dataset/my_xfdu/video/waymo/checkpoints/waymo_reported/' + str(args.model) + '/ood.npy',allow_pickle=True) 35 | id_data = np.load('/nobackup-fast/dataset/my_xfdu/video/waymo/checkpoints/waymo_reported/' + str(args.model) + '/id.npy',allow_pickle=True) 36 | # id_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed'+'_' +str(args.seed) +'/inference/voc_custom_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 37 | # ood_data = pickle.load(open('./data/VOC-Detection/' + args.model + '/'+args.name+'/random_seed' +'_'+str(args.seed) +'/inference/coco_ood_val/standard_nms/corruption_level_0/probabilistic_scoring_res_odd_'+str(args.thres)+'.pkl', 'rb')) 38 | # id_score = [] 39 | # energy score calculation. 40 | # import ipdb; ipdb.set_trace() 41 | index = 0 42 | for data in id_data: 43 | if index == 0: 44 | id_data_all = data 45 | index += 1 46 | else: 47 | id_data_all = np.concatenate([id_data_all, data], 0) 48 | 49 | id_data = torch.from_numpy(id_data_all) 50 | 51 | index = 0 52 | for data in ood_data: 53 | if index == 0: 54 | ood_data_all = data 55 | index += 1 56 | else: 57 | ood_data_all = np.concatenate([ood_data_all, data], 0) 58 | 59 | ood_data = torch.from_numpy(ood_data_all) 60 | 61 | 62 | 63 | T = 1 64 | # breakpoint() 65 | 66 | 67 | 68 | assert len(id_data[0]) == 4 69 | if args.energy: 70 | id_score = -args.T * torch.logsumexp(id_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 71 | ood_score = -args.T * torch.logsumexp(ood_data[:, :-1] / args.T, dim=1).cpu().data.numpy() 72 | else: 73 | id_score = -np.max(F.softmax(id_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 74 | ood_score = -np.max(F.softmax(ood_data[:, :-1], dim=1).cpu().data.numpy(), axis=1) 75 | 76 | ########### 77 | ######## 78 | print(len(id_score)) 79 | print(len(ood_score)) 80 | 81 | measures = get_measures(-id_score, -ood_score, plot=False) 82 | 83 | if args.energy: 84 | print_measures(measures[0], measures[1], measures[2], 'energy') 85 | else: 86 | print_measures(measures[0], measures[1], measures[2], 'msp') 87 | 88 | # # import ipdb; ipdb.set_trace() 89 | # plt.figure(figsize=(5.5,3)) 90 | # # plot of 2 variables 91 | # id_pd = pd.Series(-id_score) 92 | # # id_pd.rename('ID') 93 | # 94 | # ood_pd = pd.Series(-ood_score) 95 | # # ood_pd.rename('OOD') 96 | # # data_plot = {'Energy': np.concatenate((-id_score[0:2000], -ood_score), 0), 'label':['ID'] * len(-id_score[0:2000]) + \ 97 | # # ['OOD'] * len(-ood_score)} 98 | # # df_after = pd.DataFrame(data=data_plot) 99 | # # sns.histplot(data=df_after, x="Energy", hue="label") 100 | # p1 = sns.kdeplot(id_pd, shade=True, color="r", label='ID') 101 | # p1 = sns.kdeplot(ood_pd, shade=True, color="b", label='OOD') 102 | # plt.legend(fontsize=12) 103 | # plt.xticks(fontsize=12) 104 | # plt.yticks(fontsize=12) 105 | # # plt.ylabel('Density', fontsize=12) 106 | # if args.energy: 107 | # plt.savefig('voc_coco_gan.jpg', dpi=250) 108 | # else: 109 | # plt.savefig('voc_coco_msp_probdet.jpg', dpi=250) 110 | # # sns.plt.show() 111 | --------------------------------------------------------------------------------