├── .gitignore
├── LICENSE
├── README.md
├── _0scripts
    ├── install.sh
    ├── test.sh
    └── train.sh
├── _1Prop_Cfgs
    ├── ade20k-150
    │   ├── Base-ADE20K-150.yaml
    │   ├── Base-Prop.yaml
    │   ├── s1_seg.yaml
    │   ├── s1_seg_crosim.yaml
    │   ├── s1_seg_pseudo_label.yaml
    │   ├── s1_seg_retraining.yaml
    │   ├── s2_seg.yaml
    │   ├── s2_seg_crosim.yaml
    │   ├── s2_seg_pseudo_label.yaml
    │   ├── s2_seg_retraining.yaml
    │   ├── s3_seg.yaml
    │   ├── s3_seg_crosim.yaml
    │   ├── s3_seg_pseudo_label.yaml
    │   ├── s3_seg_retraining.yaml
    │   ├── s4_seg.yaml
    │   ├── s4_seg_crosim.yaml
    │   ├── s4_seg_pseudo_label.yaml
    │   └── s4_seg_retraining.yaml
    └── coco_sutff_10k
    │   ├── Base-COCO-stuff-10k-prop.yaml
    │   ├── Base-COCO-stuff-10k.yaml
    │   ├── s1_seg.yaml
    │   ├── s1_seg_crosim.yaml
    │   ├── s1_seg_crosim_resume.yaml
    │   ├── s1_seg_pseudo_label.yaml
    │   ├── s1_seg_retraining.yaml
    │   ├── s2_seg.yaml
    │   ├── s2_seg_crosim.yaml
    │   ├── s2_seg_pseudo_label.yaml
    │   ├── s2_seg_retraining.yaml
    │   ├── s3_seg.yaml
    │   ├── s3_seg_crosim.yaml
    │   ├── s3_seg_pseudo_label.yaml
    │   ├── s3_seg_retraining.yaml
    │   ├── s4_seg.yaml
    │   ├── s4_seg_crosim.yaml
    │   ├── s4_seg_pseudo_label.yaml
    │   ├── s4_seg_retraining.yaml
    │   ├── s5_seg.yaml
    │   ├── s5_seg_crosim.yaml
    │   ├── s5_seg_pseudo_label.yaml
    │   ├── s5_seg_retraining.yaml
    │   ├── s6_seg.yaml
    │   ├── s6_seg_crosim.yaml
    │   ├── s6_seg_pseudo_label.yaml
    │   ├── s6_seg_retraining.yaml
    │   ├── s7_seg.yaml
    │   ├── s7_seg_crosim.yaml
    │   ├── s7_seg_pseudo_label.yaml
    │   ├── s7_seg_retraining.yaml
    │   ├── s8_seg.yaml
    │   ├── s8_seg_crosim.yaml
    │   ├── s8_seg_pseudo_label.yaml
    │   ├── s8_seg_retraining.yaml
    │   ├── s9_seg.yaml
    │   ├── s9_seg_crosim.yaml
    │   ├── s9_seg_pseudo_label.yaml
    │   └── s9_seg_retraining.yaml
├── configs
    ├── ade20k-150-panoptic
    │   ├── maskformer_panoptic_R101_bs16_720k.yaml
    │   └── maskformer_panoptic_R50_bs16_720k.yaml
    ├── ade20k-150
    │   ├── Base-ADE20K-150.yaml
    │   ├── maskformer_R101_bs16_160k.yaml
    │   ├── maskformer_R101c_bs16_160k.yaml
    │   ├── maskformer_R50_bs16_160k.yaml
    │   ├── per_pixel_baseline_R50_bs16_160k.yaml
    │   ├── per_pixel_baseline_plus_R50_bs16_160k.yaml
    │   └── swin
    │   │   ├── maskformer_swin_base_IN21k_384_bs16_160k_res640.yaml
    │   │   ├── maskformer_swin_large_IN21k_384_bs16_160k_res640.yaml
    │   │   ├── maskformer_swin_small_bs16_160k.yaml
    │   │   └── maskformer_swin_tiny_bs16_160k.yaml
    ├── ade20k-full-847
    │   ├── Base-ADE20KFull-847.yaml
    │   ├── maskformer_R101_bs16_200k.yaml
    │   ├── maskformer_R101c_bs16_200k.yaml
    │   ├── maskformer_R50_bs16_200k.yaml
    │   ├── per_pixel_baseline_R50_bs16_200k.yaml
    │   └── per_pixel_baseline_plus_R50_bs16_200k.yaml
    ├── cityscapes-19
    │   ├── Base-Cityscapes-19.yaml
    │   ├── maskformer_R101_bs16_90k.yaml
    │   └── maskformer_R101c_bs16_90k.yaml
    ├── coco-panoptic
    │   ├── Base-COCO-PanopticSegmentation.yaml
    │   ├── maskformer_panoptic_R101_bs64_554k.yaml
    │   ├── maskformer_panoptic_R50_bs64_554k.yaml
    │   └── swin
    │   │   ├── maskformer_panoptic_swin_base_IN21k_384_bs64_554k.yaml
    │   │   ├── maskformer_panoptic_swin_large_IN21k_384_bs64_554k.yaml
    │   │   ├── maskformer_panoptic_swin_small_bs64_554k.yaml
    │   │   └── maskformer_panoptic_swin_tiny_bs64_554k.yaml
    ├── coco-stuff-10k-171
    │   ├── Base-COCOStuff10K-171.yaml
    │   ├── maskformer_R101_bs32_60k.yaml
    │   ├── maskformer_R101c_bs32_60k.yaml
    │   ├── maskformer_R50_bs32_60k.yaml
    │   ├── per_pixel_baseline_R50_bs32_60k.yaml
    │   └── per_pixel_baseline_plus_R50_bs32_60k.yaml
    └── mapillary-vistas-65
    │   ├── Base-MapillaryVistas-65.yaml
    │   └── maskformer_R50_bs16_300k.yaml
├── figs
    ├── framework.png
    ├── overview.png
    ├── viz.png
    └── viz_func.py
├── init_datasets
    ├── README.md
    ├── ade20k_instance_catid_mapping.txt
    ├── prepare_ade20k_full_sem_seg.py
    ├── prepare_ade20k_pan_seg.py
    ├── prepare_ade20k_sem_seg.py
    ├── prepare_coco_stuff_10k_v1.0_sem_seg.py
    └── voc_meta
    │   ├── train_aug.txt
    │   ├── train_aug_base1.txt
    │   ├── trans_query.pth
    │   ├── val.txt
    │   ├── val_base1.txt
    │   └── word_vectors
    │       ├── fasttext.pkl
    │       └── word2vec.pkl
├── main
    ├── train_net_mf.py
    └── train_net_qt.py
├── mask_former
    ├── __init__.py
    ├── config.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset_mappers
    │   │   ├── __init__.py
    │   │   ├── detr_panoptic_dataset_mapper.py
    │   │   ├── mask_former_panoptic_dataset_mapper.py
    │   │   ├── mask_former_semantic_dataset_mapper.py
    │   │   └── weakshot_semantic_dataset_mapper.py
    │   └── datasets
    │   │   ├── __init__.py
    │   │   ├── register_ade20k_full.py
    │   │   ├── register_ade20k_panoptic.py
    │   │   ├── register_coco_stuff_10k.py
    │   │   ├── register_mapillary_vistas.py
    │   │   ├── register_voc_splits.py
    │   │   └── shared.py
    ├── mask_former_model.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   └── swin.py
    │   ├── criterion.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── mask_former_head.py
    │   │   ├── per_pixel_baseline.py
    │   │   └── pixel_decoder.py
    │   ├── matcher.py
    │   └── transformer
    │   │   ├── __init__.py
    │   │   ├── position_encoding.py
    │   │   ├── transformer.py
    │   │   └── transformer_predictor.py
    ├── test_time_augmentation.py
    └── utils
    │   ├── __init__.py
    │   ├── misc.py
    │   └── viz.py
├── prop_former
    ├── __init__.py
    ├── config.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset_mappers
    │   │   ├── __init__.py
    │   │   ├── weakshot_mapper_training.py
    │   │   └── weakshot_mapper_training_pair.py
    │   └── datasets
    │   │   ├── ADE_20k
    │   │       ├── info.py
    │   │       └── register_ADE_20k_splits.py
    │   │   ├── __init__.py
    │   │   ├── coco_stuff_10k
    │   │       ├── meta_files
    │   │       │   ├── info.py
    │   │       │   └── updated_rand_permute.npy
    │   │       ├── register_coco_stuff_10k_splits.py
    │   │       └── updated_images.py
    │   │   ├── shared.py
    │   │   └── voc
    │   │       ├── __init__.py
    │   │       ├── meta_files
    │   │           ├── __init__.py
    │   │           ├── info.py
    │   │           ├── split1_existing.txt
    │   │           ├── split1_updated.txt
    │   │           ├── train_aug.txt
    │   │           └── val.txt
    │   │       ├── register_voc_splits.py
    │   │       └── split_voc_to_existing_and_updated.py
    ├── evaluation.py
    ├── modeling
    │   ├── __init__.py
    │   ├── cross_img_sim
    │   │   ├── compute_pairs.py
    │   │   ├── cro_simnet.py
    │   │   ├── func.py
    │   │   └── meter.py
    │   ├── fc_modules.py
    │   ├── hungarian_matcher.py
    │   ├── loss_func.py
    │   ├── loss_manager.py
    │   ├── prop_criterion.py
    │   ├── prop_former_head.py
    │   └── prop_transformer_predictor.py
    ├── prop_former_model.py
    ├── pseudo_labeling.py
    └── shared.py
├── requirements.txt
└── train_net_prop.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | pretrained
 2 | saves
 3 | 
 4 | # output dir
 5 | output
 6 | instant_test_output
 7 | inference_test_output
 8 | 
 9 | 
10 | *.png
11 | *.diff
12 | *.jpg
13 | !/projects/DensePose/doc/images/*.jpg
14 | !figs/*.jpg
15 | !figs/*.png
16 | 
17 | # compilation and distribution
18 | __pycache__
19 | _ext
20 | *.pyc
21 | *.pyd
22 | *.so
23 | *.dll
24 | *.egg-info/
25 | build/
26 | dist/
27 | wheels/
28 | 
29 | # pytorch/python/numpy formats
30 | *.ts
31 | model_ts*.txt
32 | 
33 | # ipython/jupyter notebooks
34 | *.ipynb
35 | **/.ipynb_checkpoints/
36 | 
37 | # Editor temporaries
38 | *.swn
39 | *.swo
40 | *.swp
41 | *~
42 | 
43 | # editor settings
44 | .idea
45 | .vscode
46 | _darcs
47 | 
48 | # project dirs
49 | /detectron2/model_zoo/configs
50 | /datasets/*
51 | !/datasets/*.*
52 | /projects/*/datasets
53 | /models
54 | /snippet


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Weak-shot Semantic Segmentation via Dual Similarity Transfer
 2 | 
 3 | This repository contains the official PyTorch implementation of the following paper:
 4 | 
 5 | > **Weak-shot Semantic Segmentation via Dual Similarity Transfer**<br>
 6 | >
 7 | > Junjie Chen, [Li Niu](http://bcmi.sjtu.edu.cn/home/niuli/), Siyuan Zhou, Jianlou Si, Chen Qian, and Liqing Zhang<br>MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University<br>
 8 | > https://arxiv.org/abs/2210.02270 <br> Accepted by **NeurIPS2022**.
 9 | 
10 | ## Abstract
11 | Semantic segmentation is a practical and active task, but severely suffers from the expensive cost of pixel-level labels when extending to more classes in wider applications.
12 | To this end, we focus on the problem named weak-shot semantic segmentation, where the novel classes are learnt from cheaper image-level labels with the support of base classes having off-the-shelf pixel-level labels.
13 | To tackle this problem, we propose SimFormer, which performs dual similarity transfer upon MaskFormer.
14 | Specifically, MaskFormer disentangles the semantic segmentation task into single-label classification and binary segmentation for each proposal two sub-tasks.
15 | The binary segmentation allows proposal-pixel similarity transfer from base classes to novel classes, which enables the mask learning of novel classes.
16 | We also learn pixel-pixel similarity from base classes and distill such class-agnostic semantic similarity to the semantic masks of novel classes, which regularizes the segmentation model with pixel-level semantic relationship across images.
17 | In addition, we propose a complementary loss to facilitate the learning of novel classes.
18 | Comprehensive experiments on the challenging COCO-Stuff-10K and ADE20K datasets demonstrate the effectiveness of our method.
19 | 
20 | ## 2. Problem and Method
21 | <div align="center">
22 |   <img src='figs/overview.png' align="center" width=600>
23 | </div>
24 | <br>
25 | We refer to our learning scenario as weak-shot semantic segmentation, which focuses on further segmenting novel classes by virtue of cheaper image-level labels with the support of base classes having pixel-level masks. 
26 | Specifically, given a standard semantic segmentation dataset annotated only for base classes (the novel classes hide in the ignored regions), we assume that the image-level  labels are available for novel classes in each image, as shown in above figure (a).
27 | Our proposed solution is SimFormer, which performs dual similarity transfer upon MaskFormer as shown in above figure (b).
28 | 
29 | 
30 | ## 3. Experiment and Result
31 | <div align="center">
32 |   <img src='figs/viz.png' align="center" width=600>
33 | </div>
34 | <br>
35 | Extensive experiments on the challenging COCO-Stuff-10K and ADE20K datasets have demonstrated the effectiveness of our proposed method.
36 | We provide in-depth qualitative visualization in above figure, from which we could directly inspect the single-label classification and binary segmentation sub-tasks of each proposal embedding. 
37 | Overall, the predicted classes are precise and confident, and the produced masks of proposal embeddings completely cover the corresponding semantic classes. 
38 | Although Truck is actually not in the first example, the class score and binary mask are both relatively lower, and thus the fused result will not severely degrade the final segmentation performance.
39 | 
40 | 
41 | ## 4. Codebase
42 | 
43 | ### 4.1 Data
44 | The COCO-Stuff-10K and ADE-20K datasets are prepared as [MaskFormer](https://github.com/facebookresearch/MaskFormer).
45 | For convenience, we provide the data packages at [Baidu Cloud](https://pan.baidu.com/s/1brIra88FOdsaV0kLCfph2Q?pwd=BCMI) (access code: BCMI).
46 | All data files are configured as:
47 | 
48 |   ```
49 |  root_dir
50 |   ├── datasets
51 |     ├── coco/coco_stuff_10k
52 |         ├── images_detectron2
53 |         ├── annotations_detectron2
54 |     ├── ADEChallengeData2016
55 |         ├── images_detectron2
56 |         ├── annotations_detectron2
57 | ├── ……
58 |   ```
59 | 
60 | The split information for base class and novel class on both datasets can be found in `prop_former/data/datasets/coco_stuff_10k/meta_files/info.py` and `prop_former/data/datasets/ADE_20k/info.py`.
61 | 
62 | ### 4.2 Install 
63 | The proposed approach is implemented in Python 3.7.4 and Pytorch 1.8.0.
64 | The full script for install can be found in `_0scripts/install.sh`.
65 | 
66 | ### 4.3 Evaluation
67 | The trained models are released as `trained_models.zip` at [Baidu Cloud](https://pan.baidu.com/s/1brIra88FOdsaV0kLCfph2Q?pwd=BCMI) (access code: BCMI).
68 | 
69 | The exemplary commands for evaluation can be found in `_0scripts/test.sh`.
70 | 
71 | ### 4.4 Training
72 | The exemplary commands for training can be found in `_0scripts/train.sh`.
73 | 
74 | ## Resources
75 | 
76 | We have summarized the existing papers and codes on weak-shot learning in the following repository:
77 | [https://github.com/bcmi/Awesome-Weak-Shot-Learning](https://github.com/bcmi/Awesome-Weak-Shot-Learning)
78 | 
79 | ## Bibtex
80 | If you find this work is useful for your research, please cite our paper using the following **BibTeX  [[pdf]()] [[supp](https://arxiv.org/abs/2210.02270)] [[arxiv](https://arxiv.org/abs/2210.02270)]:**
81 | 
82 | ```
83 | @inproceedings{SimFormer2022,
84 | title={Weak-shot Semantic Segmentation via Dual Similarity Transfer},
85 | author={Chen, Junjie and Niu, Li and Zhou, Siyuan and Si, Jianlou and Qian, Chen and Zhang, Liqing},
86 | booktitle={NeurIPS},
87 | year={2022}}
88 | ```
89 | 


--------------------------------------------------------------------------------
/_0scripts/install.sh:
--------------------------------------------------------------------------------
1 | conda create -n ENV python=3.7.4
2 | conda activate ENV
3 | pip install torch===1.8.0+cu101 torchvision===0.9.0+cu101 -f https://mirror.sjtu.edu.cn/pytorch-wheels/torch_stable.html
4 | python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
5 | pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple


--------------------------------------------------------------------------------
/_0scripts/test.sh:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------- COCO Stuff 10K ------------------------------------------------------------------------------------
 2 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s1_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/model_final_cb03eb_COCO.pkl OUTPUT_PREFIX Fully_COCO_S1
 3 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s2_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/model_final_cb03eb_COCO.pkl OUTPUT_PREFIX Fully_COCO_S2
 4 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s3_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/model_final_cb03eb_COCO.pkl OUTPUT_PREFIX Fully_COCO_S3
 5 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s4_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/model_final_cb03eb_COCO.pkl OUTPUT_PREFIX Fully_COCO_S4
 6 | 
 7 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s1_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/Release/COCO/SimFormer_S1.pth OUTPUT_PREFIX os_COCO_S1
 8 | 
 9 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s1_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/Release/COCO/final_S1.pth OUTPUT_PREFIX Ours_COCO_S1
10 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s2_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/Release/COCO/final_S2.pth OUTPUT_PREFIX Ours_COCO_S2
11 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s3_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/Release/COCO/final_S3.pth OUTPUT_PREFIX Ours_COCO_S3
12 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s4_seg.yaml --eval-only MODEL.WEIGHTS ../../pretrained/Release/COCO/final_S4.pth OUTPUT_PREFIX Ours_COCO_S4
13 | # -------------------------------------------------------- ADE 20K ------------------------------------------------------------------------------------
14 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s1_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/model_final_d8dbeb_ADE.pkl OUTPUT_PREFIX Fully_ADE_S1
15 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s2_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/model_final_d8dbeb_ADE.pkl OUTPUT_PREFIX Fully_ADE_S2
16 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s3_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/model_final_d8dbeb_ADE.pkl OUTPUT_PREFIX Fully_ADE_S3
17 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s4_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/model_final_d8dbeb_ADE.pkl OUTPUT_PREFIX Fully_ADE_S4
18 | 
19 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s1_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/Release/ADE/final_S1.pth OUTPUT_PREFIX Ours_ADE_S1
20 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s2_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/Release/ADE/final_S2.pth OUTPUT_PREFIX Ours_ADE_S2
21 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s3_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/Release/ADE/final_S3.pth OUTPUT_PREFIX Ours_ADE_S3
22 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s4_seg.yaml --eval-only MODEL.WEIGHTS  ../../pretrained/Release/ADE/final_S4.pth OUTPUT_PREFIX Ours_ADE_S4
23 | 


--------------------------------------------------------------------------------
/_0scripts/train.sh:
--------------------------------------------------------------------------------
1 | python train_net_prop.py --config-file _1Prop_Cfgs/coco_sutff_10k/s1_seg.yaml
2 | python train_net_prop.py --config-file _1Prop_Cfgs/ade20k-150/s1_seg.yaml


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/Base-ADE20K-150.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("ade20k_sem_seg_train",)
18 |   TEST: ("ade20k_sem_seg_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 8
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 160000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 512) for x in range(5, 21)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 512
38 |   MAX_SIZE_TRAIN: 2048
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (512, 512)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 512  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 20000
51 |   AUG:
52 |     ENABLED: False
53 |     MIN_SIZES: [256, 384, 512, 640, 768, 896]
54 |     MAX_SIZE: 3584
55 |     FLIP: True
56 | DATALOADER:
57 |   FILTER_EMPTY_ANNOTATIONS: True
58 |   NUM_WORKERS: 4
59 | VERSION: 2
60 | 


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/Base-Prop.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20K-150.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PropFormer"
 4 | 
 5 |   SEM_SEG_HEAD:
 6 |     NAME: "PropFormerHead"
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     IGNORE_VALUE: 255
 9 |     NUM_CLASSES: 150
10 |     COMMON_STRIDE: 4  # not used, hard-coded
11 |     LOSS_WEIGHT: 1.0
12 |     CONVS_DIM: 256
13 |     MASK_DIM: 256
14 |     NORM: "GN"
15 | 
16 |   MASK_FORMER:
17 |     TRANSFORMER_IN_FEATURE: "res5"
18 |     DEEP_SUPERVISION: True
19 |     NO_OBJECT_WEIGHT: 0.1
20 |     DICE_WEIGHT: 1.0
21 |     MASK_WEIGHT: 20.0
22 |     HIDDEN_DIM: 256
23 |     NUM_OBJECT_QUERIES: 100
24 |     NHEADS: 8
25 |     DROPOUT: 0.1
26 |     DIM_FEEDFORWARD: 2048
27 |     ENC_LAYERS: 0
28 |     DEC_LAYERS: 6
29 |     PRE_NORM: False
30 | 
31 | SOLVER:
32 |   CHECKPOINT_PERIOD: 999999
33 | 
34 | INPUT:
35 |   DATASET_MAPPER_NAME: "weakshot_sem_seg_mapper"


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s1_seg.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("ADE_split1_train",)
 5 | #  TEST: ("ADE_split1_train","ADE_split1_val",)
 6 |   TEST: ("ADE_split1_val",)
 7 | 
 8 | ASM:
 9 |   HasMaskCls: 1.
10 |   NoMaskCls: 1.
11 |   HasMaskMask: 1.
12 |   NoMaskMask: 0.
13 | 
14 | LOSS:
15 |   AssignCls: 1.
16 | 
17 |   AssignMaskDICE: 1.
18 |   AssignMaskMASK: 20.
19 | 
20 | 
21 |   CompSupNovel: 0.2
22 | 
23 | EVAL:
24 |   # bg_base_novel
25 |   BIAS: ( "1_1_1", )
26 | 
27 | MODEL:
28 |   MASK_FORMER:
29 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
30 | 
31 | SOLVER:
32 |   CHECKPOINT_PERIOD: 999999
33 | 
34 | OUTPUT_PREFIX: ADE_S1


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s1_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s1_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.5
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s1_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("ADE_split1_train",)
 7 |   TEST: ("ADE_split1_train",)
 8 | 
 9 | VIZ:
10 |   EVAL_HEAD: 0
11 | 
12 | TEST:
13 |   AUG:
14 |     ENABLED: True
15 |     #    MIN_SIZES: [320, 480, 640, 800, 960, 1120]
16 |     MIN_SIZES: [ 320, 480, 640, 800, 960 ]
17 |     MAX_SIZE: 4480
18 |     FLIP: True
19 | 
20 | MODEL:
21 |   SEM_SEG_HEAD:
22 |     NUM_CLASSES: 150
23 | 
24 |   MASK_FORMER:
25 |     NUM_OBJECT_QUERIES: 100
26 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
27 | 
28 | SOLVER:
29 |   CHECKPOINT_PERIOD: 999999
30 | 
31 | OUTPUT_PREFIX: GenerateADEPseudoLabelS1


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s1_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("ADE_split1_train",)
 5 |   TEST: ("ADE_split1_val",)
 6 | #  TEST: ("ADE_split1_train",)
 7 | 
 8 | NOVEL_HAS_MASK: True
 9 | PSEUDO_LABEL_PATH: pseudo_ours_ADE_S1
10 | 
11 | 
12 | ASM:
13 |   HasMaskCls: 1.
14 |   NoMaskCls: 1.
15 |   HasMaskMask: 1.
16 |   NoMaskMask: 0.
17 | 
18 | LOSS:
19 |   AssignCls: 1.
20 |   MILCls: 0.
21 | 
22 |   AssignMaskDICE: 1.
23 |   AssignMaskMASK: 20.
24 | 
25 |   PoolMask: 0.0
26 | 
27 |   CompSupNovel: 0.0
28 |   EntroRegNovel: 0.0
29 | 
30 |   PER_PROP_ENTROPY: 0.
31 |   CAT_MASK_ENTROPY: 0.
32 | 
33 | EVAL:
34 |   # bg_base_novel
35 |   BIAS: ( "1_1_1", )
36 | 
37 | MODEL:
38 |   SEM_SEG_HEAD:
39 |     NUM_CLASSES: 150
40 | 
41 |   MASK_FORMER:
42 |     NUM_OBJECT_QUERIES: 100
43 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
44 | 
45 | SOLVER:
46 |   CHECKPOINT_PERIOD: 999999
47 | 
48 | OUTPUT_PREFIX: ADE_S1_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s2_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("ADE_split2_train",)
5 | #  TEST: ("ADE_split2_train","ADE_split2_val",)
6 |   TEST: ("ADE_split2_val",)
7 | 
8 | OUTPUT_PREFIX: ADE_S2


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s2_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s2_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.5
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s2_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("ADE_split2_train",)
 7 |   TEST: ("ADE_split2_train",)
 8 | 
 9 | VIZ:
10 |   EVAL_HEAD: 0
11 | 
12 | TEST:
13 |   AUG:
14 |     ENABLED: True
15 |     #    MIN_SIZES: [320, 480, 640, 800, 960, 1120]
16 |     MIN_SIZES: [ 320, 480, 640, 800, 960 ]
17 |     MAX_SIZE: 4480
18 |     FLIP: True
19 | 
20 | MODEL:
21 |   SEM_SEG_HEAD:
22 |     NUM_CLASSES: 150
23 | 
24 |   MASK_FORMER:
25 |     NUM_OBJECT_QUERIES: 100
26 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
27 | 
28 | SOLVER:
29 |   CHECKPOINT_PERIOD: 999999
30 | 
31 | OUTPUT_PREFIX: GenerateADEPseudoLabelS2


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s2_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("ADE_split2_train",)
 5 |   TEST: ("ADE_split2_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_ADE_S2
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 150
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: ADE_S2_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s3_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("ADE_split3_train",)
5 | #  TEST: ("ADE_split3_train","ADE_split3_val",)
6 |   TEST: ("ADE_split3_val",)
7 | 
8 | OUTPUT_PREFIX: ADE_S3


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s3_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s3_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.5
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s3_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("ADE_split3_train",)
 7 |   TEST: ("ADE_split3_train",)
 8 | 
 9 | TEST:
10 |   AUG:
11 |     ENABLED: True
12 |     #    MIN_SIZES: [320, 480, 640, 800, 960, 1120]
13 |     MIN_SIZES: [ 320, 480, 640, 800, 960 ]
14 |     MAX_SIZE: 4480
15 |     FLIP: True
16 | 
17 | MODEL:
18 |   SEM_SEG_HEAD:
19 |     NUM_CLASSES: 150
20 | 
21 |   MASK_FORMER:
22 |     NUM_OBJECT_QUERIES: 100
23 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
24 | 
25 | SOLVER:
26 |   CHECKPOINT_PERIOD: 999999
27 | 
28 | OUTPUT_PREFIX: GenerateADEPseudoLabelS3


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s3_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("ADE_split3_train",)
 5 |   TEST: ("ADE_split3_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_ADE_S3
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 | EVAL:
24 |   # bg_base_novel
25 |   BIAS: ( "1_1_1", )
26 | 
27 | MODEL:
28 |   SEM_SEG_HEAD:
29 |     NUM_CLASSES: 150
30 | 
31 |   MASK_FORMER:
32 |     NUM_OBJECT_QUERIES: 100
33 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
34 | 
35 | SOLVER:
36 |   CHECKPOINT_PERIOD: 999999
37 | 
38 | OUTPUT_PREFIX: ADE_S3_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s4_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("ADE_split4_train",)
5 | #  TEST: ("ADE_split4_train","ADE_split4_val",)
6 |   TEST: ("ADE_split4_val",)
7 | 
8 | OUTPUT_PREFIX: ADE_S4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s4_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s4_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.5
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s4_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("ADE_split4_train",)
 7 |   TEST: ("ADE_split4_train",)
 8 | 
 9 | VIZ:
10 |   EVAL_HEAD: 0
11 | 
12 | TEST:
13 |   AUG:
14 |     ENABLED: True
15 |     #    MIN_SIZES: [320, 480, 640, 800, 960, 1120]
16 |     MIN_SIZES: [ 320, 480, 640, 800, 960, 1120]
17 |     MAX_SIZE: 4480
18 |     FLIP: True
19 | 
20 | MODEL:
21 |   SEM_SEG_HEAD:
22 |     NUM_CLASSES: 150
23 | 
24 |   MASK_FORMER:
25 |     NUM_OBJECT_QUERIES: 100
26 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
27 | 
28 | SOLVER:
29 |   CHECKPOINT_PERIOD: 999999
30 | 
31 | OUTPUT_PREFIX: GenerateADEPseudoLabelS4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/ade20k-150/s4_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("ADE_split4_train",)
 5 |   TEST: ("ADE_split4_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_ADE_S4
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 150
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: ADE_S4_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/Base-COCO-stuff-10k-prop.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k.yaml
 2 | 
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PropFormer"
 5 | 
 6 |   SEM_SEG_HEAD:
 7 |     NAME: "PropFormerHead"
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |     IGNORE_VALUE: 255
10 |     NUM_CLASSES: 171
11 |     COMMON_STRIDE: 4  # not used, hard-coded
12 |     LOSS_WEIGHT: 1.0
13 |     CONVS_DIM: 256
14 |     MASK_DIM: 256
15 |     NORM: "GN"
16 | 
17 |   MASK_FORMER:
18 |     TRANSFORMER_IN_FEATURE: "res5"
19 |     DEEP_SUPERVISION: True
20 |     NO_OBJECT_WEIGHT: 0.1
21 |     DICE_WEIGHT: 1.0
22 |     MASK_WEIGHT: 20.0
23 |     HIDDEN_DIM: 256
24 |     NUM_OBJECT_QUERIES: 100
25 |     NHEADS: 8
26 |     DROPOUT: 0.1
27 |     DIM_FEEDFORWARD: 2048
28 |     ENC_LAYERS: 0
29 |     DEC_LAYERS: 6
30 |     PRE_NORM: False
31 | 


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/Base-COCO-stuff-10k.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |     TRAIN: ("coco_2017_train_stuff_10k_sem_seg",)
18 |     TEST: ("coco_2017_test_stuff_10k_sem_seg",)
19 | 
20 | SOLVER:
21 |   IMS_PER_BATCH: 8
22 |   BASE_LR: 0.0001
23 |   MAX_ITER: 60000
24 | 
25 |   WARMUP_FACTOR: 1.0
26 |   WARMUP_ITERS: 0
27 |   WEIGHT_DECAY: 0.0001
28 |   OPTIMIZER: "ADAMW"
29 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
30 |   BACKBONE_MULTIPLIER: 0.1
31 |   CLIP_GRADIENTS:
32 |     ENABLED: True
33 |     CLIP_TYPE: "full_model"
34 |     CLIP_VALUE: 0.01
35 |     NORM_TYPE: 2.0
36 | INPUT:
37 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 16)]"]
38 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
39 |   MIN_SIZE_TEST: 640
40 |   MAX_SIZE_TRAIN: 2560
41 |   MAX_SIZE_TEST: 2560
42 |   CROP:
43 |     ENABLED: True
44 |     TYPE: "absolute"
45 |     SIZE: (640, 640)
46 |     SINGLE_CATEGORY_MAX_AREA: 1.0
47 |   COLOR_AUG_SSD: True
48 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
49 |   FORMAT: "RGB"
50 |   DATASET_MAPPER_NAME: "weakshot_sem_seg_mapper"
51 | TEST:
52 |   EVAL_PERIOD: 10000
53 |   AUG:
54 |     ENABLED: False
55 |     MIN_SIZES: [320, 480, 640, 800, 960, 1120]
56 |     MAX_SIZE: 4480
57 |     FLIP: True
58 | DATALOADER:
59 |   FILTER_EMPTY_ANNOTATIONS: True
60 |   NUM_WORKERS: 4
61 | VERSION: 2
62 | 


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s1_seg.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split1_train",)
 5 | #  TEST: ("coco_stuff_split1_train","coco_stuff_split1_val")
 6 |   TEST: ("coco_stuff_split1_val",)
 7 | 
 8 | ASM:
 9 |   HasMaskCls: 1.
10 |   NoMaskCls: 1.
11 |   HasMaskMask: 1.
12 |   NoMaskMask: 0.
13 | 
14 | LOSS:
15 |   AssignCls: 1.
16 | 
17 |   AssignMaskDICE: 1.
18 |   AssignMaskMASK: 20.
19 | 
20 |   CompSupNovel: 0.15
21 | 
22 | EVAL:
23 |   # bg_base_novel
24 |   BIAS: ( "1_1_1", )
25 | 
26 | MODEL:
27 |   SEM_SEG_HEAD:
28 |     NUM_CLASSES: 171
29 | 
30 |   MASK_FORMER:
31 |     NUM_OBJECT_QUERIES: 100
32 |     CLS_LOSS_TYPE: SoftmaxBCE
33 | 
34 | SOLVER:
35 |   CHECKPOINT_PERIOD: 999999
36 | 
37 | OUTPUT_PREFIX: COCO_S1


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s1_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s1_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: ce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s1_seg_crosim_resume.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s1_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.
12 |   DISTILL_TO: NovelScore
13 |   DISTILL_FUNC: ce # [ce, cce, b0.5]
14 | 
15 |   TEACH_DETACH: False
16 |   BASE_DETACH: False
17 |   LayerNum: 3
18 | 
19 | SOLVER:
20 |   IMS_PER_BATCH: 4
21 |   MAX_ITER: 5000
22 |   BASE_LR: 0.00001
23 | 
24 | MODEL:
25 |   WEIGHTS: datasets/SimFormer_S1.pth


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s1_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split1_train",)
 7 |   TEST: ("coco_stuff_split1_train",)
 8 | #  TEST: ("coco_stuff_split1_val",)
 9 | 
10 | MODEL:
11 |   SEM_SEG_HEAD:
12 |     NUM_CLASSES: 171
13 | 
14 |   MASK_FORMER:
15 |     NUM_OBJECT_QUERIES: 100
16 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
17 | 
18 | SOLVER:
19 |   CHECKPOINT_PERIOD: 999999
20 | 
21 | TEST:
22 |   AUG:
23 |     ENABLED: False
24 | #    MIN_SIZES: [320, 480, 640, 800, 960, 1120]
25 |     MIN_SIZES: [ 320, 480, 640, 800, 960]
26 |     MAX_SIZE: 4480
27 |     FLIP: True
28 | 
29 | OUTPUT_PREFIX: GeneratePseudoLabelS1


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s1_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split1_train",)
 5 | #  TEST: ("coco_stuff_split1_train",)
 6 | #  TEST: ("coco_stuff_split1_train","coco_stuff_split1_val",)
 7 |   TEST: ("coco_stuff_split1_val",)
 8 | 
 9 | NOVEL_HAS_MASK: True
10 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S1 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
11 | 
12 | ASM:
13 |   HasMaskCls: 1.
14 |   NoMaskCls: 1.
15 |   HasMaskMask: 1.
16 |   NoMaskMask: 0.
17 | 
18 | LOSS:
19 |   AssignCls: 1.
20 |   MILCls: 0.
21 | 
22 |   AssignMaskDICE: 1.
23 |   AssignMaskMASK: 20.
24 | 
25 |   PoolMask: 0.0
26 | 
27 |   CompSupNovel: 0.0
28 |   EntroRegNovel: 0.0
29 | 
30 |   PER_PROP_ENTROPY: 0.
31 |   CAT_MASK_ENTROPY: 0.
32 | 
33 | EVAL:
34 |   # bg_base_novel
35 |   BIAS: ( "1_1_1", )
36 | 
37 | MODEL:
38 |   SEM_SEG_HEAD:
39 |     NUM_CLASSES: 171
40 | 
41 |   MASK_FORMER:
42 |     NUM_OBJECT_QUERIES: 100
43 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
44 | 
45 | SOLVER:
46 |   CHECKPOINT_PERIOD: 999999
47 | 
48 | OUTPUT_PREFIX: COCO_S1_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s2_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split2_train",)
5 | #  TEST: ("coco_stuff_split2_train","coco_stuff_split2_val")
6 |   TEST: ("coco_stuff_split2_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S2


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s2_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s2_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s2_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split2_train",)
 7 |   TEST: ("coco_stuff_split2_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS2


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s2_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split2_train",)
 5 | #  TEST: ("coco_stuff_split2_train",)
 6 | #  TEST: ("coco_stuff_split2_train","coco_stuff_split2_val",)
 7 |   TEST: ("coco_stuff_split2_val",)
 8 | 
 9 | NOVEL_HAS_MASK: True
10 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S2 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
11 | 
12 | ASM:
13 |   HasMaskCls: 1.
14 |   NoMaskCls: 1.
15 |   HasMaskMask: 1.
16 |   NoMaskMask: 0.
17 | 
18 | LOSS:
19 |   AssignCls: 1.
20 |   MILCls: 0.
21 | 
22 |   AssignMaskDICE: 1.
23 |   AssignMaskMASK: 20.
24 | 
25 |   PoolMask: 0.0
26 | 
27 |   CompSupNovel: 0.0
28 |   EntroRegNovel: 0.0
29 | 
30 |   PER_PROP_ENTROPY: 0.
31 |   CAT_MASK_ENTROPY: 0.
32 | 
33 | EVAL:
34 |   # bg_base_novel
35 |   BIAS: ( "1_1_1", )
36 | 
37 | MODEL:
38 |   SEM_SEG_HEAD:
39 |     NUM_CLASSES: 171
40 | 
41 |   MASK_FORMER:
42 |     NUM_OBJECT_QUERIES: 100
43 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
44 | 
45 | SOLVER:
46 |   CHECKPOINT_PERIOD: 999999
47 | 
48 | OUTPUT_PREFIX: COCO_S2_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s3_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split3_train",)
5 | #  TEST: ("coco_stuff_split3_train","coco_stuff_split3_val")
6 |   TEST: ("coco_stuff_split3_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S3


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s3_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s3_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: ce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s3_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split3_train",)
 7 |   TEST: ("coco_stuff_split3_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS3


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s3_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split3_train",)
 5 | #  TEST: ("coco_stuff_split3_train",)
 6 | #  TEST: ("coco_stuff_split3_train","coco_stuff_split3_val",)
 7 |   TEST: ("coco_stuff_split3_val",)
 8 | 
 9 | NOVEL_HAS_MASK: True
10 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S3 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
11 | 
12 | ASM:
13 |   HasMaskCls: 1.
14 |   NoMaskCls: 1.
15 |   HasMaskMask: 1.
16 |   NoMaskMask: 0.
17 | 
18 | LOSS:
19 |   AssignCls: 1.
20 |   MILCls: 0.
21 | 
22 |   AssignMaskDICE: 1.
23 |   AssignMaskMASK: 20.
24 | 
25 |   PoolMask: 0.0
26 | 
27 |   CompSupNovel: 0.0
28 |   EntroRegNovel: 0.0
29 | 
30 |   PER_PROP_ENTROPY: 0.
31 |   CAT_MASK_ENTROPY: 0.
32 | 
33 | EVAL:
34 |   # bg_base_novel
35 |   BIAS: ( "1_1_1", )
36 | 
37 | MODEL:
38 |   SEM_SEG_HEAD:
39 |     NUM_CLASSES: 171
40 | 
41 |   MASK_FORMER:
42 |     NUM_OBJECT_QUERIES: 100
43 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
44 | 
45 | SOLVER:
46 |   CHECKPOINT_PERIOD: 999999
47 | 
48 | OUTPUT_PREFIX: COCO_S3_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s4_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split4_train",)
5 | #  TEST: ("coco_stuff_split4_train","coco_stuff_split4_val")
6 |   TEST: ("coco_stuff_split4_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s4_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s4_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s4_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split4_train",)
 7 |   TEST: ("coco_stuff_split4_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s4_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split4_train",)
 5 | #  TEST: ("coco_stuff_split3_train",)
 6 | #  TEST: ("coco_stuff_split4_train","coco_stuff_split4_val",)
 7 |   TEST: ("coco_stuff_split3_val",)
 8 | 
 9 | NOVEL_HAS_MASK: True
10 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S4 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
11 | 
12 | ASM:
13 |   HasMaskCls: 1.
14 |   NoMaskCls: 1.
15 |   HasMaskMask: 1.
16 |   NoMaskMask: 0.
17 | 
18 | LOSS:
19 |   AssignCls: 1.
20 |   MILCls: 0.
21 | 
22 |   AssignMaskDICE: 1.
23 |   AssignMaskMASK: 20.
24 | 
25 |   PoolMask: 0.0
26 | 
27 |   CompSupNovel: 0.0
28 |   EntroRegNovel: 0.0
29 | 
30 |   PER_PROP_ENTROPY: 0.
31 |   CAT_MASK_ENTROPY: 0.
32 | 
33 | EVAL:
34 |   # bg_base_novel
35 |   BIAS: ( "1_1_1", )
36 | 
37 | MODEL:
38 |   SEM_SEG_HEAD:
39 |     NUM_CLASSES: 171
40 | 
41 |   MASK_FORMER:
42 |     NUM_OBJECT_QUERIES: 100
43 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
44 | 
45 | SOLVER:
46 |   CHECKPOINT_PERIOD: 999999
47 | 
48 | OUTPUT_PREFIX: COCO_S4_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s5_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split5_train",)
5 | #  TEST: ("coco_stuff_split5_train","coco_stuff_split5_val")
6 |   TEST: ("coco_stuff_split5_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S5


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s5_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s5_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s5_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split5_train",)
 7 |   TEST: ("coco_stuff_split5_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS5


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s5_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split5_train",)
 5 |   TEST: ("coco_stuff_split5_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S5 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 171
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: COCO_S5_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s6_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split6_train",)
5 | #  TEST: ("coco_stuff_split6_train","coco_stuff_split6_val")
6 |   TEST: ("coco_stuff_split6_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S6


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s6_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s6_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s6_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split6_train",)
 7 |   TEST: ("coco_stuff_split6_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS6


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s6_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split6_train",)
 5 |   TEST: ("coco_stuff_split6_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S6 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 171
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: COCO_S6_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s7_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split7_train",)
5 | #  TEST: ("coco_stuff_split7_train","coco_stuff_split7_val")
6 |   TEST: ("coco_stuff_split7_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S7


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s7_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s7_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s7_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split7_train",)
 7 |   TEST: ("coco_stuff_split7_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS7


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s7_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split7_train",)
 5 |   TEST: ("coco_stuff_split7_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S7 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 171
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: COCO_S7_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s8_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split8_train",)
5 | #  TEST: ("coco_stuff_split8_train","coco_stuff_split8_val")
6 |   TEST: ("coco_stuff_split8_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S8


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s8_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s8_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s8_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split8_train",)
 7 |   TEST: ("coco_stuff_split8_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS8


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s8_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split8_train",)
 5 |   TEST: ("coco_stuff_split8_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S8 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 171
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: COCO_S8_RETRAINING


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s9_seg.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: s1_seg.yaml
2 | 
3 | DATASETS:
4 |   TRAIN: ("coco_stuff_split9_train",)
5 | #  TEST: ("coco_stuff_split9_train","coco_stuff_split9_val")
6 |   TEST: ("coco_stuff_split9_val",)
7 | 
8 | OUTPUT_PREFIX: COCO_S9


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s9_seg_crosim.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: s9_seg.yaml
 2 | 
 3 | INPUT:
 4 |   DATASET_MAPPER_NAME: pair_mapper
 5 | 
 6 | CROSS_IMG_SIM:
 7 |   PAIR_TYPE: Deconf0.01
 8 | 
 9 |   BASE_LOSS: 1.0
10 | 
11 |   DISTILL_LOSS: 0.1
12 |   DISTILL_TO: NovelScore # [NovelScore, FullScore, FullLogit, FullLogitC]
13 |   DISTILL_FUNC: cce # [ce, cce, b0.5]
14 | 
15 | SOLVER:
16 |   IMS_PER_BATCH: 4


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s9_seg_pseudo_label.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | GeneratePseudoLabel: True
 4 | 
 5 | DATASETS:
 6 |   TRAIN: ("coco_stuff_split9_train",)
 7 |   TEST: ("coco_stuff_split9_train",)
 8 | 
 9 | MODEL:
10 |   SEM_SEG_HEAD:
11 |     NUM_CLASSES: 171
12 | 
13 |   MASK_FORMER:
14 |     NUM_OBJECT_QUERIES: 100
15 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
16 | 
17 | SOLVER:
18 |   CHECKPOINT_PERIOD: 999999
19 | 
20 | OUTPUT_PREFIX: GeneratePseudoLabelS9


--------------------------------------------------------------------------------
/_1Prop_Cfgs/coco_sutff_10k/s9_seg_retraining.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-stuff-10k-prop.yaml
 2 | 
 3 | DATASETS:
 4 |   TRAIN: ("coco_stuff_split9_train",)
 5 |   TEST: ("coco_stuff_split9_val",)
 6 | 
 7 | NOVEL_HAS_MASK: True
 8 | PSEUDO_LABEL_PATH: pseudo_ours_COCO_S9 # [pseudo_ours_COCO_S1, pseudo_retab_COCO_S1]
 9 | 
10 | ASM:
11 |   HasMaskCls: 1.
12 |   NoMaskCls: 1.
13 |   HasMaskMask: 1.
14 |   NoMaskMask: 0.
15 | 
16 | LOSS:
17 |   AssignCls: 1.
18 |   MILCls: 0.
19 | 
20 |   AssignMaskDICE: 1.
21 |   AssignMaskMASK: 20.
22 | 
23 |   PoolMask: 0.0
24 | 
25 |   CompSupNovel: 0.0
26 |   EntroRegNovel: 0.0
27 | 
28 |   PER_PROP_ENTROPY: 0.
29 |   CAT_MASK_ENTROPY: 0.
30 | 
31 | EVAL:
32 |   # bg_base_novel
33 |   BIAS: ( "1_1_1", )
34 | 
35 | MODEL:
36 |   SEM_SEG_HEAD:
37 |     NUM_CLASSES: 171
38 | 
39 |   MASK_FORMER:
40 |     NUM_OBJECT_QUERIES: 100
41 |     CLS_LOSS_TYPE: SoftmaxBCE # SoftmaxBCE / SigmoidBCE / RIB / MSM
42 | 
43 | SOLVER:
44 |   CHECKPOINT_PERIOD: 999999
45 | 
46 | OUTPUT_PREFIX: COCO_S9_RETRAINING


--------------------------------------------------------------------------------
/configs/ade20k-150-panoptic/maskformer_panoptic_R101_bs16_720k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_panoptic_R50_bs16_720k.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 | 


--------------------------------------------------------------------------------
/configs/ade20k-150-panoptic/maskformer_panoptic_R50_bs16_720k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../ade20k-150/maskformer.yaml
 2 | MODEL:
 3 |   SEM_SEG_HEAD:
 4 |     PIXEL_DECODER_NAME: "TransformerEncoderPixelDecoder"
 5 |     TRANSFORMER_ENC_LAYERS: 6
 6 |   MASK_FORMER:
 7 |     TRANSFORMER_IN_FEATURE: "transformer_encoder"
 8 |     TEST:
 9 |       PANOPTIC_ON: True
10 |       OVERLAP_THRESHOLD: 0.8
11 |       OBJECT_MASK_THRESHOLD: 0.7
12 | DATASETS:
13 |   TRAIN: ("ade20k_panoptic_train",)
14 |   TEST: ("ade20k_panoptic_val",)
15 | SOLVER:
16 |   MAX_ITER: 720000
17 | INPUT:
18 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"]
19 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
20 |   MIN_SIZE_TEST: 640
21 |   MAX_SIZE_TRAIN: 2560
22 |   MAX_SIZE_TEST: 2560
23 |   CROP:
24 |     ENABLED: True
25 |     TYPE: "absolute"
26 |     SIZE: (640, 640)
27 |     SINGLE_CATEGORY_MAX_AREA: 1.0
28 |   COLOR_AUG_SSD: True
29 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
30 |   FORMAT: "RGB"
31 |   DATASET_MAPPER_NAME: "mask_former_panoptic"
32 | TEST:
33 |   EVAL_PERIOD: 0
34 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/Base-ADE20K-150.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("ade20k_sem_seg_train",)
18 |   TEST: ("ade20k_sem_seg_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 160000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 512) for x in range(5, 21)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 512
38 |   MAX_SIZE_TRAIN: 2048
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (512, 512)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 512  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 |   AUG:
52 |     ENABLED: False
53 |     MIN_SIZES: [256, 384, 512, 640, 768, 896]
54 |     MAX_SIZE: 3584
55 |     FLIP: True
56 | DATALOADER:
57 |   FILTER_EMPTY_ANNOTATIONS: True
58 |   NUM_WORKERS: 4
59 | VERSION: 2
60 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/maskformer_R101_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/maskformer_R101c_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/maskformer_R50_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20K-150.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 150
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False
28 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/per_pixel_baseline_R50_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20K-150.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselineHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 150
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/per_pixel_baseline_plus_R50_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20K-150.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselinePlusHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 150
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     HIDDEN_DIM: 256
18 |     NUM_OBJECT_QUERIES: 150  # remember to set this to NUM_CLASSES
19 |     NHEADS: 8
20 |     DROPOUT: 0.1
21 |     DIM_FEEDFORWARD: 2048
22 |     ENC_LAYERS: 0
23 |     DEC_LAYERS: 6
24 |     PRE_NORM: False
25 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/swin/maskformer_swin_base_IN21k_384_bs16_160k_res640.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 128
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [4, 8, 16, 32]
 9 |     WINDOW_SIZE: 12
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |     PRETRAIN_IMG_SIZE: 384
14 |   WEIGHTS: "swin_base_patch4_window12_384_22k.pkl"
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 | SOLVER:
18 |   BASE_LR: 0.00006
19 |   WARMUP_FACTOR: 1e-6
20 |   WARMUP_ITERS: 1500
21 |   WEIGHT_DECAY: 0.01
22 |   WEIGHT_DECAY_NORM: 0.0
23 |   WEIGHT_DECAY_EMBED: 0.0
24 |   BACKBONE_MULTIPLIER: 1.0
25 | INPUT:
26 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"]
27 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
28 |   MIN_SIZE_TEST: 640
29 |   MAX_SIZE_TRAIN: 2560
30 |   MAX_SIZE_TEST: 2560
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "absolute"
34 |     SIZE: (640, 640)
35 |     SINGLE_CATEGORY_MAX_AREA: 1.0
36 |   COLOR_AUG_SSD: True
37 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
38 |   FORMAT: "RGB"
39 | TEST:
40 |   EVAL_PERIOD: 5000
41 |   AUG:
42 |     ENABLED: False
43 |     MIN_SIZES: [320, 480, 640, 800, 960, 1120]
44 |     MAX_SIZE: 4480
45 |     FLIP: True
46 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/swin/maskformer_swin_large_IN21k_384_bs16_160k_res640.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 192
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [6, 12, 24, 48]
 9 |     WINDOW_SIZE: 12
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |     PRETRAIN_IMG_SIZE: 384
14 |   WEIGHTS: "swin_large_patch4_window12_384_22k.pkl"
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 | SOLVER:
18 |   BASE_LR: 0.00006
19 |   WARMUP_FACTOR: 1e-6
20 |   WARMUP_ITERS: 1500
21 |   WEIGHT_DECAY: 0.01
22 |   WEIGHT_DECAY_NORM: 0.0
23 |   WEIGHT_DECAY_EMBED: 0.0
24 |   BACKBONE_MULTIPLIER: 1.0
25 | INPUT:
26 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"]
27 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
28 |   MIN_SIZE_TEST: 640
29 |   MAX_SIZE_TRAIN: 2560
30 |   MAX_SIZE_TEST: 2560
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "absolute"
34 |     SIZE: (640, 640)
35 |     SINGLE_CATEGORY_MAX_AREA: 1.0
36 |   COLOR_AUG_SSD: True
37 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
38 |   FORMAT: "RGB"
39 | TEST:
40 |   EVAL_PERIOD: 5000
41 |   AUG:
42 |     ENABLED: False
43 |     MIN_SIZES: [320, 480, 640, 800, 960, 1120]
44 |     MAX_SIZE: 4480
45 |     FLIP: True
46 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/swin/maskformer_swin_small_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [3, 6, 12, 24]
 9 |     WINDOW_SIZE: 7
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |   WEIGHTS: "swin_small_patch4_window7_224.pkl"
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.120, 57.375]
16 | SOLVER:
17 |   BASE_LR: 0.00006
18 |   WARMUP_FACTOR: 1e-6
19 |   WARMUP_ITERS: 1500
20 |   WEIGHT_DECAY: 0.01
21 |   WEIGHT_DECAY_NORM: 0.0
22 |   WEIGHT_DECAY_EMBED: 0.0
23 |   BACKBONE_MULTIPLIER: 1.0
24 | 


--------------------------------------------------------------------------------
/configs/ade20k-150/swin/maskformer_swin_tiny_bs16_160k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [2, 2, 6, 2]
 8 |     NUM_HEADS: [3, 6, 12, 24]
 9 |     WINDOW_SIZE: 7
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |   WEIGHTS: "swin_tiny_patch4_window7_224.pkl"
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.120, 57.375]
16 | SOLVER:
17 |   BASE_LR: 0.00006
18 |   WARMUP_FACTOR: 1e-6
19 |   WARMUP_ITERS: 1500
20 |   WEIGHT_DECAY: 0.01
21 |   WEIGHT_DECAY_NORM: 0.0
22 |   WEIGHT_DECAY_EMBED: 0.0
23 |   BACKBONE_MULTIPLIER: 1.0
24 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/Base-ADE20KFull-847.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("ade20k_full_sem_seg_train",)
18 |   TEST: ("ade20k_full_sem_seg_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 200000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 512) for x in range(5, 21)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 512
38 |   MAX_SIZE_TRAIN: 2048
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (512, 512)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 512  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 | DATALOADER:
52 |   FILTER_EMPTY_ANNOTATIONS: True
53 |   NUM_WORKERS: 4
54 | VERSION: 2
55 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/maskformer_R101_bs16_200k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs16_200k.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/maskformer_R101c_bs16_200k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs16_200k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/maskformer_R50_bs16_200k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20KFull-847.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 65535
 8 |     NUM_CLASSES: 847
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False
28 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/per_pixel_baseline_R50_bs16_200k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20KFull-847.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselineHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 65535
 8 |     NUM_CLASSES: 847
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 | 


--------------------------------------------------------------------------------
/configs/ade20k-full-847/per_pixel_baseline_plus_R50_bs16_200k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-ADE20KFull-847.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselinePlusHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 65535
 8 |     NUM_CLASSES: 847
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     HIDDEN_DIM: 256
18 |     NUM_OBJECT_QUERIES: 847  # remember to set this to NUM_CLASSES
19 |     NHEADS: 8
20 |     DROPOUT: 0.1
21 |     DIM_FEEDFORWARD: 2048
22 |     ENC_LAYERS: 0
23 |     DEC_LAYERS: 6
24 |     PRE_NORM: False
25 | 


--------------------------------------------------------------------------------
/configs/cityscapes-19/Base-Cityscapes-19.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("cityscapes_fine_sem_seg_train",)
18 |   TEST: ("cityscapes_fine_sem_seg_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 90000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 1024
38 |   MAX_SIZE_TRAIN: 4096
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (512, 1024)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: -1
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 |   AUG:
52 |     ENABLED: False
53 |     MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792]
54 |     MAX_SIZE: 4096
55 |     FLIP: True
56 | DATALOADER:
57 |   FILTER_EMPTY_ANNOTATIONS: True
58 |   NUM_WORKERS: 4
59 | VERSION: 2
60 | 


--------------------------------------------------------------------------------
/configs/cityscapes-19/maskformer_R101_bs16_90k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-Cityscapes-19.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 |   META_ARCHITECTURE: "MaskFormer"
13 |   SEM_SEG_HEAD:
14 |     NAME: "MaskFormerHead"
15 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
16 |     IGNORE_VALUE: 255
17 |     NUM_CLASSES: 19
18 |     COMMON_STRIDE: 4  # not used, hard-coded
19 |     LOSS_WEIGHT: 1.0
20 |     CONVS_DIM: 256
21 |     MASK_DIM: 256
22 |     NORM: "GN"
23 |   MASK_FORMER:
24 |     TRANSFORMER_IN_FEATURE: "res5"
25 |     DEEP_SUPERVISION: True
26 |     NO_OBJECT_WEIGHT: 0.1
27 |     DICE_WEIGHT: 1.0
28 |     MASK_WEIGHT: 20.0
29 |     HIDDEN_DIM: 256
30 |     NUM_OBJECT_QUERIES: 100
31 |     NHEADS: 8
32 |     DROPOUT: 0.1
33 |     DIM_FEEDFORWARD: 2048
34 |     ENC_LAYERS: 0
35 |     DEC_LAYERS: 6
36 |     PRE_NORM: False
37 | 


--------------------------------------------------------------------------------
/configs/cityscapes-19/maskformer_R101c_bs16_90k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R101_bs16_90k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "build_resnet_deeplab_backbone"
 6 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 7 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 8 |   PIXEL_STD: [58.395, 57.120, 57.375]
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     STEM_TYPE: "deeplab"
12 |     STEM_OUT_CHANNELS: 128
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
15 |     # NORM: "SyncBN"
16 |     RES5_MULTI_GRID: [1, 2, 4]
17 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/Base-COCO-PanopticSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("coco_2017_train_panoptic",)
18 |   TEST: ("coco_2017_val_panoptic",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 64
21 |   BASE_LR: 0.0001
22 |   STEPS: (369600,)
23 |   MAX_ITER: 554400
24 |   WARMUP_FACTOR: 1.0
25 |   WARMUP_ITERS: 10
26 |   WEIGHT_DECAY: 0.0001
27 |   OPTIMIZER: "ADAMW"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
36 |   CROP:
37 |     ENABLED: True
38 |     TYPE: "absolute_range"
39 |     SIZE: (384, 600)
40 |   FORMAT: "RGB"
41 |   DATASET_MAPPER_NAME: "detr_panoptic"
42 | TEST:
43 |   EVAL_PERIOD: 0
44 | DATALOADER:
45 |   FILTER_EMPTY_ANNOTATIONS: True
46 |   NUM_WORKERS: 4
47 | VERSION: 2
48 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/maskformer_panoptic_R101_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_panoptic_R50_bs64_554k.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/maskformer_panoptic_R50_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCO-PanopticSegmentation.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 133
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |     # add additional 6 encoder layers
15 |     PIXEL_DECODER_NAME: "TransformerEncoderPixelDecoder"
16 |     TRANSFORMER_ENC_LAYERS: 6
17 |   MASK_FORMER:
18 |     TRANSFORMER_IN_FEATURE: "transformer_encoder"
19 |     DEEP_SUPERVISION: True
20 |     NO_OBJECT_WEIGHT: 0.1
21 |     DICE_WEIGHT: 1.0
22 |     MASK_WEIGHT: 20.0
23 |     HIDDEN_DIM: 256
24 |     NUM_OBJECT_QUERIES: 100
25 |     NHEADS: 8
26 |     DROPOUT: 0.1
27 |     DIM_FEEDFORWARD: 2048
28 |     ENC_LAYERS: 0
29 |     DEC_LAYERS: 6
30 |     PRE_NORM: False
31 |     # COCO model should not pad image
32 |     SIZE_DIVISIBILITY: 0
33 |     TEST:
34 |       PANOPTIC_ON: True
35 |       OVERLAP_THRESHOLD: 0.8
36 |       OBJECT_MASK_THRESHOLD: 0.8
37 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/swin/maskformer_panoptic_swin_base_IN21k_384_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_panoptic_R50_bs64_554k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 128
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [4, 8, 16, 32]
 9 |     WINDOW_SIZE: 12
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |     PRETRAIN_IMG_SIZE: 384
14 |   WEIGHTS: "swin_base_patch4_window12_384_22k.pkl"
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     PIXEL_DECODER_NAME: "BasePixelDecoder"
19 |   MASK_FORMER:
20 |     TRANSFORMER_IN_FEATURE: "res5"
21 |     ENFORCE_INPUT_PROJ: True
22 |     TEST:
23 |       PANOPTIC_ON: True
24 |       OVERLAP_THRESHOLD: 0.8
25 |       OBJECT_MASK_THRESHOLD: 0.8
26 | SOLVER:
27 |   BASE_LR: 0.00006
28 |   WARMUP_FACTOR: 1e-6
29 |   WARMUP_ITERS: 1500
30 |   WEIGHT_DECAY: 0.01
31 |   WEIGHT_DECAY_NORM: 0.0
32 |   WEIGHT_DECAY_EMBED: 0.0
33 |   BACKBONE_MULTIPLIER: 1.0


--------------------------------------------------------------------------------
/configs/coco-panoptic/swin/maskformer_panoptic_swin_large_IN21k_384_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_panoptic_R50_bs64_554k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 192
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [6, 12, 24, 48]
 9 |     WINDOW_SIZE: 12
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |     PRETRAIN_IMG_SIZE: 384
14 |   WEIGHTS: "swin_large_patch4_window12_384_22k.pkl"
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     PIXEL_DECODER_NAME: "BasePixelDecoder"
19 |   MASK_FORMER:
20 |     TRANSFORMER_IN_FEATURE: "res5"
21 |     ENFORCE_INPUT_PROJ: True
22 |     TEST:
23 |       PANOPTIC_ON: True
24 |       OVERLAP_THRESHOLD: 0.8
25 |       OBJECT_MASK_THRESHOLD: 0.8
26 | SOLVER:
27 |   BASE_LR: 0.00006
28 |   WARMUP_FACTOR: 1e-6
29 |   WARMUP_ITERS: 1500
30 |   WEIGHT_DECAY: 0.01
31 |   WEIGHT_DECAY_NORM: 0.0
32 |   WEIGHT_DECAY_EMBED: 0.0
33 |   BACKBONE_MULTIPLIER: 1.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
36 |   MAX_SIZE_TRAIN: 1000
37 |   CROP:
38 |     ENABLED: True
39 |     TYPE: "absolute_range"
40 |     SIZE: (384, 600)
41 |   FORMAT: "RGB"
42 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/swin/maskformer_panoptic_swin_small_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_panoptic_R50_bs64_554k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [2, 2, 18, 2]
 8 |     NUM_HEADS: [3, 6, 12, 24]
 9 |     WINDOW_SIZE: 7
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |   WEIGHTS: "swin_small_patch4_window7_224.pkl"
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.120, 57.375]
16 |   SEM_SEG_HEAD:
17 |     PIXEL_DECODER_NAME: "BasePixelDecoder"
18 |   MASK_FORMER:
19 |     TRANSFORMER_IN_FEATURE: "res5"
20 |     ENFORCE_INPUT_PROJ: True
21 |     TEST:
22 |       PANOPTIC_ON: True
23 |       OVERLAP_THRESHOLD: 0.8
24 |       OBJECT_MASK_THRESHOLD: 0.8
25 | SOLVER:
26 |   BASE_LR: 0.00006
27 |   WARMUP_FACTOR: 1e-6
28 |   WARMUP_ITERS: 1500
29 |   WEIGHT_DECAY: 0.01
30 |   WEIGHT_DECAY_NORM: 0.0
31 |   WEIGHT_DECAY_EMBED: 0.0
32 |   BACKBONE_MULTIPLIER: 1.0
33 | 


--------------------------------------------------------------------------------
/configs/coco-panoptic/swin/maskformer_panoptic_swin_tiny_bs64_554k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_panoptic_R50_bs64_554k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "D2SwinTransformer"
 5 |   SWIN:
 6 |     EMBED_DIM: 96
 7 |     DEPTHS: [2, 2, 6, 2]
 8 |     NUM_HEADS: [3, 6, 12, 24]
 9 |     WINDOW_SIZE: 7
10 |     APE: False
11 |     DROP_PATH_RATE: 0.3
12 |     PATCH_NORM: True
13 |   WEIGHTS: "swin_tiny_patch4_window7_224.pkl"
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.120, 57.375]
16 |   SEM_SEG_HEAD:
17 |     PIXEL_DECODER_NAME: "BasePixelDecoder"
18 |   MASK_FORMER:
19 |     TRANSFORMER_IN_FEATURE: "res5"
20 |     ENFORCE_INPUT_PROJ: True
21 |     TEST:
22 |       PANOPTIC_ON: True
23 |       OVERLAP_THRESHOLD: 0.8
24 |       OBJECT_MASK_THRESHOLD: 0.8
25 | SOLVER:
26 |   BASE_LR: 0.00006
27 |   WARMUP_FACTOR: 1e-6
28 |   WARMUP_ITERS: 1500
29 |   WEIGHT_DECAY: 0.01
30 |   WEIGHT_DECAY_NORM: 0.0
31 |   WEIGHT_DECAY_EMBED: 0.0
32 |   BACKBONE_MULTIPLIER: 1.0
33 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/Base-COCOStuff10K-171.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("coco_2017_train_stuff_10k_sem_seg",)
18 |   TEST: ("coco_2017_test_stuff_10k_sem_seg",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 32
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 60000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 16)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 640
38 |   MAX_SIZE_TRAIN: 2560
39 |   MAX_SIZE_TEST: 2560
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (640, 640)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 640  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 |   AUG:
52 |     ENABLED: False
53 |     MIN_SIZES: [320, 480, 640, 800, 960, 1120]
54 |     MAX_SIZE: 4480
55 |     FLIP: True
56 | DATALOADER:
57 |   FILTER_EMPTY_ANNOTATIONS: True
58 |   NUM_WORKERS: 4
59 | VERSION: 2
60 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/maskformer_R101_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     STEM_TYPE: "basic"  # not used
 7 |     STEM_OUT_CHANNELS: 64
 8 |     STRIDE_IN_1X1: False
 9 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
10 |     # NORM: "SyncBN"
11 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
12 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/maskformer_R101c_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/maskformer_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCOStuff10K-171.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 171
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False
28 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/per_pixel_baseline_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCOStuff10K-171.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselineHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 171
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 | 


--------------------------------------------------------------------------------
/configs/coco-stuff-10k-171/per_pixel_baseline_plus_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCOStuff10K-171.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "PerPixelBaselinePlusHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 171
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     HIDDEN_DIM: 256
18 |     NUM_OBJECT_QUERIES: 171  # remember to set this to NUM_CLASSES
19 |     NHEADS: 8
20 |     DROPOUT: 0.1
21 |     DIM_FEEDFORWARD: 2048
22 |     ENC_LAYERS: 0
23 |     DEC_LAYERS: 6
24 |     PRE_NORM: False
25 | 


--------------------------------------------------------------------------------
/configs/mapillary-vistas-65/Base-MapillaryVistas-65.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("mapillary_vistas_sem_seg_train",)
18 |   TEST: ("mapillary_vistas_sem_seg_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 300000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 2048) for x in range(5, 21)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 2048
38 |   MAX_SIZE_TRAIN: 8192
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (1280, 1280)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 1280  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 | DATALOADER:
52 |   FILTER_EMPTY_ANNOTATIONS: True
53 |   NUM_WORKERS: 10
54 | VERSION: 2
55 | 


--------------------------------------------------------------------------------
/configs/mapillary-vistas-65/maskformer_R50_bs16_300k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-MapillaryVistas-65.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 65
 8 |     NUM_CLASSES: 65
 9 |     COMMON_STRIDE: 4  # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False
28 | 


--------------------------------------------------------------------------------
/figs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/figs/framework.png


--------------------------------------------------------------------------------
/figs/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/figs/overview.png


--------------------------------------------------------------------------------
/figs/viz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/figs/viz.png


--------------------------------------------------------------------------------
/init_datasets/README.md:
--------------------------------------------------------------------------------
  1 | # Prepare Datasets for MaskFormer
  2 | 
  3 | A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog)
  4 | for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc).
  5 | This document explains how to setup the builtin datasets so they can be used by the above APIs.
  6 | [Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`,
  7 | and how to add new datasets to them.
  8 | 
  9 | MaskFormer has builtin support for a few datasets.
 10 | The datasets are assumed to exist in a directory specified by the environment variable
 11 | `DETECTRON2_DATASETS`.
 12 | Under this directory, detectron2 will look for datasets in the structure described below, if needed.
 13 | ```
 14 | $DETECTRON2_DATASETS/
 15 |   ADEChallengeData2016/
 16 |   ADE20K_2021_17_01/
 17 |   coco/
 18 |   cityscapes/
 19 |   mapillary_vistas/
 20 | ```
 21 | 
 22 | You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
 23 | If left unset, the default is `./datasets` relative to your current working directory.
 24 | 
 25 | The [model zoo](https://github.com/facebookresearch/MaskFormer/blob/master/MODEL_ZOO.md)
 26 | contains configs and models that use these builtin datasets.
 27 | 
 28 | ## Expected dataset structure for [ADE20k Scene Parsing](http://sceneparsing.csail.mit.edu/):
 29 | ```
 30 | ADEChallengeData2016/
 31 |   annotations/
 32 |   annotations_detectron2/
 33 |   images/
 34 |   objectInfo150.txt
 35 | ```
 36 | The directory `annotations_detectron2` is generated by running `python datasets/prepare_ade20k_sem_seg.py`.
 37 | 
 38 | ## Expected dataset structure for ADE20K panoptic segmentation:
 39 | ```
 40 | ADEChallengeData2016/
 41 |   images/
 42 |   annotations/
 43 |   objectInfo150.txt
 44 |   # download instance annotation
 45 |   annotations_instance/
 46 |   # generated by prepare_ade20k_sem_seg.py
 47 |   annotations_detectron2/
 48 |   # below are generated by prepare_ade20k_panoptic_annotations.py
 49 |   ade20k_panoptic_train.json
 50 |   ade20k_panoptic_train/
 51 |   ade20k_panoptic_val.json
 52 |   ade20k_panoptic_val/
 53 | ```
 54 | Install panopticapi by:
 55 | ```bash
 56 | pip install git+https://github.com/cocodataset/panopticapi.git
 57 | ```
 58 | 
 59 | Download the instance annotation from http://sceneparsing.csail.mit.edu/:
 60 | ```bash
 61 | wget http://sceneparsing.csail.mit.edu/data/ChallengeData2017/annotations_instance.tar
 62 | ```
 63 | 
 64 | Then, run `python datasets/prepare_ade20k_pan_seg.py`, to combine semantic and instance annotations for panoptic annotations.
 65 | 
 66 | ## Expected dataset structure for [ADE20k-Full](https://groups.csail.mit.edu/vision/datasets/ADE20K/):
 67 | ```
 68 | ADE20K_2021_17_01/
 69 |   images/
 70 |   images_detectron2/
 71 |   annotations_detectron2/
 72 |   index_ade20k.pkl
 73 |   objects.txt
 74 | ```
 75 | The directories `images_detectron2` and `annotations_detectron2` are generated by running `python datasets/prepare_ade20k_full_sem_seg.py`.
 76 | 
 77 | ## Expected dataset structure for [cityscapes](https://www.cityscapes-dataset.com/downloads/):
 78 | ```
 79 | cityscapes/
 80 |   gtFine/
 81 |     train/
 82 |       aachen/
 83 |         color.png, instanceIds.png, labelIds.png, polygons.json,
 84 |         labelTrainIds.png
 85 |       ...
 86 |     val/
 87 |     test/
 88 |     # below are generated Cityscapes panoptic annotation
 89 |     cityscapes_panoptic_train.json
 90 |     cityscapes_panoptic_train/
 91 |     cityscapes_panoptic_val.json
 92 |     cityscapes_panoptic_val/
 93 |     cityscapes_panoptic_test.json
 94 |     cityscapes_panoptic_test/
 95 |   leftImg8bit/
 96 |     train/
 97 |     val/
 98 |     test/
 99 | ```
100 | Install cityscapes scripts by:
101 | ```
102 | pip install git+https://github.com/mcordts/cityscapesScripts.git
103 | ```
104 | 
105 | Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with:
106 | ```
107 | CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
108 | ```
109 | These files are not needed for instance segmentation.
110 | 
111 | Note: to generate Cityscapes panoptic dataset, run cityscapesescript with:
112 | ```
113 | CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createPanopticImgs.py
114 | ```
115 | These files are not needed for semantic and instance segmentation.
116 | 
117 | ## Expected dataset structure for [COCO-Stuff-10K](https://github.com/nightrome/cocostuff10k):
118 | 
119 | ```
120 | coco/
121 |   coco_stuff_10k/
122 |     annotations/
123 |       COCO_train2014_000000000077.mat
124 |       ...
125 |     imageLists/
126 |       all.txt
127 |       test.txt
128 |       train.txt
129 |     images/
130 |       COCO_train2014_000000000077.jpg
131 |       ...
132 |     # below are generated by prepare_coco_stuff_10k_v1.0_sem_seg.py
133 |     annotations_detectron2/
134 |       train/
135 |       test/
136 |     images_detectron2/
137 |       train/
138 |       test/
139 | ```
140 | 
141 | Get the COCO-Stuff-10k **v1.0** annotation from https://github.com/nightrome/cocostuff10k.
142 | ```bash
143 | wget http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.0.zip
144 | ```
145 | Unzip `cocostuff-10k-v1.0.zip` and put `annotations`, `imageLists` and `images` to the correct location listed above.
146 | 
147 | Generate COCO-Stuff-10k annotation by `python datasets/prepare_coco_stuff_10k_v1.0_sem_seg.py`
148 | 
149 | ## Expected dataset structure for [Mapillary Vistas](https://www.mapillary.com/dataset/vistas):
150 | ```
151 | mapillary_vistas/
152 |   training/
153 |     images/
154 |     instances/
155 |     labels/
156 |     panoptic/
157 |   validation/
158 |     images/
159 |     instances/
160 |     labels/
161 |     panoptic/
162 | ```
163 | 
164 | No preprocessing is needed for Mapillary Vistas.
165 | 


--------------------------------------------------------------------------------
/init_datasets/ade20k_instance_catid_mapping.txt:
--------------------------------------------------------------------------------
  1 | Instacne100	SceneParse150	FullADE20K
  2 | 1		8		165
  3 | 2		9		3055
  4 | 3		11		350
  5 | 4		13		1831
  6 | 5		15		774
  7 | 5		15		783
  8 | 6		16		2684
  9 | 7		19		687
 10 | 8		20		471
 11 | 9		21		401
 12 | 10		23		1735
 13 | 11		24		2473
 14 | 12		25		2329
 15 | 13		28		1564
 16 | 14		31		57
 17 | 15		32		2272
 18 | 16		33		907
 19 | 17		34		724
 20 | 18		36		2985
 21 | 18		36		533
 22 | 19		37		1395
 23 | 20		38		155
 24 | 21		39		2053
 25 | 22		40		689
 26 | 23		42		266
 27 | 24		43		581
 28 | 25		44		2380
 29 | 26		45		491
 30 | 27		46		627
 31 | 28		48		2388
 32 | 29		50		943
 33 | 30		51		2096
 34 | 31		54		2530
 35 | 32		56		420
 36 | 33		57		1948
 37 | 34		58		1869
 38 | 35		59		2251
 39 | 36		63		239
 40 | 37		65		571
 41 | 38		66		2793
 42 | 39		67		978
 43 | 40		68		236
 44 | 41		70		181
 45 | 42		71		629
 46 | 43		72		2598
 47 | 44		73		1744
 48 | 45		74		1374
 49 | 46		75		591
 50 | 47		76		2679
 51 | 48		77		223
 52 | 49		79		47
 53 | 50		81		327
 54 | 51		82		2821
 55 | 52		83		1451
 56 | 53		84		2880
 57 | 54		86		480
 58 | 55		87		77
 59 | 56		88		2616
 60 | 57		89		246
 61 | 57		89		247
 62 | 58		90		2733
 63 | 59		91		14
 64 | 60		93		38
 65 | 61		94		1936
 66 | 62		96		120
 67 | 63		98		1702
 68 | 64		99		249
 69 | 65		103		2928
 70 | 66		104		2337
 71 | 67		105		1023
 72 | 68		108		2989
 73 | 69		109		1930
 74 | 70		111		2586
 75 | 71		112		131
 76 | 72		113		146
 77 | 73		116		95
 78 | 74		117		1563
 79 | 75		119		1708
 80 | 76		120		103
 81 | 77		121		1002
 82 | 78		122		2569
 83 | 79		124		2833
 84 | 80		125		1551
 85 | 81		126		1981
 86 | 82		127		29
 87 | 83		128		187
 88 | 84		130		747
 89 | 85		131		2254
 90 | 86		133		2262
 91 | 87		134		1260
 92 | 88		135		2243
 93 | 89		136		2932
 94 | 90		137		2836
 95 | 91		138		2850
 96 | 92		139		64
 97 | 93		140		894
 98 | 94		143		1919
 99 | 95		144		1583
100 | 96		145		318
101 | 97		147		2046
102 | 98		148		1098
103 | 99		149		530
104 | 100		150		954
105 | 


--------------------------------------------------------------------------------
/init_datasets/prepare_ade20k_sem_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | import numpy as np
 8 | import tqdm
 9 | from PIL import Image
10 | 
11 | 
12 | def convert(input, output):
13 |     img = np.asarray(Image.open(input))
14 |     assert img.dtype == np.uint8
15 |     img = img - 1  # 0 (ignore) becomes 255. others are shifted by 1
16 |     Image.fromarray(img).save(output)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
21 |     for name in ["training", "validation"]:
22 |         annotation_dir = dataset_dir / "annotations" / name
23 |         output_dir = dataset_dir / "annotations_detectron2" / name
24 |         output_dir.mkdir(parents=True, exist_ok=True)
25 |         for file in tqdm.tqdm(list(annotation_dir.iterdir())):
26 |             output_file = output_dir / file.name
27 |             convert(file, output_file)
28 | 


--------------------------------------------------------------------------------
/init_datasets/prepare_coco_stuff_10k_v1.0_sem_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | import os
 5 | from pathlib import Path
 6 | from shutil import copyfile
 7 | 
 8 | import h5py
 9 | import numpy as np
10 | import tqdm
11 | from PIL import Image
12 | 
13 | if __name__ == "__main__":
14 |     dataset_dir = os.path.join(
15 |         os.getenv("DETECTRON2_DATASETS", "datasets"), "coco", "coco_stuff_10k"
16 |     )
17 |     for s in ["test", "train"]:
18 |         image_list_file = os.path.join(dataset_dir, "imageLists", f"{s}.txt")
19 |         with open(image_list_file, "r") as f:
20 |             image_list = f.readlines()
21 | 
22 |         image_list = [f.strip() for f in image_list]
23 | 
24 |         image_dir = os.path.join(dataset_dir, "images_detectron2", s)
25 |         Path(image_dir).mkdir(parents=True, exist_ok=True)
26 |         annotation_dir = os.path.join(dataset_dir, "annotations_detectron2", s)
27 |         Path(annotation_dir).mkdir(parents=True, exist_ok=True)
28 | 
29 |         for fname in tqdm.tqdm(image_list):
30 |             copyfile(
31 |                 os.path.join(dataset_dir, "images", fname + ".jpg"),
32 |                 os.path.join(image_dir, fname + ".jpg"),
33 |             )
34 | 
35 |             img = np.asarray(Image.open(os.path.join(image_dir, fname + ".jpg")))
36 | 
37 |             matfile = h5py.File(os.path.join(dataset_dir, "annotations", fname + ".mat"))
38 |             S = np.array(matfile["S"]).astype(np.uint8)
39 |             S = np.transpose(S)
40 |             S = S - 2  # 1 (ignore) becomes 255. others are shifted by 2
41 | 
42 |             assert S.shape == img.shape[:2], "{} vs {}".format(S.shape, img.shape)
43 | 
44 |             Image.fromarray(S).save(os.path.join(annotation_dir, fname + ".png"))
45 | 


--------------------------------------------------------------------------------
/init_datasets/voc_meta/trans_query.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/init_datasets/voc_meta/trans_query.pth


--------------------------------------------------------------------------------
/init_datasets/voc_meta/word_vectors/fasttext.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/init_datasets/voc_meta/word_vectors/fasttext.pkl


--------------------------------------------------------------------------------
/init_datasets/voc_meta/word_vectors/word2vec.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/init_datasets/voc_meta/word_vectors/word2vec.pkl


--------------------------------------------------------------------------------
/mask_former/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import data  # register all new datasets
 3 | from . import modeling
 4 | 
 5 | # config
 6 | from .config import add_mask_former_config
 7 | 
 8 | # dataset loading
 9 | from .data.dataset_mappers.detr_panoptic_dataset_mapper import DETRPanopticDatasetMapper
10 | from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import (
11 |     MaskFormerPanopticDatasetMapper,
12 | )
13 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper import (
14 |     MaskFormerSemanticDatasetMapper,
15 | )
16 | 
17 | # from .data.dataset_mappers.weakshot_semantic_dataset_mapper import (
18 | #     WeakShotSemSegMapper,
19 | # )
20 | 
21 | # models
22 | from .mask_former_model import MaskFormer
23 | from .test_time_augmentation import SemanticSegmentorWithTTA
24 | 


--------------------------------------------------------------------------------
/mask_former/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | from detectron2.config import CfgNode as CN
 4 | 
 5 | 
 6 | def add_mask_former_config(cfg):
 7 |     """
 8 |     Add config for MASK_FORMER.
 9 |     """
10 |     cfg.EvalPseudoLabel=False
11 |     cfg.GeneratePseudoLabel = False
12 |     # dir_name under datasets/
13 |     cfg.PSEUDO_LABEL_PATH = 'none'
14 | 
15 |     # data config
16 |     # select the dataset mapper
17 |     cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic"
18 |     # Color augmentation
19 |     cfg.INPUT.COLOR_AUG_SSD = False
20 |     # We retry random cropping until no single category in semantic segmentation GT occupies more
21 |     # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
22 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
23 |     # Pad image and segmentation GT in dataset mapper.
24 |     cfg.INPUT.SIZE_DIVISIBILITY = -1
25 | 
26 |     # solver config
27 |     # weight decay on embedding
28 |     cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
29 |     # optimizer
30 |     cfg.SOLVER.OPTIMIZER = "ADAMW"
31 |     cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
32 | 
33 |     # mask_former model config
34 |     cfg.MODEL.MASK_FORMER = CN()
35 | 
36 |     # loss
37 |     cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True
38 |     cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1
39 |     cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0
40 |     cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0
41 | 
42 |     # transformer config
43 |     cfg.MODEL.MASK_FORMER.NHEADS = 8
44 |     cfg.MODEL.MASK_FORMER.DROPOUT = 0.1
45 |     cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048
46 |     cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0
47 |     cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6
48 |     cfg.MODEL.MASK_FORMER.PRE_NORM = False
49 | 
50 |     cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256
51 |     cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100
52 | 
53 |     cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5"
54 |     cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False
55 | 
56 |     # mask_former inference config
57 |     cfg.MODEL.MASK_FORMER.TEST = CN()
58 |     cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False
59 |     cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0
60 |     cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0
61 |     cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
62 | 
63 |     # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet)
64 |     # you can use this config to override
65 |     cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32
66 | 
67 |     # pixel decoder config
68 |     cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256
69 |     # adding transformer in pixel decoder
70 |     cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0
71 |     # pixel decoder
72 |     cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder"
73 | 
74 |     # swin transformer backbone
75 |     cfg.MODEL.SWIN = CN()
76 |     cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224
77 |     cfg.MODEL.SWIN.PATCH_SIZE = 4
78 |     cfg.MODEL.SWIN.EMBED_DIM = 96
79 |     cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2]
80 |     cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24]
81 |     cfg.MODEL.SWIN.WINDOW_SIZE = 7
82 |     cfg.MODEL.SWIN.MLP_RATIO = 4.0
83 |     cfg.MODEL.SWIN.QKV_BIAS = True
84 |     cfg.MODEL.SWIN.QK_SCALE = None
85 |     cfg.MODEL.SWIN.DROP_RATE = 0.0
86 |     cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0
87 |     cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3
88 |     cfg.MODEL.SWIN.APE = False
89 |     cfg.MODEL.SWIN.PATCH_NORM = True
90 |     cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"]
91 | 


--------------------------------------------------------------------------------
/mask_former/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 | 


--------------------------------------------------------------------------------
/mask_former/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mask_former/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import (
3 |     register_ade20k_full,
4 |     register_ade20k_panoptic,
5 |     register_coco_stuff_10k,
6 |     register_mapillary_vistas,
7 |     register_voc_splits,
8 | )
9 | 


--------------------------------------------------------------------------------
/mask_former/data/datasets/register_voc_splits.py:
--------------------------------------------------------------------------------
  1 | # import os
  2 | # import torch
  3 | # from detectron2.data import DatasetCatalog, MetadataCatalog
  4 | # from .shared import read_data_list_from_file, write_data_list_to_file, split_data_list_from_file
  5 | # import numpy as np
  6 | # import pickle
  7 | #
  8 | # ignored_cid = 255
  9 | # ignored_dids = [255]
 10 | #
 11 | # CAT_LIST = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
 12 | #             'bottle', 'bus', 'car', 'cat', 'chair',
 13 | #             'cow', 'diningtable', 'dog', 'horse',
 14 | #             'motorbike', 'person', 'pottedplant',
 15 | #             'sheep', 'sofa', 'train',
 16 | #             'tvmonitor']
 17 | #
 18 | # CAT_COLOR = [
 19 | #     [255, 255, 255],
 20 | #     [220, 20, 60], [119, 11, 32], [0, 0, 142], [0, 0, 230], [106, 0, 228],
 21 | #     [0, 60, 100], [0, 80, 100], [0, 0, 70], [0, 0, 192], [250, 170, 30],
 22 | #     [100, 170, 30], [220, 220, 0], [175, 116, 175], [250, 0, 30], [165, 42, 42],
 23 | #     [255, 77, 255], [0, 226, 252], [182, 182, 255], [0, 82, 0], [120, 166, 157],
 24 | # ]
 25 | #
 26 | # voc_dataset_id_to_names = {k: v for k, v in enumerate(CAT_LIST)}
 27 | # voc_dataset_id_to_color = {k: v for k, v in enumerate(CAT_COLOR)}
 28 | #
 29 | # voc_dataset_ids = list(voc_dataset_id_to_names.keys())
 30 | # dataset_id_to_query_id = {did: i for i, did in enumerate(voc_dataset_ids)}
 31 | #
 32 | # word2vec = pickle.load(open('init_datasets/voc_meta/word_vectors/word2vec.pkl', "rb")).astype(np.float32)
 33 | # fasttext = pickle.load(open('init_datasets/voc_meta/word_vectors/fasttext.pkl', "rb")).astype(np.float32)
 34 | # fcweight = torch.load('init_datasets/voc_meta/trans_query.pth', map_location='cpu').numpy()
 35 | #
 36 | #
 37 | # # from mask_former.utils.viz_tools import viz_class_colors
 38 | # # viz_class_colors(voc_dataset_id_to_names, voc_dataset_id_to_color)
 39 | #
 40 | # def _get_voc_full_meta():
 41 | #     splited_dataset_ids = voc_dataset_ids
 42 | #     assert len(splited_dataset_ids) == 21, len(splited_dataset_ids)
 43 | #     splited_names = [voc_dataset_id_to_names[did] for did in splited_dataset_ids]
 44 | #     splited_did_to_cid = {k: i for i, k in enumerate(splited_dataset_ids)}
 45 | #
 46 | #     # from 0 to 20.
 47 | #     cid_to_did = {v: k for k, v in splited_did_to_cid.items() if v != ignored_cid}
 48 | #
 49 | #     splited_contiguous_id_to_color = {v: voc_dataset_id_to_color[k] for k, v in splited_did_to_cid.items()}
 50 | #
 51 | #     ret = {
 52 | #         "c_dataset_id_to_contiguous_id": splited_did_to_cid,
 53 | #         "c_cid_to_did": cid_to_did,
 54 | #         "c_class_names": splited_names,
 55 | #         "c_contiguous_id_to_color": splited_contiguous_id_to_color,
 56 | #     }
 57 | #     ret["word2vec"] = word2vec
 58 | #     ret["fasttext"] = fasttext
 59 | #     ret["fcweight"] = fcweight
 60 | #     return ret
 61 | #
 62 | #
 63 | # def _get_voc_split1_meta():
 64 | #     novel1_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle']
 65 | #     base1_names = [name for name in CAT_LIST if name not in novel1_names]
 66 | #     assert len(base1_names) + len(novel1_names) == len(CAT_LIST)
 67 | #
 68 | #     base_dataset_ids = [k for k, v in voc_dataset_id_to_names.items() if v in base1_names]
 69 | #     novel_dataset_ids = [k for k, v in voc_dataset_id_to_names.items() if v in novel1_names]
 70 | #
 71 | #     did_to_cid_full = {k: i for i, k in enumerate(voc_dataset_ids)}
 72 | #     contiguous_all_dataset_ids = list(did_to_cid_full.keys())
 73 | #
 74 | #     did_to_cid_training = {k: v if k in base_dataset_ids else ignored_cid for k, v in did_to_cid_full.items()}
 75 | #     did_to_cid_testing = did_to_cid_full
 76 | #
 77 | #     ret = {
 78 | #         "c_dataset_id_to_contiguous_id_training": did_to_cid_training,
 79 | #         "c_dataset_id_to_contiguous_id_testing": did_to_cid_testing,
 80 | #         "c_base_dataset_ids": base_dataset_ids,
 81 | #         "c_novel_dataset_ids": novel_dataset_ids,
 82 | #         "c_contiguous_all_dataset_ids": contiguous_all_dataset_ids,
 83 | #         "c_dataset_id_to_name": voc_dataset_id_to_names,
 84 | #         "c_dataset_id_to_color": voc_dataset_id_to_color,
 85 | #     }
 86 | #     ret["word2vec"] = word2vec
 87 | #     ret["fasttext"] = fasttext
 88 | #     ret["fcweight"] = fcweight
 89 | #     return ret
 90 | #
 91 | #
 92 | # name_to_file = {
 93 | #     'voc_full_trainaug_seg': 'init_datasets/voc_meta/train_aug.txt',
 94 | #     'voc_full_val_seg': 'init_datasets/voc_meta/val.txt',
 95 | #
 96 | #     'voc_split1_trainaug_seg': 'init_datasets/voc_meta/train_aug_base1.txt',
 97 | #     'voc_split1_val_seg': 'init_datasets/voc_meta/val.txt',
 98 | # }
 99 | # name_to_meta = {
100 | #     'voc_full_trainaug_seg': _get_voc_full_meta,
101 | #     'voc_full_val_seg': _get_voc_full_meta,
102 | #
103 | #     'voc_split1_trainaug_seg': _get_voc_split1_meta,
104 | #     'voc_split1_val_seg': _get_voc_split1_meta,
105 | # }
106 | #
107 | #
108 | # def register_voc_splits(root):
109 | #     print(f'Register VOC QTFormer...')
110 | #
111 | #     data_root = os.path.join(root, "VOC2012")
112 | #
113 | #     # Read&Save Base1 Split TXT
114 | #     # base1_meta = _get_voc_base1_meta()
115 | #     #
116 | #     # trainaug_base_list, trainaug_novel_list = split_data_list_from_file(
117 | #     #     data_root, name_to_file['voc_full_trainaug_seg'], base1_meta, voc_dataset_id_to_names)
118 | #     #
119 | #     # val_base_list, val_novel_list = split_data_list_from_file(
120 | #     #     data_root, name_to_file['voc_full_val_seg'], base1_meta, voc_dataset_id_to_names)
121 | #     #
122 | #     # write_data_list_to_file(data_root, trainaug_base_list, 'init_datasets/voc_meta/train_aug_base1.txt')
123 | #
124 | #     for split_name in ['voc_full_trainaug_seg', 'voc_full_val_seg',
125 | #                        'voc_split1_trainaug_seg', 'voc_split1_val_seg', ]:
126 | #         split_meta = name_to_meta[split_name]()
127 | #
128 | #         DatasetCatalog.register(
129 | #             split_name,
130 | #             lambda x=data_root, y=name_to_file[split_name]:
131 | #             read_data_list_from_file(x, y)
132 | #         )
133 | #
134 | #         MetadataCatalog.get(split_name).set(
135 | #             evaluator_type="weakshot_sem_seg",
136 | #             ignore_label=ignored_cid,
137 | #             **split_meta,
138 | #         )
139 | #
140 | #     return
141 | #
142 | #
143 | # _root = os.getenv("DETECTRON2_DATASETS", "datasets")
144 | # register_voc_splits(_root)
145 | 


--------------------------------------------------------------------------------
/mask_former/data/datasets/shared.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from detectron2.data import detection_utils as utils
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | import torch
 6 | import pickle
 7 | import torch.nn.functional as F
 8 | 
 9 | def get_embedding(cfg):
10 |     dataset_path = os.path.join(cfg['datadir'], cfg['dataset'])
11 |     if cfg['embedding'] == 'word2vec':
12 |         class_emb = pickle.load(open(dataset_path + '/word_vectors/word2vec.pkl', "rb"))
13 |     elif cfg['embedding'] == 'fasttext':
14 |         class_emb = pickle.load(open(dataset_path + '/word_vectors/fasttext.pkl', "rb"))
15 |     elif cfg['embedding'] == 'fastnvec':
16 |         class_emb = np.concatenate([pickle.load(open(dataset_path + '/word_vectors/fasttext.pkl', "rb")),
17 |                                     pickle.load(open(dataset_path + '/word_vectors/word2vec.pkl', "rb"))], axis=1)
18 |     else:
19 |         print("invalid embedding: {0}".format(cfg['embedding']))
20 | 
21 |     if not cfg['emb_without_normal']:
22 |         class_emb = F.normalize(torch.tensor(class_emb, dtype=torch.float32), p=2, dim=1)
23 |         print("Class embedding map normalized!")
24 |     else:
25 |         class_emb = torch.tensor(class_emb, dtype=torch.float32)
26 |     return class_emb
27 | 
28 | 
29 | def read_data_list_from_file(data_root, file_path):
30 |     data_list = []
31 |     for line in open(file_path).read().splitlines():
32 |         data = {}
33 |         img_name, ant_name = line.split(' ')
34 |         abs_img_name = f'{data_root}/{img_name}'
35 |         abs_ant_name = f'{data_root}/{ant_name}'
36 | 
37 |         assert os.path.exists(abs_img_name), f'FileNotFound: {abs_img_name}'
38 |         assert os.path.exists(abs_ant_name), f'FileNotFound: {abs_ant_name}'
39 | 
40 |         data['file_name'] = abs_img_name
41 |         data['sem_seg_file_name'] = abs_ant_name
42 | 
43 |         data_list.append(data)
44 | 
45 |     return data_list
46 | 
47 | 
48 | def split_data_list_from_file(data_root, file_path, split_meta, voc_dataset_id_to_names):
49 |     splited_did_to_cid = split_meta['c_dataset_id_to_contiguous_id']
50 | 
51 |     base_dids = [k for k, v in splited_did_to_cid.items() if v != 255]
52 |     novel_dids = [k for k in voc_dataset_id_to_names.keys() if k not in base_dids]
53 | 
54 |     base_list, novel_list = [], []
55 |     for line in tqdm(open(file_path).read().splitlines()):
56 |         data = {}
57 |         img_name, ant_name = line.split(' ')
58 |         abs_img_name = f'{data_root}/{img_name}'
59 |         abs_ant_name = f'{data_root}/{ant_name}'
60 | 
61 |         assert os.path.exists(abs_img_name), f'FileNotFound: {abs_img_name}'
62 |         assert os.path.exists(abs_ant_name), f'FileNotFound: {abs_ant_name}'
63 | 
64 |         raw_ant = utils.read_image(abs_ant_name)
65 |         data['file_name'] = abs_img_name
66 |         data['sem_seg_file_name'] = abs_ant_name
67 | 
68 |         has_novel = False
69 |         for did in np.unique(raw_ant):
70 |             if did in novel_dids:
71 |                 has_novel = True
72 | 
73 |         if has_novel:
74 |             novel_list.append(data)
75 |         else:
76 |             base_list.append(data)
77 | 
78 |     return base_list, novel_list
79 | 
80 | 
81 | def write_data_list_to_file(data_root, data_list, file_path):
82 |     'images_detection2/2011_003276.jpg annotations_detection2/2011_003276.png'
83 | 
84 |     with open(file_path, 'w', encoding='utf-8') as f:
85 |         for data in data_list:
86 |             line = f"{data['file_name'].split(data_root + '/')[1]}" \
87 |                    f" {data['sem_seg_file_name'].split(data_root + '/')[1]}\n"
88 |             f.write(line)
89 | 


--------------------------------------------------------------------------------
/mask_former/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .backbone.swin import D2SwinTransformer
3 | from .heads.mask_former_head import MaskFormerHead
4 | from .heads.per_pixel_baseline import PerPixelBaselineHead, PerPixelBaselinePlusHead
5 | from .heads.pixel_decoder import BasePixelDecoder
6 | 


--------------------------------------------------------------------------------
/mask_former/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mask_former/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mask_former/modeling/heads/mask_former_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import logging
  3 | from copy import deepcopy
  4 | from typing import Callable, Dict, List, Optional, Tuple, Union
  5 | 
  6 | import fvcore.nn.weight_init as weight_init
  7 | from torch import nn
  8 | from torch.nn import functional as F
  9 | 
 10 | from detectron2.config import configurable
 11 | from detectron2.layers import Conv2d, ShapeSpec, get_norm
 12 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
 13 | 
 14 | from ..transformer.transformer_predictor import TransformerPredictor
 15 | from .pixel_decoder import build_pixel_decoder
 16 | 
 17 | 
 18 | @SEM_SEG_HEADS_REGISTRY.register()
 19 | class MaskFormerHead(nn.Module):
 20 | 
 21 |     _version = 2
 22 | 
 23 |     def _load_from_state_dict(
 24 |         self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
 25 |     ):
 26 |         version = local_metadata.get("version", None)
 27 |         if version is None or version < 2:
 28 |             # Do not warn if train from scratch
 29 |             scratch = True
 30 |             logger = logging.getLogger(__name__)
 31 |             for k in list(state_dict.keys()):
 32 |                 newk = k
 33 |                 if "sem_seg_head" in k and not k.startswith(prefix + "predictor"):
 34 |                     newk = k.replace(prefix, prefix + "pixel_decoder.")
 35 |                     # logger.debug(f"{k} ==> {newk}")
 36 |                 if newk != k:
 37 |                     state_dict[newk] = state_dict[k]
 38 |                     del state_dict[k]
 39 |                     scratch = False
 40 | 
 41 |             if not scratch:
 42 |                 logger.warning(
 43 |                     f"Weight format of {self.__class__.__name__} have changed! "
 44 |                     "Please upgrade your models. Applying automatic conversion now ..."
 45 |                 )
 46 | 
 47 |     @configurable
 48 |     def __init__(
 49 |         self,
 50 |         input_shape: Dict[str, ShapeSpec],
 51 |         *,
 52 |         num_classes: int,
 53 |         pixel_decoder: nn.Module,
 54 |         loss_weight: float = 1.0,
 55 |         ignore_value: int = -1,
 56 |         # extra parameters
 57 |         transformer_predictor: nn.Module,
 58 |         transformer_in_feature: str,
 59 |     ):
 60 |         """
 61 |         NOTE: this interface is experimental.
 62 |         Args:
 63 |             input_shape: shapes (channels and stride) of the input features
 64 |             num_classes: number of classes to predict
 65 |             pixel_decoder: the pixel decoder module
 66 |             loss_weight: loss weight
 67 |             ignore_value: category id to be ignored during training.
 68 |             transformer_predictor: the transformer decoder that makes prediction
 69 |             transformer_in_feature: input feature name to the transformer_predictor
 70 |         """
 71 |         super().__init__()
 72 |         input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)
 73 |         self.in_features = [k for k, v in input_shape]
 74 |         feature_strides = [v.stride for k, v in input_shape]
 75 |         feature_channels = [v.channels for k, v in input_shape]
 76 | 
 77 |         self.ignore_value = ignore_value
 78 |         self.common_stride = 4
 79 |         self.loss_weight = loss_weight
 80 | 
 81 |         self.pixel_decoder = pixel_decoder
 82 |         self.predictor = transformer_predictor
 83 |         self.transformer_in_feature = transformer_in_feature
 84 | 
 85 |         self.num_classes = num_classes
 86 | 
 87 |     @classmethod
 88 |     def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
 89 |         return {
 90 |             "input_shape": {
 91 |                 k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
 92 |             },
 93 |             "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
 94 |             "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
 95 |             "pixel_decoder": build_pixel_decoder(cfg, input_shape),
 96 |             "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT,
 97 |             "transformer_in_feature": cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE,
 98 |             "transformer_predictor": TransformerPredictor(
 99 |                 cfg,
100 |                 cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
101 |                 if cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE == "transformer_encoder"
102 |                 else input_shape[cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE].channels,
103 |                 mask_classification=True,
104 |             ),
105 |         }
106 | 
107 |     def forward(self, features):
108 |         return self.layers(features)
109 | 
110 |     def layers(self, features):
111 |         mask_features, transformer_encoder_features = self.pixel_decoder.forward_features(features)
112 |         if self.transformer_in_feature == "transformer_encoder":
113 |             assert (
114 |                 transformer_encoder_features is not None
115 |             ), "Please use the TransformerEncoderPixelDecoder."
116 |             predictions = self.predictor(transformer_encoder_features, mask_features)
117 |         else:
118 |             predictions = self.predictor(features[self.transformer_in_feature], mask_features)
119 |         return predictions
120 | 


--------------------------------------------------------------------------------
/mask_former/modeling/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mask_former/modeling/transformer/position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
 3 | """
 4 | Various positional encodings for the transformer.
 5 | """
 6 | import math
 7 | 
 8 | import torch
 9 | from torch import nn
10 | 
11 | 
12 | class PositionEmbeddingSine(nn.Module):
13 |     """
14 |     This is a more standard version of the position embedding, very similar to the one
15 |     used by the Attention is all you need paper, generalized to work on images.
16 |     """
17 | 
18 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
19 |         super().__init__()
20 |         self.num_pos_feats = num_pos_feats
21 |         self.temperature = temperature
22 |         self.normalize = normalize
23 |         if scale is not None and normalize is False:
24 |             raise ValueError("normalize should be True if scale is passed")
25 |         if scale is None:
26 |             scale = 2 * math.pi
27 |         self.scale = scale
28 | 
29 |     def forward(self, x, mask=None):
30 |         if mask is None:
31 |             mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
32 |         not_mask = ~mask
33 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
34 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
35 |         if self.normalize:
36 |             eps = 1e-6
37 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
38 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
39 | 
40 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
41 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
42 | 
43 |         pos_x = x_embed[:, :, :, None] / dim_t
44 |         pos_y = y_embed[:, :, :, None] / dim_t
45 |         pos_x = torch.stack(
46 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
47 |         ).flatten(3)
48 |         pos_y = torch.stack(
49 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
50 |         ).flatten(3)
51 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
52 |         return pos
53 | 


--------------------------------------------------------------------------------
/mask_former/test_time_augmentation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | from itertools import count
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from fvcore.transforms import HFlipTransform
  8 | from torch import nn
  9 | from torch.nn.parallel import DistributedDataParallel
 10 | 
 11 | from detectron2.data.detection_utils import read_image
 12 | from detectron2.modeling import DatasetMapperTTA
 13 | 
 14 | __all__ = [
 15 |     "SemanticSegmentorWithTTA",
 16 | ]
 17 | 
 18 | 
 19 | class SemanticSegmentorWithTTA(nn.Module):
 20 |     """
 21 |     A SemanticSegmentor with test-time augmentation enabled.
 22 |     Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`.
 23 |     """
 24 | 
 25 |     def __init__(self, cfg, model, tta_mapper=None, batch_size=1):
 26 |         """
 27 |         Args:
 28 |             cfg (CfgNode):
 29 |             model (SemanticSegmentor): a SemanticSegmentor to apply TTA on.
 30 |             tta_mapper (callable): takes a dataset dict and returns a list of
 31 |                 augmented versions of the dataset dict. Defaults to
 32 |                 `DatasetMapperTTA(cfg)`.
 33 |             batch_size (int): batch the augmented images into this batch size for inference.
 34 |         """
 35 |         super().__init__()
 36 |         if isinstance(model, DistributedDataParallel):
 37 |             model = model.module
 38 |         self.cfg = cfg.clone()
 39 | 
 40 |         self.model = model
 41 | 
 42 |         if tta_mapper is None:
 43 |             tta_mapper = DatasetMapperTTA(cfg)
 44 |         self.tta_mapper = tta_mapper
 45 |         self.batch_size = batch_size
 46 | 
 47 |     def _batch_inference(self, batched_inputs):
 48 |         """
 49 |         Execute inference on a list of inputs,
 50 |         using batch size = self.batch_size, instead of the length of the list.
 51 |         Inputs & outputs have the same format as :meth:`SemanticSegmentor.forward`
 52 |         """
 53 |         outputs = []
 54 |         inputs = []
 55 |         for idx, input in zip(count(), batched_inputs):
 56 |             inputs.append(input)
 57 |             if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1:
 58 |                 with torch.no_grad():
 59 |                     outputs.extend(self.model(inputs))
 60 |                 inputs = []
 61 |         return outputs
 62 | 
 63 |     def __call__(self, batched_inputs):
 64 |         """
 65 |         Same input/output format as :meth:`SemanticSegmentor.forward`
 66 |         """
 67 | 
 68 |         def _maybe_read_image(dataset_dict):
 69 |             ret = copy.copy(dataset_dict)
 70 |             if "image" not in ret:
 71 |                 image = read_image(ret.pop("file_name"), self.model.input_format)
 72 |                 image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
 73 |                 ret["image"] = image
 74 |             if "height" not in ret and "width" not in ret:
 75 |                 ret["height"] = image.shape[1]
 76 |                 ret["width"] = image.shape[2]
 77 |             return ret
 78 | 
 79 |         return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs]
 80 | 
 81 |     def _inference_one_image(self, input):
 82 |         """
 83 |         Args:
 84 |             input (dict): one dataset dict with "image" field being a CHW tensor
 85 |         Returns:
 86 |             dict: one output dict
 87 |         """
 88 |         augmented_inputs, tfms = self._get_augmented_inputs(input)
 89 |         # 1: forward with all augmented images
 90 |         outputs = self._batch_inference(augmented_inputs)
 91 |         # Delete now useless variables to avoid being out of memory
 92 |         del augmented_inputs
 93 |         # 2: merge the results
 94 |         # handle flip specially
 95 |         new_outputs = []
 96 |         for output, tfm in zip(outputs, tfms):
 97 |             if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
 98 |                 new_outputs.append(output.pop("sem_seg").flip(dims=[2]))
 99 |             else:
100 |                 new_outputs.append(output.pop("sem_seg"))
101 |         del outputs
102 |         # to avoid OOM with torch.stack
103 |         final_predictions = new_outputs[0]
104 |         for i in range(1, len(new_outputs)):
105 |             final_predictions += new_outputs[i]
106 |         final_predictions = final_predictions / len(new_outputs)
107 |         del new_outputs
108 |         return {"sem_seg": final_predictions}
109 | 
110 |     def _get_augmented_inputs(self, input):
111 |         augmented_inputs = self.tta_mapper(input)
112 |         tfms = [x.pop("transforms") for x in augmented_inputs]
113 |         return augmented_inputs, tfms
114 | 


--------------------------------------------------------------------------------
/mask_former/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/mask_former/utils/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py
  3 | """
  4 | Misc functions, including distributed helpers.
  5 | 
  6 | Mostly copy-paste from torchvision references.
  7 | """
  8 | from typing import List, Optional
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | import torchvision
 13 | from torch import Tensor
 14 | 
 15 | 
 16 | def _max_by_axis(the_list):
 17 |     # type: (List[List[int]]) -> List[int]
 18 |     maxes = the_list[0]
 19 |     for sublist in the_list[1:]:
 20 |         for index, item in enumerate(sublist):
 21 |             maxes[index] = max(maxes[index], item)
 22 |     return maxes
 23 | 
 24 | 
 25 | class NestedTensor(object):
 26 |     def __init__(self, tensors, mask: Optional[Tensor]):
 27 |         self.tensors = tensors
 28 |         self.mask = mask
 29 | 
 30 |     def to(self, device):
 31 |         # type: (Device) -> NestedTensor # noqa
 32 |         cast_tensor = self.tensors.to(device)
 33 |         mask = self.mask
 34 |         if mask is not None:
 35 |             assert mask is not None
 36 |             cast_mask = mask.to(device)
 37 |         else:
 38 |             cast_mask = None
 39 |         return NestedTensor(cast_tensor, cast_mask)
 40 | 
 41 |     def decompose(self):
 42 |         return self.tensors, self.mask
 43 | 
 44 |     def __repr__(self):
 45 |         return str(self.tensors)
 46 | 
 47 | 
 48 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
 49 |     # TODO make this more general
 50 |     if tensor_list[0].ndim == 3:
 51 |         if torchvision._is_tracing():
 52 |             # nested_tensor_from_tensor_list() does not export well to ONNX
 53 |             # call _onnx_nested_tensor_from_tensor_list() instead
 54 |             return _onnx_nested_tensor_from_tensor_list(tensor_list)
 55 | 
 56 |         # TODO make it support different-sized images
 57 |         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
 58 |         # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
 59 |         batch_shape = [len(tensor_list)] + max_size
 60 |         b, c, h, w = batch_shape
 61 |         dtype = tensor_list[0].dtype
 62 |         device = tensor_list[0].device
 63 |         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
 64 |         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
 65 |         for img, pad_img, m in zip(tensor_list, tensor, mask):
 66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 67 |             m[: img.shape[1], : img.shape[2]] = False
 68 |     else:
 69 |         raise ValueError("not supported")
 70 |     return NestedTensor(tensor, mask)
 71 | 
 72 | 
 73 | # _onnx_nested_tensor_from_tensor_list() is an implementation of
 74 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing.
 75 | @torch.jit.unused
 76 | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
 77 |     max_size = []
 78 |     for i in range(tensor_list[0].dim()):
 79 |         max_size_i = torch.max(
 80 |             torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
 81 |         ).to(torch.int64)
 82 |         max_size.append(max_size_i)
 83 |     max_size = tuple(max_size)
 84 | 
 85 |     # work around for
 86 |     # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 87 |     # m[: img.shape[1], :img.shape[2]] = False
 88 |     # which is not yet supported in onnx
 89 |     padded_imgs = []
 90 |     padded_masks = []
 91 |     for img in tensor_list:
 92 |         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
 93 |         padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
 94 |         padded_imgs.append(padded_img)
 95 | 
 96 |         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
 97 |         padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
 98 |         padded_masks.append(padded_mask.to(torch.bool))
 99 | 
100 |     tensor = torch.stack(padded_imgs)
101 |     mask = torch.stack(padded_masks)
102 | 
103 |     return NestedTensor(tensor, mask=mask)
104 | 
105 | 
106 | def is_dist_avail_and_initialized():
107 |     if not dist.is_available():
108 |         return False
109 |     if not dist.is_initialized():
110 |         return False
111 |     return True
112 | 


--------------------------------------------------------------------------------
/mask_former/utils/viz.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from terminaltables import AsciiTable
  4 | import copy
  5 | 
  6 | 
  7 | def viz_data_ant(img, mask, meta, fpath='output/t.jpg'):
  8 |     nimg = img.permute(1, 2, 0).numpy() / 255
  9 | 
 10 |     colored_mask = np.ones_like(nimg)
 11 |     nmask = mask.numpy()
 12 | 
 13 |     for cid in np.unique(nmask):
 14 |         color = meta.voc_contiguous_id_to_color[cid]
 15 |         colored_mask[:, :, 0][nmask == cid] = color[0]
 16 |         colored_mask[:, :, 1][nmask == cid] = color[1]
 17 |         colored_mask[:, :, 2][nmask == cid] = color[2]
 18 | 
 19 |     size_unit = 5
 20 |     font_unit = 7
 21 | 
 22 |     fig, axes = plt.subplots(ncols=2, nrows=1,
 23 |                              figsize=(2 * size_unit, 1 * size_unit))
 24 | 
 25 |     axes[0].imshow(nimg)
 26 |     axes[0].axis('off')
 27 | 
 28 |     axes[1].imshow(colored_mask / 255.)
 29 |     axes[1].axis('off')
 30 | 
 31 |     plt.tight_layout()
 32 |     plt.savefig(fpath, dpi=100)
 33 |     plt.close()
 34 | 
 35 |     return
 36 | 
 37 | 
 38 | def viz_class_colors(did_to_names, did_to_colors, fpath='output/class_colors.jpg'):
 39 |     import copy
 40 |     dict_list = []
 41 |     lsize = 3
 42 | 
 43 |     row = {}
 44 |     for i, did in enumerate(list(did_to_names)):
 45 |         name = did_to_names[did]
 46 |         color = did_to_colors[did]
 47 | 
 48 |         patch = np.array(color)[np.newaxis, np.newaxis, :] * np.ones([100, 100, 3])
 49 |         row[f'{did}: {name}'] = patch / 255.
 50 | 
 51 |         if ((i + 1) % lsize == 0) | (i == len(did_to_names) - 1):
 52 |             dict_list.append(copy.deepcopy(row))
 53 |             row = {}
 54 |         i
 55 |     viz_dict_list(dict_list, fpath)
 56 |     return
 57 | 
 58 | 
 59 | def viz_dict_list(mask_dict_list, fpath, dpi=40):
 60 |     size_unit = 5
 61 |     font_unit = 7
 62 |     dict_num = len(mask_dict_list)
 63 |     mask_num = max(len(t) for t in mask_dict_list)
 64 | 
 65 |     fig, axes = plt.subplots(ncols=mask_num, nrows=dict_num,
 66 |                              figsize=(mask_num * size_unit, dict_num * size_unit))
 67 | 
 68 |     for row in range(dict_num):
 69 |         for col in range(mask_num):
 70 |             axes[row, col].axis('off')
 71 | 
 72 |     for row, mask_dict in enumerate(mask_dict_list):
 73 |         for col, kv in enumerate(mask_dict.items()):
 74 |             axes[row, col].set_title(kv[0], fontsize=size_unit * font_unit)
 75 |             img = kv[1]
 76 |             if len(img.shape) == 2:
 77 |                 axes[row, col].imshow(img, 'gray', vmax=1., vmin=0.)
 78 |             elif len(img.shape) == 3:
 79 |                 axes[row, col].imshow(img)
 80 |             else:
 81 |                 raise NotImplementedError
 82 | 
 83 |     plt.tight_layout()
 84 |     plt.savefig(fpath, dpi=dpi)
 85 |     plt.close()
 86 |     return
 87 | 
 88 | 
 89 | def c_print_csv_format(results, logger):
 90 |     col_num = 4
 91 | 
 92 |     for task, res in results.items():
 93 |         imp_keys = sorted([k for k in res.keys() if "-" not in k])
 94 |         summary_res = {k: res[k] for k in res.keys() if k in imp_keys}
 95 |         class_IoU_res = {k.split('-')[1]: res[k] for k in res.keys() if k not in imp_keys and 'IoU' in k}
 96 |         class_ACC_res = {k.split('-')[1]: res[k] for k in res.keys() if k not in imp_keys and 'ACC' in k}
 97 | 
 98 |         names = sorted(list(class_IoU_res.keys()))
 99 |         ml = max([len(name) for name in names])
100 | 
101 |         table_data = []
102 |         title = [f'     Name: IoU / ACC' for i in range(col_num)]
103 |         table_data.append(title)
104 | 
105 |         row_data = []
106 |         for i, name in enumerate(names):
107 |             row_data.append(f'{name.ljust(ml)}: {class_IoU_res[name]:.1f}/{class_ACC_res[name]:.1f}')
108 |             if ((i + 1) % col_num == 0) | (i == len(names) - 1):
109 |                 table_data.append(copy.deepcopy(row_data))
110 |                 row_data = []
111 | 
112 |         table_ins = AsciiTable(table_data)
113 |         for i in range(len(table_ins.justify_columns)):
114 |             table_ins.justify_columns[i] = 'center'
115 |         out_str = f'\n!! Class Result of \"{task}\":\n{table_ins.table}'
116 |         logger.info(out_str)
117 | 
118 |         name, value = [], []
119 |         for k, v in summary_res.items():
120 |             name.append(f'{k.ljust(5)}')
121 |             value.append(f'{v:.1f}')
122 | 
123 |         table_ins = AsciiTable([name, value])
124 |         for i in range(len(table_ins.justify_columns)):
125 |             table_ins.justify_columns[i] = 'center'
126 |         out_str = f'\n!! Summary of \"{task}\":\n{table_ins.table}'
127 | 
128 |         logger.info(out_str)
129 | 
130 |     return
131 | 


--------------------------------------------------------------------------------
/prop_former/__init__.py:
--------------------------------------------------------------------------------
 1 | # config
 2 | from .config import add_prop_former_config
 3 | 
 4 | # models
 5 | from .prop_former_model import PropFormer
 6 | from .modeling.prop_former_head import PropFormerHead
 7 | 
 8 | from . import data
 9 | 
10 | from .data.dataset_mappers.weakshot_mapper_training import (
11 |     WeakShotMapperTraining,
12 | )


--------------------------------------------------------------------------------
/prop_former/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from detectron2.config import CfgNode as CN
 4 | 
 5 | inf = 1e8
 6 | 
 7 | 
 8 | def add_prop_former_config(cfg):
 9 |     print(f'adding PropFormer cfg')
10 | 
11 |     cfg.SEED = 6
12 | 
13 |     cfg.OUTPUT_PREFIX = ''
14 |     cfg.MODEL.OUT_TASK = 'SEG'
15 | 
16 |     # For Proposal Line:
17 |     cfg.MODEL.MASK_FORMER.MAKE_CLS = True
18 |     cfg.MODEL.MASK_FORMER.CLS_WEIGHT = 1.
19 | 
20 |     cfg.MODEL.MASK_FORMER.FIXED_MATCHER = False
21 |     cfg.MODEL.MASK_FORMER.FREEZE_QUERY = False
22 |     cfg.MODEL.MASK_FORMER.TRANS_QUERY = 'RAND'  # FCWT256 / WDVT1 / WDVT2
23 | 
24 |     cfg.MODEL.MASK_FORMER.CLS_LOSS_TYPE = 'SoftmaxBCE'  # SoftmaxBCE / SigmoidBCE / RIB / SMS
25 | 
26 |     ####################################
27 |     cfg.CROSS_IMG_SIM = CN()
28 |     cfg.CROSS_IMG_SIM.BASE_LOSS = 0.
29 |     cfg.CROSS_IMG_SIM.BASE_DETACH = True
30 |     cfg.CROSS_IMG_SIM.BASE_POINT_NUM = 100
31 |     cfg.CROSS_IMG_SIM.LayerNum = 3
32 |     cfg.CROSS_IMG_SIM.BN = True
33 | 
34 |     cfg.CROSS_IMG_SIM.PAIR_TYPE = 'Deconf0.01'  # [Rand, BInter, NInter, Deconf]
35 | 
36 |     cfg.CROSS_IMG_SIM.TEACH_DETACH = True
37 |     cfg.CROSS_IMG_SIM.DISTILL_LOSS = 0.
38 |     cfg.CROSS_IMG_SIM.NOVEL_POINT_NUM = 100
39 |     cfg.CROSS_IMG_SIM.DISTILL_TO = 'NovelScore'  # [NovelScore, FullScore, FullLogit, FullLogitC]
40 |     cfg.CROSS_IMG_SIM.DISTILL_FUNC = 'ce'  # [ce, ce, b0.5]
41 |     cfg.CROSS_IMG_SIM.FOCUS_K = 0.
42 |     cfg.CROSS_IMG_SIM.DISTILL_VALID = False
43 | 
44 |     ############################
45 |     cfg.ALL_EXISTING = True
46 |     cfg.NOVEL_HAS_MASK = False
47 |     ####################################
48 |     cfg.ASM = CN()
49 |     cfg.ASM.HasMaskCls = 5.
50 |     cfg.ASM.NoMaskCls = 5.
51 |     cfg.ASM.HasMaskMask = 1.
52 |     cfg.ASM.NoMaskMask = 0.
53 | 
54 |     ####################################
55 |     cfg.LOSS = CN()
56 |     cfg.LOSS.AssignCls = 5.
57 |     cfg.LOSS.MILCls = 0.
58 | 
59 |     cfg.LOSS.AssignMaskDICE = 1.
60 |     cfg.LOSS.AssignMaskMASK = 20.
61 |     cfg.LOSS.CompSupNovel = 0.
62 | 
63 |     cfg.LOSS.CompSupNovelType = 'EQ'  # [EQ, IN]
64 |     cfg.LOSS.IgnoreInit = -2.9444  # Disable by <=-50
65 |     cfg.LOSS.IgnoreLearnable = False
66 | 
67 |     ####################################
68 |     cfg.EVAL = CN()
69 |     cfg.EVAL.BIAS = ('1_1_1',)
70 |     return
71 | 


--------------------------------------------------------------------------------
/prop_former/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 | 


--------------------------------------------------------------------------------
/prop_former/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/ADE_20k/register_ADE_20k_splits.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from detectron2.data import DatasetCatalog, MetadataCatalog
  3 | import prop_former.data.datasets.ADE_20k.info as INFO
  4 | from detectron2.utils.file_io import PathManager
  5 | from detectron2.data import detection_utils as utils
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | 
  9 | 
 10 | def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
 11 |     # We match input images with ground truth based on their relative filepaths (without file
 12 |     # extensions) starting from 'image_root' and 'gt_root' respectively.
 13 |     def file2id(folder_path, file_path):
 14 |         # extract relative path starting from `folder_path`
 15 |         image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
 16 |         # remove file extension
 17 |         image_id = os.path.splitext(image_id)[0]
 18 |         return image_id
 19 | 
 20 |     input_files = sorted(
 21 |         (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
 22 |         key=lambda file_path: file2id(image_root, file_path),
 23 |     )
 24 |     gt_files = sorted(
 25 |         (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
 26 |         key=lambda file_path: file2id(gt_root, file_path),
 27 |     )
 28 | 
 29 |     assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
 30 | 
 31 |     # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
 32 |     if len(input_files) != len(gt_files):
 33 |         input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
 34 |         gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
 35 |         intersect = list(set(input_basenames) & set(gt_basenames))
 36 |         # sort, otherwise each worker may obtain a list[dict] in different order
 37 |         intersect = sorted(intersect)
 38 |         input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
 39 |         gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
 40 | 
 41 |     dataset_dicts = []
 42 | 
 43 |     all_255_list = ['ADE_train_00005149',
 44 |                     'ADE_train_00005150',
 45 |                     'ADE_train_00005152',
 46 |                     'ADE_train_00005333',
 47 |                     'ADE_train_00005905',
 48 |                     'ADE_train_00006510',
 49 |                     'ADE_train_00013298',
 50 |                     'ADE_train_00014634',
 51 |                     'ADE_train_00014636',
 52 |                     'ADE_train_00014884',
 53 |                     'ADE_train_00015320',
 54 |                     'ADE_train_00015330',
 55 |                     'ADE_train_00015928',
 56 |                     'ADE_train_00019743',
 57 |                     'ADE_train_00019385',
 58 |                     'ADE_train_00019873']
 59 |     for (img_path, gt_path) in tqdm(zip(input_files, gt_files)):
 60 |         if os.path.basename(img_path).split('.')[0] in all_255_list:
 61 |             continue
 62 |         record = {}
 63 |         record["file_name"] = img_path
 64 |         record["sem_seg_file_name"] = gt_path
 65 |         record["type"] = 'exisitng'
 66 | 
 67 |         # raw_segm_gt = utils.read_image(gt_path)
 68 |         # if raw_segm_gt.mean() == 255:
 69 |         #     print(f'')
 70 |         #     print(f'ALL 255 in')
 71 |         #     print(f'{gt_path}')
 72 |         #     print(f'{np.unique(raw_segm_gt)}')
 73 |         #     print(f'')
 74 |         #     all_255_list.append(gt_path)
 75 |         # else:
 76 |         #     dataset_dicts.append(record)
 77 |         dataset_dicts.append(record)
 78 | 
 79 |     return dataset_dicts
 80 | 
 81 | 
 82 | # from mask_former.utils.viz_tools import viz_class_colors
 83 | # viz_class_colors(voc_dataset_id_to_names, voc_dataset_id_to_color)
 84 | 
 85 | def _get_ADE_20k_split_meta(s_name):
 86 |     # Only used in Training
 87 |     base_names = eval(f'INFO.{s_name}_base_names')
 88 |     novel_names = eval(f'INFO.{s_name}_novel_names')
 89 |     assert len(base_names) + len(novel_names) == 150
 90 | 
 91 |     base_dids = [k for k, v in INFO.did_to_name.items() if v in base_names]
 92 |     novel_dids = [k for k, v in INFO.did_to_name.items() if v in novel_names]
 93 |     did_to_cid = {k: i for i, k in enumerate(INFO.did_list)}
 94 |     cid_to_did = {v: k for k, v in did_to_cid.items()}
 95 | 
 96 |     ret = {
 97 |         "c_did_to_cid": did_to_cid,
 98 |         "c_cid_to_did": cid_to_did,
 99 |         "c_class_names": [INFO.did_to_name[did] for did in did_to_cid.keys()],
100 |         "c_did_to_name": INFO.did_to_name,
101 | 
102 |         "c_base_dids": base_dids,
103 |         "c_novel_dids": novel_dids,
104 | 
105 |         "c_did_to_color": INFO.did_to_color,
106 |         "stuff_classes": [INFO.did_to_name[did] for did in did_to_cid.keys()]
107 |     }
108 |     return ret
109 | 
110 | 
111 | def register_ADE_20k_splits(root):
112 |     print(f'Register ADE 20K PropFormer...')
113 |     root = os.path.join(root, "ADEChallengeData2016")
114 | 
115 |     for s_name in ['split1', 'split2', 'split3', 'split4']:
116 |         split_meta = _get_ADE_20k_split_meta(s_name)
117 |         for name, image_dirname, sem_seg_dirname in [
118 |             ("train", "images_detectron2/train", "annotations_detectron2/train"),
119 |             ("val", "images_detectron2/test", "annotations_detectron2/test"),
120 |         ]:
121 |             split_name = f'ADE_{s_name}_{name}'
122 |             image_dir = os.path.join(root, image_dirname)
123 |             gt_dir = os.path.join(root, sem_seg_dirname)
124 |             DatasetCatalog.register(
125 |                 split_name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
126 |             )
127 |             MetadataCatalog.get(split_name).set(
128 |                 image_root=image_dir,
129 |                 sem_seg_root=gt_dir,
130 |                 evaluator_type="weakshot_sem_seg",
131 |                 ignore_label=INFO.ignored_cid,
132 |                 **split_meta,
133 |             )
134 |     return
135 | 
136 | 
137 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
138 | register_ADE_20k_splits(_root)
139 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .voc import register_voc_splits
3 | from .coco_stuff_10k import register_coco_stuff_10k_splits
4 | from .ADE_20k import register_ADE_20k_splits
5 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/coco_stuff_10k/meta_files/updated_rand_permute.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/prop_former/data/datasets/coco_stuff_10k/meta_files/updated_rand_permute.npy


--------------------------------------------------------------------------------
/prop_former/data/datasets/coco_stuff_10k/register_coco_stuff_10k_splits.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from detectron2.data import DatasetCatalog, MetadataCatalog
  3 | import prop_former.data.datasets.coco_stuff_10k.meta_files.info as INFO
  4 | # from detectron2.data.datasets import load_sem_seg
  5 | from detectron2.utils.file_io import PathManager
  6 | from detectron2.data import detection_utils as utils
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | from .updated_images import updated_func_dict
 10 | 
 11 | 
 12 | def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
 13 |     # We match input images with ground truth based on their relative filepaths (without file
 14 |     # extensions) starting from 'image_root' and 'gt_root' respectively.
 15 |     def file2id(folder_path, file_path):
 16 |         # extract relative path starting from `folder_path`
 17 |         image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
 18 |         # remove file extension
 19 |         image_id = os.path.splitext(image_id)[0]
 20 |         return image_id
 21 | 
 22 |     input_files = sorted(
 23 |         (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
 24 |         key=lambda file_path: file2id(image_root, file_path),
 25 |     )
 26 |     gt_files = sorted(
 27 |         (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
 28 |         key=lambda file_path: file2id(gt_root, file_path),
 29 |     )
 30 | 
 31 |     assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
 32 | 
 33 |     # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
 34 |     if len(input_files) != len(gt_files):
 35 |         input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
 36 |         gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
 37 |         intersect = list(set(input_basenames) & set(gt_basenames))
 38 |         # sort, otherwise each worker may obtain a list[dict] in different order
 39 |         intersect = sorted(intersect)
 40 |         input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
 41 |         gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
 42 | 
 43 |     dataset_dicts = []
 44 |     for (img_path, gt_path) in tqdm(zip(input_files, gt_files)):
 45 | 
 46 |         if 'COCO_train2014_000000016680' in img_path:
 47 |             continue
 48 |         if 'COCO_train2014_000000230639' in img_path:
 49 |             continue
 50 |         if 'COCO_train2014_000000382127' in img_path:
 51 |             continue
 52 |         if 'COCO_train2014_000000429995' in img_path:
 53 |             continue
 54 |         if 'COCO_train2014_000000314646' in img_path:
 55 |             continue
 56 | 
 57 |         if 'COCO_train2014_000000003518' in img_path:
 58 |             continue
 59 |         if 'COCO_train2014_000000058075' in img_path:
 60 |             continue
 61 | 
 62 |         record = {}
 63 |         record["file_name"] = img_path
 64 |         record["sem_seg_file_name"] = gt_path
 65 |         record["type"] = 'exisitng'
 66 | 
 67 |         # raw_segm_gt = utils.read_image(gt_path)
 68 |         # if raw_segm_gt.mean() == 255:
 69 |         #     print(f'')
 70 |         #     print(f'')
 71 |         #     print(f'ALL 255 in')
 72 |         #     print(f'{gt_path}')
 73 |         #     print(f'{np.unique(raw_segm_gt)}')
 74 |         #     print(f'')
 75 |         #     print(f'')
 76 |         #     print(f'')
 77 |         # else:
 78 |         #     dataset_dicts.append(record)
 79 |         dataset_dicts.append(record)
 80 | 
 81 |     return dataset_dicts
 82 | 
 83 | 
 84 | # from mask_former.utils.viz_tools import viz_class_colors
 85 | # viz_class_colors(voc_dataset_id_to_names, voc_dataset_id_to_color)
 86 | 
 87 | def _get_coco_stuff_10k_split_meta(s_name):
 88 |     # Only used in Training
 89 |     base_names = eval(f'INFO.{s_name}_base_names')
 90 |     novel_names = eval(f'INFO.{s_name}_novel_names')
 91 |     assert len(base_names) + len(novel_names) == 171
 92 | 
 93 |     base_dids = [k for k, v in INFO.did_to_name.items() if v in base_names]
 94 |     novel_dids = [k for k, v in INFO.did_to_name.items() if v in novel_names]
 95 |     did_to_cid = {k: i for i, k in enumerate(INFO.did_list)}
 96 |     cid_to_did = {v: k for k, v in did_to_cid.items()}
 97 | 
 98 |     ret = {
 99 |         "c_did_to_cid": did_to_cid,
100 |         "c_cid_to_did": cid_to_did,
101 |         "c_class_names": [INFO.did_to_name[did] for did in did_to_cid.keys()],
102 |         "c_did_to_name": INFO.did_to_name,
103 | 
104 |         "c_base_dids": base_dids,
105 |         "c_novel_dids": novel_dids,
106 | 
107 |         "c_did_to_color": INFO.did_to_color,
108 | 
109 |         "stuff_classes": [INFO.did_to_name[did] for did in did_to_cid.keys()]
110 |     }
111 |     return ret
112 | 
113 | 
114 | def register_coco_stuff_10k_splits(root):
115 |     print(f'Register COCO Stuff 10K PropFormer...')
116 |     'coco_stuff_split1_train'
117 |     'coco_stuff_split1_val'
118 | 
119 |     root = os.path.join(root, "coco", "coco_stuff_10k")
120 | 
121 |     for s_name in ['split1', 'split2', 'split3', 'split4',
122 |                    'split5', 'split6', 'split7', 'split8', 'split9']:
123 |         split_meta = _get_coco_stuff_10k_split_meta(s_name)
124 |         for name, image_dirname, sem_seg_dirname in [
125 |             ("train", "images_detectron2/train", "annotations_detectron2/train"),
126 |             ("val", "images_detectron2/test", "annotations_detectron2/test"),
127 |         ]:
128 |             split_name = f'coco_stuff_{s_name}_{name}'
129 |             image_dir = os.path.join(root, image_dirname)
130 |             gt_dir = os.path.join(root, sem_seg_dirname)
131 | 
132 |             if s_name in ['split10', 'split11', 'split12', 'split13', 'split14', 'split15'] and name == 'train':
133 |                 load_updated_func = updated_func_dict[s_name]
134 |                 DatasetCatalog.register(split_name, load_updated_func)
135 |             else:
136 |                 DatasetCatalog.register(split_name,
137 |                                         lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg"))
138 | 
139 |             MetadataCatalog.get(split_name).set(
140 |                 image_root=image_dir,
141 |                 sem_seg_root=gt_dir,
142 |                 evaluator_type="weakshot_sem_seg",
143 |                 ignore_label=INFO.ignored_cid,
144 |                 **split_meta,
145 |             )
146 | 
147 |     return
148 | 
149 | 
150 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
151 | register_coco_stuff_10k_splits(_root)
152 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/coco_stuff_10k/updated_images.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | from detectron2.data import DatasetCatalog, MetadataCatalog
  5 | import prop_former.data.datasets.coco_stuff_10k.meta_files.info as INFO
  6 | # from detectron2.data.datasets import load_sem_seg
  7 | from detectron2.utils.file_io import PathManager
  8 | from detectron2.data import detection_utils as utils
  9 | import numpy as np
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | def load_sem_seg(gt_root, image_root, s_name, gt_ext="png", image_ext="jpg"):
 14 |     def file2id(folder_path, file_path):
 15 |         image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
 16 |         image_id = os.path.splitext(image_id)[0]
 17 |         return image_id
 18 | 
 19 |     input_files = sorted(
 20 |         (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
 21 |         key=lambda file_path: file2id(image_root, file_path),
 22 |     )
 23 |     gt_files = sorted(
 24 |         (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
 25 |         key=lambda file_path: file2id(gt_root, file_path),
 26 |     )
 27 |     assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
 28 |     if len(input_files) != len(gt_files):
 29 |         input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
 30 |         gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
 31 |         intersect = list(set(input_basenames) & set(gt_basenames))
 32 |         intersect = sorted(intersect)
 33 |         input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
 34 |         gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
 35 | 
 36 |     dataset_dicts = []
 37 |     for (img_path, gt_path) in tqdm(zip(input_files, gt_files)):
 38 |         if 'COCO_train2014_000000016680' in img_path:
 39 |             continue
 40 |         if 'COCO_train2014_000000230639' in img_path:
 41 |             continue
 42 |         if 'COCO_train2014_000000382127' in img_path:
 43 |             continue
 44 |         if 'COCO_train2014_000000429995' in img_path:
 45 |             continue
 46 |         if 'COCO_train2014_000000314646' in img_path:
 47 |             continue
 48 |         record = {}
 49 |         record["file_name"] = img_path
 50 |         record["sem_seg_file_name"] = gt_path
 51 | 
 52 |         dataset_dicts.append(record)
 53 | 
 54 |     return consider_updated_images(s_name, dataset_dicts)
 55 | 
 56 | 
 57 | def consider_updated_images(s_name, dataset_dicts):
 58 |     updated_ratio_dict = {
 59 |         'split10': 0.0,
 60 |         'split11': 0.1,
 61 |         'split12': 0.2,
 62 |         'split13': 0.3,
 63 |         'split14': 0.4,
 64 |         'split15': 0.5,
 65 |     }
 66 | 
 67 |     existing_ratio = 0.6
 68 | 
 69 |     existing_num = int(len(dataset_dicts) * existing_ratio)
 70 |     updated_num = int(len(dataset_dicts) * updated_ratio_dict[s_name])
 71 | 
 72 |     randn_permute = np.load('prop_former/data/datasets/coco_stuff_10k/meta_files/updated_rand_permute.npy')
 73 |     existing_idx = randn_permute[:existing_num].tolist()
 74 |     updated_idx = randn_permute[existing_num:(existing_num + updated_num)].tolist()
 75 | 
 76 |     updated_existing_data_list = []
 77 | 
 78 |     for i, data in enumerate(dataset_dicts):
 79 |         if i in existing_idx:
 80 |             img_type = 'existing'
 81 |         elif i in updated_idx:
 82 |             img_type = 'updated'
 83 |         else:
 84 |             continue
 85 | 
 86 |         data['type'] = img_type
 87 |         updated_existing_data_list.append(data)
 88 |         i
 89 | 
 90 |     # existing_N = len([i for i in updated_existing_data_list if i['type'] == 'existing'])
 91 |     # updated_N = len([i for i in updated_existing_data_list if i['type'] == 'updated'])
 92 | 
 93 |     # torch.save(updated_existing_data_list, f'output/Updated_images_split_COCO_{s_name}.pth')
 94 |     return updated_existing_data_list
 95 | 
 96 | 
 97 | def load_sem_seg_s10(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
 98 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
 99 |     return load_sem_seg(gt_root, image_root, 'split10')
100 | 
101 | 
102 | def load_sem_seg_s11(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
103 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
104 |     return load_sem_seg(gt_root, image_root, 'split11')
105 | 
106 | 
107 | def load_sem_seg_s12(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
108 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
109 |     return load_sem_seg(gt_root, image_root, 'split12')
110 | 
111 | 
112 | def load_sem_seg_s13(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
113 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
114 |     return load_sem_seg(gt_root, image_root, 'split13')
115 | 
116 | 
117 | def load_sem_seg_s14(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
118 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
119 |     return load_sem_seg(gt_root, image_root, 'split14')
120 | 
121 | 
122 | def load_sem_seg_s15(gt_root='datasets/coco/coco_stuff_10k/annotations_detectron2/train',
123 |                      image_root='datasets/coco/coco_stuff_10k/images_detectron2/train'):
124 |     return load_sem_seg(gt_root, image_root, 'split15')
125 | 
126 | 
127 | updated_func_dict = {
128 |     'split10': load_sem_seg_s10,
129 |     'split11': load_sem_seg_s11,
130 |     'split12': load_sem_seg_s12,
131 |     'split13': load_sem_seg_s13,
132 |     'split14': load_sem_seg_s14,
133 |     'split15': load_sem_seg_s15,
134 | }
135 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/shared.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from detectron2.data import detection_utils as utils
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | import torch
 6 | import pickle
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | def read_split_data_list_from_file(data_root, existing_file_path, updated_file_path):
11 |     existing_data_list = []
12 |     for line in open(existing_file_path).read().splitlines():
13 |         data = {}
14 |         img_name, ant_name = line.split(' ')
15 |         abs_img_name = f'{data_root}/{img_name}'
16 |         abs_ant_name = f'{data_root}/{ant_name}'
17 | 
18 |         assert os.path.exists(abs_img_name), f'FileNotFound: {abs_img_name}'
19 |         assert os.path.exists(abs_ant_name), f'FileNotFound: {abs_ant_name}'
20 | 
21 |         data['file_name'] = abs_img_name
22 |         data['sem_seg_file_name'] = abs_ant_name
23 |         data['type'] = 'existing'
24 |         existing_data_list.append(data)
25 | 
26 |     updated_data_list = []
27 |     for line in open(updated_file_path).read().splitlines():
28 |         data = {}
29 |         img_name, ant_name = line.split(' ')
30 |         abs_img_name = f'{data_root}/{img_name}'
31 |         abs_ant_name = f'{data_root}/{ant_name}'
32 | 
33 |         assert os.path.exists(abs_img_name), f'FileNotFound: {abs_img_name}'
34 |         assert os.path.exists(abs_ant_name), f'FileNotFound: {abs_ant_name}'
35 | 
36 |         data['file_name'] = abs_img_name
37 |         data['sem_seg_file_name'] = abs_ant_name
38 |         data['type'] = 'updated'
39 |         updated_data_list.append(data)
40 | 
41 |     return existing_data_list + updated_data_list
42 | 
43 | 
44 | def read_data_list_from_file(data_root, file_path):
45 |     data_list = []
46 |     for line in open(file_path).read().splitlines():
47 |         data = {}
48 |         img_name, ant_name = line.split(' ')
49 |         abs_img_name = f'{data_root}/{img_name}'
50 |         abs_ant_name = f'{data_root}/{ant_name}'
51 | 
52 |         assert os.path.exists(abs_img_name), f'FileNotFound: {abs_img_name}'
53 |         assert os.path.exists(abs_ant_name), f'FileNotFound: {abs_ant_name}'
54 | 
55 |         data['file_name'] = abs_img_name
56 |         data['sem_seg_file_name'] = abs_ant_name
57 |         data_list.append(data)
58 | 
59 |     return data_list
60 | 
61 | 
62 | def write_data_list_to_file(data_root, data_list, file_path):
63 |     'images_detection2/2011_003276.jpg annotations_detection2/2011_003276.png'
64 | 
65 |     with open(file_path, 'w', encoding='utf-8') as f:
66 |         for data in data_list:
67 |             line = f"{data['file_name'].split(data_root + '/')[1]}" \
68 |                    f" {data['sem_seg_file_name'].split(data_root + '/')[1]}\n"
69 |             f.write(line)
70 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/voc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/prop_former/data/datasets/voc/__init__.py


--------------------------------------------------------------------------------
/prop_former/data/datasets/voc/meta_files/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/prop_former/data/datasets/voc/meta_files/__init__.py


--------------------------------------------------------------------------------
/prop_former/data/datasets/voc/meta_files/info.py:
--------------------------------------------------------------------------------
 1 | USE_BACKGROUND = True
 2 | # USE_BACKGROUND = False # Change NUM_CLASSES to 20!!! Change MIL Mask Loss to 1e-7 !!!
 3 | 
 4 | ignored_cid = 255
 5 | 
 6 | name_to_file = {
 7 |     'voc_trainaug_seg': 'prop_former/data/datasets/voc/meta_files/train_aug.txt',
 8 |     'voc_val_seg': 'prop_former/data/datasets/voc/meta_files/val.txt'
 9 | }
10 | 
11 | name_to_existing_file = {}
12 | name_to_updated_file = {}
13 | for i in range(1):
14 |     name_to_existing_file[
15 |         f'voc_split{i + 1}_trainaug'] = f'prop_former/data/datasets/voc/meta_files/split{i + 1}_existing.txt'
16 |     name_to_updated_file[
17 |         f'voc_split{i + 1}_trainaug'] = f'prop_former/data/datasets/voc/meta_files/split{i + 1}_updated.txt'
18 | 
19 |     name_to_file[f'voc_split{i + 1}_val'] = 'prop_former/data/datasets/voc/meta_files/val.txt'
20 | 
21 | if USE_BACKGROUND:
22 |     CAT_LIST = ['background',
23 |                 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
24 |                 'bus', 'car', 'cat', 'chair', 'cow',
25 |                 'diningtable', 'dog', 'horse', 'motorbike', 'person',
26 |                 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
27 | 
28 |     CAT_COLOR = [
29 |         [255, 255, 255],
30 |         [220, 20, 60], [119, 11, 32], [0, 0, 142], [0, 0, 230], [106, 0, 228],
31 |         [0, 60, 100], [0, 80, 100], [0, 0, 70], [0, 0, 192], [250, 170, 30],
32 |         [100, 170, 30], [220, 220, 0], [175, 116, 175], [250, 0, 30], [165, 42, 42],
33 |         [255, 77, 255], [0, 226, 252], [182, 182, 255], [0, 82, 0], [120, 166, 157]]
34 | 
35 |     voc_did_to_names = {k: v for k, v in enumerate(CAT_LIST)}
36 |     voc_did_to_color = {k: v for k, v in enumerate(CAT_COLOR)}
37 | else:
38 |     CAT_LIST = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
39 |                 'bus', 'car', 'cat', 'chair', 'cow',
40 |                 'diningtable', 'dog', 'horse', 'motorbike', 'person',
41 |                 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
42 | 
43 |     CAT_COLOR = [
44 |         [220, 20, 60], [119, 11, 32], [0, 0, 142], [0, 0, 230], [106, 0, 228],
45 |         [0, 60, 100], [0, 80, 100], [0, 0, 70], [0, 0, 192], [250, 170, 30],
46 |         [100, 170, 30], [220, 220, 0], [175, 116, 175], [250, 0, 30], [165, 42, 42],
47 |         [255, 77, 255], [0, 226, 252], [182, 182, 255], [0, 82, 0], [120, 166, 157]]
48 | 
49 |     voc_did_to_names = {k + 1: v for k, v in enumerate(CAT_LIST)}
50 |     voc_did_to_color = {k + 1: v for k, v in enumerate(CAT_COLOR)}
51 | 
52 | voc_did_list = list(voc_did_to_names.keys())
53 | 
54 | # SPLIT 1
55 | split1_novel_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle']
56 | split1_base_names = [name for name in CAT_LIST if name not in split1_novel_names]
57 | 
58 | # SPLIT 2
59 | split2_novel_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle']
60 | split2_base_names = [name for name in CAT_LIST if name not in split1_novel_names]
61 | 
62 | voc_did_to_color_ex = {k: v for k, v in voc_did_to_color.items()}
63 | voc_did_to_color_ex[0] = [255, 255, 255]
64 | voc_did_to_color_ex[255] = [0, 0, 0]
65 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/voc/register_voc_splits.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from detectron2.data import DatasetCatalog, MetadataCatalog
 3 | from prop_former.data.datasets.shared import read_data_list_from_file, read_split_data_list_from_file
 4 | from prop_former.data.datasets.voc.meta_files.info import *
 5 | 
 6 | 
 7 | # from mask_former.utils.viz_tools import viz_class_colors
 8 | # viz_class_colors(voc_dataset_id_to_names, voc_dataset_id_to_color)
 9 | 
10 | def _get_voc_meta():
11 |     did_to_cid = {k: i for i, k in enumerate(voc_did_list)}
12 |     cid_to_did = {v: k for k, v in did_to_cid.items()}
13 |     ret = {
14 |         "c_did_to_cid": did_to_cid,
15 |         "c_cid_to_did": cid_to_did,
16 |         "c_class_names": [voc_did_to_names[did] for did in did_to_cid.keys()],
17 |         "c_did_to_name": voc_did_to_names,
18 |         "stuff_classes": [voc_did_to_names[did] for did in did_to_cid.keys()]
19 |     }
20 |     return ret
21 | 
22 | 
23 | def _get_voc_split_meta(split_name):
24 |     # Only used in Training
25 |     base_names = eval(f'{split_name}_base_names')
26 |     novel_names = eval(f'{split_name}_novel_names')
27 |     assert len(base_names) + len(novel_names) == len(CAT_LIST)
28 | 
29 |     base_dids = [k for k, v in voc_did_to_names.items() if v in base_names]
30 |     novel_dids = [k for k, v in voc_did_to_names.items() if v in novel_names]
31 |     did_to_cid = {k: i for i, k in enumerate(voc_did_list)}
32 |     cid_to_did = {v: k for k, v in did_to_cid.items() if v != ignored_cid}
33 | 
34 |     ret = {
35 |         "c_did_to_cid": did_to_cid,
36 |         "c_cid_to_did": cid_to_did,
37 |         "c_class_names": [voc_did_to_names[did] for did in did_to_cid.keys()],
38 |         "c_did_to_name": voc_did_to_names,
39 | 
40 |         "c_base_dids": base_dids,
41 |         "c_novel_dids": novel_dids,
42 |         "stuff_classes": [voc_did_to_names[did] for did in did_to_cid.keys()]
43 |     }
44 |     return ret
45 | 
46 | 
47 | def register_voc_splits(root):
48 |     print(f'Register VOC PropFormer...')
49 |     data_root = os.path.join(root, "VOC2012")
50 | 
51 |     for typical_split_name in ['voc_val_seg', 'voc_trainaug_seg']:
52 |         split_meta = _get_voc_meta()
53 | 
54 |         DatasetCatalog.register(
55 |             typical_split_name,
56 |             lambda x=data_root, y=name_to_file[typical_split_name]:
57 |             read_data_list_from_file(x, y))
58 | 
59 |         MetadataCatalog.get(typical_split_name).set(
60 |             evaluator_type="weakshot_sem_seg",
61 |             ignore_label=ignored_cid,
62 |             **split_meta,
63 |         )
64 | 
65 |     for s_name in ['split1']:
66 |         split_meta = _get_voc_split_meta(s_name)
67 |         train_split_name = f'voc_{s_name}_trainaug'
68 | 
69 |         DatasetCatalog.register(
70 |             train_split_name,
71 |             lambda x=data_root, y=name_to_existing_file[train_split_name], z=name_to_updated_file[train_split_name]:
72 |             read_split_data_list_from_file(x, y, z)
73 |         )
74 | 
75 |         MetadataCatalog.get(train_split_name).set(
76 |             evaluator_type="weakshot_sem_seg",
77 |             ignore_label=ignored_cid,
78 |             **split_meta,
79 |         )
80 | 
81 |         eval_split_name = f'voc_{s_name}_val'
82 |         DatasetCatalog.register(
83 |             eval_split_name,
84 |             lambda x=data_root, y=name_to_file[eval_split_name]:
85 |             read_data_list_from_file(x, y))
86 | 
87 |         MetadataCatalog.get(eval_split_name).set(
88 |             evaluator_type="weakshot_sem_seg",
89 |             ignore_label=ignored_cid,
90 |             **split_meta,
91 |         )
92 |     return
93 | 
94 | 
95 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
96 | register_voc_splits(_root)
97 | 


--------------------------------------------------------------------------------
/prop_former/data/datasets/voc/split_voc_to_existing_and_updated.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from prop_former.data.datasets.voc.meta_files.info import *
 3 | from tqdm import tqdm
 4 | from detectron2.data import detection_utils as utils
 5 | import numpy as np
 6 | from prop_former.data.datasets.shared import write_data_list_to_file
 7 | 
 8 | voc_training_file = name_to_file['voc_full_trainaug_seg']
 9 | existing_rate = 0.5
10 | split_name = 'split1'
11 | base_names = eval(f'{split_name}_base_names')
12 | novel_names = eval(f'{split_name}_novel_names')
13 | existing_save_file = name_to_existing_file[split_name]
14 | updated_save_file = name_to_updated_file[split_name]
15 | 
16 | 
17 | def split_file(root):
18 |     data_root = os.path.join(root, "VOC2012")
19 | 
20 |     total_lines = open(voc_training_file).read().splitlines()
21 |     total_num = len(total_lines)
22 | 
23 |     idx_perm = np.random.permutation([i for i in range(total_num)])
24 | 
25 |     existing_num = int(total_num * existing_rate)
26 | 
27 |     existing_idx_list = idx_perm[:existing_num].tolist()
28 |     updated_idx_list = idx_perm[existing_num:].tolist()
29 | 
30 |     base_dids = [k for k, v in voc_did_to_names.items() if v in base_names]
31 |     novel_dids = [k for k, v in voc_did_to_names.items() if v in novel_names]
32 | 
33 |     existing_data_list, updated_data_list = [], []
34 | 
35 |     for idx in tqdm(existing_idx_list):
36 |         data = {}
37 |         img_name, ant_name = total_lines[idx].split(' ')
38 |         abs_img_path = f'{data_root}/{img_name}'
39 |         abs_ant_path = f'{data_root}/{ant_name}'
40 | 
41 |         assert os.path.exists(abs_img_path), f'FileNotFound: {abs_img_path}'
42 |         assert os.path.exists(abs_ant_path), f'FileNotFound: {abs_ant_path}'
43 | 
44 |         data['file_name'] = abs_img_path
45 |         data['sem_seg_file_name'] = abs_ant_path
46 | 
47 |         raw_ant = utils.read_image(abs_ant_path)
48 | 
49 |         has_base = False
50 |         for did in np.unique(raw_ant):
51 |             if did in base_dids:
52 |                 has_base = True
53 | 
54 |         if has_base:
55 |             existing_data_list.append(data)
56 |         else:
57 |             updated_data_list.append(data)
58 | 
59 |     for idx in tqdm(updated_idx_list):
60 |         data = {}
61 |         img_name, ant_name = total_lines[idx].split(' ')
62 |         abs_img_path = f'{data_root}/{img_name}'
63 |         abs_ant_path = f'{data_root}/{ant_name}'
64 | 
65 |         assert os.path.exists(abs_img_path), f'FileNotFound: {abs_img_path}'
66 |         assert os.path.exists(abs_ant_path), f'FileNotFound: {abs_ant_path}'
67 | 
68 |         data['file_name'] = abs_img_path
69 |         data['sem_seg_file_name'] = abs_ant_path
70 | 
71 |         updated_data_list.append(data)
72 | 
73 |     write_data_list_to_file(data_root, existing_data_list, existing_save_file)
74 |     write_data_list_to_file(data_root, updated_data_list, updated_save_file)
75 | 
76 |     A = open(voc_training_file).read().splitlines()
77 |     B = open(existing_save_file).read().splitlines()
78 |     C = open(updated_save_file).read().splitlines()
79 |     assert sorted(B + C) == sorted(A)
80 |     return
81 | 
82 | 
83 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
84 | split_file(_root)
85 | 


--------------------------------------------------------------------------------
/prop_former/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcmi/SimFormer-Weak-Shot-Semantic-Segmentation/9e32a800d9c40c1f85e7b1d8d24c412572f484f7/prop_former/modeling/__init__.py


--------------------------------------------------------------------------------
/prop_former/modeling/cross_img_sim/compute_pairs.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('.')
  4 | 
  5 | import torch
  6 | import os
  7 | from detectron2.data import DatasetCatalog, MetadataCatalog
  8 | import prop_former.data.datasets.coco_stuff_10k.meta_files.info as INFO
  9 | # from detectron2.data.datasets import load_sem_seg
 10 | from detectron2.utils.file_io import PathManager
 11 | from detectron2.data import detection_utils as utils
 12 | import numpy as np
 13 | from tqdm import tqdm
 14 | from detectron2.data import DatasetCatalog, MetadataCatalog
 15 | import prop_former.data.datasets.coco_stuff_10k.meta_files.info as INFO
 16 | 
 17 | eps = 1e-5
 18 | 
 19 | 
 20 | def get_imname_to_dids(split_name):
 21 |     meta = MetadataCatalog.get(split_name)
 22 |     data_list = DatasetCatalog.get(split_name)
 23 | 
 24 |     itd_path = f'datasets/imname_to_dids_{split_name}.pth'
 25 |     if os.path.exists(itd_path):
 26 |         imname_to_dids = torch.load(itd_path)
 27 |     else:
 28 |         imname_to_dids = {}
 29 |         for data_idx, data_item in tqdm(enumerate(data_list)):
 30 |             imname = os.path.basename(data_item['sem_seg_file_name'])
 31 |             raw_segm_gt = utils.read_image(data_item['sem_seg_file_name'])
 32 | 
 33 |             all_dids = np.unique(raw_segm_gt)
 34 |             novel_dids = [did for did in all_dids if did in meta.c_novel_dids]
 35 |             base_dids = [did for did in all_dids if did in meta.c_base_dids]
 36 |             imname_to_dids[imname] = {'base_dids': base_dids, 'novel_dids': novel_dids}
 37 | 
 38 |         torch.save(imname_to_dids, itd_path)
 39 | 
 40 |     return imname_to_dids
 41 | 
 42 | 
 43 | def limit_set_len(anyset, maxlen):
 44 |     if len(anyset) <= maxlen:
 45 |         return anyset
 46 |     else:
 47 |         thatlist = list(anyset)
 48 |         thatlist.__delitem__(np.random.randint(maxlen))
 49 |         return set(thatlist)
 50 | 
 51 | 
 52 | def get_deconf_dict(split_name, imname_to_dids):
 53 |     deconf_path = f'datasets/imname_to_pair_list_dict_{split_name}.pth'
 54 | 
 55 |     if os.path.exists(deconf_path):
 56 |         imname_to_pair_list_dict = torch.load(deconf_path)
 57 |     else:
 58 |         max_deconf_pair_len = 500
 59 |         max_common_pair_len = 50
 60 |         imname_to_pair_list_dict = {}
 61 |         for focused_imname, v in tqdm(imname_to_dids.items()):
 62 |             base_dids = v['base_dids']
 63 |             novel_dids = v['novel_dids']
 64 | 
 65 |             deconf_pair_list = {ndid: set() for ndid in novel_dids}
 66 |             novel_comm_pair_list = set()
 67 |             base_comm_pair_list = set()
 68 | 
 69 |             for candi_imname, candi_v in imname_to_dids.items():
 70 |                 candi_novel_dids = candi_v['novel_dids']
 71 |                 candi_base_dids = candi_v['base_dids']
 72 | 
 73 |                 novel_inter = list(set(novel_dids).intersection(set(candi_novel_dids)))
 74 | 
 75 |                 if len(novel_inter) == 1:
 76 |                     deconf_pair_list[novel_inter[0]].add(candi_imname)
 77 |                     deconf_pair_list[novel_inter[0]] = limit_set_len(deconf_pair_list[novel_inter[0]],
 78 |                                                                      max_deconf_pair_len)
 79 | 
 80 | 
 81 |                 elif len(novel_dids) >= 2:
 82 |                     novel_comm_pair_list.add(candi_imname)
 83 |                     novel_comm_pair_list = limit_set_len(novel_comm_pair_list, max_common_pair_len)
 84 | 
 85 |                 base_inter = list(set(base_dids).intersection(set(candi_base_dids)))
 86 | 
 87 |                 if len(base_inter) >= 1:
 88 |                     base_comm_pair_list.add(candi_imname)
 89 |                     base_comm_pair_list = limit_set_len(base_comm_pair_list, max_common_pair_len)
 90 | 
 91 |             imname_to_pair_list_dict[focused_imname] = {'deconf_pair_list': deconf_pair_list,
 92 |                                                         'novel_comm_pair_list': novel_comm_pair_list,
 93 |                                                         'base_comm_pair_list': base_comm_pair_list}
 94 | 
 95 |         torch.save(imname_to_pair_list_dict, deconf_path)
 96 | 
 97 |     return imname_to_pair_list_dict
 98 | 
 99 | 
100 | def check_deconf_dict(imname_to_dids, decon_dict):
101 |     for imname, deconf in tqdm(decon_dict.items()):
102 | 
103 |         for cid, dlist in deconf['deconf_pair_list'].items():
104 |             A = imname_to_dids[imname]['novel_dids']
105 | 
106 |             for pairname in dlist:
107 |                 B = imname_to_dids[pairname]['novel_dids']
108 | 
109 |                 assert set(A).intersection(set(B)) == {cid}, f'{A}; {B}; {imname}; {pairname}'
110 | 
111 |             dlist
112 | 
113 |         imname
114 | 
115 |     return
116 | 
117 | 
118 | def main(split_name='ADE_split1_train'):
119 |     meta = MetadataCatalog.get(split_name)
120 |     data_list = DatasetCatalog.get(split_name)
121 | 
122 |     imname_to_dids = get_imname_to_dids(split_name)
123 |     decon_dict = get_deconf_dict(split_name, imname_to_dids)
124 |     check_deconf_dict(imname_to_dids, decon_dict)
125 |     return
126 | 
127 | 
128 | import sys
129 | 
130 | 'coco_stuff_split3_train'
131 | 'ADE_split1_train'
132 | 
133 | # python prop_former/modeling/cross_img_sim/compute_pairs.py coco_stuff_split3_train
134 | if __name__ == '__main__':
135 |     istr = sys.argv[1]
136 |     print(istr)
137 |     main(split_name=istr)
138 | 


--------------------------------------------------------------------------------
/prop_former/modeling/cross_img_sim/cro_simnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import queue
 5 | from prop_former.modeling.fc_modules import ResidualFullyConnectedBranch
 6 | 
 7 | 
 8 | class BalanceBinaryWeightManager(object):
 9 |     def __init__(self):
10 |         self.neg_num_queue = queue.deque(maxlen=25)
11 |         self.pos_num_queue = queue.deque(maxlen=25)
12 |         self.neg_num_queue.append(1)
13 |         self.pos_num_queue.append(1)
14 | 
15 |         return
16 | 
17 |     def update(self, GT_map):
18 |         self.neg_num_queue.append((GT_map[:, ::5, ::5, ] == 0).sum().item())
19 |         self.pos_num_queue.append((GT_map[:, ::5, ::5, ] == 1).sum().item())
20 |         return
21 | 
22 |     def get_balance_weight(self):
23 |         neg_num = sum(self.neg_num_queue)
24 |         pos_num = sum(self.pos_num_queue)
25 | 
26 |         neg_w = pos_num / (pos_num + neg_num)
27 |         pos_w = neg_num / (pos_num + neg_num)
28 | 
29 |         return neg_w, pos_w
30 | 
31 | 
32 | class CroPixelSimConvNet(nn.Module):
33 |     def __init__(self, in_feature: int, hidden_size: int,
34 |                  layer_num=3, func='sigmoid', batch_norm=True):
35 |         super(CroPixelSimConvNet, self).__init__()
36 |         self.func = func
37 | 
38 |         self.layers = nn.Sequential()
39 | 
40 |         dim_in = in_feature
41 | 
42 |         for l in range(layer_num):
43 |             self.layers.add_module(f'Conv{l}', nn.Conv2d(dim_in, hidden_size, kernel_size=1))
44 |             if batch_norm:
45 |                 self.layers.add_module(f'BN{l}', nn.BatchNorm2d(hidden_size))
46 | 
47 |             self.layers.add_module(f'RL{l}', nn.ReLU(inplace=True))
48 |             dim_in = hidden_size
49 | 
50 |         if self.func == 'sigmoid':
51 |             self.layers.add_module(f'Out{l}', nn.Conv2d(dim_in, 1, kernel_size=1))
52 |             self.layers.add_module(f'Sigmoid{l}', nn.Sigmoid())
53 |         elif self.func == 'softmax':
54 |             self.layers.add_module(f'Out{l}', nn.Conv2d(dim_in, 2, kernel_size=1))
55 |         else:
56 |             raise NotImplementedError
57 | 
58 |     def forward(self, x):
59 | 
60 |         if self.func == 'sigmoid':
61 |             res = self.layers(x)
62 |         elif self.func == 'softmax':
63 |             feat = self.layers(x)
64 |             res = torch.softmax(feat, dim=1)[:, 1][:, None]
65 |         else:
66 |             raise NotImplementedError
67 | 
68 |         return res
69 | 
70 | 
71 | class CroPixelResSimConvNet(nn.Module):
72 |     def __init__(self, in_dim, feat_dim, layer_num=3, use_bn=True):
73 |         super(CroPixelResSimConvNet, self).__init__()
74 |         self.fc_branch = ResidualFullyConnectedBranch(in_dim, [feat_dim for l in range(layer_num)], use_bn=use_bn)
75 |         self.out_head = nn.Conv2d(feat_dim, 2, kernel_size=1)
76 | 
77 |     def forward(self, x):
78 |         feat = self.fc_branch(x)
79 |         logit = self.out_head(feat)
80 |         res = torch.softmax(logit, dim=1)[:, 1][:, None]
81 |         return res
82 | 
83 | 
84 | def get_cro_simnet(cfg, dim_in, dim_mid):
85 |     layer_num = cfg.CROSS_IMG_SIM.LayerNum
86 |     batch_norm = cfg.CROSS_IMG_SIM.BN
87 |     net = CroPixelResSimConvNet(dim_in, dim_mid, layer_num, use_bn=batch_norm)
88 |     return net
89 | 


--------------------------------------------------------------------------------
/prop_former/modeling/cross_img_sim/func.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | 
  5 | def get_shuffle_idx(B):
  6 |     # shuffle_idx = torch.randperm(B)
  7 | 
  8 |     seq_idx = torch.range(0, B - 1).long()
  9 |     shuffle_idx = torch.range(0, B - 1).long()
 10 |     shuffle_idx[::2] = seq_idx[1::2]
 11 |     shuffle_idx[1::2] = seq_idx[::2]
 12 |     return shuffle_idx
 13 | 
 14 | 
 15 | def get_grid_pair_from_AB(X, Y):
 16 |     assert X.dim() == 3
 17 |     assert Y.dim() == 3
 18 |     B, Ka, d = X.size()
 19 |     B, Kb, d = Y.size()
 20 | 
 21 |     pair = torch.cat([X.unsqueeze(2).expand(-1, -1, Kb, -1),
 22 |                       Y.unsqueeze(1).expand(-1, Ka, -1, -1)], dim=-1)
 23 |     return pair
 24 | 
 25 | 
 26 | def get_regions(pixel_labels, targets, meta):
 27 |     ignore_region = pixel_labels == 255
 28 | 
 29 |     novel_region_per = []
 30 |     for n_did in meta.c_novel_dids:
 31 |         novel_region_per.append(pixel_labels == n_did)
 32 | 
 33 |     novel_region_float = torch.stack(novel_region_per).sum(0)
 34 |     assert novel_region_float.max() <= 1
 35 |     novel_region = novel_region_float.bool()
 36 | 
 37 |     pad_region = torch.stack([t['pad_region'] for t in targets]).type_as(pixel_labels)
 38 |     pad_region = F.interpolate(pad_region[:, None], size=pixel_labels.size()[-2:], mode="nearest").bool()
 39 | 
 40 |     base_region = ~ignore_region * ~novel_region
 41 | 
 42 |     assert (ignore_region.float() + novel_region.float() + base_region.float()).max() == 1
 43 |     assert (ignore_region.float() + novel_region.float() + base_region.float()).min() == 1
 44 | 
 45 |     return base_region.float(), pad_region.float(), novel_region.float(), ignore_region.float()
 46 | 
 47 | 
 48 | def rand_sample_points_within_the_region(valid_region, point_num, rand_max=0.1):
 49 |     B, _, H, W = valid_region.size()
 50 | 
 51 |     point_positions = valid_region.new_ones(B, point_num, 2) * -10
 52 |     point_scores = valid_region.new_ones(B, point_num, 1) * -10
 53 | 
 54 |     # random score for random topk
 55 |     score_map = valid_region + torch.rand_like(valid_region) * rand_max
 56 | 
 57 |     score_map_f = score_map.reshape(B, H * W)
 58 |     point_probs_f, point_indices_f = torch.topk(score_map_f, k=point_num, dim=1)
 59 |     point_probs_per = point_probs_f.reshape(B, point_num)
 60 |     point_indices = point_indices_f.reshape(B, point_num)
 61 | 
 62 |     ws = (point_indices % W).to(torch.float) * 2 / (W - 1) - 1
 63 |     hs = (point_indices // W).to(torch.float) * 2 / (H - 1) - 1
 64 | 
 65 |     point_positions[:, :, 0] = ws
 66 |     point_positions[:, :, 1] = hs
 67 | 
 68 |     point_scores[:, :, 0] = point_probs_per
 69 | 
 70 |     assert point_positions.min() >= -1
 71 |     assert point_positions.max() <= 1
 72 | 
 73 |     return point_positions, point_scores
 74 | 
 75 | 
 76 | def sample_on_any_map(points, any_map, mode='bilinear'):
 77 |     assert points.dim() == 3
 78 |     assert any_map.dim() == 4
 79 | 
 80 |     B, K, _ = points.size()
 81 |     B, C, H, W = any_map.size()
 82 | 
 83 |     points_map = points.reshape(B, K, 1, 2)
 84 | 
 85 |     sampled_feature_map = F.grid_sample(any_map, points_map, mode=mode, align_corners=True)
 86 |     sampled_feature = sampled_feature_map.squeeze(-1).permute(0, 2, 1)
 87 | 
 88 |     return sampled_feature
 89 | 
 90 | # def get_regions(pixel_labels, meta):
 91 | #     ignore_region = pixel_labels == 255
 92 | #
 93 | #     novel_region_per = []
 94 | #     for n_did in meta.c_novel_dids:
 95 | #         novel_region_per.append(pixel_labels == n_did)
 96 | #
 97 | #     novel_region_float = torch.stack(novel_region_per).sum(0)
 98 | #     assert novel_region_float.max() <= 1
 99 | #     novel_region = novel_region_float.bool()
100 | #
101 | #     base_region = ~ignore_region * ~novel_region
102 | #     return base_region, novel_region, ignore_region
103 | 


--------------------------------------------------------------------------------
/prop_former/modeling/cross_img_sim/meter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class CroBinaryMeter():
 5 |     def __init__(self, meter_name='', classes=['DIS', 'SIM']):
 6 |         self.meter_name = meter_name
 7 |         self.classes = classes
 8 |         self.reset()
 9 |         return
10 | 
11 |     def reset(self):
12 |         '''
13 |             0: dissimilar
14 |             1: similar
15 | 
16 |             [i,j] the i-th class is predicted as the j-th class.
17 |         '''
18 |         self.hit_matrix = np.zeros((len(self.classes), len(self.classes)))
19 |         return
20 | 
21 |     def update(self, pred, label):
22 |         if len(pred) == 0:
23 |             return
24 |         for p, l in zip(pred, label):
25 |             self.hit_matrix[int(l), int(p)] += 1
26 |             p, l
27 |         return
28 | 
29 |     def get_matrix(self):
30 |         return self.hit_matrix / self.hit_matrix.sum(1).reshape(-1, 1)
31 | 
32 |     def __str__(self):
33 |         return self.report()
34 | 
35 |     def get_recall(self, idx):
36 |         bottom = self.hit_matrix[idx].sum()
37 |         top = float(self.hit_matrix[idx, idx])
38 |         return top / bottom if bottom != 0 else 0
39 | 
40 |     def get_precision(self, idx):
41 |         bottom = self.hit_matrix[:, idx].sum()
42 |         top = float(self.hit_matrix[idx, idx])
43 |         return top / bottom if bottom != 0 else 0
44 | 
45 |     def get_f1score(self, idx):
46 |         r = self.get_recall(idx)
47 |         p = self.get_precision(idx)
48 |         if (p + r) == 0:
49 |             return 0
50 |         return 2 * p * r / (p + r)
51 | 
52 |     def get_str_hit(self):
53 |         str = '\nHit Matrix:\n'
54 |         for i in range(len(self.classes)):
55 |             str += f'[ {self.classes[i]:5s}:'
56 |             for j in range(len(self.classes)):
57 |                 str += f'  {self.hit_matrix[i, j]:6.0f}'
58 |             str += f'\t({self.hit_matrix[i].sum():6.0f} in all.)]\n'
59 |         return str
60 | 
61 |     def get_str_conf(self):
62 |         conf = self.get_matrix()
63 |         str = '\nConfusion Matrix\n'
64 |         for i in range(len(self.classes)):
65 |             str += f'[ {self.classes[i]:5s}:'
66 |             for j in range(len(self.classes)):
67 |                 str += f'\t{conf[i, j]:6.1%}'
68 |             str += f'\t({self.hit_matrix[i].sum():6.0f} in all.)]\n'
69 |         return str
70 | 
71 |     def get_str_f1score(self, idx):
72 |         return f'F1-score of {self.classes[idx]}: {self.get_f1score(idx):3.1%}'
73 | 
74 |     def report(self, hit=True, caption=''):
75 |         str = f'\n=========== {self.meter_name}: {caption} ============\n'
76 |         str += f'======================== {self.get_f1score(0):2.1%} Dis F1 =======================\n'
77 |         str += self.get_str_hit() if hit else ''
78 |         # str += self.get_str_conf()
79 |         str += '\n'
80 |         for i, c in enumerate(self.classes):
81 |             str += f'[ {c:5s}:\tPR: {self.get_precision(i):5.1%},\tRR: {self.get_recall(i):5.1%},\t F1: {self.get_f1score(i):5.1%}]\n'
82 | 
83 |         return str + '^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
84 | 


--------------------------------------------------------------------------------
/prop_former/modeling/fc_modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class BasicBlock(nn.Module):
 7 | 
 8 |     def __init__(self, d_in, d_out, use_bn):
 9 |         super(BasicBlock, self).__init__()
10 |         self.layer1 = nn.Conv2d(d_in, d_out, kernel_size=1, )
11 |         self.layer2 = nn.Conv2d(d_out, d_out, kernel_size=1, )
12 |         self.use_bn = use_bn
13 | 
14 |         if use_bn:
15 |             self.bn1 = nn.BatchNorm2d(d_out)
16 |             self.bn2 = nn.BatchNorm2d(d_out)
17 | 
18 |         if d_in != d_out:
19 |             self.sqz = nn.Conv2d(d_in, d_out, kernel_size=1, )
20 |         else:
21 |             self.sqz = None
22 | 
23 |     def forward(self, x):
24 |         if self.sqz:
25 |             residual = F.relu(self.sqz(x))
26 |         else:
27 |             residual = x
28 | 
29 |         x = self.layer1(x)
30 |         if self.use_bn:
31 |             x = self.bn1(x)
32 | 
33 |         x = F.relu(x)
34 | 
35 |         x = self.layer2(x)
36 |         if self.use_bn:
37 |             x = self.bn2(x)
38 |         x = F.relu(x)
39 | 
40 |         x += residual
41 |         return x
42 | 
43 | 
44 | class ResidualFullyConnectedBranch(nn.Module):
45 |     def __init__(self, feat_dim_in, dim_layer_list, use_bn):
46 |         super(ResidualFullyConnectedBranch, self).__init__(),
47 |         self.layers = nn.Sequential()
48 | 
49 |         d_in = dim_layer = feat_dim_in
50 |         for i, dim_layer in enumerate(dim_layer_list):
51 |             self.layers.add_module(f'block{i}', BasicBlock(d_in, dim_layer, use_bn))
52 |             d_in = dim_layer
53 | 
54 |         self.feat_dim_out = dim_layer
55 | 
56 |     def forward(self, x):
57 |         for layer in self.layers:
58 |             x = layer(x)
59 |         return x
60 | 


--------------------------------------------------------------------------------
/prop_former/modeling/hungarian_matcher.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from scipy.optimize import linear_sum_assignment
  4 | from torch import nn
  5 | 
  6 | 
  7 | def batch_mask_loss_novel(inputs, targets, alpha: float = 0.25, gamma: float = 2):
  8 |     assert targets.sum() == 0
  9 | 
 10 |     T = inputs.new_ones(targets.size(0))[None, :]
 11 | 
 12 |     return -torch.log(inputs) * T
 13 | 
 14 | 
 15 | def batch_dice_loss(inputs, targets):
 16 |     inputs = inputs.sigmoid()
 17 |     inputs = inputs.flatten(1)
 18 |     numerator = 2 * torch.einsum("nc,mc->nm", inputs, targets)
 19 |     denominator = inputs.sum(-1)[:, None] + targets.sum(-1)[None, :]
 20 |     loss = 1 - (numerator + 1) / (denominator + 1)
 21 |     return loss
 22 | 
 23 | 
 24 | def batch_sigmoid_focal_loss(inputs, targets, alpha: float = 0.25, gamma: float = 2):
 25 |     hw = inputs.shape[1]
 26 | 
 27 |     prob = inputs.sigmoid()
 28 |     focal_pos = ((1 - prob) ** gamma) * F.binary_cross_entropy_with_logits(
 29 |         inputs, torch.ones_like(inputs), reduction="none"
 30 |     )
 31 |     focal_neg = (prob ** gamma) * F.binary_cross_entropy_with_logits(
 32 |         inputs, torch.zeros_like(inputs), reduction="none"
 33 |     )
 34 |     if alpha >= 0:
 35 |         focal_pos = focal_pos * alpha
 36 |         focal_neg = focal_neg * (1 - alpha)
 37 |     loss = torch.einsum("nc,mc->nm", focal_pos, targets) + torch.einsum(
 38 |         "nc,mc->nm", focal_neg, (1 - targets)
 39 |     )
 40 |     return loss / hw
 41 | 
 42 | 
 43 | class PropHungarianMatcher(nn.Module):
 44 |     def __init__(self, cfg):
 45 |         super().__init__()
 46 |         self.cfg = cfg
 47 | 
 48 |     @torch.no_grad()
 49 |     def my_assignment(self, outputs, targets):
 50 |         bs, num_queries = outputs["pred_logits"].shape[:2]
 51 |         indices = []
 52 |         for b in range(bs):
 53 |             out_prob = outputs["pred_logits"][b].softmax(-1)  # [num_queries, num_classes]
 54 |             out_mask = outputs["pred_masks"][b]  # [num_queries, H_pred, W_pred]
 55 | 
 56 |             tgt_ids = targets[b]["labels"]
 57 |             tgt_mask = targets[b]["masks"].to(out_mask)
 58 |             tgt_mask = F.interpolate(tgt_mask[:, None], size=out_mask.shape[-2:], mode="nearest")
 59 | 
 60 |             # assert ((tgt_mask.mean([1, 2, 3]) != 0) == targets[b]['has_masks']).min(), \
 61 |             #     f"{targets[b]['file_name']}" \
 62 |             #     f"{(tgt_mask.mean([1, 2, 3]) != 0), targets[b]['has_masks']}"
 63 |             #
 64 |             # hasmask_idx = targets[b]['has_masks']
 65 |             hasmask_idx = tgt_mask.mean([1, 2, 3]) != 0
 66 |             nomask_idx = ~hasmask_idx
 67 | 
 68 |             out_mask_f = out_mask.flatten(1)  # [num_queries, H*W]
 69 |             tgt_mask_f = tgt_mask[:, 0].flatten(1)  # [num_total_targets, H*W]
 70 | 
 71 |             hasmask_cls_cost = -out_prob[:, tgt_ids[hasmask_idx]]
 72 |             nomask_cls_cost = -out_prob[:, tgt_ids[nomask_idx]]
 73 | 
 74 |             hasmask_mask_cost_mask = batch_sigmoid_focal_loss(out_mask_f, tgt_mask_f[hasmask_idx])
 75 |             hasmask_mask_cost_dice = batch_dice_loss(out_mask_f, tgt_mask_f[hasmask_idx])
 76 | 
 77 |             hasmask_mask_cost = self.cfg.LOSS.AssignMaskMASK * hasmask_mask_cost_mask \
 78 |                                 + self.cfg.LOSS.AssignMaskDICE * hasmask_mask_cost_dice
 79 | 
 80 |             pMask = F.adaptive_max_pool2d(out_mask[:, None].sigmoid(), 1).squeeze(2).squeeze(2)
 81 |             nomask_mask_cost = 0 * batch_mask_loss_novel(pMask, tgt_mask[nomask_idx])
 82 | 
 83 |             # Final cost matrix
 84 |             A = self.cfg.ASM.HasMaskCls * hasmask_cls_cost
 85 |             B = self.cfg.ASM.HasMaskMask * hasmask_mask_cost
 86 | 
 87 |             C = self.cfg.ASM.NoMaskCls * nomask_cls_cost
 88 |             D = self.cfg.ASM.NoMaskMask * nomask_mask_cost
 89 | 
 90 |             cost_matrix = torch.cat((A + B, C + D), dim=1)
 91 | 
 92 |             indices.append(linear_sum_assignment(cost_matrix.cpu()))
 93 | 
 94 |         return [(torch.as_tensor(i, dtype=torch.int64),
 95 |                  torch.as_tensor(j, dtype=torch.int64))
 96 |                 for i, j in indices]
 97 | 
 98 |     @torch.no_grad()
 99 |     def forward(self, outputs, targets):
100 |         return self.my_assignment(outputs, targets)
101 | 
102 |     def __repr__(self):
103 |         head = "Matcher " + self.__class__.__name__
104 |         return head
105 | 


--------------------------------------------------------------------------------
/prop_former/modeling/loss_func.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | eps = 1e-5
  5 | 
  6 | 
  7 | def my_sigmoid_bce(preds, targets, targets_):
  8 |     gts = torch.zeros_like(preds)
  9 |     for b, target in enumerate(targets):
 10 |         gts[b][target] = 1
 11 | 
 12 |     loss = -(gts * torch.log(preds.sigmoid()) + (1 - gts) * torch.log(1 - preds.sigmoid()))
 13 |     return loss
 14 | 
 15 |     batch_res = []
 16 |     for y, t in zip(preds, targets_):
 17 |         res = []
 18 |         for i in range(len(y)):
 19 |             if i in t:
 20 |                 r = -y[i].sigmoid().log()
 21 |             else:
 22 |                 r = -(1 - y[i].sigmoid()).log()
 23 |             res.append(r)
 24 | 
 25 |         batch_res.append(torch.stack(res))
 26 |     batch_res = torch.stack(batch_res)
 27 |     return
 28 | 
 29 | 
 30 | def my_softmax_bce(multi_preds, targets):
 31 |     '''
 32 |         multi_preds:    [B,N,K+1]
 33 |         targets:        [B,K+1]
 34 |     '''
 35 | 
 36 |     preds = torch.softmax(multi_preds, -1).max(1)[0]
 37 | 
 38 |     gts = torch.zeros_like(preds)
 39 |     for b, target in enumerate(targets):
 40 |         gts[b][target] = 1
 41 | 
 42 |     loss = -(gts * torch.log(preds) + (1 - gts) * torch.log(1 - preds))
 43 |     return loss
 44 | 
 45 | 
 46 | def dice_loss(inputs, targets, num_masks):
 47 |     """
 48 |     Compute the DICE loss, similar to generalized IOU for masks
 49 |     Args:
 50 |         inputs: A float tensor of arbitrary shape.
 51 |                 The predictions for each example.
 52 |         targets: A float tensor with the same shape as inputs. Stores the binary
 53 |                  classification label for each element in inputs
 54 |                 (0 for the negative class and 1 for the positive class).
 55 |     """
 56 |     inputs = inputs.sigmoid()
 57 |     inputs = inputs.flatten(1)
 58 |     numerator = 2 * (inputs * targets).sum(-1)
 59 |     denominator = inputs.sum(-1) + targets.sum(-1)
 60 |     loss = 1 - (numerator + 1) / (denominator + 1)
 61 |     return loss.sum() / num_masks
 62 | 
 63 | 
 64 | def sigmoid_focal_loss(inputs, targets, num_masks, alpha: float = 0.25, gamma: float = 2):
 65 |     """
 66 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
 67 |     Args:
 68 |         inputs: A float tensor of arbitrary shape.
 69 |                 The predictions for each example.
 70 |         targets: A float tensor with the same shape as inputs. Stores the binary
 71 |                  classification label for each element in inputs
 72 |                 (0 for the negative class and 1 for the positive class).
 73 |         alpha: (optional) Weighting factor in range (0,1) to balance
 74 |                 positive vs negative examples. Default = -1 (no weighting).
 75 |         gamma: Exponent of the modulating factor (1 - p_t) to
 76 |                balance easy vs hard examples.
 77 |     Returns:
 78 |         Loss tensor
 79 |     """
 80 |     prob = inputs.sigmoid()
 81 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
 82 |     p_t = prob * targets + (1 - prob) * (1 - targets)
 83 |     loss = ce_loss * ((1 - p_t) ** gamma)
 84 | 
 85 |     if alpha >= 0:
 86 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
 87 |         loss = alpha_t * loss
 88 | 
 89 |     return loss.mean(1).sum() / num_masks
 90 | 
 91 | 
 92 | def dice_loss_without_reduction(inputs, targets):
 93 |     """
 94 |     Compute the DICE loss, similar to generalized IOU for masks
 95 |     Args:
 96 |         inputs: A float tensor of arbitrary shape.
 97 |                 The predictions for each example.
 98 |         targets: A float tensor with the same shape as inputs. Stores the binary
 99 |                  classification label for each element in inputs
100 |                 (0 for the negative class and 1 for the positive class).
101 |     """
102 |     inputs = inputs.sigmoid()
103 |     inputs = inputs.flatten(1)
104 |     numerator = 2 * (inputs * targets).sum(-1)
105 |     denominator = inputs.sum(-1) + targets.sum(-1)
106 |     loss = 1 - (numerator + 1) / (denominator + 1)
107 |     return loss
108 | 
109 | 
110 | def bce_loss_without_reduction(inputs, targets):
111 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
112 |     return ce_loss


--------------------------------------------------------------------------------
/prop_former/modeling/loss_manager.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | from .loss_func import my_softmax_bce
  5 | from .loss_func import dice_loss, sigmoid_focal_loss, bce_loss_without_reduction, dice_loss_without_reduction
  6 | 
  7 | 
  8 | def get_cls_loss_on_assigned(pred_logits, targets, indices, idx):
  9 |     '''
 10 |     Args:
 11 |         pred_logits: [:,N,K]
 12 |         labels_full: [:,N_b]
 13 |         indices: [:,N_b/N_b]
 14 | 
 15 |     For each sample in the mini-batch:
 16 |         There is list_S, list_T in indices, which shows the s-th proposal is assigned to the t-th target.
 17 |     '''
 18 |     target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
 19 |     target_classes = torch.full(
 20 |         pred_logits.shape[:2], pred_logits.size(-1) - 1, dtype=torch.int64, device=pred_logits.device
 21 |     )
 22 |     target_classes[idx] = target_classes_o
 23 |     loss_ce = F.cross_entropy(pred_logits.transpose(1, 2), target_classes, reduction='none')
 24 | 
 25 |     return loss_ce.mean()
 26 | 
 27 | 
 28 | def get_cls_loss_on_pooling(pred_logits, labels_full, ltype='SoftmaxBCE'):
 29 |     # Note that there is no ignore class in labels_full.
 30 |     if ltype == 'MSM':
 31 |         raise NotImplementedError
 32 |     elif ltype == 'SigmoidBCE':
 33 |         raise NotImplementedError
 34 |     elif ltype == 'SoftmaxBCE':
 35 |         loss_cls = my_softmax_bce(pred_logits, labels_full).mean()
 36 |     elif ltype == 'RIB':
 37 |         pass
 38 |     else:
 39 |         raise NotImplementedError
 40 | 
 41 |     '''
 42 |         pooled_logits = outputs["pred_logits"].max(1)[0]
 43 | 
 44 |         mbce_targets = torch.ones_like(pooled_logits) * -1
 45 | 
 46 |         # Note that there is no ignore class in target['labels_full'].
 47 |         labels_full = [target['labels_full'] for target in targets]
 48 | 
 49 |         for i, target in enumerate(targets):
 50 |             mbce_t = target['labels_full']
 51 |             mbce_targets[i][:len(mbce_t)] = mbce_t
 52 | 
 53 |         if self.cls_loss_type == 'MSM':
 54 |             loss_cls = F.multilabel_soft_margin_loss(pooled_logits, mbce_targets)
 55 |         elif self.cls_loss_type == 'SigmoidBCE':
 56 |             loss_cls = my_sigmoid_bce(pooled_logits, labels_full, mbce_targets).mean()
 57 |         elif self.cls_loss_type == 'SoftmaxBCE':
 58 |             loss_cls = my_softmax_bce(outputs["pred_logits"], labels_full).mean()
 59 |         elif self.cls_loss_type == 'RIB':
 60 |             pass
 61 |         else:
 62 |             raise NotImplementedError
 63 |     '''
 64 |     return loss_cls
 65 | 
 66 | 
 67 | def get_mask_loss_on_assigned(inputs, targets, num_masks):
 68 |     if inputs.size(0) == 0:
 69 |         return inputs.new_zeros(1)[0], inputs.new_zeros(1)[0]
 70 |     else:
 71 |         # CHENS CHECK
 72 |         # assert (targets.max(1)[0]).min() == 1, f'Should not use zero mask as GT'
 73 |         return sigmoid_focal_loss(inputs, targets, num_masks), dice_loss(inputs, targets, num_masks)
 74 | 
 75 | 
 76 | def get_mask_loss_on_pooling(inputs, targets, num_masks):
 77 |     if inputs.size(0) == 0:
 78 |         return inputs.new_zeros(1)[0]
 79 |     else:
 80 |         # CHENS CHECK
 81 |         assert (targets.max(1)[0]).min() == 0
 82 |         pooled_pred = inputs.max(1, keepdim=True)[0]
 83 |         loss = F.binary_cross_entropy_with_logits(pooled_pred,
 84 |                                                   torch.ones_like(pooled_pred),
 85 |                                                   reduction="none")
 86 |         loss = loss.sum() / inputs.size(0)
 87 |         return loss
 88 | 
 89 | 
 90 | def activate_top_R_loss(inputs, targets, rate=0.1):
 91 |     if inputs.size(0) == 0:
 92 |         return inputs.new_zeros(1)[0]
 93 |     else:
 94 |         # CHENS CHECK
 95 |         assert (targets.max(1)[0]).min() == 0
 96 | 
 97 |         if inputs.size(0) >= 2:
 98 |             d = 1
 99 | 
100 |         topk_region = torch.topk(inputs, k=int(inputs.size(1) * rate), dim=1)[0]
101 |         loss = F.binary_cross_entropy_with_logits(topk_region,
102 |                                                   torch.ones_like(topk_region),
103 |                                                   reduction="none")
104 |         return loss.mean()


--------------------------------------------------------------------------------
/prop_former/modeling/prop_former_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import logging
  3 | from copy import deepcopy
  4 | from typing import Callable, Dict, List, Optional, Tuple, Union
  5 | 
  6 | import fvcore.nn.weight_init as weight_init
  7 | from torch import nn
  8 | from torch.nn import functional as F
  9 | 
 10 | from detectron2.config import configurable
 11 | from detectron2.layers import Conv2d, ShapeSpec, get_norm
 12 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
 13 | 
 14 | from mask_former.modeling.transformer.transformer_predictor import TransformerPredictor
 15 | from mask_former.modeling.heads.pixel_decoder import build_pixel_decoder
 16 | from .prop_transformer_predictor import PropTransformerPredictor
 17 | 
 18 | @SEM_SEG_HEADS_REGISTRY.register()
 19 | class PropFormerHead(nn.Module):
 20 |     _version = 2
 21 | 
 22 |     def _load_from_state_dict(
 23 |             self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
 24 |     ):
 25 |         version = local_metadata.get("version", None)
 26 |         if version is None or version < 2:
 27 |             # Do not warn if train from scratch
 28 |             scratch = True
 29 |             logger = logging.getLogger(__name__)
 30 |             for k in list(state_dict.keys()):
 31 |                 newk = k
 32 |                 if "sem_seg_head" in k and not k.startswith(prefix + "predictor"):
 33 |                     newk = k.replace(prefix, prefix + "pixel_decoder.")
 34 |                     # logger.debug(f"{k} ==> {newk}")
 35 |                 if newk != k:
 36 |                     state_dict[newk] = state_dict[k]
 37 |                     del state_dict[k]
 38 |                     scratch = False
 39 | 
 40 |             if not scratch:
 41 |                 logger.warning(
 42 |                     f"Weight format of {self.__class__.__name__} have changed! "
 43 |                     "Please upgrade your models. Applying automatic conversion now ..."
 44 |                 )
 45 | 
 46 |     @configurable
 47 |     def __init__(
 48 |             self,
 49 |             input_shape: Dict[str, ShapeSpec],
 50 |             *,
 51 |             num_classes: int,
 52 |             pixel_decoder: nn.Module,
 53 |             loss_weight: float = 1.0,
 54 |             ignore_value: int = -1,
 55 |             # extra parameters
 56 |             transformer_predictor: nn.Module,
 57 |             transformer_in_feature: str,
 58 |     ):
 59 |         """
 60 |         NOTE: this interface is experimental.
 61 |         Args:
 62 |             input_shape: shapes (channels and stride) of the input features
 63 |             num_classes: number of classes to predict
 64 |             pixel_decoder: the pixel decoder module
 65 |             loss_weight: loss weight
 66 |             ignore_value: category id to be ignored during training.
 67 |             transformer_predictor: the transformer decoder that makes prediction
 68 |             transformer_in_feature: input feature name to the transformer_predictor
 69 |         """
 70 |         super().__init__()
 71 |         input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)
 72 |         self.in_features = [k for k, v in input_shape]
 73 |         feature_strides = [v.stride for k, v in input_shape]
 74 |         feature_channels = [v.channels for k, v in input_shape]
 75 | 
 76 |         self.ignore_value = ignore_value
 77 |         self.common_stride = 4
 78 |         self.loss_weight = loss_weight
 79 | 
 80 |         self.pixel_decoder = pixel_decoder
 81 |         self.predictor = transformer_predictor
 82 |         self.transformer_in_feature = transformer_in_feature
 83 | 
 84 |         self.num_classes = num_classes
 85 | 
 86 |     @classmethod
 87 |     def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
 88 |         res = {
 89 |             "input_shape": {
 90 |                 k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
 91 |             },
 92 |             "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
 93 |             "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
 94 |             "pixel_decoder": build_pixel_decoder(cfg, input_shape),
 95 |             "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT,
 96 |             "transformer_in_feature": cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE,
 97 |         }
 98 | 
 99 |         res["transformer_predictor"] = PropTransformerPredictor(
100 |             cfg,
101 |             cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
102 |             if cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE == "transformer_encoder"
103 |             else input_shape[cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE].channels,
104 |             mask_classification=cfg.MODEL.MASK_FORMER.MAKE_CLS)
105 | 
106 |         return res
107 | 
108 |     def forward(self, features):
109 |         return self.layers(features)
110 | 
111 |     def layers(self, features):
112 |         mask_features, transformer_encoder_features = self.pixel_decoder.forward_features(features)
113 |         if self.transformer_in_feature == "transformer_encoder":
114 |             assert (transformer_encoder_features is not None), "Please use the TransformerEncoderPixelDecoder."
115 |             predictions = self.predictor(transformer_encoder_features, mask_features)
116 |         else:
117 |             predictions = self.predictor(features[self.transformer_in_feature], mask_features)
118 |         return predictions
119 | 


--------------------------------------------------------------------------------
/prop_former/modeling/prop_transformer_predictor.py:
--------------------------------------------------------------------------------
  1 | import fvcore.nn.weight_init as weight_init
  2 | import torch
  3 | from torch import nn
  4 | from torch.nn import functional as F
  5 | 
  6 | from detectron2.config import configurable
  7 | from detectron2.layers import Conv2d
  8 | 
  9 | from mask_former.modeling.transformer.position_encoding import PositionEmbeddingSine
 10 | from mask_former.modeling.transformer.transformer import Transformer
 11 | from mask_former.modeling.transformer.transformer_predictor import MLP
 12 | from detectron2.data import MetadataCatalog
 13 | 
 14 | 
 15 | class PropTransformerPredictor(nn.Module):
 16 |     @configurable
 17 |     def __init__(self, in_channels, mask_classification=True, cfg=None, *, num_classes: int, hidden_dim: int,
 18 |                  num_queries: int, nheads: int, dropout: float, dim_feedforward: int, enc_layers: int, dec_layers: int,
 19 |                  pre_norm: bool, deep_supervision: bool, mask_dim: int, enforce_input_project: bool, ):
 20 |         super().__init__()
 21 |         self.mask_classification = mask_classification
 22 | 
 23 |         N_steps = hidden_dim // 2
 24 |         self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True)
 25 | 
 26 |         transformer = Transformer(
 27 |             d_model=hidden_dim,
 28 |             dropout=dropout,
 29 |             nhead=nheads,
 30 |             dim_feedforward=dim_feedforward,
 31 |             num_encoder_layers=enc_layers,
 32 |             num_decoder_layers=dec_layers,
 33 |             normalize_before=pre_norm,
 34 |             return_intermediate_dec=deep_supervision,
 35 |         )
 36 | 
 37 |         self.num_queries = num_queries
 38 |         self.transformer = transformer
 39 |         hidden_dim = transformer.d_model
 40 | 
 41 |         if cfg.MODEL.MASK_FORMER.TRANS_QUERY == 'RAND':
 42 |             self.query_embed = nn.Embedding(num_queries, hidden_dim)
 43 |         else:
 44 |             if cfg.MODEL.MASK_FORMER.TRANS_QUERY == 'FCWT256':
 45 |                 transferrable_query = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).fcweight
 46 |             elif cfg.MODEL.MASK_FORMER.TRANS_QUERY == 'WDVT1':
 47 |                 transferrable_query = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).word2vec
 48 |             elif cfg.MODEL.MASK_FORMER.TRANS_QUERY == 'WDVT2':
 49 |                 transferrable_query = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).fasttext
 50 |             else:
 51 |                 raise NotImplementedError
 52 | 
 53 |             trans_num, trans_dim = transferrable_query.shape
 54 |             self.query_embed = nn.Embedding(num_queries, trans_dim)
 55 |             self.query_embed.weight.data = torch.tensor(transferrable_query)
 56 |             assert trans_num == num_queries
 57 |             if trans_dim != hidden_dim:
 58 |                 self.query_sqz = nn.Linear(trans_dim, hidden_dim, bias=True)
 59 | 
 60 |         if cfg.MODEL.MASK_FORMER.FREEZE_QUERY:
 61 |             self.query_embed.weight.requires_grad = False
 62 | 
 63 |         if in_channels != hidden_dim or enforce_input_project:
 64 |             self.input_proj = Conv2d(in_channels, hidden_dim, kernel_size=1)
 65 |             weight_init.c2_xavier_fill(self.input_proj)
 66 |         else:
 67 |             self.input_proj = nn.Sequential()
 68 |         self.aux_loss = deep_supervision
 69 | 
 70 |         # output FFNs
 71 |         if self.mask_classification:
 72 |             self.class_embed = nn.Linear(hidden_dim, num_classes + 1)
 73 | 
 74 |         self.mask_embed = MLP(hidden_dim, hidden_dim, mask_dim, 3)
 75 | 
 76 |         self.cfg = cfg
 77 | 
 78 |     @classmethod
 79 |     def from_config(cls, cfg, in_channels, mask_classification):
 80 |         ret = {}
 81 |         ret["in_channels"] = in_channels
 82 |         ret["mask_classification"] = mask_classification
 83 | 
 84 |         ret["num_classes"] = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
 85 |         ret["hidden_dim"] = cfg.MODEL.MASK_FORMER.HIDDEN_DIM
 86 |         ret["num_queries"] = cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES
 87 |         # Transformer parameters:
 88 |         ret["nheads"] = cfg.MODEL.MASK_FORMER.NHEADS
 89 |         ret["dropout"] = cfg.MODEL.MASK_FORMER.DROPOUT
 90 |         ret["dim_feedforward"] = cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD
 91 |         ret["enc_layers"] = cfg.MODEL.MASK_FORMER.ENC_LAYERS
 92 |         ret["dec_layers"] = cfg.MODEL.MASK_FORMER.DEC_LAYERS
 93 |         ret["pre_norm"] = cfg.MODEL.MASK_FORMER.PRE_NORM
 94 |         ret["deep_supervision"] = cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION
 95 |         ret["enforce_input_project"] = cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ
 96 | 
 97 |         ret["mask_dim"] = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM
 98 |         ret["cfg"] = cfg
 99 |         return ret
100 | 
101 |     def forward(self, x, mask_features):
102 |         pos = self.pe_layer(x)
103 | 
104 |         src = x
105 |         mask = None
106 | 
107 |         if hasattr(self, 'query_sqz'):
108 |             query = self.query_sqz(self.query_embed.weight)
109 |         else:
110 |             query = self.query_embed.weight
111 | 
112 |         query_embed, memory = self.transformer(self.input_proj(src), mask, query, pos)
113 | 
114 |         out = {}
115 | 
116 |         if self.mask_classification:
117 |             outputs_class = self.class_embed(query_embed)
118 |             out["pred_logits"] = outputs_class[-1]
119 | 
120 |         if self.aux_loss:
121 |             # [l, bs, queries, embed]
122 |             mask_embed = self.mask_embed(query_embed)
123 |             outputs_seg_masks = torch.einsum("lbqc,bchw->lbqhw", mask_embed, mask_features)
124 |             out["pred_masks"] = outputs_seg_masks[-1]
125 |             out["aux_outputs"] = self._set_aux_loss(outputs_class if self.mask_classification else None,
126 |                                                     outputs_seg_masks)
127 |         else:
128 |             # FIXME h_boxes takes the last one computed, keep this in mind
129 |             # [bs, queries, embed]
130 |             mask_embed = self.mask_embed(query_embed[-1])
131 |             outputs_seg_masks = torch.einsum("bqc,bchw->bqhw", mask_embed, mask_features)
132 |             out["pred_masks"] = outputs_seg_masks
133 | 
134 |         ####
135 | 
136 |         if self.cfg.CROSS_IMG_SIM.BASE_LOSS != 0:
137 |             out['pixel_features'] = mask_features
138 |         return out
139 | 
140 |     @torch.jit.unused
141 |     def _set_aux_loss(self, outputs_class, outputs_seg_masks):
142 |         if self.mask_classification:
143 |             return [{"pred_logits": a, "pred_masks": b} for a, b in zip(outputs_class[:-1], outputs_seg_masks[:-1])]
144 |         else:
145 |             return [{"pred_masks": b} for b in outputs_seg_masks[:-1]]
146 | 


--------------------------------------------------------------------------------
/prop_former/pseudo_labeling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import os
 4 | import matplotlib.pyplot as plt
 5 | from PIL import Image
 6 | from detectron2.utils.file_io import PathManager
 7 | from shutil import copyfile
 8 | 
 9 | 
10 | def generate_pseudo_label(pred_segm, gt_segm_raw, ant_file, output_dir, meta, ant_file_to_type=None):
11 |     '''
12 |         pred_segm is cid, while gt_segm_raw is did.
13 |     '''
14 | 
15 |     # split_idx = int(meta.name.split('_')[2][5:])
16 |     # if split_idx >= 10:
17 |     #     img_type = ant_file_to_type[ant_file]
18 |     # else:
19 |     #     img_type = 'existing'
20 |     img_type = 'existing'
21 | 
22 |     assert img_type in ['existing', 'updated']
23 |     mixed_mask = np.ones_like(gt_segm_raw) * 255
24 | 
25 |     for gt_did in np.unique(gt_segm_raw):
26 |         if gt_did == 255:
27 |             continue
28 |         if gt_did in meta.c_novel_dids:
29 |             novel_cid = meta.c_did_to_cid[gt_did]
30 |             mixed_mask[pred_segm == novel_cid] = gt_did
31 | 
32 |     if img_type == 'updated':
33 |         for gt_did in np.unique(gt_segm_raw):
34 |             if gt_did == 255:
35 |                 continue
36 |             if gt_did in meta.c_base_dids:
37 |                 base_cid = meta.c_did_to_cid[gt_did]
38 |                 mixed_mask[pred_segm == base_cid] = gt_did
39 |     else:
40 |         for gt_did in np.unique(gt_segm_raw):
41 |             if gt_did == 255:
42 |                 continue
43 |             if gt_did in meta.c_base_dids:
44 |                 mixed_mask[gt_segm_raw == gt_did] = gt_did
45 | 
46 |     os.makedirs(output_dir, exist_ok=True)
47 |     save_file = f'{output_dir}/{os.path.basename(ant_file)}'
48 |     mixed_mask = mixed_mask.astype(np.uint8)
49 | 
50 |     mixed_mask_img = Image.fromarray(mixed_mask)
51 |     mixed_mask_img.save(save_file)
52 | 
53 |     # with PathManager.open(save_file, "rb") as f:
54 |     #     mixed_mask2 = np.array(Image.open(f), dtype=np.int)
55 |     #
56 |     # assert (mixed_mask2 == mixed_mask).min()
57 | 
58 |     # copyfile(ant_file, f'{output_dir}/{os.path.basename(ant_file).split(".")[0]}_GT.png')
59 |     return mixed_mask
60 | 


--------------------------------------------------------------------------------
/prop_former/shared.py:
--------------------------------------------------------------------------------
  1 | from detectron2.data import MetadataCatalog
  2 | import torch.nn as nn
  3 | import torch
  4 | import numpy as np
  5 | import copy
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | from terminaltables import AsciiTable
 10 | import copy
 11 | import os
 12 | import torch.nn.functional as F
 13 | 
 14 | 
 15 | def c_print_csv_format(results, logger):
 16 |     col_num = 4
 17 | 
 18 |     for task, res in results.items():
 19 |         imp_keys = sorted([k for k in res.keys() if "-" not in k])
 20 |         summary_res = {k: res[k] for k in res.keys() if k in imp_keys}
 21 |         class_IoU_res = {k.split('-')[1]: res[k] for k in res.keys() if k not in imp_keys and 'IoU' in k}
 22 |         class_ACC_res = {k.split('-')[1]: res[k] for k in res.keys() if k not in imp_keys and 'ACC' in k}
 23 | 
 24 |         names = sorted(list(class_IoU_res.keys()))
 25 |         ml = min(max([len(name) for name in names]), 10)
 26 | 
 27 |         table_data = []
 28 |         title = [f'     Name: IoU / ACC' for i in range(col_num)]
 29 |         table_data.append(title)
 30 | 
 31 |         row_data = []
 32 |         for i, name in enumerate(names):
 33 |             row_data.append(f'{name.ljust(ml)}: {class_IoU_res[name]:.1f}/{class_ACC_res[name]:.1f}')
 34 |             if ((i + 1) % col_num == 0) | (i == len(names) - 1):
 35 |                 table_data.append(copy.deepcopy(row_data))
 36 |                 row_data = []
 37 | 
 38 |         table_ins = AsciiTable(table_data)
 39 |         for i in range(len(table_ins.justify_columns)):
 40 |             table_ins.justify_columns[i] = 'center'
 41 |         out_str = f'\n!! Class Result of \"{task}\":\n{table_ins.table}'
 42 |         logger.info(out_str)
 43 | 
 44 |         name, value = [], []
 45 |         for k, v in summary_res.items():
 46 |             name.append(f'{k.ljust(5)}')
 47 |             value.append(f'{v:.1f}')
 48 | 
 49 |         table_ins = AsciiTable([name, value])
 50 |         for i in range(len(table_ins.justify_columns)):
 51 |             table_ins.justify_columns[i] = 'center'
 52 |         out_str = f'\n!! Summary of \"{task}\":\n{table_ins.table}'
 53 | 
 54 |         logger.info(out_str)
 55 | 
 56 |     return
 57 | 
 58 | def print_pc(module_dict, printf=print):
 59 |     for name, module in module_dict.items():
 60 |         total_params = sum(p.numel() for p in module.parameters())
 61 |         total_trainable_params = sum(p.numel() for p in module.parameters() if p.requires_grad)
 62 | 
 63 |         printf(f'{total_trainable_params / 1e6:.1f}M/{total_params / 1e6:.1f}M training/total params in {name}.')
 64 |     return
 65 | 
 66 | 
 67 | def crf_inference_for_segm(img, segm, t=10, pos_scale_factor=1, im_scale_factor=2):
 68 |     import pydensecrf.densecrf as dcrf
 69 |     from pydensecrf.utils import unary_from_softmax
 70 | 
 71 |     score_maps = np.stack([segm == c for c in np.unique(segm)]).astype(np.float32)
 72 | 
 73 |     localcid_to_globalcid = {i: c for i, c in enumerate(np.unique(segm))}
 74 | 
 75 |     h, w = img.shape[:2]
 76 |     n_labels = score_maps.shape[0]
 77 | 
 78 |     d = dcrf.DenseCRF2D(w, h, n_labels)
 79 |     d.setUnaryEnergy(score_maps.reshape((n_labels, -1)))
 80 | 
 81 |     d.addPairwiseGaussian(sxy=3 / pos_scale_factor, compat=3)
 82 |     d.addPairwiseBilateral(sxy=80 / im_scale_factor, srgb=13, rgbim=np.copy(img), compat=10)
 83 |     Q = d.inference(t)
 84 |     res = np.array(Q).reshape((n_labels, h, w)).argmax(0)
 85 | 
 86 |     final_res = copy.deepcopy(segm)
 87 |     for localcid in np.unique(res):
 88 |         final_res[res == localcid] = localcid_to_globalcid[localcid]
 89 | 
 90 |     return final_res
 91 | 
 92 | 
 93 | def crf_inference_for_prob(img, segm, t=10, scale_factor=1, labels=21):
 94 |     import pydensecrf.densecrf as dcrf
 95 |     from pydensecrf.utils import unary_from_softmax
 96 | 
 97 |     h, w = img.shape[:2]
 98 |     n_labels = labels
 99 | 
100 |     d = dcrf.DenseCRF2D(w, h, n_labels)
101 | 
102 |     unary = unary_from_softmax(probs)
103 |     unary = np.ascontiguousarray(unary)
104 | 
105 |     d.setUnaryEnergy(unary)
106 |     d.addPairwiseGaussian(sxy=3 / scale_factor, compat=3)
107 |     d.addPairwiseBilateral(sxy=80 / scale_factor, srgb=13, rgbim=np.copy(img), compat=10)
108 |     Q = d.inference(t)
109 | 
110 |     return np.array(Q).reshape((n_labels, h, w))
111 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | scipy
3 | shapely
4 | timm
5 | h5py
6 | opencv-python
7 | tqdm
8 | pandas
9 | terminaltables


--------------------------------------------------------------------------------