├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── align_uniform.py
├── clip
    ├── __init__.py
    ├── bpe_simple_vocab_16e6.txt.gz
    ├── clip.py
    ├── model.py
    └── simple_tokenizer.py
├── clip_words.csv
├── configs
    ├── datasets
    │   ├── caltech101.yaml
    │   ├── dtd.yaml
    │   ├── eurosat.yaml
    │   ├── fgvc_aircraft.yaml
    │   ├── food101.yaml
    │   ├── imagenet.yaml
    │   ├── imagenet_21k.yaml
    │   ├── imagenet_a.yaml
    │   ├── imagenet_r.yaml
    │   ├── imagenet_sketch.yaml
    │   ├── imagenetv2.yaml
    │   ├── oxford_flowers.yaml
    │   ├── oxford_pets.yaml
    │   ├── stanford_cars.yaml
    │   ├── sun397.yaml
    │   └── ucf101.yaml
    └── trainers
    │   ├── CoCoOp
    │       ├── vit_b16_c16_ep10_batch1.yaml
    │       ├── vit_b16_c4_ep10_batch1.yaml
    │       ├── vit_b16_c4_ep10_batch1_ctxv1.yaml
    │       └── vit_b16_c8_ep10_batch1.yaml
    │   ├── CoOp
    │       ├── rn101.yaml
    │       ├── rn101_ep50.yaml
    │       ├── rn50.yaml
    │       ├── rn50_ctxv1.yaml
    │       ├── rn50_ep100.yaml
    │       ├── rn50_ep50.yaml
    │       ├── rn50_ep50_ctxv1.yaml
    │       ├── rn50_val.yaml
    │       ├── vit_b16.yaml
    │       ├── vit_b16_ep100.yaml
    │       ├── vit_b16_ep50.yaml
    │       ├── vit_b16_val.yaml
    │       ├── vit_b32.yaml
    │       ├── vit_b32_ep50.yaml
    │       ├── vit_b32_val.yaml
    │       └── vit_l14.yaml
    │   ├── MLP
    │       ├── rn50_ep20.yaml
    │       ├── rn50_ep50.yaml
    │       ├── vit_b16_ep20.yaml
    │       ├── vit_b16_ep50.yaml
    │       ├── vit_b32_ep20.yaml
    │       └── vit_b32_ep50.yaml
    │   ├── MaPLe
    │       ├── vit_b16_c2_ep5_batch4_2ctx.yaml
    │       ├── vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml
    │       └── vit_b32_c2_ep5_batch4_2ctx_cross_datasets.yaml
    │   ├── POMP
    │       ├── rn50_ep20_randaug2.yaml
    │       ├── rn50_ep5.yaml
    │       ├── vit_b16_ep20.yaml
    │       ├── vit_b16_ep20_randaug2.yaml
    │       ├── vit_b16_ep5.yaml
    │       ├── vit_b16_ep5_randaug2.yaml
    │       └── vit_b32_ep20_randaug2.yaml
    │   └── VPT
    │       ├── vit_b16_c2_ep5_batch4_4.yaml
    │       └── vit_b32_c2_ep5_batch4_4.yaml
├── datasets
    ├── __init__.py
    ├── caltech101.py
    ├── dtd.py
    ├── eurosat.py
    ├── fgvc_aircraft.py
    ├── food101.py
    ├── imagenet.py
    ├── imagenet_21k.py
    ├── imagenet_a.py
    ├── imagenet_r.py
    ├── imagenet_sketch.py
    ├── imagenetv2.py
    ├── oxford_flowers.py
    ├── oxford_pets.py
    ├── stanford_cars.py
    ├── sun397.py
    └── ucf101.py
├── docs
    ├── DATASETS.md
    ├── INSTALL.md
    ├── MODELS.md
    ├── RUN.md
    └── main_figure.png
├── requirements.txt
├── scripts
    ├── cocoop
    │   ├── base2new_test.sh
    │   ├── base2new_train.sh
    │   ├── xd_test.sh
    │   └── xd_train.sh
    ├── coop
    │   ├── eval.sh
    │   ├── main.sh
    │   └── multi_scripts.sh
    ├── maple
    │   ├── base2new_test_maple.sh
    │   ├── base2new_train_maple.sh
    │   ├── reproduce_maple.sh
    │   ├── reproduce_maple_xd.sh
    │   ├── xd_test_maple.sh
    │   └── xd_train_maple.sh
    ├── mlp
    │   └── main.sh
    ├── pomp
    │   ├── eval.sh
    │   ├── main.sh
    │   ├── multi_scripts.sh
    │   └── xd_test.sh
    ├── vpt
    │   ├── base2new_test_vpt.sh
    │   ├── base2new_train_vpt.sh
    │   ├── reproduce_vpt.sh
    │   ├── xd_test_vpt.sh
    │   └── xd_train_vpt.sh
    └── zsclip
    │   ├── xd_test.sh
    │   └── zeroshot.sh
├── third_party
    ├── Dassl.pytorch
    │   ├── .flake8
    │   ├── .gitignore
    │   ├── .isort.cfg
    │   ├── .style.yapf
    │   ├── DATASETS.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── configs
    │   │   ├── README.md
    │   │   ├── datasets
    │   │   │   ├── da
    │   │   │   │   ├── cifar_stl.yaml
    │   │   │   │   ├── digit5.yaml
    │   │   │   │   ├── domainnet.yaml
    │   │   │   │   ├── mini_domainnet.yaml
    │   │   │   │   ├── office31.yaml
    │   │   │   │   ├── office_home.yaml
    │   │   │   │   └── visda17.yaml
    │   │   │   ├── dg
    │   │   │   │   ├── camelyon17.yaml
    │   │   │   │   ├── cifar100_c.yaml
    │   │   │   │   ├── cifar10_c.yaml
    │   │   │   │   ├── digit_single.yaml
    │   │   │   │   ├── digits_dg.yaml
    │   │   │   │   ├── fmow.yaml
    │   │   │   │   ├── iwildcam.yaml
    │   │   │   │   ├── office_home_dg.yaml
    │   │   │   │   ├── pacs.yaml
    │   │   │   │   └── vlcs.yaml
    │   │   │   └── ssl
    │   │   │   │   ├── cifar10.yaml
    │   │   │   │   ├── cifar100.yaml
    │   │   │   │   ├── stl10.yaml
    │   │   │   │   └── svhn.yaml
    │   │   └── trainers
    │   │   │   ├── da
    │   │   │       ├── cdac
    │   │   │       │   ├── digit5.yaml
    │   │   │       │   ├── domainnet.yaml
    │   │   │       │   └── mini_domainnet.yaml
    │   │   │       ├── dael
    │   │   │       │   ├── digit5.yaml
    │   │   │       │   ├── domainnet.yaml
    │   │   │       │   └── mini_domainnet.yaml
    │   │   │       ├── m3sda
    │   │   │       │   ├── digit5.yaml
    │   │   │       │   ├── domainnet.yaml
    │   │   │       │   └── mini_domainnet.yaml
    │   │   │       └── source_only
    │   │   │       │   ├── digit5.yaml
    │   │   │       │   ├── mini_domainnet.yaml
    │   │   │       │   ├── office31.yaml
    │   │   │       │   └── visda17.yaml
    │   │   │   ├── dg
    │   │   │       ├── daeldg
    │   │   │       │   ├── digits_dg.yaml
    │   │   │       │   ├── office_home_dg.yaml
    │   │   │       │   └── pacs.yaml
    │   │   │       ├── ddaig
    │   │   │       │   ├── digits_dg.yaml
    │   │   │       │   ├── office_home_dg.yaml
    │   │   │       │   └── pacs.yaml
    │   │   │       └── vanilla
    │   │   │       │   ├── digits_dg.yaml
    │   │   │       │   ├── mini_domainnet.yaml
    │   │   │       │   ├── office_home_dg.yaml
    │   │   │       │   └── pacs.yaml
    │   │   │   └── ssl
    │   │   │       └── fixmatch
    │   │   │           └── cifar10.yaml
    │   ├── dassl
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   └── defaults.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── data_manager.py
    │   │   │   ├── datasets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_dataset.py
    │   │   │   │   ├── build.py
    │   │   │   │   ├── da
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── cifarstl.py
    │   │   │   │   │   ├── digit5.py
    │   │   │   │   │   ├── domainnet.py
    │   │   │   │   │   ├── mini_domainnet.py
    │   │   │   │   │   ├── office31.py
    │   │   │   │   │   ├── office_home.py
    │   │   │   │   │   └── visda17.py
    │   │   │   │   ├── dg
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── cifar_c.py
    │   │   │   │   │   ├── digit_single.py
    │   │   │   │   │   ├── digits_dg.py
    │   │   │   │   │   ├── office_home_dg.py
    │   │   │   │   │   ├── pacs.py
    │   │   │   │   │   ├── vlcs.py
    │   │   │   │   │   └── wilds
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── camelyon17.py
    │   │   │   │   │   │   ├── fmow.py
    │   │   │   │   │   │   ├── iwildcam.py
    │   │   │   │   │   │   └── wilds_base.py
    │   │   │   │   └── ssl
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── cifar.py
    │   │   │   │   │   ├── stl10.py
    │   │   │   │   │   └── svhn.py
    │   │   │   ├── samplers.py
    │   │   │   └── transforms
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── autoaugment.py
    │   │   │   │   ├── randaugment.py
    │   │   │   │   └── transforms.py
    │   │   ├── engine
    │   │   │   ├── __init__.py
    │   │   │   ├── build.py
    │   │   │   ├── da
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── adabn.py
    │   │   │   │   ├── adda.py
    │   │   │   │   ├── cdac.py
    │   │   │   │   ├── dael.py
    │   │   │   │   ├── dann.py
    │   │   │   │   ├── m3sda.py
    │   │   │   │   ├── mcd.py
    │   │   │   │   ├── mme.py
    │   │   │   │   ├── se.py
    │   │   │   │   └── source_only.py
    │   │   │   ├── dg
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── crossgrad.py
    │   │   │   │   ├── daeldg.py
    │   │   │   │   ├── ddaig.py
    │   │   │   │   ├── domain_mix.py
    │   │   │   │   └── vanilla.py
    │   │   │   ├── ssl
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── entmin.py
    │   │   │   │   ├── fixmatch.py
    │   │   │   │   ├── mean_teacher.py
    │   │   │   │   ├── mixmatch.py
    │   │   │   │   └── sup_baseline.py
    │   │   │   └── trainer.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── build.py
    │   │   │   └── evaluator.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   ├── accuracy.py
    │   │   │   └── distance.py
    │   │   ├── modeling
    │   │   │   ├── __init__.py
    │   │   │   ├── backbone
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── alexnet.py
    │   │   │   │   ├── backbone.py
    │   │   │   │   ├── build.py
    │   │   │   │   ├── cnn_digit5_m3sda.py
    │   │   │   │   ├── cnn_digitsdg.py
    │   │   │   │   ├── cnn_digitsingle.py
    │   │   │   │   ├── efficientnet
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── preact_resnet18.py
    │   │   │   │   ├── resnet.py
    │   │   │   │   ├── resnet_dynamic.py
    │   │   │   │   ├── vgg.py
    │   │   │   │   └── wide_resnet.py
    │   │   │   ├── head
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── build.py
    │   │   │   │   └── mlp.py
    │   │   │   ├── network
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── build.py
    │   │   │   │   └── ddaig_fcn.py
    │   │   │   └── ops
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── conv.py
    │   │   │   │   ├── cross_entropy.py
    │   │   │   │   ├── dsbn.py
    │   │   │   │   ├── efdmix.py
    │   │   │   │   ├── mixstyle.py
    │   │   │   │   ├── mixup.py
    │   │   │   │   ├── mmd.py
    │   │   │   │   ├── optimal_transport.py
    │   │   │   │   ├── reverse_grad.py
    │   │   │   │   ├── sequential2.py
    │   │   │   │   ├── transnorm.py
    │   │   │   │   └── utils.py
    │   │   ├── optim
    │   │   │   ├── __init__.py
    │   │   │   ├── lr_scheduler.py
    │   │   │   ├── optimizer.py
    │   │   │   └── radam.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── logger.py
    │   │   │   ├── meters.py
    │   │   │   ├── registry.py
    │   │   │   ├── tools.py
    │   │   │   └── torchtools.py
    │   ├── datasets
    │   │   ├── da
    │   │   │   ├── cifar_stl.py
    │   │   │   ├── digit5.py
    │   │   │   └── visda17.sh
    │   │   ├── dg
    │   │   │   └── cifar_c.py
    │   │   └── ssl
    │   │   │   ├── cifar10_cifar100_svhn.py
    │   │   │   └── stl10.py
    │   ├── linter.sh
    │   ├── requirements.txt
    │   ├── setup.py
    │   └── tools
    │   │   ├── parse_test_res.py
    │   │   ├── replace_text.py
    │   │   └── train.py
    ├── Detic
    │   ├── .DS_Store
    │   ├── .gitignore
    │   ├── .gitmodules
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── cherry_pick.py
    │   ├── cog.yaml
    │   ├── configs
    │   │   ├── Base-C2_L_R5021k_640b64_4x.yaml
    │   │   ├── Base-DeformDETR_L_R50_4x.yaml
    │   │   ├── Base_OVCOCO_C4_1x.yaml
    │   │   ├── BoxSup-C2_LCOCO_CLIP_CXT21k_640b32_4x.yaml
    │   │   ├── BoxSup-C2_LCOCO_CLIP_R18_640b32_4x.yaml
    │   │   ├── BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x.yaml
    │   │   ├── BoxSup-C2_LCOCO_CLIP_SwinB_896b32_4x.yaml
    │   │   ├── BoxSup-C2_L_CLIP_R5021k_640b64_4x.yaml
    │   │   ├── BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp.yaml
    │   │   ├── BoxSup-C2_L_CLIP_SwinB_896b32_4x.yaml
    │   │   ├── BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.yaml
    │   │   ├── BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp.yaml
    │   │   ├── BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.yaml
    │   │   ├── BoxSup-DeformDETR_L_R50_2x.yaml
    │   │   ├── BoxSup-DeformDETR_L_R50_4x.yaml
    │   │   ├── BoxSup_OVCOCO_CLIP_R50_1x.yaml
    │   │   ├── BoxSup_OVCOCO_CLIP_R50_1x_pomp.yaml
    │   │   ├── BoxSup_ViLD_200e.py
    │   │   ├── Detic_DeformDETR_LI_R50_4x_ft4x.yaml
    │   │   ├── Detic_LCOCOI21k_CLIP_CXT21k_640b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LCOCOI21k_CLIP_R18_640b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LCOCOI21k_CLIP_R5021k_640b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml
    │   │   ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp_cross_datasets.yaml
    │   │   ├── Detic_LI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LbaseCCcapimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LbaseCCimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml
    │   │   ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml
    │   │   ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_predicted.yaml
    │   │   ├── Detic_LbaseI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml
    │   │   ├── Detic_OVCOCO_CLIP_R50_1x_caption.yaml
    │   │   ├── Detic_OVCOCO_CLIP_R50_1x_max-size.yaml
    │   │   ├── Detic_OVCOCO_CLIP_R50_1x_max-size_caption.yaml
    │   │   ├── Detic_OVCOCO_CLIP_R50_1x_max-size_caption_pomp.yaml
    │   │   ├── Detic_ViLD_200e.py
    │   │   └── debug.yaml
    │   ├── datasets
    │   │   ├── README.md
    │   │   └── metadata
    │   │   │   ├── Objects365_names_fix.csv
    │   │   │   ├── coco_clip_a+cname.npy
    │   │   │   ├── coco_clip_pomp+cname.npy
    │   │   │   ├── imagenet_lvis_wnid.txt
    │   │   │   ├── lvis_v1_clip_a+cname.npy
    │   │   │   ├── lvis_v1_clip_pomp+cname.npy
    │   │   │   ├── lvis_v1_train_cat_info.json
    │   │   │   ├── o365_clip_a+cnamefix.npy
    │   │   │   ├── o365_fixname_clip_pomp+cname.npy
    │   │   │   └── oid_clip_a+cname.npy
    │   ├── demo.py
    │   ├── demo
    │   │   ├── demo.ipynb
    │   │   └── environment.yaml
    │   ├── detic
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── custom_solver.py
    │   │   ├── data
    │   │   │   ├── custom_build_augmentation.py
    │   │   │   ├── custom_dataset_dataloader.py
    │   │   │   ├── custom_dataset_mapper.py
    │   │   │   ├── datasets
    │   │   │   │   ├── cc.py
    │   │   │   │   ├── coco_zeroshot.py
    │   │   │   │   ├── imagenet.py
    │   │   │   │   ├── lvis_22k_categories.py
    │   │   │   │   ├── lvis_v1.py
    │   │   │   │   ├── objects365.py
    │   │   │   │   ├── oid.py
    │   │   │   │   └── register_oid.py
    │   │   │   ├── tar_dataset.py
    │   │   │   └── transforms
    │   │   │   │   ├── custom_augmentation_impl.py
    │   │   │   │   └── custom_transform.py
    │   │   ├── evaluation
    │   │   │   ├── custom_coco_eval.py
    │   │   │   └── oideval.py
    │   │   ├── modeling
    │   │   │   ├── backbone
    │   │   │   │   ├── swintransformer.py
    │   │   │   │   └── timm.py
    │   │   │   ├── debug.py
    │   │   │   ├── meta_arch
    │   │   │   │   ├── custom_rcnn.py
    │   │   │   │   └── d2_deformable_detr.py
    │   │   │   ├── roi_heads
    │   │   │   │   ├── detic_fast_rcnn.py
    │   │   │   │   ├── detic_roi_heads.py
    │   │   │   │   ├── res5_roi_heads.py
    │   │   │   │   └── zero_shot_classifier.py
    │   │   │   ├── text
    │   │   │   │   └── text_encoder.py
    │   │   │   └── utils.py
    │   │   └── predictor.py
    │   ├── docs
    │   │   ├── INSTALL.md
    │   │   ├── MODEL_ZOO.md
    │   │   ├── example_output_custom.jpeg
    │   │   ├── example_output_lvis.jpeg
    │   │   └── teaser.jpeg
    │   ├── extract.py
    │   ├── figures
    │   │   ├── .DS_Store
    │   │   ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size
    │   │   │   ├── .DS_Store
    │   │   │   └── inference_lvis_v1_val
    │   │   │   │   └── .DS_Store
    │   │   └── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt
    │   │   │   └── .DS_Store
    │   ├── lazy_train_net.py
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── third_party
    │   │   ├── CenterNet2
    │   │   │   ├── .github
    │   │   │   │   ├── CODE_OF_CONDUCT.md
    │   │   │   │   ├── CONTRIBUTING.md
    │   │   │   │   ├── Detectron2-Logo-Horz.svg
    │   │   │   │   ├── ISSUE_TEMPLATE.md
    │   │   │   │   ├── ISSUE_TEMPLATE
    │   │   │   │   │   ├── bugs.md
    │   │   │   │   │   ├── config.yml
    │   │   │   │   │   ├── documentation.md
    │   │   │   │   │   ├── feature-request.md
    │   │   │   │   │   └── unexpected-problems-bugs.md
    │   │   │   │   ├── pull_request_template.md
    │   │   │   │   └── workflows
    │   │   │   │   │   ├── check-template.yml
    │   │   │   │   │   ├── levenshtein.js
    │   │   │   │   │   ├── needs-reply.yml
    │   │   │   │   │   ├── remove-needs-reply.yml
    │   │   │   │   │   └── workflow.yml
    │   │   │   ├── .gitignore
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── centernet
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── config.py
    │   │   │   │   ├── data
    │   │   │   │   │   ├── custom_build_augmentation.py
    │   │   │   │   │   ├── custom_dataset_dataloader.py
    │   │   │   │   │   ├── datasets
    │   │   │   │   │   │   ├── coco.py
    │   │   │   │   │   │   ├── nuimages.py
    │   │   │   │   │   │   └── objects365.py
    │   │   │   │   │   └── transforms
    │   │   │   │   │   │   ├── custom_augmentation_impl.py
    │   │   │   │   │   │   └── custom_transform.py
    │   │   │   │   └── modeling
    │   │   │   │   │   ├── backbone
    │   │   │   │   │       ├── bifpn.py
    │   │   │   │   │       ├── bifpn_fcos.py
    │   │   │   │   │       ├── dla.py
    │   │   │   │   │       ├── dlafpn.py
    │   │   │   │   │       ├── fpn_p5.py
    │   │   │   │   │       └── res2net.py
    │   │   │   │   │   ├── debug.py
    │   │   │   │   │   ├── dense_heads
    │   │   │   │   │       ├── centernet.py
    │   │   │   │   │       ├── centernet_head.py
    │   │   │   │   │       └── utils.py
    │   │   │   │   │   ├── layers
    │   │   │   │   │       ├── deform_conv.py
    │   │   │   │   │       ├── heatmap_focal_loss.py
    │   │   │   │   │       ├── iou_loss.py
    │   │   │   │   │       └── ml_nms.py
    │   │   │   │   │   ├── meta_arch
    │   │   │   │   │       └── centernet_detector.py
    │   │   │   │   │   └── roi_heads
    │   │   │   │   │       ├── custom_fast_rcnn.py
    │   │   │   │   │       ├── custom_roi_heads.py
    │   │   │   │   │       └── fed_loss.py
    │   │   │   ├── configs
    │   │   │   │   ├── Base-CenterNet-FPN.yaml
    │   │   │   │   ├── Base-CenterNet2.yaml
    │   │   │   │   ├── Base_S4_DLA.yaml
    │   │   │   │   ├── CenterNet-FPN_R50_1x.yaml
    │   │   │   │   ├── CenterNet-S4_DLA_8x.yaml
    │   │   │   │   ├── CenterNet2-F_R50_1x.yaml
    │   │   │   │   ├── CenterNet2_DLA-BiFPN-P3_24x.yaml
    │   │   │   │   ├── CenterNet2_DLA-BiFPN-P3_4x.yaml
    │   │   │   │   ├── CenterNet2_DLA-BiFPN-P5_640_16x.yaml
    │   │   │   │   ├── CenterNet2_DLA-BiFPN-P5_640_16x_ST.yaml
    │   │   │   │   ├── CenterNet2_DLA-fcosBiFPN-P5_640_16x_ST.yaml
    │   │   │   │   ├── CenterNet2_R2-101-DCN-BiFPN_1280_4x.yaml
    │   │   │   │   ├── CenterNet2_R2-101-DCN-BiFPN_4x+4x_1560_ST.yaml
    │   │   │   │   ├── CenterNet2_R2-101-DCN_896_4x.yaml
    │   │   │   │   ├── CenterNet2_R50_1x.yaml
    │   │   │   │   ├── CenterNet2_X101-DCN_2x.yaml
    │   │   │   │   ├── LVIS_CenterNet2_R50_1x.yaml
    │   │   │   │   ├── LVIS_CenterNet2_R50_Fed_1x.yaml
    │   │   │   │   ├── O365_CenterNet2_R50_1x.yaml
    │   │   │   │   └── nuImages_CenterNet2_DLA_640_8x.yaml
    │   │   │   ├── datasets
    │   │   │   │   └── README.md
    │   │   │   ├── demo.py
    │   │   │   ├── docs
    │   │   │   │   └── MODEL_ZOO.md
    │   │   │   ├── predictor.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── tools
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── analyze_model.py
    │   │   │   │   ├── benchmark.py
    │   │   │   │   ├── convert-torchvision-to-d2.py
    │   │   │   │   ├── deploy
    │   │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   │   ├── README.md
    │   │   │   │   │   ├── export_model.py
    │   │   │   │   │   └── torchscript_mask_rcnn.cpp
    │   │   │   │   ├── lazyconfig_train_net.py
    │   │   │   │   ├── lightning_train_net.py
    │   │   │   │   ├── plain_train_net.py
    │   │   │   │   ├── train_net.py
    │   │   │   │   ├── visualize_data.py
    │   │   │   │   └── visualize_json_results.py
    │   │   │   └── train_net.py
    │   │   └── Deformable-DETR
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── benchmark.py
    │   │   │   ├── configs
    │   │   │       ├── r50_deformable_detr.sh
    │   │   │       ├── r50_deformable_detr_plus_iterative_bbox_refinement.sh
    │   │   │       ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh
    │   │   │       ├── r50_deformable_detr_single_scale.sh
    │   │   │       └── r50_deformable_detr_single_scale_dc5.sh
    │   │   │   ├── datasets
    │   │   │       ├── __init__.py
    │   │   │       ├── coco.py
    │   │   │       ├── coco_eval.py
    │   │   │       ├── coco_panoptic.py
    │   │   │       ├── data_prefetcher.py
    │   │   │       ├── panoptic_eval.py
    │   │   │       ├── samplers.py
    │   │   │       ├── torchvision_datasets
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── coco.py
    │   │   │       └── transforms.py
    │   │   │   ├── docs
    │   │   │       └── changelog.md
    │   │   │   ├── engine.py
    │   │   │   ├── main.py
    │   │   │   ├── models
    │   │   │       ├── __init__.py
    │   │   │       ├── backbone.py
    │   │   │       ├── deformable_detr.py
    │   │   │       ├── deformable_transformer.py
    │   │   │       ├── matcher.py
    │   │   │       ├── ops
    │   │   │       │   ├── functions
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── ms_deform_attn_func.py
    │   │   │       │   ├── make.sh
    │   │   │       │   ├── modules
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── ms_deform_attn.py
    │   │   │       │   ├── setup.py
    │   │   │       │   ├── src
    │   │   │       │   │   ├── cpu
    │   │   │       │   │   │   ├── ms_deform_attn_cpu.cpp
    │   │   │       │   │   │   └── ms_deform_attn_cpu.h
    │   │   │       │   │   ├── cuda
    │   │   │       │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │       │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │       │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   │       │   │   ├── ms_deform_attn.h
    │   │   │       │   │   └── vision.cpp
    │   │   │       │   └── test.py
    │   │   │       ├── position_encoding.py
    │   │   │       └── segmentation.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── tools
    │   │   │       ├── launch.py
    │   │   │       ├── run_dist_launch.sh
    │   │   │       └── run_dist_slurm.sh
    │   │   │   └── util
    │   │   │       ├── __init__.py
    │   │   │       ├── box_ops.py
    │   │   │       ├── misc.py
    │   │   │       └── plot_utils.py
    │   ├── tools
    │   │   ├── convert-thirdparty-pretrained-model-to-d2.py
    │   │   ├── create_imagenetlvis_json.py
    │   │   ├── create_lvis_21k.py
    │   │   ├── download_cc.py
    │   │   ├── dump_clip_features.py
    │   │   ├── fix_o365_names.py
    │   │   ├── fix_o365_path.py
    │   │   ├── get_cc_tags.py
    │   │   ├── get_coco_zeroshot_oriorder.py
    │   │   ├── get_imagenet_21k_full_tar_json.py
    │   │   ├── get_lvis_cat_info.py
    │   │   ├── merge_lvis_coco.py
    │   │   ├── preprocess_imagenet22k.py
    │   │   ├── remove_lvis_rare.py
    │   │   ├── tar_dataset.py
    │   │   └── unzip_imagenet_lvis.py
    │   └── train_net.py
    └── zsseg.baseline
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── configs
    │       ├── ade20k-150
    │       │   ├── cross_dataset_imagenet_prompt_test_only.yaml
    │       │   ├── cross_dataset_learned_prompt_test_only.yaml
    │       │   ├── cross_dataset_pomp_prompt_test_only.yaml
    │       │   ├── cross_dataset_single_prompt_test_only.yaml
    │       │   ├── cross_dataset_test_only.yaml
    │       │   └── cross_dataset_vild_prompt_test_only.yaml
    │       ├── ade20k-847
    │       │   └── cross_dataset_test_only.yaml
    │       ├── cityscapes-19
    │       │   ├── cross_dataset_maskformer_R101c_bs32_cart_prompt_test_only.yaml
    │       │   ├── cross_dataset_maskformer_R101c_bs32_learned_prompt_test_only.yaml
    │       │   └── cross_dataset_maskformer_R101c_bs32_test_only.yaml
    │       ├── coco-stuff-164k-156
    │       │   ├── zero_shot_maskformer_R101c_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_clip_rn101_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50x16_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50x4_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_clip_vit-bx32_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R50_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_perpixel_R101c_single_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_proposal_classification_bs32_10k.yaml
    │       │   ├── zero_shot_proposal_classification_learn_prompt_bs32_10k.yaml
    │       │   └── zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k.yaml
    │       ├── coco-stuff-164k-171
    │       │   ├── Base-COCOStuff164K-171.yaml
    │       │   ├── maskformer_R101c_bs32_60k.yaml
    │       │   ├── maskformer_R50_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml
    │       │   ├── zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml
    │       │   └── zero_shot_maskformer_R50_bs32_60k.yaml
    │       ├── pcontext-59
    │       │   ├── cross_dataset_learned_prompt_test_only.yaml
    │       │   ├── cross_dataset_pomp_prompt_test_only.yaml
    │       │   └── cross_dataset_test_only.yaml
    │       ├── voc-11k-15
    │       │   ├── zero_shot_maskformer_R101c_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R101c_pomp_prompt_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R101c_single_prompt_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R101c_vild_prompt_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R50_bs16_20k.yaml
    │       │   ├── zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml
    │       │   ├── zero_shot_proposal_classification_learn_prompt_bs16_10k.yaml
    │       │   └── zero_shot_proposal_classification_learn_prompt_pomp_bs16_10k.yaml
    │       └── voc-11k-20
    │       │   ├── Base-VOC11K-20.yaml
    │       │   ├── maskformer_R101c_bs16_20k.yaml
    │       │   ├── maskformer_R50_bs16_20k.yaml
    │       │   └── zero_shot_maskformer_R101c_bs16_test_only.yaml
    │   ├── datasets
    │       ├── prepare_ade20k_sem_seg.py
    │       ├── prepare_coco_stuff_164k_sem_seg.py
    │       ├── prepare_pcontext_sem_seg.py
    │       └── prepare_voc_sem_seg.py
    │   ├── mask_former
    │       ├── __init__.py
    │       ├── ablation
    │       │   ├── __init__.py
    │       │   ├── oracle_mask_former_model.py
    │       │   ├── zero_shot_per_pixel_model.py
    │       │   └── zero_shot_proposal_based_model.py
    │       ├── config.py
    │       ├── data
    │       │   ├── __init__.py
    │       │   ├── augmentations.py
    │       │   ├── build.py
    │       │   ├── dataset_mappers
    │       │   │   ├── __init__.py
    │       │   │   ├── mask_former_binary_semantic_dataset_mapper.py
    │       │   │   ├── mask_former_semantic_dataset_mapper.py
    │       │   │   ├── oracle_dataset_mapper.py
    │       │   │   └── proposal_classification_dataset_mapper.py
    │       │   └── datasets
    │       │   │   ├── __init__.py
    │       │   │   ├── register_coco_stuff.py
    │       │   │   ├── register_pcontext.py
    │       │   │   ├── register_voc_seg.py
    │       │   │   └── utils.py
    │       ├── evaluation
    │       │   ├── __init__.py
    │       │   ├── classification_evaluation.py
    │       │   ├── generalized_sem_seg_evaluation.py
    │       │   ├── my_generalized_sem_seg_evaluation.py
    │       │   └── pseudo_sem_seg_evaluation.py
    │       ├── mask_former_model.py
    │       ├── modeling
    │       │   ├── __init__.py
    │       │   ├── backbone
    │       │   │   ├── __init__.py
    │       │   │   ├── clip_resnet.py
    │       │   │   └── swin.py
    │       │   ├── clip_adapter
    │       │   │   ├── __init__.py
    │       │   │   ├── adapter.py
    │       │   │   ├── text_prompt.py
    │       │   │   └── utils.py
    │       │   ├── criterion.py
    │       │   ├── heads
    │       │   │   ├── __init__.py
    │       │   │   ├── mask_former_head.py
    │       │   │   ├── per_pixel_baseline.py
    │       │   │   ├── pixel_decoder.py
    │       │   │   └── zero_shot_mask_former_head.py
    │       │   ├── matcher.py
    │       │   └── transformer
    │       │   │   ├── __init__.py
    │       │   │   ├── position_encoding.py
    │       │   │   ├── transformer.py
    │       │   │   ├── transformer_predictor.py
    │       │   │   └── zero_shot_transformer_predictor.py
    │       ├── proposal_classification.py
    │       ├── test_time_augmentation.py
    │       ├── utils
    │       │   ├── __init__.py
    │       │   ├── events.py
    │       │   ├── misc.py
    │       │   ├── post_process_utils.py
    │       │   └── selective_search.py
    │       └── zero_shot_mask_former_model.py
    │   ├── requirements.txt
    │   ├── resources
    │       ├── ade20k_150_stuff.txt
    │       ├── ade_thing_stuff.png
    │       ├── coco_thing_stuff.png
    │       └── proposal.png
    │   ├── third_party
    │       └── CLIP
    │       │   ├── .gitignore
    │       │   ├── CLIP.png
    │       │   ├── LICENSE
    │       │   ├── MANIFEST.in
    │       │   ├── README.md
    │       │   ├── clip
    │       │       ├── __init__.py
    │       │       ├── bpe_simple_vocab_16e6.txt.gz
    │       │       ├── clip.py
    │       │       ├── model.py
    │       │       └── simple_tokenizer.py
    │       │   ├── model-card.md
    │       │   ├── requirements.txt
    │       │   ├── setup.py
    │       │   └── tests
    │       │       └── test_consistency.py
    │   ├── tools
    │       ├── convert-pretrained-clip-model-to-d2.py
    │       ├── convert-pretrained-swin-model-to-d2.py
    │       ├── convert-torchvision-to-d2.py
    │       ├── json2dir.py
    │       ├── mask_cls_collect.py
    │       ├── parse_name.py
    │       └── self_training.sh
    │   └── train_net.py
├── train.py
├── trainers
    ├── __init__.py
    ├── clip_mlp.py
    ├── cocoop.py
    ├── coop.py
    ├── imagenet_templates.py
    ├── maple.py
    ├── pomp.py
    ├── vpt.py
    └── zsclip.py
├── utils.py
└── validation_test.py


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/align_uniform.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Adapted from https://github.com/lancopku/clip-openness
 3 | '''
 4 | import torch
 5 | 
 6 | 
 7 | def align_loss(x, y, alpha=2):
 8 |     return (x - y).norm(p=2, dim=1).pow(alpha).mean()
 9 | 
10 | 
11 | def uniform_loss(x, t=2):
12 |     return torch.pdist(x, p=2).pow(2).mul(-t).exp().mean().log()
13 | 
14 | 


--------------------------------------------------------------------------------
/clip/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/configs/datasets/caltech101.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "Caltech101"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/dtd.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "DescribableTextures"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/eurosat.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "EuroSAT"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/fgvc_aircraft.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "FGVCAircraft"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/food101.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "Food101"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenet.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNet"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenet_21k.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNet21K"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenet_a.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNetA"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenet_r.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNetR"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenet_sketch.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNetSketch"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/imagenetv2.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "ImageNetV2"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/oxford_flowers.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "OxfordFlowers"


--------------------------------------------------------------------------------
/configs/datasets/oxford_pets.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "OxfordPets"


--------------------------------------------------------------------------------
/configs/datasets/stanford_cars.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "StanfordCars"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/sun397.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "SUN397"
3 | 


--------------------------------------------------------------------------------
/configs/datasets/ucf101.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   NAME: "UCF101"
3 | 


--------------------------------------------------------------------------------
/configs/trainers/CoCoOp/vit_b16_c16_ep10_batch1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 1
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 10
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   COCOOP:
33 |     N_CTX: 16
34 |     CTX_INIT: ""
35 |     PREC: "fp16"


--------------------------------------------------------------------------------
/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 1
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 10
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   COCOOP:
33 |     N_CTX: 4
34 |     CTX_INIT: ""
35 |     PREC: "fp16"


--------------------------------------------------------------------------------
/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1_ctxv1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 1
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 10
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   COCOOP:
33 |     N_CTX: 4
34 |     CTX_INIT: "a photo of a"
35 |     PREC: "fp16"


--------------------------------------------------------------------------------
/configs/trainers/CoCoOp/vit_b16_c8_ep10_batch1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 1
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 10
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   COCOOP:
33 |     N_CTX: 8
34 |     CTX_INIT: ""
35 |     PREC: "fp16"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn101.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "RN101"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn101_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 50
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "RN101"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "RN50"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50_ctxv1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "RN50"
30 | 
31 | TRAINER:
32 |   COOP:
33 |     CTX_INIT: "a photo of a"
34 | 


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50_ep100.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 100
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | TEST:
28 |   PER_CLASS_RESULT: True
29 | 
30 | MODEL:
31 |   BACKBONE:
32 |     NAME: "RN50"
33 | 


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 50
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | TEST:
28 |   PER_CLASS_RESULT: True
29 | 
30 | MODEL:
31 |   BACKBONE:
32 |     NAME: "RN50"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50_ep50_ctxv1.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 50
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "RN50"
30 | 
31 | TRAINER:
32 |   COOP:
33 |     CTX_INIT: "a photo of a"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/rn50_val.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 100
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | MODEL:
16 |   BACKBONE:
17 |     NAME: "RN50"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b16.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b16_ep100.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 100
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b16_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 50
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b16_val.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 100
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | MODEL:
16 |   BACKBONE:
17 |     NAME: "ViT-B/16"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b32.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/32"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b32_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 50
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/32"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_b32_val.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 100
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | MODEL:
16 |   BACKBONE:
17 |     NAME: "ViT-B/32"


--------------------------------------------------------------------------------
/configs/trainers/CoOp/vit_l14.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 200
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 5
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-L/14"


--------------------------------------------------------------------------------
/configs/trainers/MLP/rn50_ep20.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.2
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-3
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | MODEL:
30 |   BACKBONE:
31 |     NAME: "RN50"
32 | 


--------------------------------------------------------------------------------
/configs/trainers/MLP/rn50_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.2
20 |   MAX_EPOCH: 50
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-3
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | MODEL:
30 |   BACKBONE:
31 |     NAME: "RN50"
32 | 


--------------------------------------------------------------------------------
/configs/trainers/MLP/vit_b16_ep20.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.2
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-3
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | MODEL:
30 |   BACKBONE:
31 |     NAME: "ViT-B/16"
32 | 


--------------------------------------------------------------------------------
/configs/trainers/MLP/vit_b16_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.02
20 |   MAX_EPOCH: 50
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-4
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | MODEL:
30 |   BACKBONE:
31 |     NAME: "ViT-B/16"
32 | 


--------------------------------------------------------------------------------
/configs/trainers/MLP/vit_b32_ep20.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.2
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-3
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | MODEL:
30 |   BACKBONE:
31 |     NAME: "ViT-B/32"
32 | 


--------------------------------------------------------------------------------
/configs/trainers/MLP/vit_b32_ep50.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.002
20 |   MAX_EPOCH: 50
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-5
25 | 
26 | TRAIN:
27 |   PRINT_FREQ: 100
28 | 
29 | TEST:
30 |   PER_CLASS_RESULT: True
31 | 
32 | MODEL:
33 |   BACKBONE:
34 |     NAME: "ViT-B/32"


--------------------------------------------------------------------------------
/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 4
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.0035
18 |   MAX_EPOCH: 5
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   MAPLE:
33 |     N_CTX: 2
34 |     CTX_INIT: "a photo of a"
35 |     PREC: "fp16"
36 |     PROMPT_DEPTH: 9


--------------------------------------------------------------------------------
/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 4
 4 |   TEST:
 5 |     BATCH_SIZE: 500
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.0026
18 |   MAX_EPOCH: 2
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/16"
30 | 
31 | TRAINER:
32 |   MAPLE:
33 |     N_CTX: 2
34 |     CTX_INIT: "a photo of a"
35 |     PREC: "fp16"
36 |     PROMPT_DEPTH: 3


--------------------------------------------------------------------------------
/configs/trainers/MaPLe/vit_b32_c2_ep5_batch4_2ctx_cross_datasets.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 4
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.0026
18 |   MAX_EPOCH: 2
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 20
26 | 
27 | MODEL:
28 |   BACKBONE:
29 |     NAME: "ViT-B/32"
30 | 
31 | TRAINER:
32 |   MAPLE:
33 |     N_CTX: 2
34 |     CTX_INIT: "a photo of a"
35 |     PREC: "fp16"
36 |     PROMPT_DEPTH: 3


--------------------------------------------------------------------------------
/configs/trainers/POMP/rn50_ep20_randaug2.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 | #    SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |   NUM_WORKERS: 8
 8 |   K_TRANSFORMS: 4
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.002
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-5
25 | 
26 | TRAIN:
27 |   CHECKPOINT_FREQ: 1
28 |   PRINT_FREQ: 100
29 | 
30 | TEST:
31 |   NO_TEST: True
32 |   PER_CLASS_RESULT: True
33 | 
34 | MODEL:
35 |   BACKBONE:
36 |     NAME: "RN50"
37 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/rn50_ep5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 5
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 100
26 | 
27 | TEST:
28 |   PER_CLASS_RESULT: True
29 | 
30 | MODEL:
31 |   BACKBONE:
32 |     NAME: "RN50"
33 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/vit_b16_ep20.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.002
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-5
25 | 
26 | TRAIN:
27 |   CHECKPOINT_FREQ: 1
28 |   PRINT_FREQ: 100
29 | 
30 | TEST:
31 |   NO_TEST: True
32 | 
33 | MODEL:
34 |   BACKBONE:
35 |     NAME: "ViT-B/16"
36 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/vit_b16_ep20_randaug2.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 |   K_TRANSFORMS: 4
10 | 
11 | INPUT:
12 |   SIZE: (224, 224)
13 |   INTERPOLATION: "bicubic"
14 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
15 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
16 |   TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"]
17 | 
18 | OPTIM:
19 |   NAME: "sgd"
20 |   LR: 0.002
21 |   MAX_EPOCH: 20
22 |   LR_SCHEDULER: "cosine"
23 |   WARMUP_EPOCH: 1
24 |   WARMUP_TYPE: "constant"
25 |   WARMUP_CONS_LR: 1e-5
26 | 
27 | TRAIN:
28 |   CHECKPOINT_FREQ: 1
29 |   PRINT_FREQ: 100
30 | 
31 | TEST:
32 |   NO_TEST: True
33 | 
34 | MODEL:
35 |   BACKBONE:
36 |     NAME: "ViT-B/16"
37 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/vit_b16_ep5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | INPUT:
 9 |   SIZE: (224, 224)
10 |   INTERPOLATION: "bicubic"
11 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
12 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
13 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
14 | 
15 | OPTIM:
16 |   NAME: "sgd"
17 |   LR: 0.002
18 |   MAX_EPOCH: 5
19 |   LR_SCHEDULER: "cosine"
20 |   WARMUP_EPOCH: 1
21 |   WARMUP_TYPE: "constant"
22 |   WARMUP_CONS_LR: 1e-5
23 | 
24 | TRAIN:
25 |   PRINT_FREQ: 100
26 | 
27 | TEST:
28 |   PER_CLASS_RESULT: True
29 | 
30 | MODEL:
31 |   BACKBONE:
32 |     NAME: "ViT-B/16"
33 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/vit_b16_ep5_randaug2.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |     SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |     SAMPLER: "SequentialDistributedSampler"
 8 |   NUM_WORKERS: 8
 9 |   K_TRANSFORMS: 4
10 | 
11 | INPUT:
12 |   SIZE: (224, 224)
13 |   INTERPOLATION: "bicubic"
14 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
15 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
16 |   TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"]
17 | 
18 | OPTIM:
19 |   NAME: "sgd"
20 |   LR: 0.002
21 |   MAX_EPOCH: 5
22 |   LR_SCHEDULER: "cosine"
23 |   WARMUP_EPOCH: 1
24 |   WARMUP_TYPE: "constant"
25 |   WARMUP_CONS_LR: 1e-5
26 | 
27 | TRAIN:
28 |   CHECKPOINT_FREQ: 1
29 |   PRINT_FREQ: 100
30 | 
31 | TEST:
32 |   NO_TEST: True
33 | 
34 | MODEL:
35 |   BACKBONE:
36 |     NAME: "ViT-B/16"
37 | 


--------------------------------------------------------------------------------
/configs/trainers/POMP/vit_b32_ep20_randaug2.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 | #    SAMPLER: "DistributedSampler"
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |   NUM_WORKERS: 8
 8 |   K_TRANSFORMS: 4
 9 | 
10 | INPUT:
11 |   SIZE: (224, 224)
12 |   INTERPOLATION: "bicubic"
13 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
14 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
15 |   TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"]
16 | 
17 | OPTIM:
18 |   NAME: "sgd"
19 |   LR: 0.002
20 |   MAX_EPOCH: 20
21 |   LR_SCHEDULER: "cosine"
22 |   WARMUP_EPOCH: 1
23 |   WARMUP_TYPE: "constant"
24 |   WARMUP_CONS_LR: 1e-5
25 | 
26 | TRAIN:
27 |   CHECKPOINT_FREQ: 1
28 |   PRINT_FREQ: 100
29 | 
30 | TEST:
31 |   NO_TEST: True
32 |   PER_CLASS_RESULT: True
33 | 
34 | MODEL:
35 |   BACKBONE:
36 |     NAME: "ViT-B/32"
37 | 


--------------------------------------------------------------------------------
/configs/trainers/VPT/vit_b16_c2_ep5_batch4_4.yaml:
--------------------------------------------------------------------------------
 1 | # Deep vision prompting
 2 | DATALOADER:
 3 |   TRAIN_X:
 4 |     BATCH_SIZE: 4
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |   NUM_WORKERS: 8
 8 | 
 9 | INPUT:
10 |   SIZE: (224, 224)
11 |   INTERPOLATION: "bicubic"
12 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
13 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
14 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
15 | 
16 | OPTIM:
17 |   NAME: "sgd"
18 |   LR: 0.0025
19 |   MAX_EPOCH: 5
20 |   LR_SCHEDULER: "cosine"
21 |   WARMUP_EPOCH: 1
22 |   WARMUP_TYPE: "constant"
23 |   WARMUP_CONS_LR: 1e-5
24 | 
25 | TRAIN:
26 |   PRINT_FREQ: 20
27 | 
28 | MODEL:
29 |   BACKBONE:
30 |     NAME: "ViT-B/16"
31 | 
32 | TRAINER:
33 |   VPT:
34 |     N_CTX_VISION: 8
35 |     CTX_INIT: "a photo of a"
36 |     PREC: "fp16"
37 |     PROMPT_DEPTH_VISION: 12


--------------------------------------------------------------------------------
/configs/trainers/VPT/vit_b32_c2_ep5_batch4_4.yaml:
--------------------------------------------------------------------------------
 1 | # Deep vision prompting
 2 | DATALOADER:
 3 |   TRAIN_X:
 4 |     BATCH_SIZE: 4
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 |   NUM_WORKERS: 8
 8 | 
 9 | INPUT:
10 |   SIZE: (224, 224)
11 |   INTERPOLATION: "bicubic"
12 |   PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
13 |   PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
14 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
15 | 
16 | OPTIM:
17 |   NAME: "sgd"
18 |   LR: 0.0025
19 |   MAX_EPOCH: 5
20 |   LR_SCHEDULER: "cosine"
21 |   WARMUP_EPOCH: 1
22 |   WARMUP_TYPE: "constant"
23 |   WARMUP_CONS_LR: 1e-5
24 | 
25 | TRAIN:
26 |   PRINT_FREQ: 20
27 | 
28 | MODEL:
29 |   BACKBONE:
30 |     NAME: "ViT-B/32"
31 | 
32 | TRAINER:
33 |   VPT:
34 |     N_CTX_VISION: 8
35 |     CTX_INIT: "a photo of a"
36 |     PREC: "fp16"
37 |     PROMPT_DEPTH_VISION: 12


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/datasets/__init__.py


--------------------------------------------------------------------------------
/docs/main_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/docs/main_figure.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ftfy==6.1.1
2 | tqdm==4.64.0
3 | wandb


--------------------------------------------------------------------------------
/scripts/cocoop/base2new_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA="/path/to/dataset/folder"
 7 | TRAINER=CoCoOp
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=vit_b16_c4_ep10_batch1_ctxv1
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}. Resuming..."
19 |     python train.py \
20 |     --root ${DATA} \
21 |     --seed ${SEED} \
22 |     --trainer ${TRAINER} \
23 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
24 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
25 |     --output-dir ${DIR} \
26 |     DATASET.NUM_SHOTS ${SHOTS} \
27 |     DATASET.SUBSAMPLE_CLASSES base
28 | else
29 |     echo "Run this job and save the output to ${DIR}"
30 |     python train.py \
31 |     --root ${DATA} \
32 |     --seed ${SEED} \
33 |     --trainer ${TRAINER} \
34 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
35 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
36 |     --output-dir ${DIR} \
37 |     DATASET.NUM_SHOTS ${SHOTS} \
38 |     DATASET.SUBSAMPLE_CLASSES base
39 | fi


--------------------------------------------------------------------------------
/scripts/cocoop/xd_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA=~/efs/data/
 7 | TRAINER=CoCoOp
 8 | 
 9 | DATASET=imagenet
10 | SEED=$1
11 | 
12 | CFG=vit_b16_c4_ep10_batch1_ctxv1
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}. Skip this job"
19 | else
20 |     echo "Run this job and save the output to ${DIR}"
21 | 
22 |     python train.py \
23 |     --root ${DATA} \
24 |     --seed ${SEED} \
25 |     --trainer ${TRAINER} \
26 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
27 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
28 |     --output-dir ${DIR} \
29 |     DATASET.NUM_SHOTS ${SHOTS}
30 | fi
31 | 


--------------------------------------------------------------------------------
/scripts/coop/multi_scripts.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 sh scripts/coop/main.sh imagenet rn50_ep50 end 16 16 False
 2 | CUDA_VISIBLE_DEVICES=3 sh scripts/coop/main.sh food101 rn50_ep100 end 16 16 False 
 3 | CUDA_VISIBLE_DEVICES=4 sh scripts/coop/main.sh stanford_cars rn50_ep100 end 16 16 False 
 4 | CUDA_VISIBLE_DEVICES=5 sh scripts/coop/main.sh fgvc_aircraft rn50_ep100 end 16 16 False 
 5 | CUDA_VISIBLE_DEVICES=6 sh scripts/coop/main.sh oxford_pets rn50_ep100 end 16 16 False 
 6 | CUDA_VISIBLE_DEVICES=7 sh scripts/coop/main.sh caltech101 rn50_ep100 end 16 16 False 
 7 | CUDA_VISIBLE_DEVICES=0 sh scripts/coop/main.sh oxford_flowers rn50_ep100 end 16 16 False 
 8 | CUDA_VISIBLE_DEVICES=1 sh scripts/coop/main.sh eurosat rn50_ep100 end 16 16 False 
 9 | CUDA_VISIBLE_DEVICES=2 sh scripts/coop/main.sh dtd rn50_ep100 end 16 16 False
10 | CUDA_VISIBLE_DEVICES=3 sh scripts/coop/main.sh sun397 rn50_ep100 end 16 16 False 
11 | CUDA_VISIBLE_DEVICES=4 sh scripts/coop/main.sh ucf101 rn50_ep100 end 16 16 False 


--------------------------------------------------------------------------------
/scripts/maple/base2new_train_maple.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA="/path/to/dataset/folder"
 7 | TRAINER=MaPLe
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=vit_b16_c2_ep5_batch4_2ctx
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}. Resuming..."
19 |     python train.py \
20 |     --root ${DATA} \
21 |     --seed ${SEED} \
22 |     --trainer ${TRAINER} \
23 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
24 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
25 |     --output-dir ${DIR} \
26 |     DATASET.NUM_SHOTS ${SHOTS} \
27 |     DATASET.SUBSAMPLE_CLASSES base
28 | else
29 |     echo "Run this job and save the output to ${DIR}"
30 |     python train.py \
31 |     --root ${DATA} \
32 |     --seed ${SEED} \
33 |     --trainer ${TRAINER} \
34 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
35 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
36 |     --output-dir ${DIR} \
37 |     DATASET.NUM_SHOTS ${SHOTS} \
38 |     DATASET.SUBSAMPLE_CLASSES base
39 | fi


--------------------------------------------------------------------------------
/scripts/maple/reproduce_maple_xd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA="/path/to/dataset/folder"
 7 | TRAINER=MaPLe
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | WEIGHTSPATH=$3
12 | 
13 | CFG=vit_b16_c2_ep5_batch4_2ctx_cross_datasets
14 | SHOTS=16
15 | LOADEP=2
16 | 
17 | MODEL_DIR=${WEIGHTSPATH}/seed${SEED}
18 | 
19 | DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
20 | if [ -d "$DIR" ]; then
21 |     echo "Results are already available in ${DIR}. Skipping..."
22 | else
23 |     echo "Evaluating model"
24 |     echo "Runing the first phase job and save the output to ${DIR}"
25 |     # Evaluate on evaluation datasets
26 |     python train.py \
27 |     --root ${DATA} \
28 |     --seed ${SEED} \
29 |     --trainer ${TRAINER} \
30 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
31 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
32 |     --output-dir ${DIR} \
33 |     --model-dir ${MODEL_DIR} \
34 |     --load-epoch ${LOADEP} \
35 |     --eval-only \
36 |     DATASET.NUM_SHOTS ${SHOTS} \
37 | 
38 | fi


--------------------------------------------------------------------------------
/scripts/maple/xd_train_maple.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA=~/efs/imagenet/
 7 | TRAINER=MaPLe
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=vit_b32_c2_ep5_batch4_2ctx_cross_datasets
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}."
19 | else
20 |     echo "Run this job and save the output to ${DIR}"
21 | 
22 |     python train.py \
23 |     --root ${DATA} \
24 |     --seed ${SEED} \
25 |     --trainer ${TRAINER} \
26 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
27 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
28 |     --output-dir ${DIR} \
29 |     DATASET.NUM_SHOTS ${SHOTS}
30 | fi
31 | 


--------------------------------------------------------------------------------
/scripts/mlp/main.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # custom config
 4 | DATA=~/efs/data/
 5 | TRAINER=MLP
 6 | 
 7 | DATASET=$1
 8 | CFG=$2  # config file
 9 | SHOTS=$3  # number of shots (1, 2, 4, 8, 16)
10 | 
11 | 
12 | for SEED in 42
13 | do
14 |     DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
15 |     if [ -d "$DIR" ]; then
16 |         echo "Results are available in ${DIR}. Skip this job"
17 |     else
18 |         echo "Run this job and save the output to ${DIR}"
19 |         python -m torch.distributed.launch --nproc_per_node 8 --master_port 12345 train.py \
20 |         --world-size 8 \
21 |         --root ${DATA} \
22 |         --seed ${SEED} \
23 |         --trainer ${TRAINER} \
24 |         --dataset-config-file configs/datasets/${DATASET}.yaml \
25 |         --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
26 |         --output-dir ${DIR} \
27 |         DATASET.NUM_SHOTS ${SHOTS} \
28 |         TEST.FINAL_MODEL best_val
29 |     fi
30 | done
31 | 
32 | 
33 | # sh scripts/mlp/main.sh imagenet_21k rn50_ep20 16
34 | # sh scripts/mlp/main.sh imagenet_21k vit_b32_ep20 16
35 | # sh scripts/mlp/main.sh imagenet_21k vit_b16_ep20 16


--------------------------------------------------------------------------------
/scripts/pomp/multi_scripts.sh:
--------------------------------------------------------------------------------
1 | sh scripts/pomp/main.sh imagenet_21k vit_b16_ep20 end 4 16 False 1000
2 | python validation_test.py


--------------------------------------------------------------------------------
/scripts/pomp/xd_test.sh:
--------------------------------------------------------------------------------
 1 | sh scripts/pomp/eval.sh oxford_pets 42 1000
 2 | sh scripts/pomp/eval.sh oxford_flowers 42 1000
 3 | sh scripts/pomp/eval.sh food101 42 1000
 4 | sh scripts/pomp/eval.sh sun397 42 1000
 5 | sh scripts/pomp/eval.sh stanford_cars 42 1000
 6 | sh scripts/pomp/eval.sh ucf101 42 1000
 7 | sh scripts/pomp/eval.sh eurosat 42 1000
 8 | sh scripts/pomp/eval.sh fgvc_aircraft 42 1000
 9 | sh scripts/pomp/eval.sh caltech101 42 1000
10 | sh scripts/pomp/eval.sh dtd 42 1000
11 | sh scripts/pomp/eval.sh imagenet_a 42 1000
12 | sh scripts/pomp/eval.sh imagenet_r 42 1000
13 | sh scripts/pomp/eval.sh imagenet_sketch 42 1000
14 | sh scripts/pomp/eval.sh imagenetv2 42 1000
15 | 
16 | # sh scripts/pomp/xd_test.sh


--------------------------------------------------------------------------------
/scripts/vpt/base2new_train_vpt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA="/path/to/dataset/folder"
 7 | TRAINER=VPT
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=vit_b16_c2_ep5_batch4_4
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}. Resuming..."
19 |     python train.py \
20 |     --root ${DATA} \
21 |     --seed ${SEED} \
22 |     --trainer ${TRAINER} \
23 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
24 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
25 |     --output-dir ${DIR} \
26 |     DATASET.NUM_SHOTS ${SHOTS} \
27 |     DATASET.SUBSAMPLE_CLASSES base
28 | else
29 |     echo "Run this job and save the output to ${DIR}"
30 |     python train.py \
31 |     --root ${DATA} \
32 |     --seed ${SEED} \
33 |     --trainer ${TRAINER} \
34 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
35 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
36 |     --output-dir ${DIR} \
37 |     DATASET.NUM_SHOTS ${SHOTS} \
38 |     DATASET.SUBSAMPLE_CLASSES base
39 | fi


--------------------------------------------------------------------------------
/scripts/vpt/xd_test_vpt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA="/path/to/dataset/folder"
 7 | TRAINER=VPT
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=vit_b16_c2_ep5_batch4_4
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}. Skip this job"
19 | else
20 |     echo "Run this job and save the output to ${DIR}"
21 | 
22 |     python train.py \
23 |     --root ${DATA} \
24 |     --seed ${SEED} \
25 |     --trainer ${TRAINER} \
26 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
27 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
28 |     --output-dir ${DIR} \
29 |     --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \
30 |     --load-epoch 2 \
31 |     --eval-only
32 | fi


--------------------------------------------------------------------------------
/scripts/vpt/xd_train_vpt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA=~/efs/imagenet/
 7 | TRAINER=VPT
 8 | 
 9 | DATASET=$1
10 | SEED=$2
11 | 
12 | CFG=rn50_c2_ep5_batch4_4
13 | SHOTS=16
14 | 
15 | 
16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
17 | if [ -d "$DIR" ]; then
18 |     echo "Results are available in ${DIR}."
19 | else
20 |     echo "Run this job and save the output to ${DIR}"
21 | 
22 |     python train.py \
23 |     --root ${DATA} \
24 |     --seed ${SEED} \
25 |     --trainer ${TRAINER} \
26 |     --dataset-config-file configs/datasets/${DATASET}.yaml \
27 |     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
28 |     --output-dir ${DIR} \
29 |     DATASET.NUM_SHOTS ${SHOTS}
30 | fi
31 | 
32 | # CUDA_VISIBLE_DEVICES=0 sh scripts/vpt/xd_train_vpt.sh imagenet 42


--------------------------------------------------------------------------------
/scripts/zsclip/xd_test.sh:
--------------------------------------------------------------------------------
 1 | sh scripts/zsclip/zeroshot.sh oxford_pets vit_b16
 2 | sh scripts/zsclip/zeroshot.sh oxford_flowers vit_b16
 3 | sh scripts/zsclip/zeroshot.sh food101 vit_b16
 4 | sh scripts/zsclip/zeroshot.sh sun397 vit_b16
 5 | sh scripts/zsclip/zeroshot.sh stanford_cars vit_b16
 6 | sh scripts/zsclip/zeroshot.sh ucf101 vit_b16
 7 | sh scripts/zsclip/zeroshot.sh eurosat vit_b16
 8 | sh scripts/zsclip/zeroshot.sh fgvc_aircraft vit_b16
 9 | sh scripts/zsclip/zeroshot.sh caltech101 vit_b16
10 | sh scripts/zsclip/zeroshot.sh dtd vit_b16
11 | sh scripts/zsclip/zeroshot.sh imagenet_a vit_b16
12 | sh scripts/zsclip/zeroshot.sh imagenet_r vit_b16
13 | sh scripts/zsclip/zeroshot.sh imagenet_sketch vit_b16
14 | sh scripts/zsclip/zeroshot.sh imagenetv2 vit_b16
15 | 
16 | # sh scripts/zsclip/xd_test.sh


--------------------------------------------------------------------------------
/scripts/zsclip/zeroshot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cd ../..
 4 | 
 5 | # custom config
 6 | DATA=~/efs/data/
 7 | TRAINER=ZeroshotCLIP
 8 | DATASET=$1
 9 | CFG=$2  # rn50, rn101, vit_b32 or vit_b16 vit_l14
10 | 
11 | python train.py \
12 | --root ${DATA} \
13 | --trainer ${TRAINER} \
14 | --dataset-config-file configs/datasets/${DATASET}.yaml \
15 | --config-file configs/trainers/CoOp/${CFG}.yaml \
16 | --output-dir output/${TRAINER}/${CFG}/${DATASET} \
17 | --eval-only
18 | 
19 | # CUDA_VISIBLE_DEVICES=0 sh scripts/zsclip/zeroshot.sh imagenet_21k rn50
20 | # CUDA_VISIBLE_DEVICES=0 sh scripts/zsclip/zeroshot.sh oxford_pets vit_l14


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore =
 3 |     # At least two spaces before inline comment
 4 |     E261,
 5 |     # Line lengths are recommended to be no greater than 79 characters
 6 |     E501,
 7 |     # Missing whitespace around arithmetic operator 
 8 |     E226,
 9 |     # Blank line contains whitespace
10 |     W293,
11 |     # Do not use bare 'except'
12 |     E722,
13 |     # Line break after binary operator
14 |     W504,
15 |     # Too many leading '#' for block comment
16 |     E266,
17 |     # Line break before binary operator
18 |     W503,
19 |     # Continuation line over-indented for hanging indent
20 |     E126,
21 |     # Module level import not at top of file
22 |     E402
23 | max-line-length = 79
24 | exclude = __init__.py, build


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=79
 3 | multi_line_output=6
 4 | length_sort=true
 5 | known_standard_library=numpy,setuptools
 6 | known_myself=dassl
 7 | known_third_party=matplotlib,cv2,torch,torchvision,PIL,yacs,scipy,gdown
 8 | no_lines_before=STDLIB,THIRDPARTY
 9 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
10 | default_section=FIRSTPARTY


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | BASED_ON_STYLE = pep8
3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
5 | DEDENT_CLOSING_BRACKETS = true
6 | SPACES_BEFORE_COMMENT = 2
7 | ARITHMETIC_PRECEDENCE_INDICATION = true


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Kaiyang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/README.md:
--------------------------------------------------------------------------------
1 | The `datasets/` folder contains dataset-specific config files which define the standard protocols (e.g., image size, data augmentation, network architecture) used by most papers. The `trainers/` folder contains method-specific config files which define optimization algorithms (e.g., optimizer, epoch) and hyperparameter settings.
2 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 |   SIZE: (32, 32)
3 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
4 |   PIXEL_STD: [0.5, 0.5, 0.5]
5 | 
6 | DATASET:
7 |   NAME: "CIFARSTL"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/digit5.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 4 |   PIXEL_STD: [0.5, 0.5, 0.5]
 5 |   TRANSFORMS: ["normalize"]
 6 | 
 7 | DATASET:
 8 |   NAME: "Digit5"
 9 | 
10 | MODEL:
11 |   BACKBONE:
12 |     NAME: "cnn_digit5_m3sda"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/domainnet.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "DomainNet"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet101"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (96, 96)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "miniDomainNet"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet18"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/office31.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "Office31"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet50"
11 |   HEAD:
12 |     NAME: "mlp"
13 |     HIDDEN_LAYERS: [256]
14 |     DROPOUT: 0.


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/office_home.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 |   SIZE: (224, 224)
3 | 
4 | DATASET:
5 |   NAME: "OfficeHome"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/da/visda17.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "center_crop", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "VisDA17"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet101"
11 | 
12 | TEST:
13 |   PER_CLASS_RESULT: True


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 |   SIZE: (224, 224)
3 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4 | 
5 | DATASET:
6 |   NAME: "Camelyon17"
7 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["random_flip", "random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 | 
 7 | DATASET:
 8 |   NAME: "CIFAR100C"
 9 |   CIFAR_C_TYPE: "fog"
10 |   CIFAR_C_LEVEL: 5
11 | 
12 | MODEL:
13 |   BACKBONE:
14 |     NAME: "wide_resnet_16_4"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["random_flip", "random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 | 
 7 | DATASET:
 8 |   NAME: "CIFAR10C"
 9 |   CIFAR_C_TYPE: "fog"
10 |   CIFAR_C_LEVEL: 5
11 | 
12 | MODEL:
13 |   BACKBONE:
14 |     NAME: "wide_resnet_16_4"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/digit_single.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 | 
 7 | DATASET:
 8 |   NAME: "DigitSingle"
 9 | 
10 | MODEL:
11 |   BACKBONE:
12 |     NAME: "cnn_digitsingle"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 | 
 7 | DATASET:
 8 |   NAME: "DigitsDG"
 9 | 
10 | MODEL:
11 |   BACKBONE:
12 |     NAME: "cnn_digitsdg"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/fmow.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 |   SIZE: (224, 224)
3 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4 | 
5 | DATASET:
6 |   NAME: "FMoW"
7 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 |   SIZE: (224, 224)
3 |   TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4 | 
5 | DATASET:
6 |   NAME: "IWildCam"
7 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "OfficeHomeDG"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet18"
11 |     PRETRAINED: True


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/pacs.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "PACS"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet18"
11 |     PRETRAINED: True


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/dg/vlcs.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (224, 224)
 3 |   TRANSFORMS: ["random_flip", "random_translation", "normalize"]
 4 | 
 5 | DATASET:
 6 |   NAME: "VLCS"
 7 | 
 8 | MODEL:
 9 |   BACKBONE:
10 |     NAME: "resnet18"
11 |     PRETRAINED: True


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["random_flip", "random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 | 
 7 | DATASET:
 8 |   NAME: "CIFAR10"
 9 |   NUM_LABELED: 4000
10 |   VAL_PERCENT: 0.
11 | 
12 | MODEL:
13 |   BACKBONE:
14 |     NAME: "wide_resnet_28_2"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["random_flip", "random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 |   CROP_PADDING: 4
 7 | 
 8 | DATASET:
 9 |   NAME: "CIFAR100"
10 |   NUM_LABELED: 10000
11 |   VAL_PERCENT: 0.
12 | 
13 | MODEL:
14 |   BACKBONE:
15 |     NAME: "wide_resnet_28_2"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/ssl/stl10.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (96, 96)
 3 |   TRANSFORMS: ["random_flip", "random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 |   CROP_PADDING: 4
 7 | 
 8 | DATASET:
 9 |   NAME: "STL10"
10 |   STL10_FOLD: 0
11 | 
12 | MODEL:
13 |   BACKBONE:
14 |     NAME: "wide_resnet_28_2"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/datasets/ssl/svhn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   SIZE: (32, 32)
 3 |   TRANSFORMS: ["random_crop", "normalize"]
 4 |   PIXEL_MEAN: [0.5, 0.5, 0.5]
 5 |   PIXEL_STD: [0.5, 0.5, 0.5]
 6 |   CROP_PADDING: 4
 7 | 
 8 | DATASET:
 9 |   NAME: "SVHN"
10 |   NUM_LABELED: 1000
11 |   VAL_PERCENT: 0.
12 | 
13 | MODEL:
14 |   BACKBONE:
15 |     NAME: "wide_resnet_28_2"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomSampler"
 4 |     BATCH_SIZE: 64
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 192
 8 |   TEST:
 9 |     BATCH_SIZE: 256
10 |   K_TRANSFORMS: 2
11 | 
12 | OPTIM:
13 |   NAME: "sgd"
14 |   LR: 0.001
15 |   MAX_EPOCH: 90
16 |   RAMPUP_ITRS: 10000
17 | 
18 | TRAINER:
19 |   CDAC:
20 |     STRONG_TRANSFORMS: ["randaugment", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 30
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 6
 8 |   TEST:
 9 |     BATCH_SIZE: 30
10 |   K_TRANSFORMS: 2
11 | 
12 | OPTIM:
13 |   NAME: "sgd"
14 |   LR: 0.001
15 |   MAX_EPOCH: 90
16 |   RAMPUP_ITRS: 10000
17 | 
18 | TRAINER:
19 |   CDAC:
20 |     STRONG_TRANSFORMS: ["randaugment", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 64
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 192
 8 |   TEST:
 9 |     BATCH_SIZE: 200
10 |   K_TRANSFORMS: 2
11 | 
12 | OPTIM:
13 |   NAME: "sgd"
14 |   LR: 0.001
15 |   MAX_EPOCH: 60
16 |   RAMPUP_ITRS: 10000
17 |   LR_SCHEDULER: "cosine"
18 | 
19 | TRAINER:
20 |   CDAC:
21 |     STRONG_TRANSFORMS: ["randaugment", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 256
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 64
 8 |   TEST:
 9 |     BATCH_SIZE: 256
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.05
14 |   STEPSIZE: [30]
15 |   MAX_EPOCH: 30
16 |   LR_SCHEDULER: "cosine"
17 | 
18 | TRAINER:
19 |   DAEL:
20 |     STRONG_TRANSFORMS: ["randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 30
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 6
 8 |   TEST:
 9 |     BATCH_SIZE: 30
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.002
14 |   MAX_EPOCH: 40
15 |   LR_SCHEDULER: "cosine"
16 | 
17 | TRAINER:
18 |   DAEL:
19 |     STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 192
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 64
 8 |   TEST:
 9 |     BATCH_SIZE: 200
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.005
14 |   MAX_EPOCH: 60
15 |   LR_SCHEDULER: "cosine"
16 | 
17 | TRAINER:
18 |   DAEL:
19 |     STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 256
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 64
 8 |   TEST:
 9 |     BATCH_SIZE: 256
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.05
14 |   STEPSIZE: [30]
15 |   MAX_EPOCH: 30
16 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 30
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 6
 8 |   TEST:
 9 |     BATCH_SIZE: 30
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.002
14 |   MAX_EPOCH: 40
15 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 192
 5 |   TRAIN_U:
 6 |     SAME_AS_X: False
 7 |     BATCH_SIZE: 64
 8 |   TEST:
 9 |     BATCH_SIZE: 200
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.005
14 |   MAX_EPOCH: 60
15 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 256
 4 |   TEST:
 5 |     BATCH_SIZE: 256
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.05
10 |   STEPSIZE: [30]
11 |   MAX_EPOCH: 30
12 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 128
 4 |   TEST:
 5 |     BATCH_SIZE: 128
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.005
10 |   MAX_EPOCH: 60
11 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 32
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.002
10 |   STEPSIZE: [20]
11 |   MAX_EPOCH: 20


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 32
 4 |   TEST:
 5 |     BATCH_SIZE: 32
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.0001
10 |   STEPSIZE: [2]
11 |   MAX_EPOCH: 2
12 | 
13 | TRAIN:
14 |   PRINT_FREQ: 50
15 |   COUNT_ITER: "train_u"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 120
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 | 
 8 | OPTIM:
 9 |   NAME: "sgd"
10 |   LR: 0.05
11 |   STEPSIZE: [20]
12 |   MAX_EPOCH: 50
13 | 
14 | TRAINER:
15 |   DAELDG:
16 |     STRONG_TRANSFORMS: ["randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 30
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 | 
 8 | OPTIM:
 9 |   NAME: "sgd"
10 |   LR: 0.002
11 |   MAX_EPOCH: 40
12 |   LR_SCHEDULER: "cosine"
13 | 
14 | TRAINER:
15 |   DAELDG:
16 |     STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     SAMPLER: "RandomDomainSampler"
 4 |     BATCH_SIZE: 30
 5 |   TEST:
 6 |     BATCH_SIZE: 100
 7 | 
 8 | OPTIM:
 9 |   NAME: "sgd"
10 |   LR: 0.002
11 |   MAX_EPOCH: 40
12 |   LR_SCHEDULER: "cosine"
13 | 
14 | TRAINER:
15 |   DAELDG:
16 |     STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   PIXEL_MEAN: [0., 0., 0.]
 3 |   PIXEL_STD: [1., 1., 1.]
 4 | 
 5 | DATALOADER:
 6 |   TRAIN_X:
 7 |     BATCH_SIZE: 128
 8 |   TEST:
 9 |     BATCH_SIZE: 128
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.05
14 |   STEPSIZE: [20]
15 |   MAX_EPOCH: 50
16 | 
17 | TRAINER:
18 |   DDAIG:
19 |     G_ARCH: "fcn_3x32_gctx"
20 |     LMDA: 0.3


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   PIXEL_MEAN: [0., 0., 0.]
 3 |   PIXEL_STD: [1., 1., 1.]
 4 | 
 5 | DATALOADER:
 6 |   TRAIN_X:
 7 |     BATCH_SIZE: 16
 8 |   TEST:
 9 |     BATCH_SIZE: 16
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.0005
14 |   STEPSIZE: [20]
15 |   MAX_EPOCH: 25
16 | 
17 | TRAINER:
18 |   DDAIG:
19 |     G_ARCH: "fcn_3x64_gctx"
20 |     WARMUP: 3
21 |     LMDA: 0.3


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   PIXEL_MEAN: [0., 0., 0.]
 3 |   PIXEL_STD: [1., 1., 1.]
 4 | 
 5 | DATALOADER:
 6 |   TRAIN_X:
 7 |     BATCH_SIZE: 16
 8 |   TEST:
 9 |     BATCH_SIZE: 16
10 | 
11 | OPTIM:
12 |   NAME: "sgd"
13 |   LR: 0.0005
14 |   STEPSIZE: [20]
15 |   MAX_EPOCH: 25
16 | 
17 | TRAINER:
18 |   DDAIG:
19 |     G_ARCH: "fcn_3x64_gctx"
20 |     WARMUP: 3
21 |     LMDA: 0.3


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 128
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 |   NUM_WORKERS: 8
 7 | 
 8 | OPTIM:
 9 |   NAME: "sgd"
10 |   LR: 0.05
11 |   STEPSIZE: [20]
12 |   MAX_EPOCH: 50
13 | 
14 | TRAIN:
15 |   PRINT_FREQ: 20


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 128
 4 |   TEST:
 5 |     BATCH_SIZE: 128
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.005
10 |   MAX_EPOCH: 60
11 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 64
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.001
10 |   MAX_EPOCH: 50
11 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 64
 4 |   TEST:
 5 |     BATCH_SIZE: 100
 6 | 
 7 | OPTIM:
 8 |   NAME: "sgd"
 9 |   LR: 0.001
10 |   MAX_EPOCH: 50
11 |   LR_SCHEDULER: "cosine"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER:
 2 |   TRAIN_X:
 3 |     BATCH_SIZE: 64
 4 |   TRAIN_U:
 5 |     SAME_AS_X: False
 6 |     BATCH_SIZE: 448
 7 |   TEST:
 8 |     BATCH_SIZE: 500
 9 | 
10 | OPTIM:
11 |   NAME: "sgd"
12 |   LR: 0.05
13 |   STEPSIZE: [4000]
14 |   MAX_EPOCH: 4000
15 |   LR_SCHEDULER: "cosine"
16 | 
17 | TRAIN:
18 |   COUNT_ITER: "train_u"
19 |   PRINT_FREQ: 10
20 | 
21 | TRAINER:
22 |   FIXMATCH:
23 |     STRONG_TRANSFORMS: ["random_flip", "randaugment_fixmatch", "normalize", "cutout"]


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dassl
 3 | ------
 4 | PyTorch toolbox for domain adaptation and semi-supervised learning.
 5 | 
 6 | URL: https://github.com/KaiyangZhou/Dassl.pytorch
 7 | 
 8 | @article{zhou2020domain,
 9 |   title={Domain Adaptive Ensemble Learning},
10 |   author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao},
11 |   journal={arXiv preprint arXiv:2003.07325},
12 |   year={2020}
13 | }
14 | """
15 | 
16 | __version__ = "0.6.3"
17 | __author__ = "Kaiyang Zhou"
18 | __homepage__ = "https://kaiyangzhou.github.io/"
19 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/config/__init__.py:
--------------------------------------------------------------------------------
 1 | from .defaults import _C as cfg_default
 2 | 
 3 | 
 4 | def get_cfg_default():
 5 |     return cfg_default.clone()
 6 | 
 7 | 
 8 | def clean_cfg(cfg, trainer):
 9 |     """Remove unused trainers (configs).
10 | 
11 |     Aim: Only show relevant information when calling print(cfg).
12 | 
13 |     Args:
14 |         cfg (_C): cfg instance.
15 |         trainer (str): trainer name.
16 |     """
17 |     keys = list(cfg.TRAINER.keys())
18 |     for key in keys:
19 |         if key == "NAME" or key == trainer.upper():
20 |             continue
21 |         cfg.TRAINER.pop(key, None)
22 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_manager import DataManager, DatasetWrapper
2 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import DATASET_REGISTRY, build_dataset  # isort:skip
2 | from .base_dataset import Datum, DatasetBase  # isort:skip
3 | 
4 | from .da import *
5 | from .dg import *
6 | from .ssl import *
7 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | DATASET_REGISTRY = Registry("DATASET")
 4 | 
 5 | 
 6 | def build_dataset(cfg):
 7 |     avai_datasets = DATASET_REGISTRY.registered_names()
 8 |     check_availability(cfg.DATASET.NAME, avai_datasets)
 9 |     if cfg.VERBOSE:
10 |         print("Loading dataset: {}".format(cfg.DATASET.NAME))
11 |     return DATASET_REGISTRY.get(cfg.DATASET.NAME)(cfg)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/da/__init__.py:
--------------------------------------------------------------------------------
1 | from .digit5 import Digit5
2 | from .visda17 import VisDA17
3 | from .cifarstl import CIFARSTL
4 | from .office31 import Office31
5 | from .domainnet import DomainNet
6 | from .office_home import OfficeHome
7 | from .mini_domainnet import miniDomainNet
8 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/dg/__init__.py:
--------------------------------------------------------------------------------
1 | from .pacs import PACS
2 | from .vlcs import VLCS
3 | from .wilds import *
4 | from .cifar_c import CIFAR10C, CIFAR100C
5 | from .digits_dg import DigitsDG
6 | from .digit_single import DigitSingle
7 | from .office_home_dg import OfficeHomeDG
8 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py:
--------------------------------------------------------------------------------
1 | from .fmow import FMoW
2 | from .iwildcam import IWildCam
3 | from .camelyon17 import Camelyon17
4 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py:
--------------------------------------------------------------------------------
 1 | from dassl.data.datasets import DATASET_REGISTRY
 2 | 
 3 | from .wilds_base import WILDSBase
 4 | 
 5 | 
 6 | @DATASET_REGISTRY.register()
 7 | class Camelyon17(WILDSBase):
 8 |     """Tumor tissue recognition.
 9 | 
10 |     2 classes (whether a given region of tissue contains tumor tissue).
11 | 
12 |     Reference:
13 |         - Bandi et al. "From detection of individual metastases to classification of lymph
14 |         node status at the patient level: the CAMELYON17 challenge." TMI 2021.
15 |         - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021.
16 |     """
17 | 
18 |     dataset_dir = "camelyon17_v1.0"
19 | 
20 |     def __init__(self, cfg):
21 |         super().__init__(cfg)
22 | 
23 |     def load_classnames(self):
24 |         return {0: "healthy tissue", 1: "tumor tissue"}
25 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import pandas as pd
 3 | 
 4 | from dassl.data.datasets import DATASET_REGISTRY
 5 | 
 6 | from .wilds_base import WILDSBase
 7 | 
 8 | 
 9 | @DATASET_REGISTRY.register()
10 | class IWildCam(WILDSBase):
11 |     """Animal species recognition.
12 | 
13 |     182 classes (species).
14 | 
15 |     Reference:
16 |         - Beery et al. "The iwildcam 2021 competition dataset." arXiv 2021.
17 |         - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021.
18 |     """
19 | 
20 |     dataset_dir = "iwildcam_v2.0"
21 | 
22 |     def __init__(self, cfg):
23 |         super().__init__(cfg)
24 | 
25 |     def get_image_path(self, dataset, idx):
26 |         image_name = dataset._input_array[idx]
27 |         image_path = osp.join(self.dataset_dir, "train", image_name)
28 |         return image_path
29 | 
30 |     def load_classnames(self):
31 |         df = pd.read_csv(osp.join(self.dataset_dir, "categories.csv"))
32 |         return dict(df["name"])
33 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py:
--------------------------------------------------------------------------------
1 | from .svhn import SVHN
2 | from .cifar import CIFAR10, CIFAR100
3 | from .stl10 import STL10
4 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py:
--------------------------------------------------------------------------------
 1 | from .cifar import CIFAR10
 2 | from ..build import DATASET_REGISTRY
 3 | 
 4 | 
 5 | @DATASET_REGISTRY.register()
 6 | class SVHN(CIFAR10):
 7 |     """SVHN for SSL.
 8 | 
 9 |     Reference:
10 |         - Netzer et al. Reading Digits in Natural Images with
11 |         Unsupervised Feature Learning. NIPS-W 2011.
12 |     """
13 | 
14 |     dataset_dir = "svhn"
15 | 
16 |     def __init__(self, cfg):
17 |         super().__init__(cfg)
18 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import INTERPOLATION_MODES, build_transform
2 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import TRAINER_REGISTRY, build_trainer  # isort:skip
2 | from .trainer import TrainerX, TrainerXU, TrainerBase, SimpleTrainer, SimpleNet  # isort:skip
3 | 
4 | from .da import *
5 | from .dg import *
6 | from .ssl import *
7 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | TRAINER_REGISTRY = Registry("TRAINER")
 4 | 
 5 | 
 6 | def build_trainer(cfg):
 7 |     avai_trainers = TRAINER_REGISTRY.registered_names()
 8 |     check_availability(cfg.TRAINER.NAME, avai_trainers)
 9 |     if cfg.VERBOSE:
10 |         print("Loading trainer: {}".format(cfg.TRAINER.NAME))
11 |     return TRAINER_REGISTRY.get(cfg.TRAINER.NAME)(cfg)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/da/__init__.py:
--------------------------------------------------------------------------------
 1 | from .se import SE
 2 | from .mcd import MCD
 3 | from .mme import MME
 4 | from .adda import ADDA
 5 | from .cdac import CDAC
 6 | from .dael import DAEL
 7 | from .dann import DANN
 8 | from .adabn import AdaBN
 9 | from .m3sda import M3SDA
10 | from .source_only import SourceOnly
11 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/da/adabn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from dassl.utils import check_isfile
 4 | from dassl.engine import TRAINER_REGISTRY, TrainerXU
 5 | 
 6 | 
 7 | @TRAINER_REGISTRY.register()
 8 | class AdaBN(TrainerXU):
 9 |     """Adaptive Batch Normalization.
10 | 
11 |     https://arxiv.org/abs/1603.04779.
12 |     """
13 | 
14 |     def __init__(self, cfg):
15 |         super().__init__(cfg)
16 |         self.done_reset_bn_stats = False
17 | 
18 |     def check_cfg(self, cfg):
19 |         assert check_isfile(
20 |             cfg.MODEL.INIT_WEIGHTS
21 |         ), "The weights of source model must be provided"
22 | 
23 |     def before_epoch(self):
24 |         if not self.done_reset_bn_stats:
25 |             for m in self.model.modules():
26 |                 classname = m.__class__.__name__
27 |                 if classname.find("BatchNorm") != -1:
28 |                     m.reset_running_stats()
29 | 
30 |             self.done_reset_bn_stats = True
31 | 
32 |     def forward_backward(self, batch_x, batch_u):
33 |         input_u = batch_u["img"].to(self.device)
34 | 
35 |         with torch.no_grad():
36 |             self.model(input_u)
37 | 
38 |         return None
39 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/da/source_only.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import functional as F
 2 | 
 3 | from dassl.engine import TRAINER_REGISTRY, TrainerXU
 4 | from dassl.metrics import compute_accuracy
 5 | 
 6 | 
 7 | @TRAINER_REGISTRY.register()
 8 | class SourceOnly(TrainerXU):
 9 |     """Baseline model for domain adaptation, which is
10 |     trained using source data only.
11 |     """
12 | 
13 |     def forward_backward(self, batch_x, batch_u):
14 |         input, label = self.parse_batch_train(batch_x, batch_u)
15 |         output = self.model(input)
16 |         loss = F.cross_entropy(output, label)
17 |         self.model_backward_and_update(loss)
18 | 
19 |         loss_summary = {
20 |             "loss": loss.item(),
21 |             "acc": compute_accuracy(output, label)[0].item(),
22 |         }
23 | 
24 |         if (self.batch_idx + 1) == self.num_batches:
25 |             self.update_lr()
26 | 
27 |         return loss_summary
28 | 
29 |     def parse_batch_train(self, batch_x, batch_u):
30 |         input = batch_x["img"]
31 |         label = batch_x["label"]
32 |         input = input.to(self.device)
33 |         label = label.to(self.device)
34 |         return input, label
35 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/dg/__init__.py:
--------------------------------------------------------------------------------
1 | from .ddaig import DDAIG
2 | from .daeldg import DAELDG
3 | from .vanilla import Vanilla
4 | from .crossgrad import CrossGrad
5 | from .domain_mix import DomainMix
6 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/dg/vanilla.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import functional as F
 2 | 
 3 | from dassl.engine import TRAINER_REGISTRY, TrainerX
 4 | from dassl.metrics import compute_accuracy
 5 | 
 6 | 
 7 | @TRAINER_REGISTRY.register()
 8 | class Vanilla(TrainerX):
 9 |     """Vanilla model.
10 |     
11 |     A.k.a. Empirical Risk Minimization, or ERM.
12 |     """
13 | 
14 |     def forward_backward(self, batch):
15 |         input, target = self.parse_batch_train(batch)
16 |         output = self.model(input)
17 |         loss = F.cross_entropy(output, target)
18 |         self.model_backward_and_update(loss)
19 | 
20 |         loss_summary = {
21 |             "loss": loss.item(),
22 |             "acc": compute_accuracy(output, target)[0].item(),
23 |         }
24 | 
25 |         if (self.batch_idx + 1) == self.num_batches:
26 |             self.update_lr()
27 | 
28 |         return loss_summary
29 | 
30 |     def parse_batch_train(self, batch):
31 |         input = batch["img"]
32 |         target = batch["label"]
33 |         input = input.to(self.device)
34 |         target = target.to(self.device)
35 |         return input, target
36 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/ssl/__init__.py:
--------------------------------------------------------------------------------
1 | from .entmin import EntMin
2 | from .fixmatch import FixMatch
3 | from .mixmatch import MixMatch
4 | from .mean_teacher import MeanTeacher
5 | from .sup_baseline import SupBaseline
6 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import functional as F
 2 | 
 3 | from dassl.engine import TRAINER_REGISTRY, TrainerXU
 4 | from dassl.metrics import compute_accuracy
 5 | 
 6 | 
 7 | @TRAINER_REGISTRY.register()
 8 | class SupBaseline(TrainerXU):
 9 |     """Supervised Baseline."""
10 | 
11 |     def forward_backward(self, batch_x, batch_u):
12 |         input, label = self.parse_batch_train(batch_x, batch_u)
13 |         output = self.model(input)
14 |         loss = F.cross_entropy(output, label)
15 |         self.model_backward_and_update(loss)
16 | 
17 |         loss_summary = {
18 |             "loss": loss.item(),
19 |             "acc": compute_accuracy(output, label)[0].item(),
20 |         }
21 | 
22 |         if (self.batch_idx + 1) == self.num_batches:
23 |             self.update_lr()
24 | 
25 |         return loss_summary
26 | 
27 |     def parse_batch_train(self, batch_x, batch_u):
28 |         input = batch_x["img"]
29 |         label = batch_x["label"]
30 |         input = input.to(self.device)
31 |         label = label.to(self.device)
32 |         return input, label
33 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import build_evaluator, EVALUATOR_REGISTRY  # isort:skip
2 | 
3 | from .evaluator import EvaluatorBase, Classification
4 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/evaluation/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | EVALUATOR_REGISTRY = Registry("EVALUATOR")
 4 | 
 5 | 
 6 | def build_evaluator(cfg, **kwargs):
 7 |     avai_evaluators = EVALUATOR_REGISTRY.registered_names()
 8 |     check_availability(cfg.TEST.EVALUATOR, avai_evaluators)
 9 |     if cfg.VERBOSE:
10 |         print("Loading evaluator: {}".format(cfg.TEST.EVALUATOR))
11 |     return EVALUATOR_REGISTRY.get(cfg.TEST.EVALUATOR)(cfg, **kwargs)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import compute_accuracy
2 | from .distance import (
3 |     cosine_distance, compute_distance_matrix, euclidean_squared_distance
4 | )
5 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | def compute_accuracy(output, target, topk=(1, )):
 2 |     """Computes the accuracy over the k top predictions for
 3 |     the specified values of k.
 4 | 
 5 |     Args:
 6 |         output (torch.Tensor): prediction matrix with shape (batch_size, num_classes).
 7 |         target (torch.LongTensor): ground truth labels with shape (batch_size).
 8 |         topk (tuple, optional): accuracy at top-k will be computed. For example,
 9 |             topk=(1, 5) means accuracy at top-1 and top-5 will be computed.
10 | 
11 |     Returns:
12 |         list: accuracy at top-k.
13 |     """
14 |     maxk = max(topk)
15 |     batch_size = target.size(0)
16 | 
17 |     if isinstance(output, (tuple, list)):
18 |         output = output[0]
19 | 
20 |     _, pred = output.topk(maxk, 1, True, True)
21 |     pred = pred.t()
22 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
23 | 
24 |     res = []
25 |     for k in topk:
26 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
27 |         acc = correct_k.mul_(100.0 / batch_size)
28 |         res.append(acc)
29 | 
30 |     return res
31 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .head import HEAD_REGISTRY, build_head
2 | from .network import NETWORK_REGISTRY, build_network
3 | from .backbone import BACKBONE_REGISTRY, Backbone, build_backbone
4 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | from .build import build_backbone, BACKBONE_REGISTRY  # isort:skip
 2 | from .backbone import Backbone  # isort:skip
 3 | 
 4 | from .vgg import vgg16
 5 | from .resnet import (
 6 |     resnet18, resnet34, resnet50, resnet101, resnet152, resnet18_ms_l1,
 7 |     resnet50_ms_l1, resnet18_ms_l12, resnet50_ms_l12, resnet101_ms_l1,
 8 |     resnet18_ms_l123, resnet50_ms_l123, resnet101_ms_l12, resnet101_ms_l123,
 9 |     resnet18_efdmix_l1, resnet50_efdmix_l1, resnet18_efdmix_l12,
10 |     resnet50_efdmix_l12, resnet101_efdmix_l1, resnet18_efdmix_l123,
11 |     resnet50_efdmix_l123, resnet101_efdmix_l12, resnet101_efdmix_l123
12 | )
13 | from .alexnet import alexnet
14 | from .wide_resnet import wide_resnet_16_4, wide_resnet_28_2
15 | from .cnn_digitsdg import cnn_digitsdg
16 | from .efficientnet import (
17 |     efficientnet_b0, efficientnet_b1, efficientnet_b2, efficientnet_b3,
18 |     efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7
19 | )
20 | from .resnet_dynamic import *
21 | from .cnn_digitsingle import cnn_digitsingle
22 | from .preact_resnet18 import preact_resnet18
23 | from .cnn_digit5_m3sda import cnn_digit5_m3sda
24 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Backbone(nn.Module):
 5 | 
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self):
10 |         pass
11 | 
12 |     @property
13 |     def out_features(self):
14 |         """Output feature dimension."""
15 |         if self.__dict__.get("_out_features") is None:
16 |             return None
17 |         return self._out_features
18 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/backbone/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | BACKBONE_REGISTRY = Registry("BACKBONE")
 4 | 
 5 | 
 6 | def build_backbone(name, verbose=True, **kwargs):
 7 |     avai_backbones = BACKBONE_REGISTRY.registered_names()
 8 |     check_availability(name, avai_backbones)
 9 |     if verbose:
10 |         print("Backbone: {}".format(name))
11 |     return BACKBONE_REGISTRY.get(name)(**kwargs)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Source: https://github.com/lukemelas/EfficientNet-PyTorch.
 3 | """
 4 | __version__ = "0.6.4"
 5 | from .model import (
 6 |     EfficientNet, efficientnet_b0, efficientnet_b1, efficientnet_b2,
 7 |     efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6,
 8 |     efficientnet_b7
 9 | )
10 | from .utils import (
11 |     BlockArgs, BlockDecoder, GlobalParams, efficientnet, get_model_params
12 | )
13 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/head/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import build_head, HEAD_REGISTRY  # isort:skip
2 | 
3 | from .mlp import mlp
4 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/head/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | HEAD_REGISTRY = Registry("HEAD")
 4 | 
 5 | 
 6 | def build_head(name, verbose=True, **kwargs):
 7 |     avai_heads = HEAD_REGISTRY.registered_names()
 8 |     check_availability(name, avai_heads)
 9 |     if verbose:
10 |         print("Head: {}".format(name))
11 |     return HEAD_REGISTRY.get(name)(**kwargs)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/network/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import build_network, NETWORK_REGISTRY  # isort:skip
2 | 
3 | from .ddaig_fcn import (
4 |     fcn_3x32_gctx, fcn_3x64_gctx, fcn_3x32_gctx_stn, fcn_3x64_gctx_stn
5 | )
6 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/network/build.py:
--------------------------------------------------------------------------------
 1 | from dassl.utils import Registry, check_availability
 2 | 
 3 | NETWORK_REGISTRY = Registry("NETWORK")
 4 | 
 5 | 
 6 | def build_network(name, verbose=True, **kwargs):
 7 |     avai_models = NETWORK_REGISTRY.registered_names()
 8 |     check_availability(name, avai_models)
 9 |     if verbose:
10 |         print("Network: {}".format(name))
11 |     return NETWORK_REGISTRY.get(name)(**kwargs)
12 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mmd import MaximumMeanDiscrepancy
 2 | from .conv import *
 3 | from .dsbn import DSBN1d, DSBN2d
 4 | from .mixup import mixup
 5 | from .efdmix import (
 6 |     EFDMix, random_efdmix, activate_efdmix, run_with_efdmix, deactivate_efdmix,
 7 |     crossdomain_efdmix, run_without_efdmix
 8 | )
 9 | from .mixstyle import (
10 |     MixStyle, random_mixstyle, activate_mixstyle, run_with_mixstyle,
11 |     deactivate_mixstyle, crossdomain_mixstyle, run_without_mixstyle
12 | )
13 | from .attention import *
14 | from .transnorm import TransNorm1d, TransNorm2d
15 | from .sequential2 import Sequential2
16 | from .reverse_grad import ReverseGrad
17 | from .cross_entropy import cross_entropy
18 | from .optimal_transport import SinkhornDivergence, MinibatchEnergyDistance
19 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/attention.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn import functional as F
 3 | 
 4 | __all__ = ["Attention"]
 5 | 
 6 | 
 7 | class Attention(nn.Module):
 8 |     """Attention from `"Dynamic Domain Generalization" <https://github.com/MetaVisionLab/DDG>`_.
 9 |     """
10 | 
11 |     def __init__(
12 |         self,
13 |         in_channels: int,
14 |         out_features: int,
15 |         squeeze=None,
16 |         bias: bool = True
17 |     ):
18 |         super(Attention, self).__init__()
19 |         self.squeeze = squeeze if squeeze else in_channels // 16
20 |         assert self.squeeze > 0
21 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc1 = nn.Linear(in_channels, self.squeeze, bias=bias)
23 |         self.fc2 = nn.Linear(self.squeeze, out_features, bias=bias)
24 |         self.sf = nn.Softmax(dim=-1)
25 | 
26 |     def forward(self, x):
27 |         x = self.avg_pool(x).view(x.shape[:-2])
28 |         x = self.fc1(x)
29 |         x = F.relu(x, inplace=True)
30 |         x = self.fc2(x)
31 |         return self.sf(x)
32 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | def cross_entropy(input, target, label_smooth=0, reduction="mean"):
 6 |     """Cross entropy loss.
 7 | 
 8 |     Args:
 9 |         input (torch.Tensor): logit matrix with shape of (batch, num_classes).
10 |         target (torch.LongTensor): int label matrix.
11 |         label_smooth (float, optional): label smoothing hyper-parameter.
12 |             Default is 0.
13 |         reduction (str, optional): how the losses for a mini-batch
14 |             will be aggregated. Default is 'mean'.
15 |     """
16 |     num_classes = input.shape[1]
17 |     log_prob = F.log_softmax(input, dim=1)
18 |     zeros = torch.zeros(log_prob.size())
19 |     target = zeros.scatter_(1, target.unsqueeze(1).data.cpu(), 1)
20 |     target = target.type_as(input)
21 |     target = (1-label_smooth) * target + label_smooth/num_classes
22 |     loss = (-target * log_prob).sum(1)
23 |     if reduction == "mean":
24 |         return loss.mean()
25 |     elif reduction == "sum":
26 |         return loss.sum()
27 |     elif reduction == "none":
28 |         return loss
29 |     else:
30 |         raise ValueError
31 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/mixup.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def mixup(x1, x2, y1, y2, beta, preserve_order=False):
 5 |     """Mixup.
 6 | 
 7 |     Args:
 8 |         x1 (torch.Tensor): data with shape of (b, c, h, w).
 9 |         x2 (torch.Tensor): data with shape of (b, c, h, w).
10 |         y1 (torch.Tensor): label with shape of (b, n).
11 |         y2 (torch.Tensor): label with shape of (b, n).
12 |         beta (float): hyper-parameter for Beta sampling.
13 |         preserve_order (bool): apply lmda=max(lmda, 1-lmda).
14 |             Default is False.
15 |     """
16 |     lmda = torch.distributions.Beta(beta, beta).sample([x1.shape[0], 1, 1, 1])
17 |     if preserve_order:
18 |         lmda = torch.max(lmda, 1 - lmda)
19 |     lmda = lmda.to(x1.device)
20 |     xmix = x1*lmda + x2 * (1-lmda)
21 |     lmda = lmda[:, :, 0, 0]
22 |     ymix = y1*lmda + y2 * (1-lmda)
23 |     return xmix, ymix
24 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | class _ReverseGrad(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, input, grad_scaling):
 9 |         ctx.grad_scaling = grad_scaling
10 |         return input.view_as(input)
11 | 
12 |     @staticmethod
13 |     def backward(ctx, grad_output):
14 |         grad_scaling = ctx.grad_scaling
15 |         return -grad_scaling * grad_output, None
16 | 
17 | 
18 | reverse_grad = _ReverseGrad.apply
19 | 
20 | 
21 | class ReverseGrad(nn.Module):
22 |     """Gradient reversal layer.
23 | 
24 |     It acts as an identity layer in the forward,
25 |     but reverses the sign of the gradient in
26 |     the backward.
27 |     """
28 | 
29 |     def forward(self, x, grad_scaling=1.0):
30 |         assert (grad_scaling >=
31 |                 0), "grad_scaling must be non-negative, " "but got {}".format(
32 |                     grad_scaling
33 |                 )
34 |         return reverse_grad(x, grad_scaling)
35 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/modeling/ops/sequential2.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Sequential2(nn.Sequential):
 5 |     """An alternative sequential container to nn.Sequential,
 6 |     which accepts an arbitrary number of input arguments.
 7 |     """
 8 | 
 9 |     def forward(self, *inputs):
10 |         for module in self._modules.values():
11 |             if isinstance(inputs, tuple):
12 |                 inputs = module(*inputs)
13 |             else:
14 |                 inputs = module(inputs)
15 |         return inputs
16 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/optim/__init__.py:
--------------------------------------------------------------------------------
1 | from .optimizer import build_optimizer
2 | from .lr_scheduler import build_lr_scheduler
3 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/dassl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tools import *
2 | from .logger import *
3 | from .meters import *
4 | from .registry import *
5 | from .torchtools import *
6 | 


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/datasets/da/visda17.sh:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # ROOT is the root directory where you put your domain datasets.
 3 | # 
 4 | # Suppose you wanna put the dataset under $DATA, which stores all the
 5 | # domain datasets, run the following command in your terminal to
 6 | # download VisDa17:
 7 | #
 8 | # $ sh visda17.sh $DATA
 9 | #------------------------------------------------------------------------
10 | 
11 | ROOT=$1
12 | mkdir $ROOT/visda17
13 | cd $ROOT/visda17
14 | 
15 | wget http://csr.bu.edu/ftp/visda17/clf/train.tar
16 | tar xvf train.tar
17 | 
18 | wget http://csr.bu.edu/ftp/visda17/clf/validation.tar
19 | tar xvf validation.tar  
20 | 
21 | wget http://csr.bu.edu/ftp/visda17/clf/test.tar
22 | tar xvf test.tar
23 | 
24 | wget https://raw.githubusercontent.com/VisionLearningGroup/taskcv-2017-public/master/classification/data/image_list.txt -O test/image_list.txt


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/linter.sh:
--------------------------------------------------------------------------------
 1 | echo "Running isort"
 2 | isort -y -sp .
 3 | echo "Done"
 4 | 
 5 | echo "Running yapf"
 6 | yapf -i -r -vv -e build .
 7 | echo "Done"
 8 | 
 9 | echo "Running flake8"
10 | flake8 .
11 | echo "Done"


--------------------------------------------------------------------------------
/third_party/Dassl.pytorch/requirements.txt:
--------------------------------------------------------------------------------
 1 | flake8==3.7.9
 2 | yapf==0.29.0
 3 | isort==4.3.21
 4 | yacs
 5 | gdown
 6 | tb-nightly
 7 | future
 8 | scipy
 9 | scikit-learn
10 | tqdm
11 | ftfy
12 | regex
13 | wilds==1.2.2
14 | tabulate
15 | 


--------------------------------------------------------------------------------
/third_party/Detic/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/.DS_Store


--------------------------------------------------------------------------------
/third_party/Detic/.gitignore:
--------------------------------------------------------------------------------
 1 | third_party/detectron2
 2 | ./models
 3 | configs-experimental
 4 | experiments
 5 | # output dir
 6 | index.html
 7 | data/*
 8 | slurm/
 9 | slurm
10 | slurm-output
11 | slurm-output/
12 | output
13 | instant_test_output
14 | inference_test_output
15 | 
16 | 
17 | *.png
18 | *.diff
19 | *.jpg
20 | !/projects/DensePose/doc/images/*.jpg
21 | 
22 | # compilation and distribution
23 | __pycache__
24 | _ext
25 | *.pyc
26 | *.pyd
27 | *.so
28 | *.dll
29 | *.egg-info/
30 | build/
31 | dist/
32 | wheels/
33 | 
34 | # pytorch/python/numpy formats
35 | *.pth
36 | *.pkl
37 | *.ts
38 | model_ts*.txt
39 | 
40 | # ipython/jupyter notebooks
41 | *.ipynb
42 | **/.ipynb_checkpoints/
43 | 
44 | # Editor temporaries
45 | *.swn
46 | *.swo
47 | *.swp
48 | *~
49 | 
50 | # editor settings
51 | .idea
52 | .vscode
53 | _darcs
54 | 
55 | # project dirs
56 | /detectron2/model_zoo/configs
57 | /datasets/*
58 | !/datasets/*.*
59 | !/datasets/metadata
60 | /projects/*/datasets
61 | /models
62 | /snippet
63 | 


--------------------------------------------------------------------------------
/third_party/Detic/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/Deformable-DETR"]
2 | 	path = third_party/Deformable-DETR
3 | 	url = https://github.com/fundamentalvision/Deformable-DETR.git
4 | [submodule "third_party/CenterNet2"]
5 | 	path = third_party/CenterNet2
6 | 	url = https://github.com/xingyizhou/CenterNet2.git
7 | 


--------------------------------------------------------------------------------
/third_party/Detic/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/third_party/Detic/cog.yaml:
--------------------------------------------------------------------------------
 1 | build:
 2 |   gpu: true
 3 |   cuda: "10.1"
 4 |   python_version: "3.8"
 5 |   system_packages:
 6 |     - "libgl1-mesa-glx"
 7 |     - "libglib2.0-0"
 8 |   python_packages:
 9 |     - "ipython==7.30.1"
10 |     - "numpy==1.21.4"
11 |     - "torch==1.8.1"
12 |     - "torchvision==0.9.1"
13 |     - "dataclasses==0.6"
14 |     - "opencv-python==4.5.5.62"
15 |     - "imageio==2.9.0"
16 |     - "ftfy==6.0.3"
17 |     - "regex==2021.10.8"
18 |     - "tqdm==4.62.3"
19 |     - "timm==0.4.12"
20 |     - "fasttext==0.9.2"
21 |     - "scikit-learn==1.0.2"
22 |     - "lvis==0.5.3"
23 |     - "nltk==3.6.7"
24 |     - "git+https://github.com/openai/CLIP.git"
25 |   run:
26 |     - pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
27 | 
28 | predict: "predict.py:Predictor"
29 | 


--------------------------------------------------------------------------------
/third_party/Detic/configs/Base_OVCOCO_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CustomRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "CustomRes5ROIHeads"
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   RESNETS:
10 |     DEPTH: 50
11 |   ROI_BOX_HEAD:
12 |     CLS_AGNOSTIC_BBOX_REG: True
13 |     USE_SIGMOID_CE: True
14 |     USE_ZEROSHOT_CLS: True
15 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/coco_clip_a+cname.npy'
16 |     IGNORE_ZERO_CATS: True
17 |     CAT_FREQ_PATH: 'datasets/coco/zero-shot/instances_train2017_seen_2_oriorder_cat_info.json'
18 | DATASETS:
19 |   TRAIN: ("coco_zeroshot_train_oriorder",)
20 |   TEST: ("coco_generalized_zeroshot_val",)
21 | SOLVER:
22 |   IMS_PER_BATCH: 16
23 |   BASE_LR: 0.02
24 |   STEPS: (60000, 80000)
25 |   MAX_ITER: 90000
26 |   CHECKPOINT_PERIOD: 1000000000
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (800,)
29 | VERSION: 2
30 | OUTPUT_DIR: output/Detic-COCO/auto
31 | FP16: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_CXT21k_640b32_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |   WEIGHTS: ''
 6 |   TIMM:
 7 |     BASE_NAME: convnext_tiny_21k
 8 |     OUT_LEVELS: [2, 3, 4]
 9 |     PRETRAINED: True
10 |   FPN:
11 |     IN_FEATURES: ["layer2", "layer3", "layer4"]
12 | SOLVER:
13 |   MAX_ITER: 180000
14 |   IMS_PER_BATCH: 32
15 |   BASE_LR: 0.0001
16 | DATASETS:
17 |   TRAIN: ("lvis_v1_train+coco",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_R18_640b32_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |   WEIGHTS: ''
 6 |   TIMM:
 7 |     BASE_NAME: resnet18
 8 |     PRETRAINED: True
 9 | SOLVER:
10 |   MAX_ITER: 180000
11 |   IMS_PER_BATCH: 32
12 |   BASE_LR: 0.0001
13 | DATASETS:
14 |   TRAIN: ("lvis_v1_train+coco",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True
5 | DATASETS:
6 |   TRAIN: ("lvis_v1_train+coco",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_SwinB_896b32_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |   WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl"
 6 |   BACKBONE:
 7 |     NAME: build_swintransformer_fpn_backbone
 8 |   SWIN:
 9 |     SIZE: B-22k
10 |   FPN:
11 |     IN_FEATURES: ["swin1", "swin2", "swin3"]
12 | SOLVER:
13 |   MAX_ITER: 180000
14 |   IMS_PER_BATCH: 32
15 |   BASE_LR: 0.0001
16 | INPUT:
17 |   TRAIN_SIZE: 896
18 | DATASETS:
19 |   TRAIN: ("lvis_v1_train+coco",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_L_CLIP_R5021k_640b64_4x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True
5 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy'


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_L_CLIP_SwinB_896b32_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |   WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl"
 6 |   BACKBONE:
 7 |     NAME: build_swintransformer_fpn_backbone
 8 |   SWIN:
 9 |     SIZE: B-22k
10 |   FPN:
11 |     IN_FEATURES: ["swin1", "swin2", "swin3"]
12 | SOLVER:
13 |   MAX_ITER: 180000
14 |   IMS_PER_BATCH: 32
15 |   BASE_LR: 0.0001
16 | INPUT:
17 |   TRAIN_SIZE: 896


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True
5 | DATASETS:
6 |   TRAIN: ("lvis_v1_train_norare",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True
5 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy'
6 | DATASETS:
7 |   TRAIN: ("lvis_v1_train_norare",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |   WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl"
 6 |   BACKBONE:
 7 |     NAME: build_swintransformer_fpn_backbone
 8 |   SWIN:
 9 |     SIZE: B-22k
10 |   FPN:
11 |     IN_FEATURES: ["swin1", "swin2", "swin3"]
12 | SOLVER:
13 |   MAX_ITER: 180000
14 |   IMS_PER_BATCH: 32
15 |   BASE_LR: 0.0001
16 | INPUT:
17 |   TRAIN_SIZE: 896
18 | DATASETS:
19 |   TRAIN: ("lvis_v1_train_norare",)


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-DeformDETR_L_R50_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml"
2 | SOLVER:
3 |   IMS_PER_BATCH: 16


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup-DeformDETR_L_R50_4x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml"


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup_OVCOCO_CLIP_R50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base_OVCOCO_C4_1x.yaml"
2 | 


--------------------------------------------------------------------------------
/third_party/Detic/configs/BoxSup_OVCOCO_CLIP_R50_1x_pomp.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base_OVCOCO_C4_1x.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     USE_ZEROSHOT_CLS: True
5 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/coco_clip_pomp+cname.npy'
6 | 


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_DeformDETR_LI_R50_4x_ft4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "models/BoxSup-DeformDETR_L_R50_4x.pth"
 4 | INPUT:
 5 |   CUSTOM_AUG: ResizeShortestEdge
 6 |   MIN_SIZE_TRAIN_SAMPLING: range
 7 |   MIN_SIZE_TRAIN: [480, 800]
 8 | DATASETS:
 9 |   TRAIN: ("lvis_v1_train","imagenet_lvis_v1")
10 |   TEST: ("lvis_v1_val",)
11 | DATALOADER:
12 |   SAMPLER_TRAIN: "MultiDatasetSampler"
13 |   DATASET_RATIO: [1, 4]
14 |   USE_DIFF_BS_SIZE: True
15 |   DATASET_BS: [4, 16]
16 |   USE_RFS: [True, False]
17 |   DATASET_MIN_SIZES: [[480, 800], [240, 400]]
18 |   DATASET_MAX_SIZES: [1333, 667]
19 |   FILTER_EMPTY_ANNOTATIONS: False
20 |   MULTI_DATASET_GROUPING: True
21 |   DATASET_ANN: ['box', 'image']
22 | WITH_IMAGE_LABELS: True
23 | 


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LCOCOI21k_CLIP_R5021k_640b32_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   DYNAMIC_CLASSIFIER: True
 4 |   ROI_BOX_HEAD:
 5 |     USE_ZEROSHOT_CLS: True
 6 |     IMAGE_LABEL_LOSS: 'max_size'
 7 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis-21k_clip_a+cname.npy'
 8 |     USE_FED_LOSS: False # Federated loss is enabled when DYNAMIC_CLASSIFIER is on
 9 |   ROI_HEADS:
10 |     NUM_CLASSES: 22047
11 |   WEIGHTS: "output/Detic/BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x/model_final.pth"
12 | SOLVER:
13 |   MAX_ITER: 180000
14 |   IMS_PER_BATCH: 32
15 |   BASE_LR: 0.0001
16 |   WARMUP_ITERS: 1000
17 |   WARMUP_FACTOR: 0.001
18 | DATASETS:
19 |   TRAIN: ("lvis_v1_train+coco","imagenet_lvis-22k")
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "MultiDatasetSampler"
22 |   DATASET_RATIO: [1, 4]
23 |   USE_DIFF_BS_SIZE: True
24 |   DATASET_BS: [4, 16]
25 |   DATASET_INPUT_SIZE: [640, 320]
26 |   USE_RFS: [True, False]
27 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
28 |   FILTER_EMPTY_ANNOTATIONS: False
29 |   MULTI_DATASET_GROUPING: True
30 |   DATASET_ANN: ['box', 'image']
31 |   NUM_WORKERS: 2
32 |   USE_TAR_DATASET: True
33 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |   WEIGHTS: "models/BoxSup-C2_L_CLIP_R5021k_640b64_4x.pth"
 7 | SOLVER:
 8 |   MAX_ITER: 90000
 9 |   IMS_PER_BATCH: 64
10 |   BASE_LR: 0.0002
11 |   WARMUP_ITERS: 1000
12 |   WARMUP_FACTOR: 0.001
13 | DATASETS:
14 |   TRAIN: ("lvis_v1_train","imagenet_lvis_v1")
15 | DATALOADER:
16 |   SAMPLER_TRAIN: "MultiDatasetSampler"
17 |   DATASET_RATIO: [1, 4]
18 |   USE_DIFF_BS_SIZE: True
19 |   DATASET_BS: [8, 32]
20 |   DATASET_INPUT_SIZE: [640, 320]
21 |   USE_RFS: [True, False]
22 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
23 |   FILTER_EMPTY_ANNOTATIONS: False
24 |   MULTI_DATASET_GROUPING: True
25 |   DATASET_ANN: ['box', 'image']
26 |   NUM_WORKERS: 8
27 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy'
 7 |   WEIGHTS: "output/Detic/BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp/model_final.pth"
 8 | SOLVER:
 9 |   MAX_ITER: 90000
10 |   IMS_PER_BATCH: 64
11 |   BASE_LR: 0.0002
12 |   WARMUP_ITERS: 1000
13 |   WARMUP_FACTOR: 0.001
14 | DATASETS:
15 |   TRAIN: ("lvis_v1_train","imagenet_lvis_v1")
16 | DATALOADER:
17 |   SAMPLER_TRAIN: "MultiDatasetSampler"
18 |   DATASET_RATIO: [1, 4]
19 |   USE_DIFF_BS_SIZE: True
20 |   DATASET_BS: [8, 32]
21 |   DATASET_INPUT_SIZE: [640, 320]
22 |   USE_RFS: [True, False]
23 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
24 |   FILTER_EMPTY_ANNOTATIONS: False
25 |   MULTI_DATASET_GROUPING: True
26 |   DATASET_ANN: ['box', 'image']
27 |   NUM_WORKERS: 8
28 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |   BACKBONE:
 7 |     NAME: build_swintransformer_fpn_backbone
 8 |   SWIN:
 9 |     SIZE: B-22k
10 |   FPN:
11 |     IN_FEATURES: ["swin1", "swin2", "swin3"]
12 |   WEIGHTS: "models/BoxSup-C2_L_CLIP_SwinB_896b32_4x.pth"
13 | SOLVER:
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 32
16 |   BASE_LR: 0.0001
17 |   WARMUP_ITERS: 1000
18 |   WARMUP_FACTOR: 0.001
19 | DATASETS:
20 |   TRAIN: ("lvis_v1_train","imagenet_lvis_v1")
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "MultiDatasetSampler"
23 |   DATASET_RATIO: [1, 4]
24 |   USE_DIFF_BS_SIZE: True
25 |   DATASET_BS: [4, 16]
26 |   DATASET_INPUT_SIZE: [896, 448]
27 |   USE_RFS: [True, False]
28 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
29 |   FILTER_EMPTY_ANNOTATIONS: False
30 |   MULTI_DATASET_GROUPING: True
31 |   DATASET_ANN: ['box', 'image']
32 |   NUM_WORKERS: 8
33 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseCCcapimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   WITH_CAPTION: True
 4 |   SYNC_CAPTION_BATCH: True
 5 |   ROI_BOX_HEAD:
 6 |     ADD_IMAGE_BOX: True # caption loss is added to the image-box
 7 |     USE_ZEROSHOT_CLS: True
 8 |     IMAGE_LABEL_LOSS: 'max_size'
 9 |   WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth"
10 | SOLVER:
11 |   MAX_ITER: 90000
12 |   IMS_PER_BATCH: 64
13 |   BASE_LR: 0.0002
14 |   WARMUP_ITERS: 1000
15 |   WARMUP_FACTOR: 0.001
16 | DATASETS:
17 |   TRAIN: ("lvis_v1_train_norare","cc3m_v1_train_tags")
18 | DATALOADER:
19 |   SAMPLER_TRAIN: "MultiDatasetSampler"
20 |   DATASET_RATIO: [1, 4]
21 |   USE_DIFF_BS_SIZE: True
22 |   DATASET_BS: [8, 32]
23 |   DATASET_INPUT_SIZE: [640, 320]
24 |   USE_RFS: [True, False]
25 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
26 |   FILTER_EMPTY_ANNOTATIONS: False
27 |   MULTI_DATASET_GROUPING: True
28 |   DATASET_ANN: ['box', 'captiontag']
29 |   NUM_WORKERS: 8
30 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseCCimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |   WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth"
 7 | SOLVER:
 8 |   MAX_ITER: 90000
 9 |   IMS_PER_BATCH: 64
10 |   BASE_LR: 0.0002
11 |   WARMUP_ITERS: 1000
12 |   WARMUP_FACTOR: 0.001
13 | DATASETS:
14 |   TRAIN: ("lvis_v1_train_norare","cc3m_v1_train_tags")
15 | DATALOADER:
16 |   SAMPLER_TRAIN: "MultiDatasetSampler"
17 |   DATASET_RATIO: [1, 4]
18 |   USE_DIFF_BS_SIZE: True
19 |   DATASET_BS: [8, 32]
20 |   DATASET_INPUT_SIZE: [640, 320]
21 |   USE_RFS: [True, False]
22 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
23 |   FILTER_EMPTY_ANNOTATIONS: False
24 |   MULTI_DATASET_GROUPING: True
25 |   DATASET_ANN: ['box', 'image']
26 |   NUM_WORKERS: 8
27 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |   WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth"
 7 | SOLVER:
 8 |   MAX_ITER: 90000
 9 |   IMS_PER_BATCH: 64
10 |   BASE_LR: 0.0002
11 |   WARMUP_ITERS: 1000
12 |   WARMUP_FACTOR: 0.001
13 | DATASETS:
14 |   TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1")
15 | DATALOADER:
16 |   SAMPLER_TRAIN: "MultiDatasetSampler"
17 |   DATASET_RATIO: [1, 4]
18 |   USE_DIFF_BS_SIZE: True
19 |   DATASET_BS: [8, 32]
20 |   DATASET_INPUT_SIZE: [640, 320]
21 |   USE_RFS: [True, False]
22 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
23 |   FILTER_EMPTY_ANNOTATIONS: False
24 |   MULTI_DATASET_GROUPING: True
25 |   DATASET_ANN: ['box', 'image']
26 |   NUM_WORKERS: 8
27 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy'
 7 |   WEIGHTS: "output/Detic/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp/model_final.pth"
 8 | SOLVER:
 9 |   MAX_ITER: 90000
10 |   IMS_PER_BATCH: 64
11 |   BASE_LR: 0.0002
12 |   WARMUP_ITERS: 1000
13 |   WARMUP_FACTOR: 0.001
14 | DATASETS:
15 |   TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1")
16 | DATALOADER:
17 |   SAMPLER_TRAIN: "MultiDatasetSampler"
18 |   DATASET_RATIO: [1, 4]
19 |   USE_DIFF_BS_SIZE: True
20 |   DATASET_BS: [8, 32]
21 |   DATASET_INPUT_SIZE: [640, 320]
22 |   USE_RFS: [True, False]
23 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
24 |   FILTER_EMPTY_ANNOTATIONS: False
25 |   MULTI_DATASET_GROUPING: True
26 |   DATASET_ANN: ['box', 'image']
27 |   NUM_WORKERS: 8
28 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_predicted.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_score'
 6 |   WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth"
 7 | SOLVER:
 8 |   MAX_ITER: 90000
 9 |   IMS_PER_BATCH: 64
10 |   BASE_LR: 0.0002
11 |   WARMUP_ITERS: 1000
12 |   WARMUP_FACTOR: 0.001
13 | DATASETS:
14 |   TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1")
15 | DATALOADER:
16 |   SAMPLER_TRAIN: "MultiDatasetSampler"
17 |   DATASET_RATIO: [1, 4]
18 |   USE_DIFF_BS_SIZE: True
19 |   DATASET_BS: [8, 32]
20 |   DATASET_INPUT_SIZE: [640, 320]
21 |   USE_RFS: [True, False]
22 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
23 |   FILTER_EMPTY_ANNOTATIONS: False
24 |   MULTI_DATASET_GROUPING: True
25 |   DATASET_ANN: ['box', 'image']
26 |   NUM_WORKERS: 8
27 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_LbaseI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |   BACKBONE:
 7 |     NAME: build_swintransformer_fpn_backbone
 8 |   SWIN:
 9 |     SIZE: B-22k
10 |   FPN:
11 |     IN_FEATURES: ["swin1", "swin2", "swin3"]
12 |   WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.pth"
13 | SOLVER:
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 32
16 |   BASE_LR: 0.0001
17 |   WARMUP_ITERS: 1000
18 |   WARMUP_FACTOR: 0.001
19 | DATASETS:
20 |   TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1")
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "MultiDatasetSampler"
23 |   DATASET_RATIO: [1, 4]
24 |   USE_DIFF_BS_SIZE: True
25 |   DATASET_BS: [4, 16]
26 |   DATASET_INPUT_SIZE: [896, 448]
27 |   USE_RFS: [True, False]
28 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
29 |   FILTER_EMPTY_ANNOTATIONS: False
30 |   MULTI_DATASET_GROUPING: True
31 |   DATASET_ANN: ['box', 'image']
32 |   NUM_WORKERS: 8
33 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_caption.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth"
 4 |   WITH_CAPTION: True
 5 |   SYNC_CAPTION_BATCH: True
 6 |   ROI_BOX_HEAD:
 7 |     WS_NUM_PROPS: 1
 8 |     ADD_IMAGE_BOX: True
 9 |     NEG_CAP_WEIGHT: 1.0
10 | SOLVER:
11 |   IMS_PER_BATCH: 16
12 |   BASE_LR: 0.02
13 |   STEPS: (60000, 80000)
14 |   MAX_ITER: 90000
15 | DATASETS:
16 |   TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags")
17 | INPUT:
18 |   CUSTOM_AUG: ResizeShortestEdge
19 |   MIN_SIZE_TRAIN_SAMPLING: range
20 |   MIN_SIZE_TRAIN: (800, 800)
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "MultiDatasetSampler"
23 |   DATASET_RATIO: [1, 4]
24 |   USE_DIFF_BS_SIZE: True
25 |   DATASET_BS: [2, 8]
26 |   USE_RFS: [False, False]
27 |   DATASET_MIN_SIZES: [[800, 800], [400, 400]]
28 |   DATASET_MAX_SIZES: [1333, 667]
29 |   FILTER_EMPTY_ANNOTATIONS: False
30 |   MULTI_DATASET_GROUPING: True
31 |   DATASET_ANN: ['box', 'caption']
32 |   NUM_WORKERS: 8
33 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_max-size.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth"
 4 |   ROI_BOX_HEAD:
 5 |     WS_NUM_PROPS: 32
 6 |     IMAGE_LABEL_LOSS: 'max_size'
 7 | SOLVER:
 8 |   IMS_PER_BATCH: 16
 9 |   BASE_LR: 0.02
10 |   STEPS: (60000, 80000)
11 |   MAX_ITER: 90000
12 | DATASETS:
13 |   TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags")
14 | INPUT:
15 |   CUSTOM_AUG: ResizeShortestEdge
16 |   MIN_SIZE_TRAIN_SAMPLING: range
17 |   MIN_SIZE_TRAIN: (800, 800)
18 | DATALOADER:
19 |   SAMPLER_TRAIN: "MultiDatasetSampler"
20 |   DATASET_RATIO: [1, 4]
21 |   USE_DIFF_BS_SIZE: True
22 |   DATASET_BS: [2, 8]
23 |   USE_RFS: [False, False]
24 |   DATASET_MIN_SIZES: [[800, 800], [400, 400]]
25 |   DATASET_MAX_SIZES: [1333, 667]
26 |   FILTER_EMPTY_ANNOTATIONS: False
27 |   MULTI_DATASET_GROUPING: True
28 |   DATASET_ANN: ['box', 'image']
29 |   NUM_WORKERS: 8
30 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_max-size_caption.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth"
 4 |   WITH_CAPTION: True
 5 |   SYNC_CAPTION_BATCH: True
 6 |   ROI_BOX_HEAD:
 7 |     WS_NUM_PROPS: 32
 8 |     ADD_IMAGE_BOX: True # caption loss is added to the image-box
 9 |     IMAGE_LABEL_LOSS: 'max_size'
10 | 
11 |     NEG_CAP_WEIGHT: 1.0
12 | SOLVER:
13 |   IMS_PER_BATCH: 16
14 |   BASE_LR: 0.02
15 |   STEPS: (60000, 80000)
16 |   MAX_ITER: 90000
17 | DATASETS:
18 |   TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags")
19 | INPUT:
20 |   CUSTOM_AUG: ResizeShortestEdge
21 |   MIN_SIZE_TRAIN_SAMPLING: range
22 |   MIN_SIZE_TRAIN: (800, 800)
23 | DATALOADER:
24 |   SAMPLER_TRAIN: "MultiDatasetSampler"
25 |   DATASET_RATIO: [1, 4]
26 |   USE_DIFF_BS_SIZE: True
27 |   DATASET_BS: [2, 8]
28 |   USE_RFS: [False, False]
29 |   DATASET_MIN_SIZES: [[800, 800], [400, 400]]
30 |   DATASET_MAX_SIZES: [1333, 667]
31 |   FILTER_EMPTY_ANNOTATIONS: False
32 |   MULTI_DATASET_GROUPING: True
33 |   DATASET_ANN: ['box', 'captiontag']
34 |   NUM_WORKERS: 8
35 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/configs/debug.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml"
 2 | MODEL:
 3 |   ROI_BOX_HEAD:
 4 |     USE_ZEROSHOT_CLS: True
 5 |     IMAGE_LABEL_LOSS: 'max_size'
 6 |     ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_detpro+cname.npy'
 7 |   WEIGHTS: "output/Detic/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_detpro/model_final.pth"
 8 | SOLVER:
 9 |   MAX_ITER: 90000
10 |   IMS_PER_BATCH: 64
11 |   BASE_LR: 0.0002
12 |   WARMUP_ITERS: 1000
13 |   WARMUP_FACTOR: 0.001
14 | DATASETS:
15 |   TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1")
16 | DATALOADER:
17 |   SAMPLER_TRAIN: "MultiDatasetSampler"
18 |   DATASET_RATIO: [1, 4]
19 |   USE_DIFF_BS_SIZE: True
20 |   DATASET_BS: [8, 32]
21 |   DATASET_INPUT_SIZE: [640, 320]
22 |   USE_RFS: [True, False]
23 |   DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]]
24 |   FILTER_EMPTY_ANNOTATIONS: False
25 |   MULTI_DATASET_GROUPING: True
26 |   DATASET_ANN: ['box', 'image']
27 |   NUM_WORKERS: 8
28 | WITH_IMAGE_LABELS: True


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/coco_clip_a+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/coco_clip_a+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/coco_clip_pomp+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/coco_clip_pomp+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/lvis_v1_clip_a+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/lvis_v1_clip_a+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/lvis_v1_clip_pomp+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/lvis_v1_clip_pomp+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/o365_clip_a+cnamefix.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/o365_clip_a+cnamefix.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/o365_fixname_clip_pomp+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/o365_fixname_clip_pomp+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/datasets/metadata/oid_clip_a+cname.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/oid_clip_a+cname.npy


--------------------------------------------------------------------------------
/third_party/Detic/detic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .modeling.meta_arch import custom_rcnn
 3 | from .modeling.roi_heads import detic_roi_heads
 4 | from .modeling.roi_heads import res5_roi_heads
 5 | from .modeling.backbone import swintransformer
 6 | from .modeling.backbone import timm
 7 | 
 8 | 
 9 | from .data.datasets import lvis_v1
10 | from .data.datasets import imagenet
11 | from .data.datasets import cc
12 | from .data.datasets import objects365
13 | from .data.datasets import oid
14 | from .data.datasets import coco_zeroshot
15 | 
16 | try:
17 |     from .modeling.meta_arch import d2_deformable_detr
18 | except:
19 |     pass


--------------------------------------------------------------------------------
/third_party/Detic/detic/data/datasets/cc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | import os
 4 | 
 5 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
 6 | from detectron2.data.datasets.lvis import get_lvis_instances_meta
 7 | from .lvis_v1 import custom_register_lvis_instances
 8 | 
 9 | _CUSTOM_SPLITS = {
10 |     "cc3m_v1_val": ("cc3m/validation/", "cc3m/val_image_info.json"),
11 |     "cc3m_v1_train": ("cc3m/training/", "cc3m/train_image_info.json"),
12 |     "cc3m_v1_train_tags": ("cc3m/training/", "cc3m/train_image_info_tags.json"),
13 | 
14 | }
15 | 
16 | for key, (image_root, json_file) in _CUSTOM_SPLITS.items():
17 |     custom_register_lvis_instances(
18 |         key,
19 |         get_lvis_instances_meta('lvis_v1'),
20 |         os.path.join("datasets", json_file) if "://" not in json_file else json_file,
21 |         os.path.join("datasets", image_root),
22 |     )
23 | 
24 | 


--------------------------------------------------------------------------------
/third_party/Detic/docs/example_output_custom.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/example_output_custom.jpeg


--------------------------------------------------------------------------------
/third_party/Detic/docs/example_output_lvis.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/example_output_lvis.jpeg


--------------------------------------------------------------------------------
/third_party/Detic/docs/teaser.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/teaser.jpeg


--------------------------------------------------------------------------------
/third_party/Detic/extract.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import imghdr
 3 | from PIL import Image
 4 | import multiprocessing
 5 | from concurrent.futures import ThreadPoolExecutor
 6 | 
 7 | train_path = '/home/ubuntu/efs/imagenet/ImageNet-21K/images/train'
 8 | img_folder_idx = [f.replace('.tar', '') for f in os.listdir(train_path) if f.endswith('.tar')]  # len=19167
 9 | print('len(img_folder_idx) = ', len(img_folder_idx))
10 | 
11 | img_paths = []
12 | for i, folder_idx in enumerate(img_folder_idx):
13 |     img_folder_path = os.path.join(train_path, folder_idx)
14 |     if os.path.exists(img_folder_path):
15 |         img_names = [f for f in os.listdir(img_folder_path)]
16 |         current_img_paths = [os.path.join(img_folder_path, img_name) for img_name in img_names]
17 |         img_paths.extend(current_img_paths)
18 | 
19 | print('len(img_paths) = ', len(img_paths))
20 | 
21 | 
22 | def check(img_path):
23 |     # if imghdr.what(img_path) is None:
24 |     try:
25 |         Image.open(img_path)
26 |     except IOError:
27 |         print(i, 'remove', img_path)
28 |         os.remove(img_path)
29 | 
30 | with ThreadPoolExecutor(128) as executor:
31 |     res = executor.map(check, img_paths)
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/third_party/Detic/figures/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/.DS_Store


--------------------------------------------------------------------------------
/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/.DS_Store


--------------------------------------------------------------------------------
/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/inference_lvis_v1_val/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/inference_lvis_v1_val/.DS_Store


--------------------------------------------------------------------------------
/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt/.DS_Store


--------------------------------------------------------------------------------
/third_party/Detic/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | mss
 3 | timm
 4 | dataclasses
 5 | ftfy
 6 | regex
 7 | fasttext
 8 | scikit-learn
 9 | lvis
10 | nltk
11 | git+https://github.com/openai/CLIP.git
12 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | 
2 | Please select an issue template from
3 | https://github.com/facebookresearch/detectron2/issues/new/choose .
4 | 
5 | Otherwise your issue will be closed.
6 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | # require an issue template to be chosen
 2 | blank_issues_enabled: false
 3 | 
 4 | contact_links:
 5 |   - name: How-To / All Other Questions
 6 |     url: https://github.com/facebookresearch/detectron2/discussions
 7 |     about: Use "github discussions" for community support on general questions that don't belong to the above issue categories
 8 |   - name: Detectron2 Documentation
 9 |     url: https://detectron2.readthedocs.io/index.html
10 |     about: Check if your question is answered in tutorials or API docs
11 | 
12 | # Unexpected behaviors & bugs are split to two templates.
13 | # When they are one template, users think "it's not a bug" and don't choose the template.
14 | #
15 | # But the file name is still "unexpected-problems-bugs.md" so that old references
16 | # to this issue template still works.
17 | # It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs)
18 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE/documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F4DA Documentation Issue"
 3 | about: Report a problem about existing documentation, comments, website or tutorials.
 4 | labels: documentation
 5 | 
 6 | ---
 7 | 
 8 | ## 📚 Documentation Issue
 9 | 
10 | This issue category is for problems about existing documentation, not for asking how-to questions.
11 | 
12 | * Provide a link to an existing documentation/comment/tutorial:
13 | 
14 | * How should the above documentation/comment/tutorial improve:
15 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | Thanks for your contribution!
 2 | 
 3 | If you're sending a large PR (e.g., >100 lines),
 4 | please open an issue first about the feature / bug, and indicate how you want to contribute.
 5 | 
 6 | We do not always accept features.
 7 | See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests about how we handle PRs.
 8 | 
 9 | Before submitting a PR, please run `dev/linter.sh` to lint the code.
10 | 
11 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.github/workflows/remove-needs-reply.yml:
--------------------------------------------------------------------------------
 1 | name: Remove needs-more-info label
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   issues:
 7 |     types: [edited]
 8 | 
 9 | jobs:
10 |   remove-needs-more-info-label:
11 |     runs-on: ubuntu-latest
12 |     # 1. issue_comment events could include PR comment, filter them out
13 |     # 2. Only trigger action if event was produced by the original author
14 |     if: ${{ !github.event.issue.pull_request && github.event.sender.login == github.event.issue.user.login }}
15 |     steps:
16 |       - name: Remove needs-more-info label
17 |         uses: octokit/request-action@v2.x
18 |         continue-on-error: true
19 |         with:
20 |           route: DELETE /repos/:repository/issues/:issue/labels/:label
21 |           repository: ${{ github.repository }}
22 |           issue: ${{ github.event.issue.number }}
23 |           label: needs-more-info
24 |         env:
25 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/.gitignore:
--------------------------------------------------------------------------------
 1 | third_party/detectron2
 2 | slurm*
 3 | # output dir
 4 | output
 5 | instant_test_output
 6 | inference_test_output
 7 | 
 8 | 
 9 | *.png
10 | *.json
11 | *.diff
12 | # *.jpg
13 | !/projects/DensePose/doc/images/*.jpg
14 | 
15 | # compilation and distribution
16 | __pycache__
17 | _ext
18 | *.pyc
19 | *.pyd
20 | *.so
21 | *.dll
22 | *.egg-info/
23 | build/
24 | dist/
25 | wheels/
26 | 
27 | # pytorch/python/numpy formats
28 | *.pth
29 | *.pkl
30 | *.npy
31 | *.ts
32 | model_ts*.txt
33 | 
34 | # ipython/jupyter notebooks
35 | *.ipynb
36 | **/.ipynb_checkpoints/
37 | 
38 | # Editor temporaries
39 | *.swn
40 | *.swo
41 | *.swp
42 | *~
43 | 
44 | # editor settings
45 | .idea
46 | .vscode
47 | _darcs
48 | 
49 | # project dirs
50 | /detectron2/model_zoo/configs
51 | /datasets/*
52 | !/datasets/*.*
53 | !/datasets/lvis/
54 | /datasets/lvis/*
55 | !/datasets/lvis/lvis_v1_train_cat_info.json
56 | /projects/*/datasets
57 | /models
58 | /snippet
59 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/centernet/__init__.py:
--------------------------------------------------------------------------------
 1 | from .modeling.meta_arch.centernet_detector import CenterNetDetector
 2 | from .modeling.dense_heads.centernet import CenterNet
 3 | from .modeling.roi_heads.custom_roi_heads import CustomROIHeads, CustomCascadeROIHeads
 4 | 
 5 | from .modeling.backbone.fpn_p5 import build_p67_resnet_fpn_backbone
 6 | from .modeling.backbone.dla import build_dla_backbone
 7 | from .modeling.backbone.dlafpn import build_dla_fpn3_backbone
 8 | from .modeling.backbone.bifpn import build_resnet_bifpn_backbone
 9 | from .modeling.backbone.bifpn_fcos import build_fcos_resnet_bifpn_backbone
10 | from .modeling.backbone.res2net import build_p67_res2net_fpn_backbone
11 | 
12 | from .data.datasets.objects365 import categories_v1
13 | from .data.datasets.coco import _PREDEFINED_SPLITS_COCO
14 | from .data.datasets import nuimages
15 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/centernet/modeling/layers/ml_nms.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import batched_nms
 2 | 
 3 | 
 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1,
 5 |            score_field="scores", label_field="labels"):
 6 |     """
 7 |     Performs non-maximum suppression on a boxlist, with scores specified
 8 |     in a boxlist field via score_field.
 9 |     Arguments:
10 |         boxlist(BoxList)
11 |         nms_thresh (float)
12 |         max_proposals (int): if > 0, then only the top max_proposals are kept
13 |             after non-maximum suppression
14 |         score_field (str)
15 |     """
16 |     if nms_thresh <= 0:
17 |         return boxlist
18 |     if boxlist.has('pred_boxes'):
19 |         boxes = boxlist.pred_boxes.tensor
20 |         labels = boxlist.pred_classes
21 |     else:
22 |         boxes = boxlist.proposal_boxes.tensor
23 |         labels = boxlist.proposal_boxes.tensor.new_zeros(
24 |             len(boxlist.proposal_boxes.tensor))
25 |     scores = boxlist.scores
26 |     
27 |     keep = batched_nms(boxes, scores, labels, nms_thresh)
28 |     if max_proposals > 0:
29 |         keep = keep[: max_proposals]
30 |     boxlist = boxlist[keep]
31 |     return boxlist
32 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/centernet/modeling/roi_heads/fed_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import json
 3 | import numpy as np
 4 | from torch.nn import functional as F
 5 | 
 6 | def load_class_freq(
 7 |     path='datasets/lvis/lvis_v1_train_cat_info.json', 
 8 |     freq_weight=0.5):
 9 |     cat_info = json.load(open(path, 'r'))
10 |     cat_info = torch.tensor(
11 |         [c['image_count'] for c in sorted(cat_info, key=lambda x: x['id'])])
12 |     freq_weight = cat_info.float() ** freq_weight
13 |     return freq_weight
14 | 
15 | def get_fed_loss_inds(
16 |     gt_classes, num_sample_cats=50, C=1203, \
17 |     weight=None, fed_cls_inds=-1):
18 |     appeared = torch.unique(gt_classes) # C'
19 |     prob = appeared.new_ones(C + 1).float()
20 |     prob[-1] = 0
21 |     if len(appeared) < num_sample_cats:
22 |         if weight is not None:
23 |             prob[:C] = weight.float().clone()
24 |         prob[appeared] = 0
25 |         if fed_cls_inds > 0:
26 |             prob[fed_cls_inds:] = 0
27 |         more_appeared = torch.multinomial(
28 |             prob, num_sample_cats - len(appeared),
29 |             replacement=False)
30 |         appeared = torch.cat([appeared, more_appeared])
31 |     return appeared


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/Base-CenterNet-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CenterNetDetector"
 3 |   PROPOSAL_GENERATOR:
 4 |     NAME: "CenterNet"
 5 |   BACKBONE:
 6 |     NAME: "build_p67_resnet_fpn_backbone"
 7 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     OUT_FEATURES: ["res3", "res4", "res5"]
11 |   FPN:
12 |     IN_FEATURES: ["res3", "res4", "res5"]
13 | DATASETS:
14 |   TRAIN: ("coco_2017_train",)
15 |   TEST: ("coco_2017_val",)
16 | SOLVER:
17 |   IMS_PER_BATCH: 16
18 |   BASE_LR: 0.01
19 |   STEPS: (60000, 80000)
20 |   MAX_ITER: 90000
21 |   CHECKPOINT_PERIOD: 1000000000
22 |   WARMUP_ITERS: 4000
23 |   WARMUP_FACTOR: 0.00025
24 |   CLIP_GRADIENTS:
25 |     ENABLED: True
26 | INPUT:
27 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
28 | OUTPUT_DIR: "./output/CenterNet2/auto"
29 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/Base_S4_DLA.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CenterNetDetector"
 3 |   PROPOSAL_GENERATOR:
 4 |     NAME: "CenterNet"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   BACKBONE:
 7 |     NAME: "build_dla_backbone"
 8 |   DLA:
 9 |     NORM: "BN"
10 |   CENTERNET:
11 |     IN_FEATURES: ["dla2"]
12 |     FPN_STRIDES: [4]
13 |     SOI: [[0, 1000000]]
14 |     NUM_CLS_CONVS: 1
15 |     NUM_BOX_CONVS: 1
16 |     REG_WEIGHT: 1.
17 |     MORE_POS: True
18 |     HM_FOCAL_ALPHA: 0.25
19 | DATASETS:
20 |   TRAIN: ("coco_2017_train",)
21 |   TEST: ("coco_2017_val",)
22 | SOLVER:
23 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
24 |   MAX_ITER: 90000
25 |   BASE_LR: 0.04
26 |   IMS_PER_BATCH: 64
27 |   WEIGHT_DECAY: 0.0001
28 |   CHECKPOINT_PERIOD: 1000000
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 | INPUT:
32 |   CUSTOM_AUG: EfficientDetResizeCrop
33 |   TRAIN_SIZE: 640
34 |   MIN_SIZE_TEST: 608
35 |   MAX_SIZE_TEST: 900
36 | TEST:
37 |   EVAL_PERIOD: 7500
38 | DATALOADER:
39 |   NUM_WORKERS: 8
40 | OUTPUT_DIR: "output/CenterNet2/auto"
41 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet-FPN_R50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-CenterNet-FPN.yaml"
2 | MODEL:
3 |   CENTERNET:
4 |     MORE_POS: True


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet-S4_DLA_8x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base_S4_DLA.yaml"
2 | SOLVER:
3 |   MAX_ITER: 90000
4 |   BASE_LR: 0.08
5 |   IMS_PER_BATCH: 128


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2-F_R50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-CenterNet2.yaml"
2 | MODEL:
3 |   ROI_HEADS:
4 |     NAME: CustomROIHeads


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P3_24x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p35_fcos_dla_bifpn_backbone"
 5 |   BIFPN:
 6 |     OUT_CHANNELS: 160
 7 |     NUM_LEVELS: 3
 8 |     NUM_BIFPN: 4
 9 |   DLA:
10 |     NUM_LAYERS: 34
11 |     NORM: "SyncBN"
12 |   FPN:
13 |     IN_FEATURES: ["dla3", "dla4", "dla5"]
14 |   ROI_HEADS:
15 |     IN_FEATURES: ["p3", "p4", "p5"]
16 |   CENTERNET:
17 |     POST_NMS_TOPK_TEST: 128
18 |     FPN_STRIDES: [8, 16, 32]
19 |     IN_FEATURES: ['p3', 'p4', 'p5']
20 |     SOI: [[0, 64], [48, 192], [128, 1000000]]
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (300000, 340000)
28 |   MAX_ITER: 360000
29 |   CHECKPOINT_PERIOD: 100000
30 |   WARMUP_ITERS: 4000
31 |   WARMUP_FACTOR: 0.00025
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
34 |   MAX_SIZE_TRAIN: 900
35 |   MAX_SIZE_TEST: 736
36 |   MIN_SIZE_TEST: 512


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P3_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p35_fcos_dla_bifpn_backbone"
 5 |   BIFPN:
 6 |     OUT_CHANNELS: 160
 7 |     NUM_LEVELS: 3
 8 |     NUM_BIFPN: 4
 9 |   DLA:
10 |     NUM_LAYERS: 34
11 |     NORM: "SyncBN"
12 |   FPN:
13 |     IN_FEATURES: ["dla3", "dla4", "dla5"]
14 |   ROI_HEADS:
15 |     IN_FEATURES: ["p3", "p4", "p5"]
16 |   CENTERNET:
17 |     POST_NMS_TOPK_TEST: 128
18 |     FPN_STRIDES: [8, 16, 32]
19 |     IN_FEATURES: ['p3', 'p4', 'p5']
20 |     SOI: [[0, 64], [48, 192], [128, 1000000]]
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (300000, 340000)
28 |   MAX_ITER: 360000
29 |   CHECKPOINT_PERIOD: 100000
30 |   WARMUP_ITERS: 4000
31 |   WARMUP_FACTOR: 0.00025
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
34 |   MAX_SIZE_TRAIN: 900
35 |   MAX_SIZE_TEST: 736
36 |   MIN_SIZE_TEST: 512


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_16x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p37_dla_bifpn_backbone"
 5 |   BIFPN:
 6 |     OUT_CHANNELS: 160
 7 |     NUM_LEVELS: 5
 8 |     NUM_BIFPN: 3
 9 |   CENTERNET:
10 |     POST_NMS_TOPK_TEST: 128
11 |   WEIGHTS: ''
12 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
13 |   PIXEL_STD: [58.395, 57.12, 57.375]
14 |   FPN:
15 |     IN_FEATURES: ["dla3", "dla4", "dla5"]
16 | SOLVER:
17 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
18 |   MAX_ITER: 360000
19 |   BASE_LR: 0.08
20 |   IMS_PER_BATCH: 64
21 |   CHECKPOINT_PERIOD: 90000
22 | TEST:
23 |   EVAL_PERIOD: 7500
24 | INPUT:
25 |   FORMAT: RGB
26 |   CUSTOM_AUG: EfficientDetResizeCrop
27 |   TRAIN_SIZE: 640
28 |   MIN_SIZE_TEST: 608
29 |   MAX_SIZE_TEST: 900
30 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_16x_ST.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p37_dla_bifpn_backbone"
 5 |   BIFPN:
 6 |     OUT_CHANNELS: 160
 7 |     NUM_LEVELS: 5
 8 |     NUM_BIFPN: 3
 9 |   CENTERNET:
10 |     POST_NMS_TOPK_TEST: 128
11 |   WEIGHTS: ''
12 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
13 |   PIXEL_STD: [58.395, 57.12, 57.375]
14 |   FPN:
15 |     IN_FEATURES: ["dla3", "dla4", "dla5"]
16 | SOLVER:
17 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
18 |   MAX_ITER: 360000
19 |   BASE_LR: 0.08
20 |   IMS_PER_BATCH: 64
21 | TEST:
22 |   EVAL_PERIOD: 7500
23 | INPUT:
24 |   FORMAT: RGB
25 |   CUSTOM_AUG: EfficientDetResizeCrop
26 |   TRAIN_SIZE: 640
27 |   MIN_SIZE_TEST: 608
28 |   MAX_SIZE_TEST: 900
29 | DATASETS:
30 |   TRAIN: ("coco_2017_train","coco_un_yolov4_55_0.5",)
31 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-fcosBiFPN-P5_640_16x_ST.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p37_fcos_dla_bifpn_backbone"
 5 |   BIFPN:
 6 |     OUT_CHANNELS: 160
 7 |     NUM_LEVELS: 5
 8 |     NUM_BIFPN: 3
 9 |   CENTERNET:
10 |     POST_NMS_TOPK_TEST: 128
11 |   WEIGHTS: ''
12 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
13 |   PIXEL_STD: [58.395, 57.12, 57.375]
14 |   FPN:
15 |     IN_FEATURES: ["dla3", "dla4", "dla5"]
16 | TEST:
17 |   EVAL_PERIOD: 7500
18 | SOLVER:
19 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
20 |   MAX_ITER: 360000
21 |   BASE_LR: 0.08
22 |   IMS_PER_BATCH: 64
23 | INPUT:
24 |   FORMAT: RGB
25 |   CUSTOM_AUG: EfficientDetResizeCrop
26 |   TRAIN_SIZE: 640
27 |   MIN_SIZE_TEST: 608
28 |   MAX_SIZE_TEST: 900
29 | DATASETS:
30 |   TRAIN: ("coco_2017_train","coco_un_yolov4_55_0.5",)
31 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN-BiFPN_1280_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_res2net_bifpn_backbone"
 5 |   BIFPN:
 6 |     NUM_BIFPN: 7
 7 |     OUT_CHANNELS: 288
 8 |   WEIGHTS: "output/r2_101.pkl"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     WIDTH_PER_GROUP: 26
12 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
13 |     DEFORM_MODULATED: True
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.12, 57.375]
16 |   CENTERNET:
17 |     USE_DEFORMABLE: True
18 |   ROI_HEADS:
19 |     IN_FEATURES: ["p3", "p4"]
20 | INPUT:
21 |   FORMAT: RGB
22 | TEST:
23 |   EVAL_PERIOD: 7500
24 | SOLVER:
25 |   MAX_ITER: 180000
26 |   CHECKPOINT_PERIOD: 60000
27 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
28 |   BASE_LR: 0.04
29 |   IMS_PER_BATCH: 32
30 | INPUT:
31 |   CUSTOM_AUG: EfficientDetResizeCrop
32 |   TRAIN_SIZE: 1280
33 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN-BiFPN_4x+4x_1560_ST.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_res2net_bifpn_backbone"
 5 |   BIFPN:
 6 |     NUM_BIFPN: 7
 7 |     OUT_CHANNELS: 288
 8 |   WEIGHTS: "output/r2_101.pkl"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     WIDTH_PER_GROUP: 26
12 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
13 |     DEFORM_MODULATED: True
14 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
15 |   PIXEL_STD: [58.395, 57.12, 57.375]
16 |   CENTERNET:
17 |     USE_DEFORMABLE: True
18 |   ROI_HEADS:
19 |     IN_FEATURES: ["p3", "p4"]
20 | TEST:
21 |   EVAL_PERIOD: 7500
22 | SOLVER:
23 |   MAX_ITER: 180000
24 |   CHECKPOINT_PERIOD: 7500
25 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
26 |   BASE_LR: 0.04
27 |   IMS_PER_BATCH: 32
28 | DATASETS:
29 |   TRAIN: "('coco_2017_train', 'coco_un_yolov4_55_0.5')"
30 | INPUT:
31 |   FORMAT: RGB
32 |   CUSTOM_AUG: EfficientDetResizeCrop
33 |   TRAIN_SIZE: 1280
34 |   TEST_SIZE: 1560
35 |   TEST_INPUT_TYPE: 'square'
36 |   


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN_896_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_p67_res2net_fpn_backbone"
 5 |   WEIGHTS: "output/r2_101.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     WIDTH_PER_GROUP: 26
 9 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
10 |     DEFORM_MODULATED: True
11 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
12 |   PIXEL_STD: [58.395, 57.12, 57.375]
13 |   CENTERNET:
14 |     USE_DEFORMABLE: True
15 |   ROI_HEADS:
16 |     IN_FEATURES: ["p3", "p4"]
17 | INPUT:
18 |   FORMAT: RGB
19 | TEST:
20 |   EVAL_PERIOD: 7500
21 | SOLVER:
22 |   MAX_ITER: 180000
23 |   CHECKPOINT_PERIOD: 600000
24 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
25 |   BASE_LR: 0.04
26 |   IMS_PER_BATCH: 32
27 | INPUT:
28 |   CUSTOM_AUG: EfficientDetResizeCrop
29 |   TRAIN_SIZE: 896


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-CenterNet2.yaml"
2 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/CenterNet2_X101-DCN_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   CENTERNET:
 4 |     USE_DEFORMABLE: True
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 6 |   PIXEL_STD: [57.375, 57.120, 58.395]
 7 |   RESNETS:
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |     DEPTH: 101
12 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
13 |     DEFORM_MODULATED: True
14 |   ROI_HEADS:
15 |     IN_FEATURES: ["p3", "p4"]
16 | SOLVER:
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   CHECKPOINT_PERIOD: 40000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (480, 960)
22 |   MIN_SIZE_TRAIN_SAMPLING: "range"
23 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/LVIS_CenterNet2_R50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   ROI_HEADS:
 4 |     NUM_CLASSES: 1203
 5 |     SCORE_THRESH_TEST: 0.02
 6 |     NMS_THRESH_TEST: 0.5
 7 |   CENTERNET:
 8 |     NUM_CLASSES: 1203
 9 |     
10 | DATASETS:
11 |   TRAIN: ("lvis_v1_train",)
12 |   TEST: ("lvis_v1_val",)
13 | DATALOADER:
14 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
15 |   REPEAT_THRESHOLD: 0.001
16 | TEST:
17 |   DETECTIONS_PER_IMAGE: 300
18 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/LVIS_CenterNet2_R50_Fed_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   ROI_HEADS:
 4 |     NUM_CLASSES: 1203
 5 |     SCORE_THRESH_TEST: 0.02
 6 |     NMS_THRESH_TEST: 0.5
 7 |   CENTERNET:
 8 |     NUM_CLASSES: 1203
 9 |   ROI_BOX_HEAD:
10 |     USE_SIGMOID_CE: True
11 |     USE_FED_LOSS: True
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | DATALOADER:
16 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
17 |   REPEAT_THRESHOLD: 0.001
18 | TEST:
19 |   DETECTIONS_PER_IMAGE: 300
20 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/O365_CenterNet2_R50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   ROI_HEADS:
 4 |     NUM_CLASSES: 365
 5 |   CENTERNET:
 6 |     NUM_CLASSES: 365
 7 | DATASETS:
 8 |   TRAIN: ("objects365_train",)
 9 |   TEST: ("objects365_val",)
10 | DATALOADER:
11 |   SAMPLER_TRAIN: "ClassAwareSampler"
12 | TEST:
13 |   DETECTIONS_PER_IMAGE: 300


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/configs/nuImages_CenterNet2_DLA_640_8x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CenterNet2.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   ROI_MASK_HEAD:
 5 |     NAME: "MaskRCNNConvUpsampleHead"
 6 |     NUM_CONV: 4
 7 |     POOLER_RESOLUTION: 14
 8 |   ROI_HEADS:
 9 |     NUM_CLASSES: 10
10 |     IN_FEATURES: ["dla2"]
11 |   BACKBONE:
12 |     NAME: "build_dla_backbone"
13 |   DLA:
14 |     NORM: "BN"
15 |   CENTERNET:
16 |     IN_FEATURES: ["dla2"]
17 |     FPN_STRIDES: [4]
18 |     SOI: [[0, 1000000]]
19 |     NUM_CLS_CONVS: 1
20 |     NUM_BOX_CONVS: 1
21 |     REG_WEIGHT: 1.
22 |     MORE_POS: True
23 |     HM_FOCAL_ALPHA: 0.25
24 |     POST_NMS_TOPK_TEST: 128
25 |   WEIGHTS: ''
26 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
27 |   PIXEL_STD: [58.395, 57.12, 57.375]
28 | SOLVER:
29 |   MAX_ITER: 180000
30 |   STEPS: (120000, 160000)
31 |   BASE_LR: 0.08
32 |   IMS_PER_BATCH: 64
33 | INPUT:
34 |   FORMAT: RGB
35 |   CUSTOM_AUG: EfficientDetResizeCrop
36 |   TRAIN_SIZE: 640
37 |   MIN_SIZE_TEST: 608
38 |   MAX_SIZE_TEST: 900
39 |   MASK_FORMAT: bitmask
40 | DATASETS:
41 |   TRAIN: ("nuimages_train",)
42 |   TEST: ("nuimages_val",)
43 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/third_party/CenterNet2/tools/__init__.py


--------------------------------------------------------------------------------
/third_party/Detic/third_party/CenterNet2/tools/deploy/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # See https://pytorch.org/tutorials/advanced/cpp_frontend.html
 3 | cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 4 | project(torchscript_mask_rcnn)
 5 | 
 6 | find_package(Torch REQUIRED)
 7 | find_package(OpenCV REQUIRED)
 8 | find_package(TorchVision REQUIRED)   # needed by export-method=tracing/scripting
 9 | 
10 | add_executable(torchscript_mask_rcnn torchscript_mask_rcnn.cpp)
11 | target_link_libraries(
12 |   torchscript_mask_rcnn
13 |   -Wl,--no-as-needed TorchVision::TorchVision -Wl,--as-needed
14 |   "${TORCH_LIBRARIES}" ${OpenCV_LIBS})
15 | set_property(TARGET torchscript_mask_rcnn PROPERTY CXX_STANDARD 14)
16 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/r50_deformable_detr
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     ${PY_ARGS}
11 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_plus_iterative_bbox_refinement.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/r50_deformable_detr_plus_iterative_bbox_refinement
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     ${PY_ARGS}
12 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --output_dir ${EXP_DIR} \
10 |     --with_box_refine \
11 |     --two_stage \
12 |     ${PY_ARGS}
13 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_single_scale.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/r50_deformable_detr_single_scale
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --num_feature_levels 1 \
10 |     --output_dir ${EXP_DIR} \
11 |     ${PY_ARGS}
12 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_single_scale_dc5.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | EXP_DIR=exps/r50_deformable_detr_single_scale_dc5
 6 | PY_ARGS=${@:1}
 7 | 
 8 | python -u main.py \
 9 |     --num_feature_levels 1 \
10 |     --dilation \
11 |     --output_dir ${EXP_DIR} \
12 |     ${PY_ARGS}
13 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/datasets/torchvision_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------
6 | 
7 | from .coco import CocoDetection
8 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ## Changelog
2 | 
3 | **[2020.12.07]** Fix a bug of sampling offset normalization (see [this issue](https://github.com/fundamentalvision/Deformable-DETR/issues/6)) in the MSDeformAttn module. The final accuracy on COCO is slightly improved. Code and pre-trained models have been updated. This bug only occurs in this released version but not in the original implementation used in our paper.


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR (https://github.com/facebookresearch/detr)
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | # ------------------------------------------------------------------------
 9 | 
10 | from .deformable_detr import build
11 | 
12 | 
13 | def build_model(args):
14 |     return build(args)
15 | 
16 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/models/ops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn_func import MSDeformAttnFunction
10 | 
11 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/models/ops/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | # ------------------------------------------------------------------------------------------------
 9 | 
10 | python setup.py build install
11 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/models/ops/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn import MSDeformAttn
10 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/models/ops/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include "ms_deform_attn.h"
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
15 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
16 | }
17 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | tqdm
3 | cython
4 | scipy
5 | 


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/tools/run_dist_launch.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | 
 8 | set -x
 9 | 
10 | GPUS=$1
11 | RUN_COMMAND=${@:2}
12 | if [ $GPUS -lt 8 ]; then
13 |     GPUS_PER_NODE=${GPUS_PER_NODE:-$GPUS}
14 | else
15 |     GPUS_PER_NODE=${GPUS_PER_NODE:-8}
16 | fi
17 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
18 | MASTER_PORT=${MASTER_PORT:-"29500"}
19 | NODE_RANK=${NODE_RANK:-0}
20 | 
21 | let "NNODES=GPUS/GPUS_PER_NODE"
22 | 
23 | python ./tools/launch.py \
24 |     --nnodes ${NNODES} \
25 |     --node_rank ${NODE_RANK} \
26 |     --master_addr ${MASTER_ADDR} \
27 |     --master_port ${MASTER_PORT} \
28 |     --nproc_per_node ${GPUS_PER_NODE} \
29 |     ${RUN_COMMAND}


--------------------------------------------------------------------------------
/third_party/Detic/third_party/Deformable-DETR/util/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------
6 | # Modified from DETR (https://github.com/facebookresearch/detr)
7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
8 | # ------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/third_party/Detic/tools/fix_o365_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import argparse
 3 | import json
 4 | import os
 5 | 
 6 | if __name__ == '__main__':
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--ann", default='datasets/objects365/annotations/zhiyuan_objv2_train_fixname.json')
 9 |     parser.add_argument("--img_dir", default='datasets/objects365/train/')
10 |     args = parser.parse_args()
11 | 
12 |     print('Loading', args.ann)
13 |     data = json.load(open(args.ann, 'r'))
14 |     images = []
15 |     count = 0
16 |     for x in data['images']:
17 |         path = '{}/{}'.format(args.img_dir, x['file_name'])
18 |         if os.path.exists(path):
19 |             images.append(x)
20 |         else:
21 |             print(path)
22 |             count = count + 1
23 |     print('Missing', count, 'images')
24 |     data['images'] = images
25 |     out_name = args.ann[:-5] + '_fixmiss.json'
26 |     print('Saving to', out_name)
27 |     json.dump(data, open(out_name, 'w'))
28 | 


--------------------------------------------------------------------------------
/third_party/Detic/tools/get_coco_zeroshot_oriorder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import argparse
 3 | import json
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--data_path', default='datasets/coco/annotations/instances_val2017_unseen_2.json')
 8 |     parser.add_argument('--cat_path', default='datasets/coco/annotations/instances_val2017.json')
 9 |     args = parser.parse_args()
10 |     print('Loading', args.cat_path)
11 |     cat = json.load(open(args.cat_path, 'r'))['categories']
12 | 
13 |     print('Loading', args.data_path)
14 |     data = json.load(open(args.data_path, 'r'))
15 |     data['categories'] = cat
16 |     out_path = args.data_path[:-5] + '_oriorder.json'
17 |     print('Saving to', out_path)
18 |     json.dump(data, open(out_path, 'w'))
19 | 


--------------------------------------------------------------------------------
/third_party/Detic/tools/remove_lvis_rare.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import argparse
 3 | import json
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--ann', default='datasets/lvis/lvis_v1_train.json')
 8 |     args = parser.parse_args()
 9 | 
10 |     print('Loading', args.ann)
11 |     data = json.load(open(args.ann, 'r'))
12 |     catid2freq = {x['id']: x['frequency'] for x in data['categories']}
13 |     print('ori #anns', len(data['annotations']))
14 |     exclude = ['r']
15 |     data['annotations'] = [x for x in data['annotations'] \
16 |         if catid2freq[x['category_id']] not in exclude]
17 |     print('filtered #anns', len(data['annotations']))
18 |     out_path = args.ann[:-5] + '_norare.json'
19 |     print('Saving to', out_path)
20 |     json.dump(data, open(out_path, 'w'))
21 | 


--------------------------------------------------------------------------------
/third_party/Detic/tools/unzip_imagenet_lvis.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import os
 3 | import argparse
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--src_path', default='datasets/imagenet/ImageNet-21K/')
 8 |     parser.add_argument('--dst_path', default='datasets/imagenet/ImageNet-LVIS/')
 9 |     parser.add_argument('--data_path', default='datasets/metadata/imagenet_lvis_wnid.txt')
10 |     args = parser.parse_args()
11 | 
12 |     f = open(args.data_path)
13 |     for i, line in enumerate(f):
14 |       cmd = 'mkdir {x} && tar -xf {src}/{l}.tar -C {x}'.format(
15 |           src=args.src_path,
16 |           l=line.strip(),
17 |           x=args.dst_path + '/' + line.strip())
18 |       print(i, cmd)
19 |       os.system(cmd)
20 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | outputs
 4 | instant_test_output
 5 | inference_test_output
 6 | 
 7 | 
 8 | *.json
 9 | *.diff
10 | *.jpg
11 | !/projects/DensePose/doc/images/*.jpg
12 | 
13 | # compilation and distribution
14 | __pycache__
15 | _ext
16 | *.pyc
17 | *.pyd
18 | *.so
19 | *.dll
20 | *.egg-info/
21 | build/
22 | dist/
23 | wheels/
24 | 
25 | # pytorch/python/numpy formats
26 | *.pth
27 | *.pkl
28 | *.npy
29 | *.ts
30 | model_ts*.txt
31 | 
32 | # ipython/jupyter notebooks
33 | *.ipynb
34 | **/.ipynb_checkpoints/
35 | 
36 | # Editor temporaries
37 | *.swn
38 | *.swo
39 | *.swp
40 | *~
41 | 
42 | # editor settings
43 | .idea
44 | .vscode
45 | _darcs
46 | 
47 | # project dirs
48 | /detectron2/model_zoo/configs
49 | /datasets/*
50 | !/datasets/*.*
51 | /projects/*/datasets
52 | /models
53 | /snippet
54 | 
55 | # vs code
56 | .history
57 | 
58 | amlt
59 | thirdparty
60 | wandb
61 | weights


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 MendelXu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_bs32_60k/official_inference


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k/re_inference


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     PROMPT_LEARNER: "pomp_tuned"
16 |     # for learnable prompt
17 |     PROMPT_DIM: 512
18 |     PROMPT_SHAPE: (16, 0)
19 |     CLIP_MODEL_NAME: "ViT-B/16"
20 |     PROMPT_CHECKPOINT: output/coco-stuff-164k-156/zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k/model_final.pth
21 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_bs32_60k/re_inference


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn101_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     CLIP_MODEL_NAME: "RN101"
16 |     PROMPT_DIM: 512
17 |   SEM_SEG_HEAD:
18 |     EMBEDDING_DIM: 512


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     CLIP_MODEL_NAME: "RN50"
16 |     PROMPT_DIM: 1024
17 |   SEM_SEG_HEAD:
18 |     EMBEDDING_DIM: 1024


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50x16_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     CLIP_MODEL_NAME: "RN50x16"
16 |     PROMPT_DIM: 768
17 |   SEM_SEG_HEAD:
18 |     EMBEDDING_DIM: 768


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50x4_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     CLIP_MODEL_NAME: "RN50x4"
16 |     PROMPT_DIM: 640 
17 |   SEM_SEG_HEAD:
18 |     EMBEDDING_DIM: 640


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_vit-bx32_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     CLIP_MODEL_NAME: "ViT-B/32"


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_vild_prompt_bs32_60k/re_inference


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 156 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "learnable"
11 |     # for learnable prompt
12 |     PROMPT_DIM: 512
13 |     PROMPT_SHAPE: (16, 0)
14 |     CLIP_MODEL_NAME: "ViT-B/16"
15 |     MASK_FILL: "mean"
16 |     MASK_EXPAND_RATIO: 1.0
17 |     MASK_THR: 0.5
18 |     MASK_MATTING: False
19 |     REGION_RESIZED: True
20 |     CLIP_ENSEMBLE: True
21 |     CLIP_ENSEMBLE_WEIGHT: 0.8
22 | DATASETS:
23 |   TRAIN: ("coco_2017_train_stuff_base_sem_seg",)
24 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 156 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "imagenet"
11 |     CLIP_MODEL_NAME: "ViT-B/16"
12 |     MASK_FILL: "mean"
13 |     MASK_EXPAND_RATIO: 1.0
14 |     MASK_THR: 0.5
15 |     MASK_MATTING: False
16 |     REGION_RESIZED: True
17 |     CLIP_ENSEMBLE: True
18 | DATASETS:
19 |   TRAIN: ("coco_2017_train_stuff_base_sem_seg",)


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 156 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "predefined"
11 |     PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."]
12 |     CLIP_MODEL_NAME: "ViT-B/16"
13 |     MASK_FILL: "mean"
14 |     MASK_EXPAND_RATIO: 1.0
15 |     MASK_THR: 0.5
16 |     MASK_MATTING: False
17 |     REGION_RESIZED: True
18 |     CLIP_ENSEMBLE: True
19 | DATASETS:
20 |   TRAIN: ("coco_2017_train_stuff_base_sem_seg",)


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 156 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "vild"
11 |     CLIP_MODEL_NAME: "ViT-B/16"
12 |     MASK_FILL: "mean"
13 |     MASK_EXPAND_RATIO: 1.0
14 |     MASK_THR: 0.5
15 |     MASK_MATTING: False
16 |     REGION_RESIZED: True
17 |     CLIP_ENSEMBLE: True
18 | DATASETS:
19 |   TRAIN: ("coco_2017_train_stuff_base_sem_seg",)


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_perpixel_R101c_single_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotPerPixelModel"
 4 |   BACKBONE:
 5 |     NAME: "build_resnet_deeplab_backbone"
 6 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 7 |   RESNETS:
 8 |     DEPTH: 101
 9 |     STEM_TYPE: "deeplab"
10 |     STEM_OUT_CHANNELS: 128
11 |     STRIDE_IN_1X1: False
12 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
13 |     # NORM: "SyncBN"
14 |     RES5_MULTI_GRID: [1, 2, 4]
15 |   SEM_SEG_HEAD:
16 |     NAME: "ZeroPerPixelBaselineHead"
17 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
18 |     IGNORE_VALUE: 255
19 |     NUM_CLASSES: 512
20 |     COMMON_STRIDE: 4  # not used, hard-coded
21 |     LOSS_WEIGHT: 1.0
22 |     CONVS_DIM: 256
23 |     MASK_DIM: 256
24 |     NORM: "GN"
25 |   CLIP_ADAPTER:
26 |     CLIP_ENSEMBLE_WEIGHT: 0.7


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_proposal_classification_bs32_10k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
 2 | ORACLE: True
 3 | MODEL:
 4 |   META_ARCHITECTURE: "ProposalClipClassifier"
 5 |   MASK_ON: True
 6 |   CLIP_ADAPTER:
 7 |     PROMPT_LEARNER: "predefined"
 8 |     PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."]
 9 |     # for learnable prompt
10 |     PROMPT_DIM: 512
11 |     PROMPT_SHAPE: (16, 0)
12 |     CLIP_MODEL_NAME: "ViT-B/16"
13 | DATASETS:
14 |   TRAIN: ("coco_2017_train_stuff_base_sem_seg_classification",)
15 |   TEST: ("coco_2017_test_stuff_sem_seg_classification",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (224,244)
18 |   MIN_SIZE_TEST: 224
19 |   MAX_SIZE_TEST: 2560
20 |   SIZE_DIVISIBILITY: -1
21 |   FORMAT: "RGB"
22 |   DATASET_MAPPER_NAME: "mask_former_binary_semantic"
23 | SOLVER:
24 |   OPTIMIZER: "SGD"
25 |   BASE_LR: 0.002
26 |   WEIGHT_DECAY: 0.0005
27 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
28 |   WARMUP_METHOD: "constant"  
29 |   WARMUP_FACTOR: 0.005
30 |   WARMUP_ITERS: 100
31 |   IMS_PER_BATCH: 32
32 |   TEST_IMS_PER_BATCH: 4
33 |   MAX_ITER: 10000
34 |   CHECKPOINT_PERIOD: 5000
35 | TEST:
36 |   EVAL_PERIOD: 5000


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/maskformer_R101c_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-COCOStuff164K-171.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 171
 9 |     COMMON_STRIDE: 4 # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_bs32_60k/


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   CLIP_ADAPTER:
 7 |     PROMPT_LEARNER: "imagenet"
 8 |     CLIP_MODEL_NAME: "ViT-B/16"
 9 |     MASK_FILL: "mean"
10 |     MASK_EXPAND_RATIO: 1.0
11 |     MASK_THR: 0.5
12 |     MASK_MATTING: False
13 |     REGION_RESIZED: True
14 |     CLIP_ENSEMBLE: True
15 |     CLIP_ENSEMBLE_WEIGHT: 0.8
16 |   RESNETS:
17 |     DEPTH: 101
18 |     STEM_TYPE: "deeplab"
19 |     STEM_OUT_CHANNELS: 128
20 |     STRIDE_IN_1X1: False
21 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
22 |     # NORM: "SyncBN"
23 |     RES5_MULTI_GRID: [1, 2, 4]
24 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k/


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     PROMPT_LEARNER: "pomp_tuned"
16 |     # for learnable prompt
17 |     PROMPT_DIM: 512
18 |     PROMPT_SHAPE: (16, 0)
19 |     CLIP_MODEL_NAME: "ViT-B/16"
20 |     PROMPT_CHECKPOINT: output/coco-stuff-164k-156/zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k/model_final.pth
21 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   CLIP_ADAPTER:
 7 |     PROMPT_LEARNER: "predefined"
 8 |     PREDEFINED_PROMPT_TEMPLATES: [ "a sculpture of a {}." ]
 9 |     CLIP_MODEL_NAME: "ViT-B/16"
10 |     MASK_FILL: "mean"
11 |     MASK_EXPAND_RATIO: 1.0
12 |     MASK_THR: 0.5
13 |     MASK_MATTING: False
14 |     REGION_RESIZED: True
15 |     CLIP_ENSEMBLE: True
16 |     CLIP_ENSEMBLE_WEIGHT: 0.8
17 |   RESNETS:
18 |     DEPTH: 101
19 |     STEM_TYPE: "deeplab"
20 |     STEM_OUT_CHANNELS: 128
21 |     STRIDE_IN_1X1: False
22 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
23 |     # NORM: "SyncBN"
24 |     RES5_MULTI_GRID: [1, 2, 4]
25 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_single_prompt_bs32_60k/


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   CLIP_ADAPTER:
 7 |     PROMPT_LEARNER: "vild"
 8 |     CLIP_MODEL_NAME: "ViT-B/16"
 9 |     MASK_FILL: "mean"
10 |     MASK_EXPAND_RATIO: 1.0
11 |     MASK_THR: 0.5
12 |     MASK_MATTING: False
13 |     REGION_RESIZED: True
14 |     CLIP_ENSEMBLE: True
15 |     CLIP_ENSEMBLE_WEIGHT: 0.8
16 |   RESNETS:
17 |     DEPTH: 101
18 |     STEM_TYPE: "deeplab"
19 |     STEM_OUT_CHANNELS: 128
20 |     STRIDE_IN_1X1: False
21 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
22 |     # NORM: "SyncBN"
23 |     RES5_MULTI_GRID: [1, 2, 4]
24 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_vild_prompt_bs32_60k/


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R50_bs32_60k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs32_60k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 171
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "learnable"
11 |     # for learnable prompt
12 |     PROMPT_DIM: 512
13 |     PROMPT_SHAPE: (16, 0)
14 |     CLIP_MODEL_NAME: "ViT-B/16"
15 |     MASK_FILL: "mean"
16 |     MASK_EXPAND_RATIO: 1.0
17 |     MASK_THR: 0.5
18 |     MASK_MATTING: False
19 |     REGION_RESIZED: True
20 |     CLIP_ENSEMBLE: True
21 |     CLIP_ENSEMBLE_WEIGHT: 0.8
22 | DATASETS:
23 |   TRAIN: ("coco_2017_train_stuff_sem_seg",)
24 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_bs16_20k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     PROMPT_LEARNER: "imagenet"
16 |     CLIP_MODEL_NAME: "ViT-B/16"
17 |     MASK_FILL: "mean"
18 |     MASK_EXPAND_RATIO: 1.0
19 |     MASK_THR: 0.5
20 |     MASK_MATTING: False
21 |     REGION_RESIZED: True
22 |     CLIP_ENSEMBLE: True
23 |     CLIP_ENSEMBLE_WEIGHT: 0.7
24 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_pomp_prompt_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     PROMPT_LEARNER: "pomp_tuned"
16 |     # for learnable prompt
17 |     PROMPT_DIM: 512
18 |     PROMPT_SHAPE: (16, 0)
19 |     CLIP_MODEL_NAME: "ViT-B/16"
20 |     PROMPT_CHECKPOINT: output/voc-11k-15/zero_shot_proposal_classification_learn_prompt_pomp_bs16_10k/model_final.pth
21 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_pomp_prompt_bs16_20k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_single_prompt_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_single_prompt_bs16_20k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_vild_prompt_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 |   CLIP_ADAPTER:
15 |     PROMPT_LEARNER: "vild"
16 |     CLIP_MODEL_NAME: "ViT-B/16"
17 |     MASK_FILL: "mean"
18 |     MASK_EXPAND_RATIO: 1.0
19 |     MASK_THR: 0.5
20 |     MASK_MATTING: False
21 |     REGION_RESIZED: True
22 |     CLIP_ENSEMBLE: True
23 |     CLIP_ENSEMBLE_WEIGHT: 0.7
24 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_vild_prompt_bs16_20k


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R50_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../voc-11k-20/maskformer_R50_bs16_20k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 15 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "learnable"
11 |     # for learnable prompt
12 |     PROMPT_DIM: 512
13 |     PROMPT_SHAPE: (16, 0)
14 |     CLIP_MODEL_NAME: "ViT-B/16"
15 |     MASK_FILL: "mean"
16 |     MASK_EXPAND_RATIO: 1.0
17 |     MASK_THR: 0.5
18 |     MASK_MATTING: False
19 |     REGION_RESIZED: True
20 |     CLIP_ENSEMBLE: True
21 |     CLIP_ENSEMBLE_WEIGHT: 0.7
22 | #    SEPERATE_ADAPTER: True
23 | #    REGION_CLIP_ADAPTER:
24 | #      PROMPT_LEARNER: "learnable"
25 |     
26 | DATASETS:
27 |   TRAIN: ("voc_base_sem_seg_train",)
28 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../voc-11k-20/maskformer_R50_bs16_20k.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ZeroShotMaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "ZeroShotMaskFormerHead"
 6 |     NUM_CLASSES: 15 #only used in set criterion
 7 |     EMBEDDING_DIM: 512
 8 |     EMBED_LAYERS: 2
 9 |   CLIP_ADAPTER:
10 |     PROMPT_LEARNER: "predefined"
11 |     PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."]
12 |     CLIP_MODEL_NAME: "ViT-B/16"
13 |     MASK_FILL: "mean"
14 |     MASK_EXPAND_RATIO: 1.0
15 |     MASK_THR: 0.5
16 |     MASK_MATTING: False
17 |     REGION_RESIZED: True
18 |     CLIP_ENSEMBLE: True
19 |     CLIP_ENSEMBLE_WEIGHT: 0.7
20 | DATASETS:
21 |   TRAIN: ("voc_base_sem_seg_train",)


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-20/maskformer_R101c_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: maskformer_R50_bs16_20k.yaml
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_deeplab_backbone"
 5 |   WEIGHTS: "detectron2://DeepLab/R-103.pkl"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     STEM_TYPE: "deeplab"
 9 |     STEM_OUT_CHANNELS: 128
10 |     STRIDE_IN_1X1: False
11 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
12 |     # NORM: "SyncBN"
13 |     RES5_MULTI_GRID: [1, 2, 4]
14 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/configs/voc-11k-20/maskformer_R50_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-VOC11K-20.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 20
 9 |     COMMON_STRIDE: 4 # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import data
 2 | from . import modeling
 3 | from .config import add_mask_former_config
 4 | 
 5 | from .test_time_augmentation import SemanticSegmentorWithTTA
 6 | from .mask_former_model import MaskFormer
 7 | from .zero_shot_mask_former_model import ZeroShotMaskFormer
 8 | from .proposal_classification import ProposalClipClassifier
 9 | from .ablation.zero_shot_per_pixel_model import ZeroShotPerPixelModel
10 | from .ablation.oracle_mask_former_model import OracleMaskFormer
11 | from .ablation.zero_shot_proposal_based_model import ZeroShotProposalBasedSegmentor
12 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/ablation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/mask_former/ablation/__init__.py


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_mappers import *
2 | from . import datasets
3 | from .build import (
4 |     build_detection_train_loader,
5 |     build_detection_test_loader,
6 |     dataset_sample_per_class,
7 | )
8 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .mask_former_binary_semantic_dataset_mapper import (
3 |     MaskFormerBinarySemanticDatasetMapper,
4 | )
5 | from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
6 | from .oracle_dataset_mapper import OracleDatasetMapper
7 | from .proposal_classification_dataset_mapper import ProposalClasificationDatasetMapper
8 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import register_coco_stuff, register_voc_seg,register_pcontext


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_sem_seg_evaluation import GeneralizedSemSegEvaluator
2 | from .classification_evaluation import ClassificationEvaluator
3 | from .pseudo_sem_seg_evaluation import GeneralizedPseudoSemSegEvaluator
4 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from .backbone.swin import D2SwinTransformer
 3 | from .backbone.clip_resnet import D2ModifiedResNet
 4 | from .heads.mask_former_head import MaskFormerHead
 5 | from .heads.zero_shot_mask_former_head import ZeroShotMaskFormerHead
 6 | from .heads.per_pixel_baseline import (
 7 |     PerPixelBaselineHead,
 8 |     PerPixelBaselinePlusHead,
 9 |     ZeroPerPixelBaselineHead,
10 | )
11 | from .heads.pixel_decoder import BasePixelDecoder
12 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/modeling/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/mask_former/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .events import setup_wandb, WandbWriter
3 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | scipy
3 | shapely
4 | timm
5 | h5py
6 | wandb
7 | fire
8 | mmcv


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/resources/ade_thing_stuff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/ade_thing_stuff.png


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/resources/coco_thing_stuff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/coco_thing_stuff.png


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/resources/proposal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/proposal.png


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | *.egg-info
 5 | .pytest_cache
 6 | .ipynb_checkpoints
 7 | 
 8 | thumbs.db
 9 | .DS_Store
10 | .idea
11 | data/
12 | *.pkl
13 | .theia
14 | tmp
15 | */tmp
16 | wandb/
17 | */wadb
18 | .history


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/CLIP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/third_party/CLIP/CLIP.png


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include clip/bpe_simple_vocab_16e6.txt.gz
2 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/clip/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/third_party/CLIP/clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/requirements.txt:
--------------------------------------------------------------------------------
1 | ftfy
2 | regex
3 | tqdm
4 | torch
5 | torchvision
6 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pkg_resources
 4 | from setuptools import setup, find_packages
 5 | 
 6 | setup(
 7 |     name="clip",
 8 |     py_modules=["clip"],
 9 |     version="1.0",
10 |     description="",
11 |     author="OpenAI",
12 |     packages=find_packages(exclude=["tests*"]),
13 |     install_requires=[
14 |         str(r)
15 |         for r in pkg_resources.parse_requirements(
16 |             open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
17 |         )
18 |     ],
19 |     include_package_data=True,
20 |     extras_require={"dev": ["pytest"]},
21 | )
22 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/third_party/CLIP/tests/test_consistency.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import torch
 4 | from PIL import Image
 5 | 
 6 | import clip
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("model_name", clip.available_models())
10 | def test_consistency(model_name):
11 |     device = "cpu"
12 |     jit_model, transform = clip.load(model_name, device=device, jit=True)
13 |     py_model, _ = clip.load(model_name, device=device, jit=False)
14 | 
15 |     image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device)
16 |     text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
17 | 
18 |     with torch.no_grad():
19 |         logits_per_image, _ = jit_model(image, text)
20 |         jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
21 | 
22 |         logits_per_image, _ = py_model(image, text)
23 |         py_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
24 | 
25 |     assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1)
26 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/tools/convert-pretrained-swin-model-to-d2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import pickle as pkl
 5 | import sys
 6 | 
 7 | import torch
 8 | 
 9 | """
10 | Usage:
11 |   # download pretrained swin model:
12 |   wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
13 |   # run the conversion
14 |   ./convert-pretrained-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl
15 |   # Then, use swin_tiny_patch4_window7_224.pkl with the following changes in config:
16 | MODEL:
17 |   WEIGHTS: "/path/to/swin_tiny_patch4_window7_224.pkl"
18 | INPUT:
19 |   FORMAT: "RGB"
20 | """
21 | 
22 | if __name__ == "__main__":
23 |     input = sys.argv[1]
24 | 
25 |     obj = torch.load(input, map_location="cpu")["model"]
26 | 
27 |     res = {"model": obj, "__author__": "third_party", "matching_heuristics": True}
28 | 
29 |     with open(sys.argv[2], "wb") as f:
30 |         pkl.dump(res, f)
31 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/tools/parse_name.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | cfg_path = sys.argv[1]
4 | print(cfg_path.split(".")[0].replace("/", "_"))
5 | 


--------------------------------------------------------------------------------
/third_party/zsseg.baseline/tools/self_training.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/tools/self_training.sh


--------------------------------------------------------------------------------
/trainers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/trainers/__init__.py


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import torch.distributed as dist
 2 | 
 3 | 
 4 | def is_dist_avail_and_initialized():
 5 |     if not dist.is_available():
 6 |         return False
 7 |     if not dist.is_initialized():
 8 |         return False
 9 |     return True
10 | 
11 | 
12 | def get_rank():
13 |     if not is_dist_avail_and_initialized():
14 |         return 0
15 |     return dist.get_rank()
16 | 
17 | 
18 | def is_main_process():
19 |     return get_rank() == 0
20 | 
21 | 
22 | def print_args(args, cfg):
23 |     print("***************")
24 |     print("** Arguments **")
25 |     print("***************")
26 |     optkeys = list(args.__dict__.keys())
27 |     optkeys.sort()
28 |     for key in optkeys:
29 |         print("{}: {}".format(key, args.__dict__[key]))
30 |     print("************")
31 |     print("** Config **")
32 |     print("************")
33 |     print(cfg)
34 | 


--------------------------------------------------------------------------------