├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── align_uniform.py ├── clip ├── __init__.py ├── bpe_simple_vocab_16e6.txt.gz ├── clip.py ├── model.py └── simple_tokenizer.py ├── clip_words.csv ├── configs ├── datasets │ ├── caltech101.yaml │ ├── dtd.yaml │ ├── eurosat.yaml │ ├── fgvc_aircraft.yaml │ ├── food101.yaml │ ├── imagenet.yaml │ ├── imagenet_21k.yaml │ ├── imagenet_a.yaml │ ├── imagenet_r.yaml │ ├── imagenet_sketch.yaml │ ├── imagenetv2.yaml │ ├── oxford_flowers.yaml │ ├── oxford_pets.yaml │ ├── stanford_cars.yaml │ ├── sun397.yaml │ └── ucf101.yaml └── trainers │ ├── CoCoOp │ ├── vit_b16_c16_ep10_batch1.yaml │ ├── vit_b16_c4_ep10_batch1.yaml │ ├── vit_b16_c4_ep10_batch1_ctxv1.yaml │ └── vit_b16_c8_ep10_batch1.yaml │ ├── CoOp │ ├── rn101.yaml │ ├── rn101_ep50.yaml │ ├── rn50.yaml │ ├── rn50_ctxv1.yaml │ ├── rn50_ep100.yaml │ ├── rn50_ep50.yaml │ ├── rn50_ep50_ctxv1.yaml │ ├── rn50_val.yaml │ ├── vit_b16.yaml │ ├── vit_b16_ep100.yaml │ ├── vit_b16_ep50.yaml │ ├── vit_b16_val.yaml │ ├── vit_b32.yaml │ ├── vit_b32_ep50.yaml │ ├── vit_b32_val.yaml │ └── vit_l14.yaml │ ├── MLP │ ├── rn50_ep20.yaml │ ├── rn50_ep50.yaml │ ├── vit_b16_ep20.yaml │ ├── vit_b16_ep50.yaml │ ├── vit_b32_ep20.yaml │ └── vit_b32_ep50.yaml │ ├── MaPLe │ ├── vit_b16_c2_ep5_batch4_2ctx.yaml │ ├── vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml │ └── vit_b32_c2_ep5_batch4_2ctx_cross_datasets.yaml │ ├── POMP │ ├── rn50_ep20_randaug2.yaml │ ├── rn50_ep5.yaml │ ├── vit_b16_ep20.yaml │ ├── vit_b16_ep20_randaug2.yaml │ ├── vit_b16_ep5.yaml │ ├── vit_b16_ep5_randaug2.yaml │ └── vit_b32_ep20_randaug2.yaml │ └── VPT │ ├── vit_b16_c2_ep5_batch4_4.yaml │ └── vit_b32_c2_ep5_batch4_4.yaml ├── datasets ├── __init__.py ├── caltech101.py ├── dtd.py ├── eurosat.py ├── fgvc_aircraft.py ├── food101.py ├── imagenet.py ├── imagenet_21k.py ├── imagenet_a.py ├── imagenet_r.py ├── imagenet_sketch.py ├── imagenetv2.py ├── oxford_flowers.py ├── oxford_pets.py ├── stanford_cars.py ├── sun397.py └── ucf101.py ├── docs ├── DATASETS.md ├── INSTALL.md ├── MODELS.md ├── RUN.md └── main_figure.png ├── requirements.txt ├── scripts ├── cocoop │ ├── base2new_test.sh │ ├── base2new_train.sh │ ├── xd_test.sh │ └── xd_train.sh ├── coop │ ├── eval.sh │ ├── main.sh │ └── multi_scripts.sh ├── maple │ ├── base2new_test_maple.sh │ ├── base2new_train_maple.sh │ ├── reproduce_maple.sh │ ├── reproduce_maple_xd.sh │ ├── xd_test_maple.sh │ └── xd_train_maple.sh ├── mlp │ └── main.sh ├── pomp │ ├── eval.sh │ ├── main.sh │ ├── multi_scripts.sh │ └── xd_test.sh ├── vpt │ ├── base2new_test_vpt.sh │ ├── base2new_train_vpt.sh │ ├── reproduce_vpt.sh │ ├── xd_test_vpt.sh │ └── xd_train_vpt.sh └── zsclip │ ├── xd_test.sh │ └── zeroshot.sh ├── third_party ├── Dassl.pytorch │ ├── .flake8 │ ├── .gitignore │ ├── .isort.cfg │ ├── .style.yapf │ ├── DATASETS.md │ ├── LICENSE │ ├── README.md │ ├── configs │ │ ├── README.md │ │ ├── datasets │ │ │ ├── da │ │ │ │ ├── cifar_stl.yaml │ │ │ │ ├── digit5.yaml │ │ │ │ ├── domainnet.yaml │ │ │ │ ├── mini_domainnet.yaml │ │ │ │ ├── office31.yaml │ │ │ │ ├── office_home.yaml │ │ │ │ └── visda17.yaml │ │ │ ├── dg │ │ │ │ ├── camelyon17.yaml │ │ │ │ ├── cifar100_c.yaml │ │ │ │ ├── cifar10_c.yaml │ │ │ │ ├── digit_single.yaml │ │ │ │ ├── digits_dg.yaml │ │ │ │ ├── fmow.yaml │ │ │ │ ├── iwildcam.yaml │ │ │ │ ├── office_home_dg.yaml │ │ │ │ ├── pacs.yaml │ │ │ │ └── vlcs.yaml │ │ │ └── ssl │ │ │ │ ├── cifar10.yaml │ │ │ │ ├── cifar100.yaml │ │ │ │ ├── stl10.yaml │ │ │ │ └── svhn.yaml │ │ └── trainers │ │ │ ├── da │ │ │ ├── cdac │ │ │ │ ├── digit5.yaml │ │ │ │ ├── domainnet.yaml │ │ │ │ └── mini_domainnet.yaml │ │ │ ├── dael │ │ │ │ ├── digit5.yaml │ │ │ │ ├── domainnet.yaml │ │ │ │ └── mini_domainnet.yaml │ │ │ ├── m3sda │ │ │ │ ├── digit5.yaml │ │ │ │ ├── domainnet.yaml │ │ │ │ └── mini_domainnet.yaml │ │ │ └── source_only │ │ │ │ ├── digit5.yaml │ │ │ │ ├── mini_domainnet.yaml │ │ │ │ ├── office31.yaml │ │ │ │ └── visda17.yaml │ │ │ ├── dg │ │ │ ├── daeldg │ │ │ │ ├── digits_dg.yaml │ │ │ │ ├── office_home_dg.yaml │ │ │ │ └── pacs.yaml │ │ │ ├── ddaig │ │ │ │ ├── digits_dg.yaml │ │ │ │ ├── office_home_dg.yaml │ │ │ │ └── pacs.yaml │ │ │ └── vanilla │ │ │ │ ├── digits_dg.yaml │ │ │ │ ├── mini_domainnet.yaml │ │ │ │ ├── office_home_dg.yaml │ │ │ │ └── pacs.yaml │ │ │ └── ssl │ │ │ └── fixmatch │ │ │ └── cifar10.yaml │ ├── dassl │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── defaults.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── data_manager.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── base_dataset.py │ │ │ │ ├── build.py │ │ │ │ ├── da │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cifarstl.py │ │ │ │ │ ├── digit5.py │ │ │ │ │ ├── domainnet.py │ │ │ │ │ ├── mini_domainnet.py │ │ │ │ │ ├── office31.py │ │ │ │ │ ├── office_home.py │ │ │ │ │ └── visda17.py │ │ │ │ ├── dg │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cifar_c.py │ │ │ │ │ ├── digit_single.py │ │ │ │ │ ├── digits_dg.py │ │ │ │ │ ├── office_home_dg.py │ │ │ │ │ ├── pacs.py │ │ │ │ │ ├── vlcs.py │ │ │ │ │ └── wilds │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── camelyon17.py │ │ │ │ │ │ ├── fmow.py │ │ │ │ │ │ ├── iwildcam.py │ │ │ │ │ │ └── wilds_base.py │ │ │ │ └── ssl │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cifar.py │ │ │ │ │ ├── stl10.py │ │ │ │ │ └── svhn.py │ │ │ ├── samplers.py │ │ │ └── transforms │ │ │ │ ├── __init__.py │ │ │ │ ├── autoaugment.py │ │ │ │ ├── randaugment.py │ │ │ │ └── transforms.py │ │ ├── engine │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── da │ │ │ │ ├── __init__.py │ │ │ │ ├── adabn.py │ │ │ │ ├── adda.py │ │ │ │ ├── cdac.py │ │ │ │ ├── dael.py │ │ │ │ ├── dann.py │ │ │ │ ├── m3sda.py │ │ │ │ ├── mcd.py │ │ │ │ ├── mme.py │ │ │ │ ├── se.py │ │ │ │ └── source_only.py │ │ │ ├── dg │ │ │ │ ├── __init__.py │ │ │ │ ├── crossgrad.py │ │ │ │ ├── daeldg.py │ │ │ │ ├── ddaig.py │ │ │ │ ├── domain_mix.py │ │ │ │ └── vanilla.py │ │ │ ├── ssl │ │ │ │ ├── __init__.py │ │ │ │ ├── entmin.py │ │ │ │ ├── fixmatch.py │ │ │ │ ├── mean_teacher.py │ │ │ │ ├── mixmatch.py │ │ │ │ └── sup_baseline.py │ │ │ └── trainer.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ └── evaluator.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── accuracy.py │ │ │ └── distance.py │ │ ├── modeling │ │ │ ├── __init__.py │ │ │ ├── backbone │ │ │ │ ├── __init__.py │ │ │ │ ├── alexnet.py │ │ │ │ ├── backbone.py │ │ │ │ ├── build.py │ │ │ │ ├── cnn_digit5_m3sda.py │ │ │ │ ├── cnn_digitsdg.py │ │ │ │ ├── cnn_digitsingle.py │ │ │ │ ├── efficientnet │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── model.py │ │ │ │ │ └── utils.py │ │ │ │ ├── preact_resnet18.py │ │ │ │ ├── resnet.py │ │ │ │ ├── resnet_dynamic.py │ │ │ │ ├── vgg.py │ │ │ │ └── wide_resnet.py │ │ │ ├── head │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ └── mlp.py │ │ │ ├── network │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ └── ddaig_fcn.py │ │ │ └── ops │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── conv.py │ │ │ │ ├── cross_entropy.py │ │ │ │ ├── dsbn.py │ │ │ │ ├── efdmix.py │ │ │ │ ├── mixstyle.py │ │ │ │ ├── mixup.py │ │ │ │ ├── mmd.py │ │ │ │ ├── optimal_transport.py │ │ │ │ ├── reverse_grad.py │ │ │ │ ├── sequential2.py │ │ │ │ ├── transnorm.py │ │ │ │ └── utils.py │ │ ├── optim │ │ │ ├── __init__.py │ │ │ ├── lr_scheduler.py │ │ │ ├── optimizer.py │ │ │ └── radam.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── logger.py │ │ │ ├── meters.py │ │ │ ├── registry.py │ │ │ ├── tools.py │ │ │ └── torchtools.py │ ├── datasets │ │ ├── da │ │ │ ├── cifar_stl.py │ │ │ ├── digit5.py │ │ │ └── visda17.sh │ │ ├── dg │ │ │ └── cifar_c.py │ │ └── ssl │ │ │ ├── cifar10_cifar100_svhn.py │ │ │ └── stl10.py │ ├── linter.sh │ ├── requirements.txt │ ├── setup.py │ └── tools │ │ ├── parse_test_res.py │ │ ├── replace_text.py │ │ └── train.py ├── Detic │ ├── .DS_Store │ ├── .gitignore │ ├── .gitmodules │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── cherry_pick.py │ ├── cog.yaml │ ├── configs │ │ ├── Base-C2_L_R5021k_640b64_4x.yaml │ │ ├── Base-DeformDETR_L_R50_4x.yaml │ │ ├── Base_OVCOCO_C4_1x.yaml │ │ ├── BoxSup-C2_LCOCO_CLIP_CXT21k_640b32_4x.yaml │ │ ├── BoxSup-C2_LCOCO_CLIP_R18_640b32_4x.yaml │ │ ├── BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x.yaml │ │ ├── BoxSup-C2_LCOCO_CLIP_SwinB_896b32_4x.yaml │ │ ├── BoxSup-C2_L_CLIP_R5021k_640b64_4x.yaml │ │ ├── BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp.yaml │ │ ├── BoxSup-C2_L_CLIP_SwinB_896b32_4x.yaml │ │ ├── BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.yaml │ │ ├── BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp.yaml │ │ ├── BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.yaml │ │ ├── BoxSup-DeformDETR_L_R50_2x.yaml │ │ ├── BoxSup-DeformDETR_L_R50_4x.yaml │ │ ├── BoxSup_OVCOCO_CLIP_R50_1x.yaml │ │ ├── BoxSup_OVCOCO_CLIP_R50_1x_pomp.yaml │ │ ├── BoxSup_ViLD_200e.py │ │ ├── Detic_DeformDETR_LI_R50_4x_ft4x.yaml │ │ ├── Detic_LCOCOI21k_CLIP_CXT21k_640b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LCOCOI21k_CLIP_R18_640b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LCOCOI21k_CLIP_R5021k_640b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml │ │ ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml │ │ ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp_cross_datasets.yaml │ │ ├── Detic_LI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml │ │ ├── Detic_LbaseCCcapimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml │ │ ├── Detic_LbaseCCimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml │ │ ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml │ │ ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml │ │ ├── Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_predicted.yaml │ │ ├── Detic_LbaseI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml │ │ ├── Detic_OVCOCO_CLIP_R50_1x_caption.yaml │ │ ├── Detic_OVCOCO_CLIP_R50_1x_max-size.yaml │ │ ├── Detic_OVCOCO_CLIP_R50_1x_max-size_caption.yaml │ │ ├── Detic_OVCOCO_CLIP_R50_1x_max-size_caption_pomp.yaml │ │ ├── Detic_ViLD_200e.py │ │ └── debug.yaml │ ├── datasets │ │ ├── README.md │ │ └── metadata │ │ │ ├── Objects365_names_fix.csv │ │ │ ├── coco_clip_a+cname.npy │ │ │ ├── coco_clip_pomp+cname.npy │ │ │ ├── imagenet_lvis_wnid.txt │ │ │ ├── lvis_v1_clip_a+cname.npy │ │ │ ├── lvis_v1_clip_pomp+cname.npy │ │ │ ├── lvis_v1_train_cat_info.json │ │ │ ├── o365_clip_a+cnamefix.npy │ │ │ ├── o365_fixname_clip_pomp+cname.npy │ │ │ └── oid_clip_a+cname.npy │ ├── demo.py │ ├── demo │ │ ├── demo.ipynb │ │ └── environment.yaml │ ├── detic │ │ ├── __init__.py │ │ ├── config.py │ │ ├── custom_solver.py │ │ ├── data │ │ │ ├── custom_build_augmentation.py │ │ │ ├── custom_dataset_dataloader.py │ │ │ ├── custom_dataset_mapper.py │ │ │ ├── datasets │ │ │ │ ├── cc.py │ │ │ │ ├── coco_zeroshot.py │ │ │ │ ├── imagenet.py │ │ │ │ ├── lvis_22k_categories.py │ │ │ │ ├── lvis_v1.py │ │ │ │ ├── objects365.py │ │ │ │ ├── oid.py │ │ │ │ └── register_oid.py │ │ │ ├── tar_dataset.py │ │ │ └── transforms │ │ │ │ ├── custom_augmentation_impl.py │ │ │ │ └── custom_transform.py │ │ ├── evaluation │ │ │ ├── custom_coco_eval.py │ │ │ └── oideval.py │ │ ├── modeling │ │ │ ├── backbone │ │ │ │ ├── swintransformer.py │ │ │ │ └── timm.py │ │ │ ├── debug.py │ │ │ ├── meta_arch │ │ │ │ ├── custom_rcnn.py │ │ │ │ └── d2_deformable_detr.py │ │ │ ├── roi_heads │ │ │ │ ├── detic_fast_rcnn.py │ │ │ │ ├── detic_roi_heads.py │ │ │ │ ├── res5_roi_heads.py │ │ │ │ └── zero_shot_classifier.py │ │ │ ├── text │ │ │ │ └── text_encoder.py │ │ │ └── utils.py │ │ └── predictor.py │ ├── docs │ │ ├── INSTALL.md │ │ ├── MODEL_ZOO.md │ │ ├── example_output_custom.jpeg │ │ ├── example_output_lvis.jpeg │ │ └── teaser.jpeg │ ├── extract.py │ ├── figures │ │ ├── .DS_Store │ │ ├── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size │ │ │ ├── .DS_Store │ │ │ └── inference_lvis_v1_val │ │ │ │ └── .DS_Store │ │ └── Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt │ │ │ └── .DS_Store │ ├── lazy_train_net.py │ ├── predict.py │ ├── requirements.txt │ ├── third_party │ │ ├── CenterNet2 │ │ │ ├── .github │ │ │ │ ├── CODE_OF_CONDUCT.md │ │ │ │ ├── CONTRIBUTING.md │ │ │ │ ├── Detectron2-Logo-Horz.svg │ │ │ │ ├── ISSUE_TEMPLATE.md │ │ │ │ ├── ISSUE_TEMPLATE │ │ │ │ │ ├── bugs.md │ │ │ │ │ ├── config.yml │ │ │ │ │ ├── documentation.md │ │ │ │ │ ├── feature-request.md │ │ │ │ │ └── unexpected-problems-bugs.md │ │ │ │ ├── pull_request_template.md │ │ │ │ └── workflows │ │ │ │ │ ├── check-template.yml │ │ │ │ │ ├── levenshtein.js │ │ │ │ │ ├── needs-reply.yml │ │ │ │ │ ├── remove-needs-reply.yml │ │ │ │ │ └── workflow.yml │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── centernet │ │ │ │ ├── __init__.py │ │ │ │ ├── config.py │ │ │ │ ├── data │ │ │ │ │ ├── custom_build_augmentation.py │ │ │ │ │ ├── custom_dataset_dataloader.py │ │ │ │ │ ├── datasets │ │ │ │ │ │ ├── coco.py │ │ │ │ │ │ ├── nuimages.py │ │ │ │ │ │ └── objects365.py │ │ │ │ │ └── transforms │ │ │ │ │ │ ├── custom_augmentation_impl.py │ │ │ │ │ │ └── custom_transform.py │ │ │ │ └── modeling │ │ │ │ │ ├── backbone │ │ │ │ │ ├── bifpn.py │ │ │ │ │ ├── bifpn_fcos.py │ │ │ │ │ ├── dla.py │ │ │ │ │ ├── dlafpn.py │ │ │ │ │ ├── fpn_p5.py │ │ │ │ │ └── res2net.py │ │ │ │ │ ├── debug.py │ │ │ │ │ ├── dense_heads │ │ │ │ │ ├── centernet.py │ │ │ │ │ ├── centernet_head.py │ │ │ │ │ └── utils.py │ │ │ │ │ ├── layers │ │ │ │ │ ├── deform_conv.py │ │ │ │ │ ├── heatmap_focal_loss.py │ │ │ │ │ ├── iou_loss.py │ │ │ │ │ └── ml_nms.py │ │ │ │ │ ├── meta_arch │ │ │ │ │ └── centernet_detector.py │ │ │ │ │ └── roi_heads │ │ │ │ │ ├── custom_fast_rcnn.py │ │ │ │ │ ├── custom_roi_heads.py │ │ │ │ │ └── fed_loss.py │ │ │ ├── configs │ │ │ │ ├── Base-CenterNet-FPN.yaml │ │ │ │ ├── Base-CenterNet2.yaml │ │ │ │ ├── Base_S4_DLA.yaml │ │ │ │ ├── CenterNet-FPN_R50_1x.yaml │ │ │ │ ├── CenterNet-S4_DLA_8x.yaml │ │ │ │ ├── CenterNet2-F_R50_1x.yaml │ │ │ │ ├── CenterNet2_DLA-BiFPN-P3_24x.yaml │ │ │ │ ├── CenterNet2_DLA-BiFPN-P3_4x.yaml │ │ │ │ ├── CenterNet2_DLA-BiFPN-P5_640_16x.yaml │ │ │ │ ├── CenterNet2_DLA-BiFPN-P5_640_16x_ST.yaml │ │ │ │ ├── CenterNet2_DLA-fcosBiFPN-P5_640_16x_ST.yaml │ │ │ │ ├── CenterNet2_R2-101-DCN-BiFPN_1280_4x.yaml │ │ │ │ ├── CenterNet2_R2-101-DCN-BiFPN_4x+4x_1560_ST.yaml │ │ │ │ ├── CenterNet2_R2-101-DCN_896_4x.yaml │ │ │ │ ├── CenterNet2_R50_1x.yaml │ │ │ │ ├── CenterNet2_X101-DCN_2x.yaml │ │ │ │ ├── LVIS_CenterNet2_R50_1x.yaml │ │ │ │ ├── LVIS_CenterNet2_R50_Fed_1x.yaml │ │ │ │ ├── O365_CenterNet2_R50_1x.yaml │ │ │ │ └── nuImages_CenterNet2_DLA_640_8x.yaml │ │ │ ├── datasets │ │ │ │ └── README.md │ │ │ ├── demo.py │ │ │ ├── docs │ │ │ │ └── MODEL_ZOO.md │ │ │ ├── predictor.py │ │ │ ├── requirements.txt │ │ │ ├── tools │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── analyze_model.py │ │ │ │ ├── benchmark.py │ │ │ │ ├── convert-torchvision-to-d2.py │ │ │ │ ├── deploy │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── README.md │ │ │ │ │ ├── export_model.py │ │ │ │ │ └── torchscript_mask_rcnn.cpp │ │ │ │ ├── lazyconfig_train_net.py │ │ │ │ ├── lightning_train_net.py │ │ │ │ ├── plain_train_net.py │ │ │ │ ├── train_net.py │ │ │ │ ├── visualize_data.py │ │ │ │ └── visualize_json_results.py │ │ │ └── train_net.py │ │ └── Deformable-DETR │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── benchmark.py │ │ │ ├── configs │ │ │ ├── r50_deformable_detr.sh │ │ │ ├── r50_deformable_detr_plus_iterative_bbox_refinement.sh │ │ │ ├── r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh │ │ │ ├── r50_deformable_detr_single_scale.sh │ │ │ └── r50_deformable_detr_single_scale_dc5.sh │ │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── coco.py │ │ │ ├── coco_eval.py │ │ │ ├── coco_panoptic.py │ │ │ ├── data_prefetcher.py │ │ │ ├── panoptic_eval.py │ │ │ ├── samplers.py │ │ │ ├── torchvision_datasets │ │ │ │ ├── __init__.py │ │ │ │ └── coco.py │ │ │ └── transforms.py │ │ │ ├── docs │ │ │ └── changelog.md │ │ │ ├── engine.py │ │ │ ├── main.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── backbone.py │ │ │ ├── deformable_detr.py │ │ │ ├── deformable_transformer.py │ │ │ ├── matcher.py │ │ │ ├── ops │ │ │ │ ├── functions │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ms_deform_attn_func.py │ │ │ │ ├── make.sh │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ms_deform_attn.py │ │ │ │ ├── setup.py │ │ │ │ ├── src │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ │ │ └── ms_deform_attn_cpu.h │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ │ │ ├── ms_deform_attn.h │ │ │ │ │ └── vision.cpp │ │ │ │ └── test.py │ │ │ ├── position_encoding.py │ │ │ └── segmentation.py │ │ │ ├── requirements.txt │ │ │ ├── tools │ │ │ ├── launch.py │ │ │ ├── run_dist_launch.sh │ │ │ └── run_dist_slurm.sh │ │ │ └── util │ │ │ ├── __init__.py │ │ │ ├── box_ops.py │ │ │ ├── misc.py │ │ │ └── plot_utils.py │ ├── tools │ │ ├── convert-thirdparty-pretrained-model-to-d2.py │ │ ├── create_imagenetlvis_json.py │ │ ├── create_lvis_21k.py │ │ ├── download_cc.py │ │ ├── dump_clip_features.py │ │ ├── fix_o365_names.py │ │ ├── fix_o365_path.py │ │ ├── get_cc_tags.py │ │ ├── get_coco_zeroshot_oriorder.py │ │ ├── get_imagenet_21k_full_tar_json.py │ │ ├── get_lvis_cat_info.py │ │ ├── merge_lvis_coco.py │ │ ├── preprocess_imagenet22k.py │ │ ├── remove_lvis_rare.py │ │ ├── tar_dataset.py │ │ └── unzip_imagenet_lvis.py │ └── train_net.py └── zsseg.baseline │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── configs │ ├── ade20k-150 │ │ ├── cross_dataset_imagenet_prompt_test_only.yaml │ │ ├── cross_dataset_learned_prompt_test_only.yaml │ │ ├── cross_dataset_pomp_prompt_test_only.yaml │ │ ├── cross_dataset_single_prompt_test_only.yaml │ │ ├── cross_dataset_test_only.yaml │ │ └── cross_dataset_vild_prompt_test_only.yaml │ ├── ade20k-847 │ │ └── cross_dataset_test_only.yaml │ ├── cityscapes-19 │ │ ├── cross_dataset_maskformer_R101c_bs32_cart_prompt_test_only.yaml │ │ ├── cross_dataset_maskformer_R101c_bs32_learned_prompt_test_only.yaml │ │ └── cross_dataset_maskformer_R101c_bs32_test_only.yaml │ ├── coco-stuff-164k-156 │ │ ├── zero_shot_maskformer_R101c_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_clip_rn101_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50x16_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_clip_rn50x4_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_clip_vit-bx32_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R50_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml │ │ ├── zero_shot_perpixel_R101c_single_prompt_bs32_60k.yaml │ │ ├── zero_shot_proposal_classification_bs32_10k.yaml │ │ ├── zero_shot_proposal_classification_learn_prompt_bs32_10k.yaml │ │ └── zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k.yaml │ ├── coco-stuff-164k-171 │ │ ├── Base-COCOStuff164K-171.yaml │ │ ├── maskformer_R101c_bs32_60k.yaml │ │ ├── maskformer_R50_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml │ │ ├── zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml │ │ └── zero_shot_maskformer_R50_bs32_60k.yaml │ ├── pcontext-59 │ │ ├── cross_dataset_learned_prompt_test_only.yaml │ │ ├── cross_dataset_pomp_prompt_test_only.yaml │ │ └── cross_dataset_test_only.yaml │ ├── voc-11k-15 │ │ ├── zero_shot_maskformer_R101c_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R101c_pomp_prompt_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R101c_single_prompt_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R101c_vild_prompt_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R50_bs16_20k.yaml │ │ ├── zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml │ │ ├── zero_shot_proposal_classification_learn_prompt_bs16_10k.yaml │ │ └── zero_shot_proposal_classification_learn_prompt_pomp_bs16_10k.yaml │ └── voc-11k-20 │ │ ├── Base-VOC11K-20.yaml │ │ ├── maskformer_R101c_bs16_20k.yaml │ │ ├── maskformer_R50_bs16_20k.yaml │ │ └── zero_shot_maskformer_R101c_bs16_test_only.yaml │ ├── datasets │ ├── prepare_ade20k_sem_seg.py │ ├── prepare_coco_stuff_164k_sem_seg.py │ ├── prepare_pcontext_sem_seg.py │ └── prepare_voc_sem_seg.py │ ├── mask_former │ ├── __init__.py │ ├── ablation │ │ ├── __init__.py │ │ ├── oracle_mask_former_model.py │ │ ├── zero_shot_per_pixel_model.py │ │ └── zero_shot_proposal_based_model.py │ ├── config.py │ ├── data │ │ ├── __init__.py │ │ ├── augmentations.py │ │ ├── build.py │ │ ├── dataset_mappers │ │ │ ├── __init__.py │ │ │ ├── mask_former_binary_semantic_dataset_mapper.py │ │ │ ├── mask_former_semantic_dataset_mapper.py │ │ │ ├── oracle_dataset_mapper.py │ │ │ └── proposal_classification_dataset_mapper.py │ │ └── datasets │ │ │ ├── __init__.py │ │ │ ├── register_coco_stuff.py │ │ │ ├── register_pcontext.py │ │ │ ├── register_voc_seg.py │ │ │ └── utils.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── classification_evaluation.py │ │ ├── generalized_sem_seg_evaluation.py │ │ ├── my_generalized_sem_seg_evaluation.py │ │ └── pseudo_sem_seg_evaluation.py │ ├── mask_former_model.py │ ├── modeling │ │ ├── __init__.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── clip_resnet.py │ │ │ └── swin.py │ │ ├── clip_adapter │ │ │ ├── __init__.py │ │ │ ├── adapter.py │ │ │ ├── text_prompt.py │ │ │ └── utils.py │ │ ├── criterion.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── mask_former_head.py │ │ │ ├── per_pixel_baseline.py │ │ │ ├── pixel_decoder.py │ │ │ └── zero_shot_mask_former_head.py │ │ ├── matcher.py │ │ └── transformer │ │ │ ├── __init__.py │ │ │ ├── position_encoding.py │ │ │ ├── transformer.py │ │ │ ├── transformer_predictor.py │ │ │ └── zero_shot_transformer_predictor.py │ ├── proposal_classification.py │ ├── test_time_augmentation.py │ ├── utils │ │ ├── __init__.py │ │ ├── events.py │ │ ├── misc.py │ │ ├── post_process_utils.py │ │ └── selective_search.py │ └── zero_shot_mask_former_model.py │ ├── requirements.txt │ ├── resources │ ├── ade20k_150_stuff.txt │ ├── ade_thing_stuff.png │ ├── coco_thing_stuff.png │ └── proposal.png │ ├── third_party │ └── CLIP │ │ ├── .gitignore │ │ ├── CLIP.png │ │ ├── LICENSE │ │ ├── MANIFEST.in │ │ ├── README.md │ │ ├── clip │ │ ├── __init__.py │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── clip.py │ │ ├── model.py │ │ └── simple_tokenizer.py │ │ ├── model-card.md │ │ ├── requirements.txt │ │ ├── setup.py │ │ └── tests │ │ └── test_consistency.py │ ├── tools │ ├── convert-pretrained-clip-model-to-d2.py │ ├── convert-pretrained-swin-model-to-d2.py │ ├── convert-torchvision-to-d2.py │ ├── json2dir.py │ ├── mask_cls_collect.py │ ├── parse_name.py │ └── self_training.sh │ └── train_net.py ├── train.py ├── trainers ├── __init__.py ├── clip_mlp.py ├── cocoop.py ├── coop.py ├── imagenet_templates.py ├── maple.py ├── pomp.py ├── vpt.py └── zsclip.py ├── utils.py └── validation_test.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /align_uniform.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Adapted from https://github.com/lancopku/clip-openness 3 | ''' 4 | import torch 5 | 6 | 7 | def align_loss(x, y, alpha=2): 8 | return (x - y).norm(p=2, dim=1).pow(alpha).mean() 9 | 10 | 11 | def uniform_loss(x, t=2): 12 | return torch.pdist(x, p=2).pow(2).mul(-t).exp().mean().log() 13 | 14 | -------------------------------------------------------------------------------- /clip/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /configs/datasets/caltech101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "Caltech101" 3 | -------------------------------------------------------------------------------- /configs/datasets/dtd.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "DescribableTextures" 3 | -------------------------------------------------------------------------------- /configs/datasets/eurosat.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "EuroSAT" 3 | -------------------------------------------------------------------------------- /configs/datasets/fgvc_aircraft.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "FGVCAircraft" 3 | -------------------------------------------------------------------------------- /configs/datasets/food101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "Food101" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNet" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_21k.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNet21K" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_a.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetA" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_r.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetR" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_sketch.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetSketch" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenetv2.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetV2" 3 | -------------------------------------------------------------------------------- /configs/datasets/oxford_flowers.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "OxfordFlowers" -------------------------------------------------------------------------------- /configs/datasets/oxford_pets.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "OxfordPets" -------------------------------------------------------------------------------- /configs/datasets/stanford_cars.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "StanfordCars" 3 | -------------------------------------------------------------------------------- /configs/datasets/sun397.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "SUN397" 3 | -------------------------------------------------------------------------------- /configs/datasets/ucf101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "UCF101" 3 | -------------------------------------------------------------------------------- /configs/trainers/CoCoOp/vit_b16_c16_ep10_batch1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 1 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 10 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | COCOOP: 33 | N_CTX: 16 34 | CTX_INIT: "" 35 | PREC: "fp16" -------------------------------------------------------------------------------- /configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 1 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 10 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | COCOOP: 33 | N_CTX: 4 34 | CTX_INIT: "" 35 | PREC: "fp16" -------------------------------------------------------------------------------- /configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1_ctxv1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 1 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 10 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | COCOOP: 33 | N_CTX: 4 34 | CTX_INIT: "a photo of a" 35 | PREC: "fp16" -------------------------------------------------------------------------------- /configs/trainers/CoCoOp/vit_b16_c8_ep10_batch1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 1 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 10 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | COCOOP: 33 | N_CTX: 8 34 | CTX_INIT: "" 35 | PREC: "fp16" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn101.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "RN101" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn101_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "RN101" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "RN50" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50_ctxv1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "RN50" 30 | 31 | TRAINER: 32 | COOP: 33 | CTX_INIT: "a photo of a" 34 | -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50_ep100.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 100 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | TEST: 28 | PER_CLASS_RESULT: True 29 | 30 | MODEL: 31 | BACKBONE: 32 | NAME: "RN50" 33 | -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | TEST: 28 | PER_CLASS_RESULT: True 29 | 30 | MODEL: 31 | BACKBONE: 32 | NAME: "RN50" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50_ep50_ctxv1.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "RN50" 30 | 31 | TRAINER: 32 | COOP: 33 | CTX_INIT: "a photo of a" -------------------------------------------------------------------------------- /configs/trainers/CoOp/rn50_val.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 100 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | MODEL: 16 | BACKBONE: 17 | NAME: "RN50" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b16.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b16_ep100.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 100 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b16_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b16_val.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 100 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | MODEL: 16 | BACKBONE: 17 | NAME: "ViT-B/16" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b32.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/32" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b32_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/32" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_b32_val.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 100 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | MODEL: 16 | BACKBONE: 17 | NAME: "ViT-B/32" -------------------------------------------------------------------------------- /configs/trainers/CoOp/vit_l14.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 200 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 5 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-L/14" -------------------------------------------------------------------------------- /configs/trainers/MLP/rn50_ep20.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.2 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-3 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | MODEL: 30 | BACKBONE: 31 | NAME: "RN50" 32 | -------------------------------------------------------------------------------- /configs/trainers/MLP/rn50_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.2 20 | MAX_EPOCH: 50 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-3 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | MODEL: 30 | BACKBONE: 31 | NAME: "RN50" 32 | -------------------------------------------------------------------------------- /configs/trainers/MLP/vit_b16_ep20.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.2 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-3 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | MODEL: 30 | BACKBONE: 31 | NAME: "ViT-B/16" 32 | -------------------------------------------------------------------------------- /configs/trainers/MLP/vit_b16_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.02 20 | MAX_EPOCH: 50 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-4 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | MODEL: 30 | BACKBONE: 31 | NAME: "ViT-B/16" 32 | -------------------------------------------------------------------------------- /configs/trainers/MLP/vit_b32_ep20.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.2 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-3 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | MODEL: 30 | BACKBONE: 31 | NAME: "ViT-B/32" 32 | -------------------------------------------------------------------------------- /configs/trainers/MLP/vit_b32_ep50.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.002 20 | MAX_EPOCH: 50 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-5 25 | 26 | TRAIN: 27 | PRINT_FREQ: 100 28 | 29 | TEST: 30 | PER_CLASS_RESULT: True 31 | 32 | MODEL: 33 | BACKBONE: 34 | NAME: "ViT-B/32" -------------------------------------------------------------------------------- /configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 4 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.0035 18 | MAX_EPOCH: 5 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | MAPLE: 33 | N_CTX: 2 34 | CTX_INIT: "a photo of a" 35 | PREC: "fp16" 36 | PROMPT_DEPTH: 9 -------------------------------------------------------------------------------- /configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 4 4 | TEST: 5 | BATCH_SIZE: 500 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.0026 18 | MAX_EPOCH: 2 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/16" 30 | 31 | TRAINER: 32 | MAPLE: 33 | N_CTX: 2 34 | CTX_INIT: "a photo of a" 35 | PREC: "fp16" 36 | PROMPT_DEPTH: 3 -------------------------------------------------------------------------------- /configs/trainers/MaPLe/vit_b32_c2_ep5_batch4_2ctx_cross_datasets.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 4 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.0026 18 | MAX_EPOCH: 2 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 20 26 | 27 | MODEL: 28 | BACKBONE: 29 | NAME: "ViT-B/32" 30 | 31 | TRAINER: 32 | MAPLE: 33 | N_CTX: 2 34 | CTX_INIT: "a photo of a" 35 | PREC: "fp16" 36 | PROMPT_DEPTH: 3 -------------------------------------------------------------------------------- /configs/trainers/POMP/rn50_ep20_randaug2.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | # SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | NUM_WORKERS: 8 8 | K_TRANSFORMS: 4 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.002 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-5 25 | 26 | TRAIN: 27 | CHECKPOINT_FREQ: 1 28 | PRINT_FREQ: 100 29 | 30 | TEST: 31 | NO_TEST: True 32 | PER_CLASS_RESULT: True 33 | 34 | MODEL: 35 | BACKBONE: 36 | NAME: "RN50" 37 | -------------------------------------------------------------------------------- /configs/trainers/POMP/rn50_ep5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 5 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 100 26 | 27 | TEST: 28 | PER_CLASS_RESULT: True 29 | 30 | MODEL: 31 | BACKBONE: 32 | NAME: "RN50" 33 | -------------------------------------------------------------------------------- /configs/trainers/POMP/vit_b16_ep20.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.002 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-5 25 | 26 | TRAIN: 27 | CHECKPOINT_FREQ: 1 28 | PRINT_FREQ: 100 29 | 30 | TEST: 31 | NO_TEST: True 32 | 33 | MODEL: 34 | BACKBONE: 35 | NAME: "ViT-B/16" 36 | -------------------------------------------------------------------------------- /configs/trainers/POMP/vit_b16_ep20_randaug2.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | K_TRANSFORMS: 4 10 | 11 | INPUT: 12 | SIZE: (224, 224) 13 | INTERPOLATION: "bicubic" 14 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 15 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 16 | TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"] 17 | 18 | OPTIM: 19 | NAME: "sgd" 20 | LR: 0.002 21 | MAX_EPOCH: 20 22 | LR_SCHEDULER: "cosine" 23 | WARMUP_EPOCH: 1 24 | WARMUP_TYPE: "constant" 25 | WARMUP_CONS_LR: 1e-5 26 | 27 | TRAIN: 28 | CHECKPOINT_FREQ: 1 29 | PRINT_FREQ: 100 30 | 31 | TEST: 32 | NO_TEST: True 33 | 34 | MODEL: 35 | BACKBONE: 36 | NAME: "ViT-B/16" 37 | -------------------------------------------------------------------------------- /configs/trainers/POMP/vit_b16_ep5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 18 | MAX_EPOCH: 5 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | 24 | TRAIN: 25 | PRINT_FREQ: 100 26 | 27 | TEST: 28 | PER_CLASS_RESULT: True 29 | 30 | MODEL: 31 | BACKBONE: 32 | NAME: "ViT-B/16" 33 | -------------------------------------------------------------------------------- /configs/trainers/POMP/vit_b16_ep5_randaug2.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | SAMPLER: "SequentialDistributedSampler" 8 | NUM_WORKERS: 8 9 | K_TRANSFORMS: 4 10 | 11 | INPUT: 12 | SIZE: (224, 224) 13 | INTERPOLATION: "bicubic" 14 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 15 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 16 | TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"] 17 | 18 | OPTIM: 19 | NAME: "sgd" 20 | LR: 0.002 21 | MAX_EPOCH: 5 22 | LR_SCHEDULER: "cosine" 23 | WARMUP_EPOCH: 1 24 | WARMUP_TYPE: "constant" 25 | WARMUP_CONS_LR: 1e-5 26 | 27 | TRAIN: 28 | CHECKPOINT_FREQ: 1 29 | PRINT_FREQ: 100 30 | 31 | TEST: 32 | NO_TEST: True 33 | 34 | MODEL: 35 | BACKBONE: 36 | NAME: "ViT-B/16" 37 | -------------------------------------------------------------------------------- /configs/trainers/POMP/vit_b32_ep20_randaug2.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | # SAMPLER: "DistributedSampler" 5 | TEST: 6 | BATCH_SIZE: 100 7 | NUM_WORKERS: 8 8 | K_TRANSFORMS: 4 9 | 10 | INPUT: 11 | SIZE: (224, 224) 12 | INTERPOLATION: "bicubic" 13 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 14 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 15 | TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"] 16 | 17 | OPTIM: 18 | NAME: "sgd" 19 | LR: 0.002 20 | MAX_EPOCH: 20 21 | LR_SCHEDULER: "cosine" 22 | WARMUP_EPOCH: 1 23 | WARMUP_TYPE: "constant" 24 | WARMUP_CONS_LR: 1e-5 25 | 26 | TRAIN: 27 | CHECKPOINT_FREQ: 1 28 | PRINT_FREQ: 100 29 | 30 | TEST: 31 | NO_TEST: True 32 | PER_CLASS_RESULT: True 33 | 34 | MODEL: 35 | BACKBONE: 36 | NAME: "ViT-B/32" 37 | -------------------------------------------------------------------------------- /configs/trainers/VPT/vit_b16_c2_ep5_batch4_4.yaml: -------------------------------------------------------------------------------- 1 | # Deep vision prompting 2 | DATALOADER: 3 | TRAIN_X: 4 | BATCH_SIZE: 4 5 | TEST: 6 | BATCH_SIZE: 100 7 | NUM_WORKERS: 8 8 | 9 | INPUT: 10 | SIZE: (224, 224) 11 | INTERPOLATION: "bicubic" 12 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 13 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 14 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 15 | 16 | OPTIM: 17 | NAME: "sgd" 18 | LR: 0.0025 19 | MAX_EPOCH: 5 20 | LR_SCHEDULER: "cosine" 21 | WARMUP_EPOCH: 1 22 | WARMUP_TYPE: "constant" 23 | WARMUP_CONS_LR: 1e-5 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/16" 31 | 32 | TRAINER: 33 | VPT: 34 | N_CTX_VISION: 8 35 | CTX_INIT: "a photo of a" 36 | PREC: "fp16" 37 | PROMPT_DEPTH_VISION: 12 -------------------------------------------------------------------------------- /configs/trainers/VPT/vit_b32_c2_ep5_batch4_4.yaml: -------------------------------------------------------------------------------- 1 | # Deep vision prompting 2 | DATALOADER: 3 | TRAIN_X: 4 | BATCH_SIZE: 4 5 | TEST: 6 | BATCH_SIZE: 100 7 | NUM_WORKERS: 8 8 | 9 | INPUT: 10 | SIZE: (224, 224) 11 | INTERPOLATION: "bicubic" 12 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 13 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 14 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 15 | 16 | OPTIM: 17 | NAME: "sgd" 18 | LR: 0.0025 19 | MAX_EPOCH: 5 20 | LR_SCHEDULER: "cosine" 21 | WARMUP_EPOCH: 1 22 | WARMUP_TYPE: "constant" 23 | WARMUP_CONS_LR: 1e-5 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/32" 31 | 32 | TRAINER: 33 | VPT: 34 | N_CTX_VISION: 8 35 | CTX_INIT: "a photo of a" 36 | PREC: "fp16" 37 | PROMPT_DEPTH_VISION: 12 -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/datasets/__init__.py -------------------------------------------------------------------------------- /docs/main_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/docs/main_figure.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ftfy==6.1.1 2 | tqdm==4.64.0 3 | wandb -------------------------------------------------------------------------------- /scripts/cocoop/base2new_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA="/path/to/dataset/folder" 7 | TRAINER=CoCoOp 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=vit_b16_c4_ep10_batch1_ctxv1 13 | SHOTS=16 14 | 15 | 16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}. Resuming..." 19 | python train.py \ 20 | --root ${DATA} \ 21 | --seed ${SEED} \ 22 | --trainer ${TRAINER} \ 23 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 24 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 25 | --output-dir ${DIR} \ 26 | DATASET.NUM_SHOTS ${SHOTS} \ 27 | DATASET.SUBSAMPLE_CLASSES base 28 | else 29 | echo "Run this job and save the output to ${DIR}" 30 | python train.py \ 31 | --root ${DATA} \ 32 | --seed ${SEED} \ 33 | --trainer ${TRAINER} \ 34 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 35 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 36 | --output-dir ${DIR} \ 37 | DATASET.NUM_SHOTS ${SHOTS} \ 38 | DATASET.SUBSAMPLE_CLASSES base 39 | fi -------------------------------------------------------------------------------- /scripts/cocoop/xd_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA=~/efs/data/ 7 | TRAINER=CoCoOp 8 | 9 | DATASET=imagenet 10 | SEED=$1 11 | 12 | CFG=vit_b16_c4_ep10_batch1_ctxv1 13 | SHOTS=16 14 | 15 | 16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}. Skip this job" 19 | else 20 | echo "Run this job and save the output to ${DIR}" 21 | 22 | python train.py \ 23 | --root ${DATA} \ 24 | --seed ${SEED} \ 25 | --trainer ${TRAINER} \ 26 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 27 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 28 | --output-dir ${DIR} \ 29 | DATASET.NUM_SHOTS ${SHOTS} 30 | fi 31 | -------------------------------------------------------------------------------- /scripts/coop/multi_scripts.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 sh scripts/coop/main.sh imagenet rn50_ep50 end 16 16 False 2 | CUDA_VISIBLE_DEVICES=3 sh scripts/coop/main.sh food101 rn50_ep100 end 16 16 False 3 | CUDA_VISIBLE_DEVICES=4 sh scripts/coop/main.sh stanford_cars rn50_ep100 end 16 16 False 4 | CUDA_VISIBLE_DEVICES=5 sh scripts/coop/main.sh fgvc_aircraft rn50_ep100 end 16 16 False 5 | CUDA_VISIBLE_DEVICES=6 sh scripts/coop/main.sh oxford_pets rn50_ep100 end 16 16 False 6 | CUDA_VISIBLE_DEVICES=7 sh scripts/coop/main.sh caltech101 rn50_ep100 end 16 16 False 7 | CUDA_VISIBLE_DEVICES=0 sh scripts/coop/main.sh oxford_flowers rn50_ep100 end 16 16 False 8 | CUDA_VISIBLE_DEVICES=1 sh scripts/coop/main.sh eurosat rn50_ep100 end 16 16 False 9 | CUDA_VISIBLE_DEVICES=2 sh scripts/coop/main.sh dtd rn50_ep100 end 16 16 False 10 | CUDA_VISIBLE_DEVICES=3 sh scripts/coop/main.sh sun397 rn50_ep100 end 16 16 False 11 | CUDA_VISIBLE_DEVICES=4 sh scripts/coop/main.sh ucf101 rn50_ep100 end 16 16 False -------------------------------------------------------------------------------- /scripts/maple/base2new_train_maple.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA="/path/to/dataset/folder" 7 | TRAINER=MaPLe 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=vit_b16_c2_ep5_batch4_2ctx 13 | SHOTS=16 14 | 15 | 16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}. Resuming..." 19 | python train.py \ 20 | --root ${DATA} \ 21 | --seed ${SEED} \ 22 | --trainer ${TRAINER} \ 23 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 24 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 25 | --output-dir ${DIR} \ 26 | DATASET.NUM_SHOTS ${SHOTS} \ 27 | DATASET.SUBSAMPLE_CLASSES base 28 | else 29 | echo "Run this job and save the output to ${DIR}" 30 | python train.py \ 31 | --root ${DATA} \ 32 | --seed ${SEED} \ 33 | --trainer ${TRAINER} \ 34 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 35 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 36 | --output-dir ${DIR} \ 37 | DATASET.NUM_SHOTS ${SHOTS} \ 38 | DATASET.SUBSAMPLE_CLASSES base 39 | fi -------------------------------------------------------------------------------- /scripts/maple/reproduce_maple_xd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA="/path/to/dataset/folder" 7 | TRAINER=MaPLe 8 | 9 | DATASET=$1 10 | SEED=$2 11 | WEIGHTSPATH=$3 12 | 13 | CFG=vit_b16_c2_ep5_batch4_2ctx_cross_datasets 14 | SHOTS=16 15 | LOADEP=2 16 | 17 | MODEL_DIR=${WEIGHTSPATH}/seed${SEED} 18 | 19 | DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED} 20 | if [ -d "$DIR" ]; then 21 | echo "Results are already available in ${DIR}. Skipping..." 22 | else 23 | echo "Evaluating model" 24 | echo "Runing the first phase job and save the output to ${DIR}" 25 | # Evaluate on evaluation datasets 26 | python train.py \ 27 | --root ${DATA} \ 28 | --seed ${SEED} \ 29 | --trainer ${TRAINER} \ 30 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 31 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 32 | --output-dir ${DIR} \ 33 | --model-dir ${MODEL_DIR} \ 34 | --load-epoch ${LOADEP} \ 35 | --eval-only \ 36 | DATASET.NUM_SHOTS ${SHOTS} \ 37 | 38 | fi -------------------------------------------------------------------------------- /scripts/maple/xd_train_maple.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA=~/efs/imagenet/ 7 | TRAINER=MaPLe 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=vit_b32_c2_ep5_batch4_2ctx_cross_datasets 13 | SHOTS=16 14 | 15 | 16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}." 19 | else 20 | echo "Run this job and save the output to ${DIR}" 21 | 22 | python train.py \ 23 | --root ${DATA} \ 24 | --seed ${SEED} \ 25 | --trainer ${TRAINER} \ 26 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 27 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 28 | --output-dir ${DIR} \ 29 | DATASET.NUM_SHOTS ${SHOTS} 30 | fi 31 | -------------------------------------------------------------------------------- /scripts/mlp/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # custom config 4 | DATA=~/efs/data/ 5 | TRAINER=MLP 6 | 7 | DATASET=$1 8 | CFG=$2 # config file 9 | SHOTS=$3 # number of shots (1, 2, 4, 8, 16) 10 | 11 | 12 | for SEED in 42 13 | do 14 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} 15 | if [ -d "$DIR" ]; then 16 | echo "Results are available in ${DIR}. Skip this job" 17 | else 18 | echo "Run this job and save the output to ${DIR}" 19 | python -m torch.distributed.launch --nproc_per_node 8 --master_port 12345 train.py \ 20 | --world-size 8 \ 21 | --root ${DATA} \ 22 | --seed ${SEED} \ 23 | --trainer ${TRAINER} \ 24 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 25 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 26 | --output-dir ${DIR} \ 27 | DATASET.NUM_SHOTS ${SHOTS} \ 28 | TEST.FINAL_MODEL best_val 29 | fi 30 | done 31 | 32 | 33 | # sh scripts/mlp/main.sh imagenet_21k rn50_ep20 16 34 | # sh scripts/mlp/main.sh imagenet_21k vit_b32_ep20 16 35 | # sh scripts/mlp/main.sh imagenet_21k vit_b16_ep20 16 -------------------------------------------------------------------------------- /scripts/pomp/multi_scripts.sh: -------------------------------------------------------------------------------- 1 | sh scripts/pomp/main.sh imagenet_21k vit_b16_ep20 end 4 16 False 1000 2 | python validation_test.py -------------------------------------------------------------------------------- /scripts/pomp/xd_test.sh: -------------------------------------------------------------------------------- 1 | sh scripts/pomp/eval.sh oxford_pets 42 1000 2 | sh scripts/pomp/eval.sh oxford_flowers 42 1000 3 | sh scripts/pomp/eval.sh food101 42 1000 4 | sh scripts/pomp/eval.sh sun397 42 1000 5 | sh scripts/pomp/eval.sh stanford_cars 42 1000 6 | sh scripts/pomp/eval.sh ucf101 42 1000 7 | sh scripts/pomp/eval.sh eurosat 42 1000 8 | sh scripts/pomp/eval.sh fgvc_aircraft 42 1000 9 | sh scripts/pomp/eval.sh caltech101 42 1000 10 | sh scripts/pomp/eval.sh dtd 42 1000 11 | sh scripts/pomp/eval.sh imagenet_a 42 1000 12 | sh scripts/pomp/eval.sh imagenet_r 42 1000 13 | sh scripts/pomp/eval.sh imagenet_sketch 42 1000 14 | sh scripts/pomp/eval.sh imagenetv2 42 1000 15 | 16 | # sh scripts/pomp/xd_test.sh -------------------------------------------------------------------------------- /scripts/vpt/base2new_train_vpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA="/path/to/dataset/folder" 7 | TRAINER=VPT 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=vit_b16_c2_ep5_batch4_4 13 | SHOTS=16 14 | 15 | 16 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}. Resuming..." 19 | python train.py \ 20 | --root ${DATA} \ 21 | --seed ${SEED} \ 22 | --trainer ${TRAINER} \ 23 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 24 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 25 | --output-dir ${DIR} \ 26 | DATASET.NUM_SHOTS ${SHOTS} \ 27 | DATASET.SUBSAMPLE_CLASSES base 28 | else 29 | echo "Run this job and save the output to ${DIR}" 30 | python train.py \ 31 | --root ${DATA} \ 32 | --seed ${SEED} \ 33 | --trainer ${TRAINER} \ 34 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 35 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 36 | --output-dir ${DIR} \ 37 | DATASET.NUM_SHOTS ${SHOTS} \ 38 | DATASET.SUBSAMPLE_CLASSES base 39 | fi -------------------------------------------------------------------------------- /scripts/vpt/xd_test_vpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA="/path/to/dataset/folder" 7 | TRAINER=VPT 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=vit_b16_c2_ep5_batch4_4 13 | SHOTS=16 14 | 15 | 16 | DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}. Skip this job" 19 | else 20 | echo "Run this job and save the output to ${DIR}" 21 | 22 | python train.py \ 23 | --root ${DATA} \ 24 | --seed ${SEED} \ 25 | --trainer ${TRAINER} \ 26 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 27 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 28 | --output-dir ${DIR} \ 29 | --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \ 30 | --load-epoch 2 \ 31 | --eval-only 32 | fi -------------------------------------------------------------------------------- /scripts/vpt/xd_train_vpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA=~/efs/imagenet/ 7 | TRAINER=VPT 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=rn50_c2_ep5_batch4_4 13 | SHOTS=16 14 | 15 | 16 | DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} 17 | if [ -d "$DIR" ]; then 18 | echo "Results are available in ${DIR}." 19 | else 20 | echo "Run this job and save the output to ${DIR}" 21 | 22 | python train.py \ 23 | --root ${DATA} \ 24 | --seed ${SEED} \ 25 | --trainer ${TRAINER} \ 26 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 27 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 28 | --output-dir ${DIR} \ 29 | DATASET.NUM_SHOTS ${SHOTS} 30 | fi 31 | 32 | # CUDA_VISIBLE_DEVICES=0 sh scripts/vpt/xd_train_vpt.sh imagenet 42 -------------------------------------------------------------------------------- /scripts/zsclip/xd_test.sh: -------------------------------------------------------------------------------- 1 | sh scripts/zsclip/zeroshot.sh oxford_pets vit_b16 2 | sh scripts/zsclip/zeroshot.sh oxford_flowers vit_b16 3 | sh scripts/zsclip/zeroshot.sh food101 vit_b16 4 | sh scripts/zsclip/zeroshot.sh sun397 vit_b16 5 | sh scripts/zsclip/zeroshot.sh stanford_cars vit_b16 6 | sh scripts/zsclip/zeroshot.sh ucf101 vit_b16 7 | sh scripts/zsclip/zeroshot.sh eurosat vit_b16 8 | sh scripts/zsclip/zeroshot.sh fgvc_aircraft vit_b16 9 | sh scripts/zsclip/zeroshot.sh caltech101 vit_b16 10 | sh scripts/zsclip/zeroshot.sh dtd vit_b16 11 | sh scripts/zsclip/zeroshot.sh imagenet_a vit_b16 12 | sh scripts/zsclip/zeroshot.sh imagenet_r vit_b16 13 | sh scripts/zsclip/zeroshot.sh imagenet_sketch vit_b16 14 | sh scripts/zsclip/zeroshot.sh imagenetv2 vit_b16 15 | 16 | # sh scripts/zsclip/xd_test.sh -------------------------------------------------------------------------------- /scripts/zsclip/zeroshot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd ../.. 4 | 5 | # custom config 6 | DATA=~/efs/data/ 7 | TRAINER=ZeroshotCLIP 8 | DATASET=$1 9 | CFG=$2 # rn50, rn101, vit_b32 or vit_b16 vit_l14 10 | 11 | python train.py \ 12 | --root ${DATA} \ 13 | --trainer ${TRAINER} \ 14 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 15 | --config-file configs/trainers/CoOp/${CFG}.yaml \ 16 | --output-dir output/${TRAINER}/${CFG}/${DATASET} \ 17 | --eval-only 18 | 19 | # CUDA_VISIBLE_DEVICES=0 sh scripts/zsclip/zeroshot.sh imagenet_21k rn50 20 | # CUDA_VISIBLE_DEVICES=0 sh scripts/zsclip/zeroshot.sh oxford_pets vit_l14 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # At least two spaces before inline comment 4 | E261, 5 | # Line lengths are recommended to be no greater than 79 characters 6 | E501, 7 | # Missing whitespace around arithmetic operator 8 | E226, 9 | # Blank line contains whitespace 10 | W293, 11 | # Do not use bare 'except' 12 | E722, 13 | # Line break after binary operator 14 | W504, 15 | # Too many leading '#' for block comment 16 | E266, 17 | # Line break before binary operator 18 | W503, 19 | # Continuation line over-indented for hanging indent 20 | E126, 21 | # Module level import not at top of file 22 | E402 23 | max-line-length = 79 24 | exclude = __init__.py, build -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=79 3 | multi_line_output=6 4 | length_sort=true 5 | known_standard_library=numpy,setuptools 6 | known_myself=dassl 7 | known_third_party=matplotlib,cv2,torch,torchvision,PIL,yacs,scipy,gdown 8 | no_lines_before=STDLIB,THIRDPARTY 9 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 10 | default_section=FIRSTPARTY -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | DEDENT_CLOSING_BRACKETS = true 6 | SPACES_BEFORE_COMMENT = 2 7 | ARITHMETIC_PRECEDENCE_INDICATION = true -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kaiyang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/README.md: -------------------------------------------------------------------------------- 1 | The `datasets/` folder contains dataset-specific config files which define the standard protocols (e.g., image size, data augmentation, network architecture) used by most papers. The `trainers/` folder contains method-specific config files which define optimization algorithms (e.g., optimizer, epoch) and hyperparameter settings. 2 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | PIXEL_MEAN: [0.5, 0.5, 0.5] 4 | PIXEL_STD: [0.5, 0.5, 0.5] 5 | 6 | DATASET: 7 | NAME: "CIFARSTL" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/digit5.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | PIXEL_MEAN: [0.5, 0.5, 0.5] 4 | PIXEL_STD: [0.5, 0.5, 0.5] 5 | TRANSFORMS: ["normalize"] 6 | 7 | DATASET: 8 | NAME: "Digit5" 9 | 10 | MODEL: 11 | BACKBONE: 12 | NAME: "cnn_digit5_m3sda" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/domainnet.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "DomainNet" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet101" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (96, 96) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "miniDomainNet" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet18" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/office31.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "Office31" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet50" 11 | HEAD: 12 | NAME: "mlp" 13 | HIDDEN_LAYERS: [256] 14 | DROPOUT: 0. -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/office_home.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | 4 | DATASET: 5 | NAME: "OfficeHome" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/da/visda17.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "center_crop", "normalize"] 4 | 5 | DATASET: 6 | NAME: "VisDA17" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet101" 11 | 12 | TEST: 13 | PER_CLASS_RESULT: True -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 4 | 5 | DATASET: 6 | NAME: "Camelyon17" 7 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["random_flip", "random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | 7 | DATASET: 8 | NAME: "CIFAR100C" 9 | CIFAR_C_TYPE: "fog" 10 | CIFAR_C_LEVEL: 5 11 | 12 | MODEL: 13 | BACKBONE: 14 | NAME: "wide_resnet_16_4" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["random_flip", "random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | 7 | DATASET: 8 | NAME: "CIFAR10C" 9 | CIFAR_C_TYPE: "fog" 10 | CIFAR_C_LEVEL: 5 11 | 12 | MODEL: 13 | BACKBONE: 14 | NAME: "wide_resnet_16_4" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/digit_single.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | 7 | DATASET: 8 | NAME: "DigitSingle" 9 | 10 | MODEL: 11 | BACKBONE: 12 | NAME: "cnn_digitsingle" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | 7 | DATASET: 8 | NAME: "DigitsDG" 9 | 10 | MODEL: 11 | BACKBONE: 12 | NAME: "cnn_digitsdg" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/fmow.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 4 | 5 | DATASET: 6 | NAME: "FMoW" 7 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 4 | 5 | DATASET: 6 | NAME: "IWildCam" 7 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "OfficeHomeDG" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet18" 11 | PRETRAINED: True -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/pacs.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "PACS" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet18" 11 | PRETRAINED: True -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/dg/vlcs.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (224, 224) 3 | TRANSFORMS: ["random_flip", "random_translation", "normalize"] 4 | 5 | DATASET: 6 | NAME: "VLCS" 7 | 8 | MODEL: 9 | BACKBONE: 10 | NAME: "resnet18" 11 | PRETRAINED: True -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["random_flip", "random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | 7 | DATASET: 8 | NAME: "CIFAR10" 9 | NUM_LABELED: 4000 10 | VAL_PERCENT: 0. 11 | 12 | MODEL: 13 | BACKBONE: 14 | NAME: "wide_resnet_28_2" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["random_flip", "random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | CROP_PADDING: 4 7 | 8 | DATASET: 9 | NAME: "CIFAR100" 10 | NUM_LABELED: 10000 11 | VAL_PERCENT: 0. 12 | 13 | MODEL: 14 | BACKBONE: 15 | NAME: "wide_resnet_28_2" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/ssl/stl10.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (96, 96) 3 | TRANSFORMS: ["random_flip", "random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | CROP_PADDING: 4 7 | 8 | DATASET: 9 | NAME: "STL10" 10 | STL10_FOLD: 0 11 | 12 | MODEL: 13 | BACKBONE: 14 | NAME: "wide_resnet_28_2" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/datasets/ssl/svhn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | SIZE: (32, 32) 3 | TRANSFORMS: ["random_crop", "normalize"] 4 | PIXEL_MEAN: [0.5, 0.5, 0.5] 5 | PIXEL_STD: [0.5, 0.5, 0.5] 6 | CROP_PADDING: 4 7 | 8 | DATASET: 9 | NAME: "SVHN" 10 | NUM_LABELED: 1000 11 | VAL_PERCENT: 0. 12 | 13 | MODEL: 14 | BACKBONE: 15 | NAME: "wide_resnet_28_2" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomSampler" 4 | BATCH_SIZE: 64 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 192 8 | TEST: 9 | BATCH_SIZE: 256 10 | K_TRANSFORMS: 2 11 | 12 | OPTIM: 13 | NAME: "sgd" 14 | LR: 0.001 15 | MAX_EPOCH: 90 16 | RAMPUP_ITRS: 10000 17 | 18 | TRAINER: 19 | CDAC: 20 | STRONG_TRANSFORMS: ["randaugment", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 30 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 6 8 | TEST: 9 | BATCH_SIZE: 30 10 | K_TRANSFORMS: 2 11 | 12 | OPTIM: 13 | NAME: "sgd" 14 | LR: 0.001 15 | MAX_EPOCH: 90 16 | RAMPUP_ITRS: 10000 17 | 18 | TRAINER: 19 | CDAC: 20 | STRONG_TRANSFORMS: ["randaugment", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 64 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 192 8 | TEST: 9 | BATCH_SIZE: 200 10 | K_TRANSFORMS: 2 11 | 12 | OPTIM: 13 | NAME: "sgd" 14 | LR: 0.001 15 | MAX_EPOCH: 60 16 | RAMPUP_ITRS: 10000 17 | LR_SCHEDULER: "cosine" 18 | 19 | TRAINER: 20 | CDAC: 21 | STRONG_TRANSFORMS: ["randaugment", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 256 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 64 8 | TEST: 9 | BATCH_SIZE: 256 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.05 14 | STEPSIZE: [30] 15 | MAX_EPOCH: 30 16 | LR_SCHEDULER: "cosine" 17 | 18 | TRAINER: 19 | DAEL: 20 | STRONG_TRANSFORMS: ["randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 30 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 6 8 | TEST: 9 | BATCH_SIZE: 30 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.002 14 | MAX_EPOCH: 40 15 | LR_SCHEDULER: "cosine" 16 | 17 | TRAINER: 18 | DAEL: 19 | STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 192 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 64 8 | TEST: 9 | BATCH_SIZE: 200 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.005 14 | MAX_EPOCH: 60 15 | LR_SCHEDULER: "cosine" 16 | 17 | TRAINER: 18 | DAEL: 19 | STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 256 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 64 8 | TEST: 9 | BATCH_SIZE: 256 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.05 14 | STEPSIZE: [30] 15 | MAX_EPOCH: 30 16 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 30 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 6 8 | TEST: 9 | BATCH_SIZE: 30 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.002 14 | MAX_EPOCH: 40 15 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 192 5 | TRAIN_U: 6 | SAME_AS_X: False 7 | BATCH_SIZE: 64 8 | TEST: 9 | BATCH_SIZE: 200 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.005 14 | MAX_EPOCH: 60 15 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 256 4 | TEST: 5 | BATCH_SIZE: 256 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.05 10 | STEPSIZE: [30] 11 | MAX_EPOCH: 30 12 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 128 4 | TEST: 5 | BATCH_SIZE: 128 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.005 10 | MAX_EPOCH: 60 11 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 32 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.002 10 | STEPSIZE: [20] 11 | MAX_EPOCH: 20 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 32 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.0001 10 | STEPSIZE: [2] 11 | MAX_EPOCH: 2 12 | 13 | TRAIN: 14 | PRINT_FREQ: 50 15 | COUNT_ITER: "train_u" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 120 5 | TEST: 6 | BATCH_SIZE: 100 7 | 8 | OPTIM: 9 | NAME: "sgd" 10 | LR: 0.05 11 | STEPSIZE: [20] 12 | MAX_EPOCH: 50 13 | 14 | TRAINER: 15 | DAELDG: 16 | STRONG_TRANSFORMS: ["randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 30 5 | TEST: 6 | BATCH_SIZE: 100 7 | 8 | OPTIM: 9 | NAME: "sgd" 10 | LR: 0.002 11 | MAX_EPOCH: 40 12 | LR_SCHEDULER: "cosine" 13 | 14 | TRAINER: 15 | DAELDG: 16 | STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | SAMPLER: "RandomDomainSampler" 4 | BATCH_SIZE: 30 5 | TEST: 6 | BATCH_SIZE: 100 7 | 8 | OPTIM: 9 | NAME: "sgd" 10 | LR: 0.002 11 | MAX_EPOCH: 40 12 | LR_SCHEDULER: "cosine" 13 | 14 | TRAINER: 15 | DAELDG: 16 | STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | PIXEL_MEAN: [0., 0., 0.] 3 | PIXEL_STD: [1., 1., 1.] 4 | 5 | DATALOADER: 6 | TRAIN_X: 7 | BATCH_SIZE: 128 8 | TEST: 9 | BATCH_SIZE: 128 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.05 14 | STEPSIZE: [20] 15 | MAX_EPOCH: 50 16 | 17 | TRAINER: 18 | DDAIG: 19 | G_ARCH: "fcn_3x32_gctx" 20 | LMDA: 0.3 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | PIXEL_MEAN: [0., 0., 0.] 3 | PIXEL_STD: [1., 1., 1.] 4 | 5 | DATALOADER: 6 | TRAIN_X: 7 | BATCH_SIZE: 16 8 | TEST: 9 | BATCH_SIZE: 16 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.0005 14 | STEPSIZE: [20] 15 | MAX_EPOCH: 25 16 | 17 | TRAINER: 18 | DDAIG: 19 | G_ARCH: "fcn_3x64_gctx" 20 | WARMUP: 3 21 | LMDA: 0.3 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | PIXEL_MEAN: [0., 0., 0.] 3 | PIXEL_STD: [1., 1., 1.] 4 | 5 | DATALOADER: 6 | TRAIN_X: 7 | BATCH_SIZE: 16 8 | TEST: 9 | BATCH_SIZE: 16 10 | 11 | OPTIM: 12 | NAME: "sgd" 13 | LR: 0.0005 14 | STEPSIZE: [20] 15 | MAX_EPOCH: 25 16 | 17 | TRAINER: 18 | DDAIG: 19 | G_ARCH: "fcn_3x64_gctx" 20 | WARMUP: 3 21 | LMDA: 0.3 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 128 4 | TEST: 5 | BATCH_SIZE: 100 6 | NUM_WORKERS: 8 7 | 8 | OPTIM: 9 | NAME: "sgd" 10 | LR: 0.05 11 | STEPSIZE: [20] 12 | MAX_EPOCH: 50 13 | 14 | TRAIN: 15 | PRINT_FREQ: 20 -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 128 4 | TEST: 5 | BATCH_SIZE: 128 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.005 10 | MAX_EPOCH: 60 11 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 64 4 | TEST: 5 | BATCH_SIZE: 100 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.001 10 | MAX_EPOCH: 50 11 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 64 4 | TEST: 5 | BATCH_SIZE: 100 6 | 7 | OPTIM: 8 | NAME: "sgd" 9 | LR: 0.001 10 | MAX_EPOCH: 50 11 | LR_SCHEDULER: "cosine" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 64 4 | TRAIN_U: 5 | SAME_AS_X: False 6 | BATCH_SIZE: 448 7 | TEST: 8 | BATCH_SIZE: 500 9 | 10 | OPTIM: 11 | NAME: "sgd" 12 | LR: 0.05 13 | STEPSIZE: [4000] 14 | MAX_EPOCH: 4000 15 | LR_SCHEDULER: "cosine" 16 | 17 | TRAIN: 18 | COUNT_ITER: "train_u" 19 | PRINT_FREQ: 10 20 | 21 | TRAINER: 22 | FIXMATCH: 23 | STRONG_TRANSFORMS: ["random_flip", "randaugment_fixmatch", "normalize", "cutout"] -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dassl 3 | ------ 4 | PyTorch toolbox for domain adaptation and semi-supervised learning. 5 | 6 | URL: https://github.com/KaiyangZhou/Dassl.pytorch 7 | 8 | @article{zhou2020domain, 9 | title={Domain Adaptive Ensemble Learning}, 10 | author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao}, 11 | journal={arXiv preprint arXiv:2003.07325}, 12 | year={2020} 13 | } 14 | """ 15 | 16 | __version__ = "0.6.3" 17 | __author__ = "Kaiyang Zhou" 18 | __homepage__ = "https://kaiyangzhou.github.io/" 19 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import _C as cfg_default 2 | 3 | 4 | def get_cfg_default(): 5 | return cfg_default.clone() 6 | 7 | 8 | def clean_cfg(cfg, trainer): 9 | """Remove unused trainers (configs). 10 | 11 | Aim: Only show relevant information when calling print(cfg). 12 | 13 | Args: 14 | cfg (_C): cfg instance. 15 | trainer (str): trainer name. 16 | """ 17 | keys = list(cfg.TRAINER.keys()) 18 | for key in keys: 19 | if key == "NAME" or key == trainer.upper(): 20 | continue 21 | cfg.TRAINER.pop(key, None) 22 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_manager import DataManager, DatasetWrapper 2 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import DATASET_REGISTRY, build_dataset # isort:skip 2 | from .base_dataset import Datum, DatasetBase # isort:skip 3 | 4 | from .da import * 5 | from .dg import * 6 | from .ssl import * 7 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | DATASET_REGISTRY = Registry("DATASET") 4 | 5 | 6 | def build_dataset(cfg): 7 | avai_datasets = DATASET_REGISTRY.registered_names() 8 | check_availability(cfg.DATASET.NAME, avai_datasets) 9 | if cfg.VERBOSE: 10 | print("Loading dataset: {}".format(cfg.DATASET.NAME)) 11 | return DATASET_REGISTRY.get(cfg.DATASET.NAME)(cfg) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/da/__init__.py: -------------------------------------------------------------------------------- 1 | from .digit5 import Digit5 2 | from .visda17 import VisDA17 3 | from .cifarstl import CIFARSTL 4 | from .office31 import Office31 5 | from .domainnet import DomainNet 6 | from .office_home import OfficeHome 7 | from .mini_domainnet import miniDomainNet 8 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/dg/__init__.py: -------------------------------------------------------------------------------- 1 | from .pacs import PACS 2 | from .vlcs import VLCS 3 | from .wilds import * 4 | from .cifar_c import CIFAR10C, CIFAR100C 5 | from .digits_dg import DigitsDG 6 | from .digit_single import DigitSingle 7 | from .office_home_dg import OfficeHomeDG 8 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py: -------------------------------------------------------------------------------- 1 | from .fmow import FMoW 2 | from .iwildcam import IWildCam 3 | from .camelyon17 import Camelyon17 4 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py: -------------------------------------------------------------------------------- 1 | from dassl.data.datasets import DATASET_REGISTRY 2 | 3 | from .wilds_base import WILDSBase 4 | 5 | 6 | @DATASET_REGISTRY.register() 7 | class Camelyon17(WILDSBase): 8 | """Tumor tissue recognition. 9 | 10 | 2 classes (whether a given region of tissue contains tumor tissue). 11 | 12 | Reference: 13 | - Bandi et al. "From detection of individual metastases to classification of lymph 14 | node status at the patient level: the CAMELYON17 challenge." TMI 2021. 15 | - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. 16 | """ 17 | 18 | dataset_dir = "camelyon17_v1.0" 19 | 20 | def __init__(self, cfg): 21 | super().__init__(cfg) 22 | 23 | def load_classnames(self): 24 | return {0: "healthy tissue", 1: "tumor tissue"} 25 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import pandas as pd 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY 5 | 6 | from .wilds_base import WILDSBase 7 | 8 | 9 | @DATASET_REGISTRY.register() 10 | class IWildCam(WILDSBase): 11 | """Animal species recognition. 12 | 13 | 182 classes (species). 14 | 15 | Reference: 16 | - Beery et al. "The iwildcam 2021 competition dataset." arXiv 2021. 17 | - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. 18 | """ 19 | 20 | dataset_dir = "iwildcam_v2.0" 21 | 22 | def __init__(self, cfg): 23 | super().__init__(cfg) 24 | 25 | def get_image_path(self, dataset, idx): 26 | image_name = dataset._input_array[idx] 27 | image_path = osp.join(self.dataset_dir, "train", image_name) 28 | return image_path 29 | 30 | def load_classnames(self): 31 | df = pd.read_csv(osp.join(self.dataset_dir, "categories.csv")) 32 | return dict(df["name"]) 33 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py: -------------------------------------------------------------------------------- 1 | from .svhn import SVHN 2 | from .cifar import CIFAR10, CIFAR100 3 | from .stl10 import STL10 4 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py: -------------------------------------------------------------------------------- 1 | from .cifar import CIFAR10 2 | from ..build import DATASET_REGISTRY 3 | 4 | 5 | @DATASET_REGISTRY.register() 6 | class SVHN(CIFAR10): 7 | """SVHN for SSL. 8 | 9 | Reference: 10 | - Netzer et al. Reading Digits in Natural Images with 11 | Unsupervised Feature Learning. NIPS-W 2011. 12 | """ 13 | 14 | dataset_dir = "svhn" 15 | 16 | def __init__(self, cfg): 17 | super().__init__(cfg) 18 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import INTERPOLATION_MODES, build_transform 2 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import TRAINER_REGISTRY, build_trainer # isort:skip 2 | from .trainer import TrainerX, TrainerXU, TrainerBase, SimpleTrainer, SimpleNet # isort:skip 3 | 4 | from .da import * 5 | from .dg import * 6 | from .ssl import * 7 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | TRAINER_REGISTRY = Registry("TRAINER") 4 | 5 | 6 | def build_trainer(cfg): 7 | avai_trainers = TRAINER_REGISTRY.registered_names() 8 | check_availability(cfg.TRAINER.NAME, avai_trainers) 9 | if cfg.VERBOSE: 10 | print("Loading trainer: {}".format(cfg.TRAINER.NAME)) 11 | return TRAINER_REGISTRY.get(cfg.TRAINER.NAME)(cfg) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/da/__init__.py: -------------------------------------------------------------------------------- 1 | from .se import SE 2 | from .mcd import MCD 3 | from .mme import MME 4 | from .adda import ADDA 5 | from .cdac import CDAC 6 | from .dael import DAEL 7 | from .dann import DANN 8 | from .adabn import AdaBN 9 | from .m3sda import M3SDA 10 | from .source_only import SourceOnly 11 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/da/adabn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from dassl.utils import check_isfile 4 | from dassl.engine import TRAINER_REGISTRY, TrainerXU 5 | 6 | 7 | @TRAINER_REGISTRY.register() 8 | class AdaBN(TrainerXU): 9 | """Adaptive Batch Normalization. 10 | 11 | https://arxiv.org/abs/1603.04779. 12 | """ 13 | 14 | def __init__(self, cfg): 15 | super().__init__(cfg) 16 | self.done_reset_bn_stats = False 17 | 18 | def check_cfg(self, cfg): 19 | assert check_isfile( 20 | cfg.MODEL.INIT_WEIGHTS 21 | ), "The weights of source model must be provided" 22 | 23 | def before_epoch(self): 24 | if not self.done_reset_bn_stats: 25 | for m in self.model.modules(): 26 | classname = m.__class__.__name__ 27 | if classname.find("BatchNorm") != -1: 28 | m.reset_running_stats() 29 | 30 | self.done_reset_bn_stats = True 31 | 32 | def forward_backward(self, batch_x, batch_u): 33 | input_u = batch_u["img"].to(self.device) 34 | 35 | with torch.no_grad(): 36 | self.model(input_u) 37 | 38 | return None 39 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/da/source_only.py: -------------------------------------------------------------------------------- 1 | from torch.nn import functional as F 2 | 3 | from dassl.engine import TRAINER_REGISTRY, TrainerXU 4 | from dassl.metrics import compute_accuracy 5 | 6 | 7 | @TRAINER_REGISTRY.register() 8 | class SourceOnly(TrainerXU): 9 | """Baseline model for domain adaptation, which is 10 | trained using source data only. 11 | """ 12 | 13 | def forward_backward(self, batch_x, batch_u): 14 | input, label = self.parse_batch_train(batch_x, batch_u) 15 | output = self.model(input) 16 | loss = F.cross_entropy(output, label) 17 | self.model_backward_and_update(loss) 18 | 19 | loss_summary = { 20 | "loss": loss.item(), 21 | "acc": compute_accuracy(output, label)[0].item(), 22 | } 23 | 24 | if (self.batch_idx + 1) == self.num_batches: 25 | self.update_lr() 26 | 27 | return loss_summary 28 | 29 | def parse_batch_train(self, batch_x, batch_u): 30 | input = batch_x["img"] 31 | label = batch_x["label"] 32 | input = input.to(self.device) 33 | label = label.to(self.device) 34 | return input, label 35 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/dg/__init__.py: -------------------------------------------------------------------------------- 1 | from .ddaig import DDAIG 2 | from .daeldg import DAELDG 3 | from .vanilla import Vanilla 4 | from .crossgrad import CrossGrad 5 | from .domain_mix import DomainMix 6 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/dg/vanilla.py: -------------------------------------------------------------------------------- 1 | from torch.nn import functional as F 2 | 3 | from dassl.engine import TRAINER_REGISTRY, TrainerX 4 | from dassl.metrics import compute_accuracy 5 | 6 | 7 | @TRAINER_REGISTRY.register() 8 | class Vanilla(TrainerX): 9 | """Vanilla model. 10 | 11 | A.k.a. Empirical Risk Minimization, or ERM. 12 | """ 13 | 14 | def forward_backward(self, batch): 15 | input, target = self.parse_batch_train(batch) 16 | output = self.model(input) 17 | loss = F.cross_entropy(output, target) 18 | self.model_backward_and_update(loss) 19 | 20 | loss_summary = { 21 | "loss": loss.item(), 22 | "acc": compute_accuracy(output, target)[0].item(), 23 | } 24 | 25 | if (self.batch_idx + 1) == self.num_batches: 26 | self.update_lr() 27 | 28 | return loss_summary 29 | 30 | def parse_batch_train(self, batch): 31 | input = batch["img"] 32 | target = batch["label"] 33 | input = input.to(self.device) 34 | target = target.to(self.device) 35 | return input, target 36 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/ssl/__init__.py: -------------------------------------------------------------------------------- 1 | from .entmin import EntMin 2 | from .fixmatch import FixMatch 3 | from .mixmatch import MixMatch 4 | from .mean_teacher import MeanTeacher 5 | from .sup_baseline import SupBaseline 6 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py: -------------------------------------------------------------------------------- 1 | from torch.nn import functional as F 2 | 3 | from dassl.engine import TRAINER_REGISTRY, TrainerXU 4 | from dassl.metrics import compute_accuracy 5 | 6 | 7 | @TRAINER_REGISTRY.register() 8 | class SupBaseline(TrainerXU): 9 | """Supervised Baseline.""" 10 | 11 | def forward_backward(self, batch_x, batch_u): 12 | input, label = self.parse_batch_train(batch_x, batch_u) 13 | output = self.model(input) 14 | loss = F.cross_entropy(output, label) 15 | self.model_backward_and_update(loss) 16 | 17 | loss_summary = { 18 | "loss": loss.item(), 19 | "acc": compute_accuracy(output, label)[0].item(), 20 | } 21 | 22 | if (self.batch_idx + 1) == self.num_batches: 23 | self.update_lr() 24 | 25 | return loss_summary 26 | 27 | def parse_batch_train(self, batch_x, batch_u): 28 | input = batch_x["img"] 29 | label = batch_x["label"] 30 | input = input.to(self.device) 31 | label = label.to(self.device) 32 | return input, label 33 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_evaluator, EVALUATOR_REGISTRY # isort:skip 2 | 3 | from .evaluator import EvaluatorBase, Classification 4 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/evaluation/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | EVALUATOR_REGISTRY = Registry("EVALUATOR") 4 | 5 | 6 | def build_evaluator(cfg, **kwargs): 7 | avai_evaluators = EVALUATOR_REGISTRY.registered_names() 8 | check_availability(cfg.TEST.EVALUATOR, avai_evaluators) 9 | if cfg.VERBOSE: 10 | print("Loading evaluator: {}".format(cfg.TEST.EVALUATOR)) 11 | return EVALUATOR_REGISTRY.get(cfg.TEST.EVALUATOR)(cfg, **kwargs) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import compute_accuracy 2 | from .distance import ( 3 | cosine_distance, compute_distance_matrix, euclidean_squared_distance 4 | ) 5 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/metrics/accuracy.py: -------------------------------------------------------------------------------- 1 | def compute_accuracy(output, target, topk=(1, )): 2 | """Computes the accuracy over the k top predictions for 3 | the specified values of k. 4 | 5 | Args: 6 | output (torch.Tensor): prediction matrix with shape (batch_size, num_classes). 7 | target (torch.LongTensor): ground truth labels with shape (batch_size). 8 | topk (tuple, optional): accuracy at top-k will be computed. For example, 9 | topk=(1, 5) means accuracy at top-1 and top-5 will be computed. 10 | 11 | Returns: 12 | list: accuracy at top-k. 13 | """ 14 | maxk = max(topk) 15 | batch_size = target.size(0) 16 | 17 | if isinstance(output, (tuple, list)): 18 | output = output[0] 19 | 20 | _, pred = output.topk(maxk, 1, True, True) 21 | pred = pred.t() 22 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 23 | 24 | res = [] 25 | for k in topk: 26 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 27 | acc = correct_k.mul_(100.0 / batch_size) 28 | res.append(acc) 29 | 30 | return res 31 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .head import HEAD_REGISTRY, build_head 2 | from .network import NETWORK_REGISTRY, build_network 3 | from .backbone import BACKBONE_REGISTRY, Backbone, build_backbone 4 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_backbone, BACKBONE_REGISTRY # isort:skip 2 | from .backbone import Backbone # isort:skip 3 | 4 | from .vgg import vgg16 5 | from .resnet import ( 6 | resnet18, resnet34, resnet50, resnet101, resnet152, resnet18_ms_l1, 7 | resnet50_ms_l1, resnet18_ms_l12, resnet50_ms_l12, resnet101_ms_l1, 8 | resnet18_ms_l123, resnet50_ms_l123, resnet101_ms_l12, resnet101_ms_l123, 9 | resnet18_efdmix_l1, resnet50_efdmix_l1, resnet18_efdmix_l12, 10 | resnet50_efdmix_l12, resnet101_efdmix_l1, resnet18_efdmix_l123, 11 | resnet50_efdmix_l123, resnet101_efdmix_l12, resnet101_efdmix_l123 12 | ) 13 | from .alexnet import alexnet 14 | from .wide_resnet import wide_resnet_16_4, wide_resnet_28_2 15 | from .cnn_digitsdg import cnn_digitsdg 16 | from .efficientnet import ( 17 | efficientnet_b0, efficientnet_b1, efficientnet_b2, efficientnet_b3, 18 | efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7 19 | ) 20 | from .resnet_dynamic import * 21 | from .cnn_digitsingle import cnn_digitsingle 22 | from .preact_resnet18 import preact_resnet18 23 | from .cnn_digit5_m3sda import cnn_digit5_m3sda 24 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Backbone(nn.Module): 5 | 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def forward(self): 10 | pass 11 | 12 | @property 13 | def out_features(self): 14 | """Output feature dimension.""" 15 | if self.__dict__.get("_out_features") is None: 16 | return None 17 | return self._out_features 18 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | BACKBONE_REGISTRY = Registry("BACKBONE") 4 | 5 | 6 | def build_backbone(name, verbose=True, **kwargs): 7 | avai_backbones = BACKBONE_REGISTRY.registered_names() 8 | check_availability(name, avai_backbones) 9 | if verbose: 10 | print("Backbone: {}".format(name)) 11 | return BACKBONE_REGISTRY.get(name)(**kwargs) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Source: https://github.com/lukemelas/EfficientNet-PyTorch. 3 | """ 4 | __version__ = "0.6.4" 5 | from .model import ( 6 | EfficientNet, efficientnet_b0, efficientnet_b1, efficientnet_b2, 7 | efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, 8 | efficientnet_b7 9 | ) 10 | from .utils import ( 11 | BlockArgs, BlockDecoder, GlobalParams, efficientnet, get_model_params 12 | ) 13 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/head/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_head, HEAD_REGISTRY # isort:skip 2 | 3 | from .mlp import mlp 4 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/head/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | HEAD_REGISTRY = Registry("HEAD") 4 | 5 | 6 | def build_head(name, verbose=True, **kwargs): 7 | avai_heads = HEAD_REGISTRY.registered_names() 8 | check_availability(name, avai_heads) 9 | if verbose: 10 | print("Head: {}".format(name)) 11 | return HEAD_REGISTRY.get(name)(**kwargs) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/network/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_network, NETWORK_REGISTRY # isort:skip 2 | 3 | from .ddaig_fcn import ( 4 | fcn_3x32_gctx, fcn_3x64_gctx, fcn_3x32_gctx_stn, fcn_3x64_gctx_stn 5 | ) 6 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/network/build.py: -------------------------------------------------------------------------------- 1 | from dassl.utils import Registry, check_availability 2 | 3 | NETWORK_REGISTRY = Registry("NETWORK") 4 | 5 | 6 | def build_network(name, verbose=True, **kwargs): 7 | avai_models = NETWORK_REGISTRY.registered_names() 8 | check_availability(name, avai_models) 9 | if verbose: 10 | print("Network: {}".format(name)) 11 | return NETWORK_REGISTRY.get(name)(**kwargs) 12 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .mmd import MaximumMeanDiscrepancy 2 | from .conv import * 3 | from .dsbn import DSBN1d, DSBN2d 4 | from .mixup import mixup 5 | from .efdmix import ( 6 | EFDMix, random_efdmix, activate_efdmix, run_with_efdmix, deactivate_efdmix, 7 | crossdomain_efdmix, run_without_efdmix 8 | ) 9 | from .mixstyle import ( 10 | MixStyle, random_mixstyle, activate_mixstyle, run_with_mixstyle, 11 | deactivate_mixstyle, crossdomain_mixstyle, run_without_mixstyle 12 | ) 13 | from .attention import * 14 | from .transnorm import TransNorm1d, TransNorm2d 15 | from .sequential2 import Sequential2 16 | from .reverse_grad import ReverseGrad 17 | from .cross_entropy import cross_entropy 18 | from .optimal_transport import SinkhornDivergence, MinibatchEnergyDistance 19 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/attention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import functional as F 3 | 4 | __all__ = ["Attention"] 5 | 6 | 7 | class Attention(nn.Module): 8 | """Attention from `"Dynamic Domain Generalization" `_. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | in_channels: int, 14 | out_features: int, 15 | squeeze=None, 16 | bias: bool = True 17 | ): 18 | super(Attention, self).__init__() 19 | self.squeeze = squeeze if squeeze else in_channels // 16 20 | assert self.squeeze > 0 21 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 22 | self.fc1 = nn.Linear(in_channels, self.squeeze, bias=bias) 23 | self.fc2 = nn.Linear(self.squeeze, out_features, bias=bias) 24 | self.sf = nn.Softmax(dim=-1) 25 | 26 | def forward(self, x): 27 | x = self.avg_pool(x).view(x.shape[:-2]) 28 | x = self.fc1(x) 29 | x = F.relu(x, inplace=True) 30 | x = self.fc2(x) 31 | return self.sf(x) 32 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | 5 | def cross_entropy(input, target, label_smooth=0, reduction="mean"): 6 | """Cross entropy loss. 7 | 8 | Args: 9 | input (torch.Tensor): logit matrix with shape of (batch, num_classes). 10 | target (torch.LongTensor): int label matrix. 11 | label_smooth (float, optional): label smoothing hyper-parameter. 12 | Default is 0. 13 | reduction (str, optional): how the losses for a mini-batch 14 | will be aggregated. Default is 'mean'. 15 | """ 16 | num_classes = input.shape[1] 17 | log_prob = F.log_softmax(input, dim=1) 18 | zeros = torch.zeros(log_prob.size()) 19 | target = zeros.scatter_(1, target.unsqueeze(1).data.cpu(), 1) 20 | target = target.type_as(input) 21 | target = (1-label_smooth) * target + label_smooth/num_classes 22 | loss = (-target * log_prob).sum(1) 23 | if reduction == "mean": 24 | return loss.mean() 25 | elif reduction == "sum": 26 | return loss.sum() 27 | elif reduction == "none": 28 | return loss 29 | else: 30 | raise ValueError 31 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/mixup.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def mixup(x1, x2, y1, y2, beta, preserve_order=False): 5 | """Mixup. 6 | 7 | Args: 8 | x1 (torch.Tensor): data with shape of (b, c, h, w). 9 | x2 (torch.Tensor): data with shape of (b, c, h, w). 10 | y1 (torch.Tensor): label with shape of (b, n). 11 | y2 (torch.Tensor): label with shape of (b, n). 12 | beta (float): hyper-parameter for Beta sampling. 13 | preserve_order (bool): apply lmda=max(lmda, 1-lmda). 14 | Default is False. 15 | """ 16 | lmda = torch.distributions.Beta(beta, beta).sample([x1.shape[0], 1, 1, 1]) 17 | if preserve_order: 18 | lmda = torch.max(lmda, 1 - lmda) 19 | lmda = lmda.to(x1.device) 20 | xmix = x1*lmda + x2 * (1-lmda) 21 | lmda = lmda[:, :, 0, 0] 22 | ymix = y1*lmda + y2 * (1-lmda) 23 | return xmix, ymix 24 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | 4 | 5 | class _ReverseGrad(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input, grad_scaling): 9 | ctx.grad_scaling = grad_scaling 10 | return input.view_as(input) 11 | 12 | @staticmethod 13 | def backward(ctx, grad_output): 14 | grad_scaling = ctx.grad_scaling 15 | return -grad_scaling * grad_output, None 16 | 17 | 18 | reverse_grad = _ReverseGrad.apply 19 | 20 | 21 | class ReverseGrad(nn.Module): 22 | """Gradient reversal layer. 23 | 24 | It acts as an identity layer in the forward, 25 | but reverses the sign of the gradient in 26 | the backward. 27 | """ 28 | 29 | def forward(self, x, grad_scaling=1.0): 30 | assert (grad_scaling >= 31 | 0), "grad_scaling must be non-negative, " "but got {}".format( 32 | grad_scaling 33 | ) 34 | return reverse_grad(x, grad_scaling) 35 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/modeling/ops/sequential2.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Sequential2(nn.Sequential): 5 | """An alternative sequential container to nn.Sequential, 6 | which accepts an arbitrary number of input arguments. 7 | """ 8 | 9 | def forward(self, *inputs): 10 | for module in self._modules.values(): 11 | if isinstance(inputs, tuple): 12 | inputs = module(*inputs) 13 | else: 14 | inputs = module(inputs) 15 | return inputs 16 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from .optimizer import build_optimizer 2 | from .lr_scheduler import build_lr_scheduler 3 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/dassl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tools import * 2 | from .logger import * 3 | from .meters import * 4 | from .registry import * 5 | from .torchtools import * 6 | -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/datasets/da/visda17.sh: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # ROOT is the root directory where you put your domain datasets. 3 | # 4 | # Suppose you wanna put the dataset under $DATA, which stores all the 5 | # domain datasets, run the following command in your terminal to 6 | # download VisDa17: 7 | # 8 | # $ sh visda17.sh $DATA 9 | #------------------------------------------------------------------------ 10 | 11 | ROOT=$1 12 | mkdir $ROOT/visda17 13 | cd $ROOT/visda17 14 | 15 | wget http://csr.bu.edu/ftp/visda17/clf/train.tar 16 | tar xvf train.tar 17 | 18 | wget http://csr.bu.edu/ftp/visda17/clf/validation.tar 19 | tar xvf validation.tar 20 | 21 | wget http://csr.bu.edu/ftp/visda17/clf/test.tar 22 | tar xvf test.tar 23 | 24 | wget https://raw.githubusercontent.com/VisionLearningGroup/taskcv-2017-public/master/classification/data/image_list.txt -O test/image_list.txt -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/linter.sh: -------------------------------------------------------------------------------- 1 | echo "Running isort" 2 | isort -y -sp . 3 | echo "Done" 4 | 5 | echo "Running yapf" 6 | yapf -i -r -vv -e build . 7 | echo "Done" 8 | 9 | echo "Running flake8" 10 | flake8 . 11 | echo "Done" -------------------------------------------------------------------------------- /third_party/Dassl.pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | flake8==3.7.9 2 | yapf==0.29.0 3 | isort==4.3.21 4 | yacs 5 | gdown 6 | tb-nightly 7 | future 8 | scipy 9 | scikit-learn 10 | tqdm 11 | ftfy 12 | regex 13 | wilds==1.2.2 14 | tabulate 15 | -------------------------------------------------------------------------------- /third_party/Detic/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/.DS_Store -------------------------------------------------------------------------------- /third_party/Detic/.gitignore: -------------------------------------------------------------------------------- 1 | third_party/detectron2 2 | ./models 3 | configs-experimental 4 | experiments 5 | # output dir 6 | index.html 7 | data/* 8 | slurm/ 9 | slurm 10 | slurm-output 11 | slurm-output/ 12 | output 13 | instant_test_output 14 | inference_test_output 15 | 16 | 17 | *.png 18 | *.diff 19 | *.jpg 20 | !/projects/DensePose/doc/images/*.jpg 21 | 22 | # compilation and distribution 23 | __pycache__ 24 | _ext 25 | *.pyc 26 | *.pyd 27 | *.so 28 | *.dll 29 | *.egg-info/ 30 | build/ 31 | dist/ 32 | wheels/ 33 | 34 | # pytorch/python/numpy formats 35 | *.pth 36 | *.pkl 37 | *.ts 38 | model_ts*.txt 39 | 40 | # ipython/jupyter notebooks 41 | *.ipynb 42 | **/.ipynb_checkpoints/ 43 | 44 | # Editor temporaries 45 | *.swn 46 | *.swo 47 | *.swp 48 | *~ 49 | 50 | # editor settings 51 | .idea 52 | .vscode 53 | _darcs 54 | 55 | # project dirs 56 | /detectron2/model_zoo/configs 57 | /datasets/* 58 | !/datasets/*.* 59 | !/datasets/metadata 60 | /projects/*/datasets 61 | /models 62 | /snippet 63 | -------------------------------------------------------------------------------- /third_party/Detic/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/Deformable-DETR"] 2 | path = third_party/Deformable-DETR 3 | url = https://github.com/fundamentalvision/Deformable-DETR.git 4 | [submodule "third_party/CenterNet2"] 5 | path = third_party/CenterNet2 6 | url = https://github.com/xingyizhou/CenterNet2.git 7 | -------------------------------------------------------------------------------- /third_party/Detic/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /third_party/Detic/cog.yaml: -------------------------------------------------------------------------------- 1 | build: 2 | gpu: true 3 | cuda: "10.1" 4 | python_version: "3.8" 5 | system_packages: 6 | - "libgl1-mesa-glx" 7 | - "libglib2.0-0" 8 | python_packages: 9 | - "ipython==7.30.1" 10 | - "numpy==1.21.4" 11 | - "torch==1.8.1" 12 | - "torchvision==0.9.1" 13 | - "dataclasses==0.6" 14 | - "opencv-python==4.5.5.62" 15 | - "imageio==2.9.0" 16 | - "ftfy==6.0.3" 17 | - "regex==2021.10.8" 18 | - "tqdm==4.62.3" 19 | - "timm==0.4.12" 20 | - "fasttext==0.9.2" 21 | - "scikit-learn==1.0.2" 22 | - "lvis==0.5.3" 23 | - "nltk==3.6.7" 24 | - "git+https://github.com/openai/CLIP.git" 25 | run: 26 | - pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html 27 | 28 | predict: "predict.py:Predictor" 29 | -------------------------------------------------------------------------------- /third_party/Detic/configs/Base_OVCOCO_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CustomRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "CustomRes5ROIHeads" 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_BOX_HEAD: 12 | CLS_AGNOSTIC_BBOX_REG: True 13 | USE_SIGMOID_CE: True 14 | USE_ZEROSHOT_CLS: True 15 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/coco_clip_a+cname.npy' 16 | IGNORE_ZERO_CATS: True 17 | CAT_FREQ_PATH: 'datasets/coco/zero-shot/instances_train2017_seen_2_oriorder_cat_info.json' 18 | DATASETS: 19 | TRAIN: ("coco_zeroshot_train_oriorder",) 20 | TEST: ("coco_generalized_zeroshot_val",) 21 | SOLVER: 22 | IMS_PER_BATCH: 16 23 | BASE_LR: 0.02 24 | STEPS: (60000, 80000) 25 | MAX_ITER: 90000 26 | CHECKPOINT_PERIOD: 1000000000 27 | INPUT: 28 | MIN_SIZE_TRAIN: (800,) 29 | VERSION: 2 30 | OUTPUT_DIR: output/Detic-COCO/auto 31 | FP16: True -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_CXT21k_640b32_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | WEIGHTS: '' 6 | TIMM: 7 | BASE_NAME: convnext_tiny_21k 8 | OUT_LEVELS: [2, 3, 4] 9 | PRETRAINED: True 10 | FPN: 11 | IN_FEATURES: ["layer2", "layer3", "layer4"] 12 | SOLVER: 13 | MAX_ITER: 180000 14 | IMS_PER_BATCH: 32 15 | BASE_LR: 0.0001 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train+coco",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_R18_640b32_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | WEIGHTS: '' 6 | TIMM: 7 | BASE_NAME: resnet18 8 | PRETRAINED: True 9 | SOLVER: 10 | MAX_ITER: 180000 11 | IMS_PER_BATCH: 32 12 | BASE_LR: 0.0001 13 | DATASETS: 14 | TRAIN: ("lvis_v1_train+coco",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | DATASETS: 6 | TRAIN: ("lvis_v1_train+coco",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_LCOCO_CLIP_SwinB_896b32_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl" 6 | BACKBONE: 7 | NAME: build_swintransformer_fpn_backbone 8 | SWIN: 9 | SIZE: B-22k 10 | FPN: 11 | IN_FEATURES: ["swin1", "swin2", "swin3"] 12 | SOLVER: 13 | MAX_ITER: 180000 14 | IMS_PER_BATCH: 32 15 | BASE_LR: 0.0001 16 | INPUT: 17 | TRAIN_SIZE: 896 18 | DATASETS: 19 | TRAIN: ("lvis_v1_train+coco",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_L_CLIP_R5021k_640b64_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy' -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_L_CLIP_SwinB_896b32_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl" 6 | BACKBONE: 7 | NAME: build_swintransformer_fpn_backbone 8 | SWIN: 9 | SIZE: B-22k 10 | FPN: 11 | IN_FEATURES: ["swin1", "swin2", "swin3"] 12 | SOLVER: 13 | MAX_ITER: 180000 14 | IMS_PER_BATCH: 32 15 | BASE_LR: 0.0001 16 | INPUT: 17 | TRAIN_SIZE: 896 -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | DATASETS: 6 | TRAIN: ("lvis_v1_train_norare",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy' 6 | DATASETS: 7 | TRAIN: ("lvis_v1_train_norare",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | WEIGHTS: "models/swin_base_patch4_window7_224_22k.pkl" 6 | BACKBONE: 7 | NAME: build_swintransformer_fpn_backbone 8 | SWIN: 9 | SIZE: B-22k 10 | FPN: 11 | IN_FEATURES: ["swin1", "swin2", "swin3"] 12 | SOLVER: 13 | MAX_ITER: 180000 14 | IMS_PER_BATCH: 32 15 | BASE_LR: 0.0001 16 | INPUT: 17 | TRAIN_SIZE: 896 18 | DATASETS: 19 | TRAIN: ("lvis_v1_train_norare",) -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-DeformDETR_L_R50_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml" 2 | SOLVER: 3 | IMS_PER_BATCH: 16 -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup-DeformDETR_L_R50_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml" -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup_OVCOCO_CLIP_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml" 2 | -------------------------------------------------------------------------------- /third_party/Detic/configs/BoxSup_OVCOCO_CLIP_R50_1x_pomp.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/coco_clip_pomp+cname.npy' 6 | -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_DeformDETR_LI_R50_4x_ft4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DeformDETR_L_R50_4x.yaml" 2 | MODEL: 3 | WEIGHTS: "models/BoxSup-DeformDETR_L_R50_4x.pth" 4 | INPUT: 5 | CUSTOM_AUG: ResizeShortestEdge 6 | MIN_SIZE_TRAIN_SAMPLING: range 7 | MIN_SIZE_TRAIN: [480, 800] 8 | DATASETS: 9 | TRAIN: ("lvis_v1_train","imagenet_lvis_v1") 10 | TEST: ("lvis_v1_val",) 11 | DATALOADER: 12 | SAMPLER_TRAIN: "MultiDatasetSampler" 13 | DATASET_RATIO: [1, 4] 14 | USE_DIFF_BS_SIZE: True 15 | DATASET_BS: [4, 16] 16 | USE_RFS: [True, False] 17 | DATASET_MIN_SIZES: [[480, 800], [240, 400]] 18 | DATASET_MAX_SIZES: [1333, 667] 19 | FILTER_EMPTY_ANNOTATIONS: False 20 | MULTI_DATASET_GROUPING: True 21 | DATASET_ANN: ['box', 'image'] 22 | WITH_IMAGE_LABELS: True 23 | -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LCOCOI21k_CLIP_R5021k_640b32_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | DYNAMIC_CLASSIFIER: True 4 | ROI_BOX_HEAD: 5 | USE_ZEROSHOT_CLS: True 6 | IMAGE_LABEL_LOSS: 'max_size' 7 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis-21k_clip_a+cname.npy' 8 | USE_FED_LOSS: False # Federated loss is enabled when DYNAMIC_CLASSIFIER is on 9 | ROI_HEADS: 10 | NUM_CLASSES: 22047 11 | WEIGHTS: "output/Detic/BoxSup-C2_LCOCO_CLIP_R5021k_640b64_4x/model_final.pth" 12 | SOLVER: 13 | MAX_ITER: 180000 14 | IMS_PER_BATCH: 32 15 | BASE_LR: 0.0001 16 | WARMUP_ITERS: 1000 17 | WARMUP_FACTOR: 0.001 18 | DATASETS: 19 | TRAIN: ("lvis_v1_train+coco","imagenet_lvis-22k") 20 | DATALOADER: 21 | SAMPLER_TRAIN: "MultiDatasetSampler" 22 | DATASET_RATIO: [1, 4] 23 | USE_DIFF_BS_SIZE: True 24 | DATASET_BS: [4, 16] 25 | DATASET_INPUT_SIZE: [640, 320] 26 | USE_RFS: [True, False] 27 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 28 | FILTER_EMPTY_ANNOTATIONS: False 29 | MULTI_DATASET_GROUPING: True 30 | DATASET_ANN: ['box', 'image'] 31 | NUM_WORKERS: 2 32 | USE_TAR_DATASET: True 33 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | WEIGHTS: "models/BoxSup-C2_L_CLIP_R5021k_640b64_4x.pth" 7 | SOLVER: 8 | MAX_ITER: 90000 9 | IMS_PER_BATCH: 64 10 | BASE_LR: 0.0002 11 | WARMUP_ITERS: 1000 12 | WARMUP_FACTOR: 0.001 13 | DATASETS: 14 | TRAIN: ("lvis_v1_train","imagenet_lvis_v1") 15 | DATALOADER: 16 | SAMPLER_TRAIN: "MultiDatasetSampler" 17 | DATASET_RATIO: [1, 4] 18 | USE_DIFF_BS_SIZE: True 19 | DATASET_BS: [8, 32] 20 | DATASET_INPUT_SIZE: [640, 320] 21 | USE_RFS: [True, False] 22 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 23 | FILTER_EMPTY_ANNOTATIONS: False 24 | MULTI_DATASET_GROUPING: True 25 | DATASET_ANN: ['box', 'image'] 26 | NUM_WORKERS: 8 27 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy' 7 | WEIGHTS: "output/Detic/BoxSup-C2_L_CLIP_R5021k_640b64_4x_pomp/model_final.pth" 8 | SOLVER: 9 | MAX_ITER: 90000 10 | IMS_PER_BATCH: 64 11 | BASE_LR: 0.0002 12 | WARMUP_ITERS: 1000 13 | WARMUP_FACTOR: 0.001 14 | DATASETS: 15 | TRAIN: ("lvis_v1_train","imagenet_lvis_v1") 16 | DATALOADER: 17 | SAMPLER_TRAIN: "MultiDatasetSampler" 18 | DATASET_RATIO: [1, 4] 19 | USE_DIFF_BS_SIZE: True 20 | DATASET_BS: [8, 32] 21 | DATASET_INPUT_SIZE: [640, 320] 22 | USE_RFS: [True, False] 23 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 24 | FILTER_EMPTY_ANNOTATIONS: False 25 | MULTI_DATASET_GROUPING: True 26 | DATASET_ANN: ['box', 'image'] 27 | NUM_WORKERS: 8 28 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | BACKBONE: 7 | NAME: build_swintransformer_fpn_backbone 8 | SWIN: 9 | SIZE: B-22k 10 | FPN: 11 | IN_FEATURES: ["swin1", "swin2", "swin3"] 12 | WEIGHTS: "models/BoxSup-C2_L_CLIP_SwinB_896b32_4x.pth" 13 | SOLVER: 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 32 16 | BASE_LR: 0.0001 17 | WARMUP_ITERS: 1000 18 | WARMUP_FACTOR: 0.001 19 | DATASETS: 20 | TRAIN: ("lvis_v1_train","imagenet_lvis_v1") 21 | DATALOADER: 22 | SAMPLER_TRAIN: "MultiDatasetSampler" 23 | DATASET_RATIO: [1, 4] 24 | USE_DIFF_BS_SIZE: True 25 | DATASET_BS: [4, 16] 26 | DATASET_INPUT_SIZE: [896, 448] 27 | USE_RFS: [True, False] 28 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 29 | FILTER_EMPTY_ANNOTATIONS: False 30 | MULTI_DATASET_GROUPING: True 31 | DATASET_ANN: ['box', 'image'] 32 | NUM_WORKERS: 8 33 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseCCcapimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | WITH_CAPTION: True 4 | SYNC_CAPTION_BATCH: True 5 | ROI_BOX_HEAD: 6 | ADD_IMAGE_BOX: True # caption loss is added to the image-box 7 | USE_ZEROSHOT_CLS: True 8 | IMAGE_LABEL_LOSS: 'max_size' 9 | WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth" 10 | SOLVER: 11 | MAX_ITER: 90000 12 | IMS_PER_BATCH: 64 13 | BASE_LR: 0.0002 14 | WARMUP_ITERS: 1000 15 | WARMUP_FACTOR: 0.001 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train_norare","cc3m_v1_train_tags") 18 | DATALOADER: 19 | SAMPLER_TRAIN: "MultiDatasetSampler" 20 | DATASET_RATIO: [1, 4] 21 | USE_DIFF_BS_SIZE: True 22 | DATASET_BS: [8, 32] 23 | DATASET_INPUT_SIZE: [640, 320] 24 | USE_RFS: [True, False] 25 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 26 | FILTER_EMPTY_ANNOTATIONS: False 27 | MULTI_DATASET_GROUPING: True 28 | DATASET_ANN: ['box', 'captiontag'] 29 | NUM_WORKERS: 8 30 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseCCimg_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth" 7 | SOLVER: 8 | MAX_ITER: 90000 9 | IMS_PER_BATCH: 64 10 | BASE_LR: 0.0002 11 | WARMUP_ITERS: 1000 12 | WARMUP_FACTOR: 0.001 13 | DATASETS: 14 | TRAIN: ("lvis_v1_train_norare","cc3m_v1_train_tags") 15 | DATALOADER: 16 | SAMPLER_TRAIN: "MultiDatasetSampler" 17 | DATASET_RATIO: [1, 4] 18 | USE_DIFF_BS_SIZE: True 19 | DATASET_BS: [8, 32] 20 | DATASET_INPUT_SIZE: [640, 320] 21 | USE_RFS: [True, False] 22 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 23 | FILTER_EMPTY_ANNOTATIONS: False 24 | MULTI_DATASET_GROUPING: True 25 | DATASET_ANN: ['box', 'image'] 26 | NUM_WORKERS: 8 27 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth" 7 | SOLVER: 8 | MAX_ITER: 90000 9 | IMS_PER_BATCH: 64 10 | BASE_LR: 0.0002 11 | WARMUP_ITERS: 1000 12 | WARMUP_FACTOR: 0.001 13 | DATASETS: 14 | TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1") 15 | DATALOADER: 16 | SAMPLER_TRAIN: "MultiDatasetSampler" 17 | DATASET_RATIO: [1, 4] 18 | USE_DIFF_BS_SIZE: True 19 | DATASET_BS: [8, 32] 20 | DATASET_INPUT_SIZE: [640, 320] 21 | USE_RFS: [True, False] 22 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 23 | FILTER_EMPTY_ANNOTATIONS: False 24 | MULTI_DATASET_GROUPING: True 25 | DATASET_ANN: ['box', 'image'] 26 | NUM_WORKERS: 8 27 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_max-size_pomp.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_pomp+cname.npy' 7 | WEIGHTS: "output/Detic/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_pomp/model_final.pth" 8 | SOLVER: 9 | MAX_ITER: 90000 10 | IMS_PER_BATCH: 64 11 | BASE_LR: 0.0002 12 | WARMUP_ITERS: 1000 13 | WARMUP_FACTOR: 0.001 14 | DATASETS: 15 | TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1") 16 | DATALOADER: 17 | SAMPLER_TRAIN: "MultiDatasetSampler" 18 | DATASET_RATIO: [1, 4] 19 | USE_DIFF_BS_SIZE: True 20 | DATASET_BS: [8, 32] 21 | DATASET_INPUT_SIZE: [640, 320] 22 | USE_RFS: [True, False] 23 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 24 | FILTER_EMPTY_ANNOTATIONS: False 25 | MULTI_DATASET_GROUPING: True 26 | DATASET_ANN: ['box', 'image'] 27 | NUM_WORKERS: 8 28 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseI_CLIP_R5021k_640b64_4x_ft4x_predicted.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_score' 6 | WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth" 7 | SOLVER: 8 | MAX_ITER: 90000 9 | IMS_PER_BATCH: 64 10 | BASE_LR: 0.0002 11 | WARMUP_ITERS: 1000 12 | WARMUP_FACTOR: 0.001 13 | DATASETS: 14 | TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1") 15 | DATALOADER: 16 | SAMPLER_TRAIN: "MultiDatasetSampler" 17 | DATASET_RATIO: [1, 4] 18 | USE_DIFF_BS_SIZE: True 19 | DATASET_BS: [8, 32] 20 | DATASET_INPUT_SIZE: [640, 320] 21 | USE_RFS: [True, False] 22 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 23 | FILTER_EMPTY_ANNOTATIONS: False 24 | MULTI_DATASET_GROUPING: True 25 | DATASET_ANN: ['box', 'image'] 26 | NUM_WORKERS: 8 27 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_LbaseI_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | BACKBONE: 7 | NAME: build_swintransformer_fpn_backbone 8 | SWIN: 9 | SIZE: B-22k 10 | FPN: 11 | IN_FEATURES: ["swin1", "swin2", "swin3"] 12 | WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_SwinB_896b32_4x.pth" 13 | SOLVER: 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 32 16 | BASE_LR: 0.0001 17 | WARMUP_ITERS: 1000 18 | WARMUP_FACTOR: 0.001 19 | DATASETS: 20 | TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1") 21 | DATALOADER: 22 | SAMPLER_TRAIN: "MultiDatasetSampler" 23 | DATASET_RATIO: [1, 4] 24 | USE_DIFF_BS_SIZE: True 25 | DATASET_BS: [4, 16] 26 | DATASET_INPUT_SIZE: [896, 448] 27 | USE_RFS: [True, False] 28 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 29 | FILTER_EMPTY_ANNOTATIONS: False 30 | MULTI_DATASET_GROUPING: True 31 | DATASET_ANN: ['box', 'image'] 32 | NUM_WORKERS: 8 33 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_caption.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth" 4 | WITH_CAPTION: True 5 | SYNC_CAPTION_BATCH: True 6 | ROI_BOX_HEAD: 7 | WS_NUM_PROPS: 1 8 | ADD_IMAGE_BOX: True 9 | NEG_CAP_WEIGHT: 1.0 10 | SOLVER: 11 | IMS_PER_BATCH: 16 12 | BASE_LR: 0.02 13 | STEPS: (60000, 80000) 14 | MAX_ITER: 90000 15 | DATASETS: 16 | TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags") 17 | INPUT: 18 | CUSTOM_AUG: ResizeShortestEdge 19 | MIN_SIZE_TRAIN_SAMPLING: range 20 | MIN_SIZE_TRAIN: (800, 800) 21 | DATALOADER: 22 | SAMPLER_TRAIN: "MultiDatasetSampler" 23 | DATASET_RATIO: [1, 4] 24 | USE_DIFF_BS_SIZE: True 25 | DATASET_BS: [2, 8] 26 | USE_RFS: [False, False] 27 | DATASET_MIN_SIZES: [[800, 800], [400, 400]] 28 | DATASET_MAX_SIZES: [1333, 667] 29 | FILTER_EMPTY_ANNOTATIONS: False 30 | MULTI_DATASET_GROUPING: True 31 | DATASET_ANN: ['box', 'caption'] 32 | NUM_WORKERS: 8 33 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_max-size.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth" 4 | ROI_BOX_HEAD: 5 | WS_NUM_PROPS: 32 6 | IMAGE_LABEL_LOSS: 'max_size' 7 | SOLVER: 8 | IMS_PER_BATCH: 16 9 | BASE_LR: 0.02 10 | STEPS: (60000, 80000) 11 | MAX_ITER: 90000 12 | DATASETS: 13 | TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags") 14 | INPUT: 15 | CUSTOM_AUG: ResizeShortestEdge 16 | MIN_SIZE_TRAIN_SAMPLING: range 17 | MIN_SIZE_TRAIN: (800, 800) 18 | DATALOADER: 19 | SAMPLER_TRAIN: "MultiDatasetSampler" 20 | DATASET_RATIO: [1, 4] 21 | USE_DIFF_BS_SIZE: True 22 | DATASET_BS: [2, 8] 23 | USE_RFS: [False, False] 24 | DATASET_MIN_SIZES: [[800, 800], [400, 400]] 25 | DATASET_MAX_SIZES: [1333, 667] 26 | FILTER_EMPTY_ANNOTATIONS: False 27 | MULTI_DATASET_GROUPING: True 28 | DATASET_ANN: ['box', 'image'] 29 | NUM_WORKERS: 8 30 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/Detic_OVCOCO_CLIP_R50_1x_max-size_caption.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_OVCOCO_C4_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "models/BoxSup_OVCOCO_CLIP_R50_1x.pth" 4 | WITH_CAPTION: True 5 | SYNC_CAPTION_BATCH: True 6 | ROI_BOX_HEAD: 7 | WS_NUM_PROPS: 32 8 | ADD_IMAGE_BOX: True # caption loss is added to the image-box 9 | IMAGE_LABEL_LOSS: 'max_size' 10 | 11 | NEG_CAP_WEIGHT: 1.0 12 | SOLVER: 13 | IMS_PER_BATCH: 16 14 | BASE_LR: 0.02 15 | STEPS: (60000, 80000) 16 | MAX_ITER: 90000 17 | DATASETS: 18 | TRAIN: ("coco_zeroshot_train_oriorder", "coco_caption_train_tags") 19 | INPUT: 20 | CUSTOM_AUG: ResizeShortestEdge 21 | MIN_SIZE_TRAIN_SAMPLING: range 22 | MIN_SIZE_TRAIN: (800, 800) 23 | DATALOADER: 24 | SAMPLER_TRAIN: "MultiDatasetSampler" 25 | DATASET_RATIO: [1, 4] 26 | USE_DIFF_BS_SIZE: True 27 | DATASET_BS: [2, 8] 28 | USE_RFS: [False, False] 29 | DATASET_MIN_SIZES: [[800, 800], [400, 400]] 30 | DATASET_MAX_SIZES: [1333, 667] 31 | FILTER_EMPTY_ANNOTATIONS: False 32 | MULTI_DATASET_GROUPING: True 33 | DATASET_ANN: ['box', 'captiontag'] 34 | NUM_WORKERS: 8 35 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/configs/debug.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | USE_ZEROSHOT_CLS: True 5 | IMAGE_LABEL_LOSS: 'max_size' 6 | ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis_v1_clip_detpro+cname.npy' 7 | WEIGHTS: "output/Detic/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x_detpro/model_final.pth" 8 | SOLVER: 9 | MAX_ITER: 90000 10 | IMS_PER_BATCH: 64 11 | BASE_LR: 0.0002 12 | WARMUP_ITERS: 1000 13 | WARMUP_FACTOR: 0.001 14 | DATASETS: 15 | TRAIN: ("lvis_v1_train_norare","imagenet_lvis_v1") 16 | DATALOADER: 17 | SAMPLER_TRAIN: "MultiDatasetSampler" 18 | DATASET_RATIO: [1, 4] 19 | USE_DIFF_BS_SIZE: True 20 | DATASET_BS: [8, 32] 21 | DATASET_INPUT_SIZE: [640, 320] 22 | USE_RFS: [True, False] 23 | DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] 24 | FILTER_EMPTY_ANNOTATIONS: False 25 | MULTI_DATASET_GROUPING: True 26 | DATASET_ANN: ['box', 'image'] 27 | NUM_WORKERS: 8 28 | WITH_IMAGE_LABELS: True -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/coco_clip_a+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/coco_clip_a+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/coco_clip_pomp+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/coco_clip_pomp+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/lvis_v1_clip_a+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/lvis_v1_clip_a+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/lvis_v1_clip_pomp+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/lvis_v1_clip_pomp+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/o365_clip_a+cnamefix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/o365_clip_a+cnamefix.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/o365_fixname_clip_pomp+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/o365_fixname_clip_pomp+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/datasets/metadata/oid_clip_a+cname.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/datasets/metadata/oid_clip_a+cname.npy -------------------------------------------------------------------------------- /third_party/Detic/detic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .modeling.meta_arch import custom_rcnn 3 | from .modeling.roi_heads import detic_roi_heads 4 | from .modeling.roi_heads import res5_roi_heads 5 | from .modeling.backbone import swintransformer 6 | from .modeling.backbone import timm 7 | 8 | 9 | from .data.datasets import lvis_v1 10 | from .data.datasets import imagenet 11 | from .data.datasets import cc 12 | from .data.datasets import objects365 13 | from .data.datasets import oid 14 | from .data.datasets import coco_zeroshot 15 | 16 | try: 17 | from .modeling.meta_arch import d2_deformable_detr 18 | except: 19 | pass -------------------------------------------------------------------------------- /third_party/Detic/detic/data/datasets/cc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import logging 3 | import os 4 | 5 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 6 | from detectron2.data.datasets.lvis import get_lvis_instances_meta 7 | from .lvis_v1 import custom_register_lvis_instances 8 | 9 | _CUSTOM_SPLITS = { 10 | "cc3m_v1_val": ("cc3m/validation/", "cc3m/val_image_info.json"), 11 | "cc3m_v1_train": ("cc3m/training/", "cc3m/train_image_info.json"), 12 | "cc3m_v1_train_tags": ("cc3m/training/", "cc3m/train_image_info_tags.json"), 13 | 14 | } 15 | 16 | for key, (image_root, json_file) in _CUSTOM_SPLITS.items(): 17 | custom_register_lvis_instances( 18 | key, 19 | get_lvis_instances_meta('lvis_v1'), 20 | os.path.join("datasets", json_file) if "://" not in json_file else json_file, 21 | os.path.join("datasets", image_root), 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /third_party/Detic/docs/example_output_custom.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/example_output_custom.jpeg -------------------------------------------------------------------------------- /third_party/Detic/docs/example_output_lvis.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/example_output_lvis.jpeg -------------------------------------------------------------------------------- /third_party/Detic/docs/teaser.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/docs/teaser.jpeg -------------------------------------------------------------------------------- /third_party/Detic/extract.py: -------------------------------------------------------------------------------- 1 | import os 2 | import imghdr 3 | from PIL import Image 4 | import multiprocessing 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | train_path = '/home/ubuntu/efs/imagenet/ImageNet-21K/images/train' 8 | img_folder_idx = [f.replace('.tar', '') for f in os.listdir(train_path) if f.endswith('.tar')] # len=19167 9 | print('len(img_folder_idx) = ', len(img_folder_idx)) 10 | 11 | img_paths = [] 12 | for i, folder_idx in enumerate(img_folder_idx): 13 | img_folder_path = os.path.join(train_path, folder_idx) 14 | if os.path.exists(img_folder_path): 15 | img_names = [f for f in os.listdir(img_folder_path)] 16 | current_img_paths = [os.path.join(img_folder_path, img_name) for img_name in img_names] 17 | img_paths.extend(current_img_paths) 18 | 19 | print('len(img_paths) = ', len(img_paths)) 20 | 21 | 22 | def check(img_path): 23 | # if imghdr.what(img_path) is None: 24 | try: 25 | Image.open(img_path) 26 | except IOError: 27 | print(i, 'remove', img_path) 28 | os.remove(img_path) 29 | 30 | with ThreadPoolExecutor(128) as executor: 31 | res = executor.map(check, img_paths) 32 | 33 | 34 | -------------------------------------------------------------------------------- /third_party/Detic/figures/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/.DS_Store -------------------------------------------------------------------------------- /third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/.DS_Store -------------------------------------------------------------------------------- /third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/inference_lvis_v1_val/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size/inference_lvis_v1_val/.DS_Store -------------------------------------------------------------------------------- /third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/figures/Detic_LI_CLIP_R5021k_640b64_4x_ft4x_max-size_gpt/.DS_Store -------------------------------------------------------------------------------- /third_party/Detic/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | mss 3 | timm 4 | dataclasses 5 | ftfy 6 | regex 7 | fasttext 8 | scikit-learn 9 | lvis 10 | nltk 11 | git+https://github.com/openai/CLIP.git 12 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | Please select an issue template from 3 | https://github.com/facebookresearch/detectron2/issues/new/choose . 4 | 5 | Otherwise your issue will be closed. 6 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # require an issue template to be chosen 2 | blank_issues_enabled: false 3 | 4 | contact_links: 5 | - name: How-To / All Other Questions 6 | url: https://github.com/facebookresearch/detectron2/discussions 7 | about: Use "github discussions" for community support on general questions that don't belong to the above issue categories 8 | - name: Detectron2 Documentation 9 | url: https://detectron2.readthedocs.io/index.html 10 | about: Check if your question is answered in tutorials or API docs 11 | 12 | # Unexpected behaviors & bugs are split to two templates. 13 | # When they are one template, users think "it's not a bug" and don't choose the template. 14 | # 15 | # But the file name is still "unexpected-problems-bugs.md" so that old references 16 | # to this issue template still works. 17 | # It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs) 18 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/ISSUE_TEMPLATE/documentation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4DA Documentation Issue" 3 | about: Report a problem about existing documentation, comments, website or tutorials. 4 | labels: documentation 5 | 6 | --- 7 | 8 | ## 📚 Documentation Issue 9 | 10 | This issue category is for problems about existing documentation, not for asking how-to questions. 11 | 12 | * Provide a link to an existing documentation/comment/tutorial: 13 | 14 | * How should the above documentation/comment/tutorial improve: 15 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Thanks for your contribution! 2 | 3 | If you're sending a large PR (e.g., >100 lines), 4 | please open an issue first about the feature / bug, and indicate how you want to contribute. 5 | 6 | We do not always accept features. 7 | See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests about how we handle PRs. 8 | 9 | Before submitting a PR, please run `dev/linter.sh` to lint the code. 10 | 11 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.github/workflows/remove-needs-reply.yml: -------------------------------------------------------------------------------- 1 | name: Remove needs-more-info label 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | issues: 7 | types: [edited] 8 | 9 | jobs: 10 | remove-needs-more-info-label: 11 | runs-on: ubuntu-latest 12 | # 1. issue_comment events could include PR comment, filter them out 13 | # 2. Only trigger action if event was produced by the original author 14 | if: ${{ !github.event.issue.pull_request && github.event.sender.login == github.event.issue.user.login }} 15 | steps: 16 | - name: Remove needs-more-info label 17 | uses: octokit/request-action@v2.x 18 | continue-on-error: true 19 | with: 20 | route: DELETE /repos/:repository/issues/:issue/labels/:label 21 | repository: ${{ github.repository }} 22 | issue: ${{ github.event.issue.number }} 23 | label: needs-more-info 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/.gitignore: -------------------------------------------------------------------------------- 1 | third_party/detectron2 2 | slurm* 3 | # output dir 4 | output 5 | instant_test_output 6 | inference_test_output 7 | 8 | 9 | *.png 10 | *.json 11 | *.diff 12 | # *.jpg 13 | !/projects/DensePose/doc/images/*.jpg 14 | 15 | # compilation and distribution 16 | __pycache__ 17 | _ext 18 | *.pyc 19 | *.pyd 20 | *.so 21 | *.dll 22 | *.egg-info/ 23 | build/ 24 | dist/ 25 | wheels/ 26 | 27 | # pytorch/python/numpy formats 28 | *.pth 29 | *.pkl 30 | *.npy 31 | *.ts 32 | model_ts*.txt 33 | 34 | # ipython/jupyter notebooks 35 | *.ipynb 36 | **/.ipynb_checkpoints/ 37 | 38 | # Editor temporaries 39 | *.swn 40 | *.swo 41 | *.swp 42 | *~ 43 | 44 | # editor settings 45 | .idea 46 | .vscode 47 | _darcs 48 | 49 | # project dirs 50 | /detectron2/model_zoo/configs 51 | /datasets/* 52 | !/datasets/*.* 53 | !/datasets/lvis/ 54 | /datasets/lvis/* 55 | !/datasets/lvis/lvis_v1_train_cat_info.json 56 | /projects/*/datasets 57 | /models 58 | /snippet 59 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/centernet/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling.meta_arch.centernet_detector import CenterNetDetector 2 | from .modeling.dense_heads.centernet import CenterNet 3 | from .modeling.roi_heads.custom_roi_heads import CustomROIHeads, CustomCascadeROIHeads 4 | 5 | from .modeling.backbone.fpn_p5 import build_p67_resnet_fpn_backbone 6 | from .modeling.backbone.dla import build_dla_backbone 7 | from .modeling.backbone.dlafpn import build_dla_fpn3_backbone 8 | from .modeling.backbone.bifpn import build_resnet_bifpn_backbone 9 | from .modeling.backbone.bifpn_fcos import build_fcos_resnet_bifpn_backbone 10 | from .modeling.backbone.res2net import build_p67_res2net_fpn_backbone 11 | 12 | from .data.datasets.objects365 import categories_v1 13 | from .data.datasets.coco import _PREDEFINED_SPLITS_COCO 14 | from .data.datasets import nuimages 15 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/centernet/modeling/layers/ml_nms.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import batched_nms 2 | 3 | 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1, 5 | score_field="scores", label_field="labels"): 6 | """ 7 | Performs non-maximum suppression on a boxlist, with scores specified 8 | in a boxlist field via score_field. 9 | Arguments: 10 | boxlist(BoxList) 11 | nms_thresh (float) 12 | max_proposals (int): if > 0, then only the top max_proposals are kept 13 | after non-maximum suppression 14 | score_field (str) 15 | """ 16 | if nms_thresh <= 0: 17 | return boxlist 18 | if boxlist.has('pred_boxes'): 19 | boxes = boxlist.pred_boxes.tensor 20 | labels = boxlist.pred_classes 21 | else: 22 | boxes = boxlist.proposal_boxes.tensor 23 | labels = boxlist.proposal_boxes.tensor.new_zeros( 24 | len(boxlist.proposal_boxes.tensor)) 25 | scores = boxlist.scores 26 | 27 | keep = batched_nms(boxes, scores, labels, nms_thresh) 28 | if max_proposals > 0: 29 | keep = keep[: max_proposals] 30 | boxlist = boxlist[keep] 31 | return boxlist 32 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/centernet/modeling/roi_heads/fed_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import json 3 | import numpy as np 4 | from torch.nn import functional as F 5 | 6 | def load_class_freq( 7 | path='datasets/lvis/lvis_v1_train_cat_info.json', 8 | freq_weight=0.5): 9 | cat_info = json.load(open(path, 'r')) 10 | cat_info = torch.tensor( 11 | [c['image_count'] for c in sorted(cat_info, key=lambda x: x['id'])]) 12 | freq_weight = cat_info.float() ** freq_weight 13 | return freq_weight 14 | 15 | def get_fed_loss_inds( 16 | gt_classes, num_sample_cats=50, C=1203, \ 17 | weight=None, fed_cls_inds=-1): 18 | appeared = torch.unique(gt_classes) # C' 19 | prob = appeared.new_ones(C + 1).float() 20 | prob[-1] = 0 21 | if len(appeared) < num_sample_cats: 22 | if weight is not None: 23 | prob[:C] = weight.float().clone() 24 | prob[appeared] = 0 25 | if fed_cls_inds > 0: 26 | prob[fed_cls_inds:] = 0 27 | more_appeared = torch.multinomial( 28 | prob, num_sample_cats - len(appeared), 29 | replacement=False) 30 | appeared = torch.cat([appeared, more_appeared]) 31 | return appeared -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/Base-CenterNet-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CenterNetDetector" 3 | PROPOSAL_GENERATOR: 4 | NAME: "CenterNet" 5 | BACKBONE: 6 | NAME: "build_p67_resnet_fpn_backbone" 7 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | DATASETS: 14 | TRAIN: ("coco_2017_train",) 15 | TEST: ("coco_2017_val",) 16 | SOLVER: 17 | IMS_PER_BATCH: 16 18 | BASE_LR: 0.01 19 | STEPS: (60000, 80000) 20 | MAX_ITER: 90000 21 | CHECKPOINT_PERIOD: 1000000000 22 | WARMUP_ITERS: 4000 23 | WARMUP_FACTOR: 0.00025 24 | CLIP_GRADIENTS: 25 | ENABLED: True 26 | INPUT: 27 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 28 | OUTPUT_DIR: "./output/CenterNet2/auto" 29 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/Base_S4_DLA.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CenterNetDetector" 3 | PROPOSAL_GENERATOR: 4 | NAME: "CenterNet" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | BACKBONE: 7 | NAME: "build_dla_backbone" 8 | DLA: 9 | NORM: "BN" 10 | CENTERNET: 11 | IN_FEATURES: ["dla2"] 12 | FPN_STRIDES: [4] 13 | SOI: [[0, 1000000]] 14 | NUM_CLS_CONVS: 1 15 | NUM_BOX_CONVS: 1 16 | REG_WEIGHT: 1. 17 | MORE_POS: True 18 | HM_FOCAL_ALPHA: 0.25 19 | DATASETS: 20 | TRAIN: ("coco_2017_train",) 21 | TEST: ("coco_2017_val",) 22 | SOLVER: 23 | LR_SCHEDULER_NAME: "WarmupCosineLR" 24 | MAX_ITER: 90000 25 | BASE_LR: 0.04 26 | IMS_PER_BATCH: 64 27 | WEIGHT_DECAY: 0.0001 28 | CHECKPOINT_PERIOD: 1000000 29 | CLIP_GRADIENTS: 30 | ENABLED: True 31 | INPUT: 32 | CUSTOM_AUG: EfficientDetResizeCrop 33 | TRAIN_SIZE: 640 34 | MIN_SIZE_TEST: 608 35 | MAX_SIZE_TEST: 900 36 | TEST: 37 | EVAL_PERIOD: 7500 38 | DATALOADER: 39 | NUM_WORKERS: 8 40 | OUTPUT_DIR: "output/CenterNet2/auto" 41 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet-FPN_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet-FPN.yaml" 2 | MODEL: 3 | CENTERNET: 4 | MORE_POS: True -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet-S4_DLA_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base_S4_DLA.yaml" 2 | SOLVER: 3 | MAX_ITER: 90000 4 | BASE_LR: 0.08 5 | IMS_PER_BATCH: 128 -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2-F_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NAME: CustomROIHeads -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P3_24x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p35_fcos_dla_bifpn_backbone" 5 | BIFPN: 6 | OUT_CHANNELS: 160 7 | NUM_LEVELS: 3 8 | NUM_BIFPN: 4 9 | DLA: 10 | NUM_LAYERS: 34 11 | NORM: "SyncBN" 12 | FPN: 13 | IN_FEATURES: ["dla3", "dla4", "dla5"] 14 | ROI_HEADS: 15 | IN_FEATURES: ["p3", "p4", "p5"] 16 | CENTERNET: 17 | POST_NMS_TOPK_TEST: 128 18 | FPN_STRIDES: [8, 16, 32] 19 | IN_FEATURES: ['p3', 'p4', 'p5'] 20 | SOI: [[0, 64], [48, 192], [128, 1000000]] 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (300000, 340000) 28 | MAX_ITER: 360000 29 | CHECKPOINT_PERIOD: 100000 30 | WARMUP_ITERS: 4000 31 | WARMUP_FACTOR: 0.00025 32 | INPUT: 33 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 34 | MAX_SIZE_TRAIN: 900 35 | MAX_SIZE_TEST: 736 36 | MIN_SIZE_TEST: 512 -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P3_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p35_fcos_dla_bifpn_backbone" 5 | BIFPN: 6 | OUT_CHANNELS: 160 7 | NUM_LEVELS: 3 8 | NUM_BIFPN: 4 9 | DLA: 10 | NUM_LAYERS: 34 11 | NORM: "SyncBN" 12 | FPN: 13 | IN_FEATURES: ["dla3", "dla4", "dla5"] 14 | ROI_HEADS: 15 | IN_FEATURES: ["p3", "p4", "p5"] 16 | CENTERNET: 17 | POST_NMS_TOPK_TEST: 128 18 | FPN_STRIDES: [8, 16, 32] 19 | IN_FEATURES: ['p3', 'p4', 'p5'] 20 | SOI: [[0, 64], [48, 192], [128, 1000000]] 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (300000, 340000) 28 | MAX_ITER: 360000 29 | CHECKPOINT_PERIOD: 100000 30 | WARMUP_ITERS: 4000 31 | WARMUP_FACTOR: 0.00025 32 | INPUT: 33 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 34 | MAX_SIZE_TRAIN: 900 35 | MAX_SIZE_TEST: 736 36 | MIN_SIZE_TEST: 512 -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_16x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p37_dla_bifpn_backbone" 5 | BIFPN: 6 | OUT_CHANNELS: 160 7 | NUM_LEVELS: 5 8 | NUM_BIFPN: 3 9 | CENTERNET: 10 | POST_NMS_TOPK_TEST: 128 11 | WEIGHTS: '' 12 | PIXEL_MEAN: [123.675, 116.280, 103.530] 13 | PIXEL_STD: [58.395, 57.12, 57.375] 14 | FPN: 15 | IN_FEATURES: ["dla3", "dla4", "dla5"] 16 | SOLVER: 17 | LR_SCHEDULER_NAME: "WarmupCosineLR" 18 | MAX_ITER: 360000 19 | BASE_LR: 0.08 20 | IMS_PER_BATCH: 64 21 | CHECKPOINT_PERIOD: 90000 22 | TEST: 23 | EVAL_PERIOD: 7500 24 | INPUT: 25 | FORMAT: RGB 26 | CUSTOM_AUG: EfficientDetResizeCrop 27 | TRAIN_SIZE: 640 28 | MIN_SIZE_TEST: 608 29 | MAX_SIZE_TEST: 900 30 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_16x_ST.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p37_dla_bifpn_backbone" 5 | BIFPN: 6 | OUT_CHANNELS: 160 7 | NUM_LEVELS: 5 8 | NUM_BIFPN: 3 9 | CENTERNET: 10 | POST_NMS_TOPK_TEST: 128 11 | WEIGHTS: '' 12 | PIXEL_MEAN: [123.675, 116.280, 103.530] 13 | PIXEL_STD: [58.395, 57.12, 57.375] 14 | FPN: 15 | IN_FEATURES: ["dla3", "dla4", "dla5"] 16 | SOLVER: 17 | LR_SCHEDULER_NAME: "WarmupCosineLR" 18 | MAX_ITER: 360000 19 | BASE_LR: 0.08 20 | IMS_PER_BATCH: 64 21 | TEST: 22 | EVAL_PERIOD: 7500 23 | INPUT: 24 | FORMAT: RGB 25 | CUSTOM_AUG: EfficientDetResizeCrop 26 | TRAIN_SIZE: 640 27 | MIN_SIZE_TEST: 608 28 | MAX_SIZE_TEST: 900 29 | DATASETS: 30 | TRAIN: ("coco_2017_train","coco_un_yolov4_55_0.5",) 31 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_DLA-fcosBiFPN-P5_640_16x_ST.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p37_fcos_dla_bifpn_backbone" 5 | BIFPN: 6 | OUT_CHANNELS: 160 7 | NUM_LEVELS: 5 8 | NUM_BIFPN: 3 9 | CENTERNET: 10 | POST_NMS_TOPK_TEST: 128 11 | WEIGHTS: '' 12 | PIXEL_MEAN: [123.675, 116.280, 103.530] 13 | PIXEL_STD: [58.395, 57.12, 57.375] 14 | FPN: 15 | IN_FEATURES: ["dla3", "dla4", "dla5"] 16 | TEST: 17 | EVAL_PERIOD: 7500 18 | SOLVER: 19 | LR_SCHEDULER_NAME: "WarmupCosineLR" 20 | MAX_ITER: 360000 21 | BASE_LR: 0.08 22 | IMS_PER_BATCH: 64 23 | INPUT: 24 | FORMAT: RGB 25 | CUSTOM_AUG: EfficientDetResizeCrop 26 | TRAIN_SIZE: 640 27 | MIN_SIZE_TEST: 608 28 | MAX_SIZE_TEST: 900 29 | DATASETS: 30 | TRAIN: ("coco_2017_train","coco_un_yolov4_55_0.5",) 31 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN-BiFPN_1280_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_res2net_bifpn_backbone" 5 | BIFPN: 6 | NUM_BIFPN: 7 7 | OUT_CHANNELS: 288 8 | WEIGHTS: "output/r2_101.pkl" 9 | RESNETS: 10 | DEPTH: 101 11 | WIDTH_PER_GROUP: 26 12 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 13 | DEFORM_MODULATED: True 14 | PIXEL_MEAN: [123.675, 116.280, 103.530] 15 | PIXEL_STD: [58.395, 57.12, 57.375] 16 | CENTERNET: 17 | USE_DEFORMABLE: True 18 | ROI_HEADS: 19 | IN_FEATURES: ["p3", "p4"] 20 | INPUT: 21 | FORMAT: RGB 22 | TEST: 23 | EVAL_PERIOD: 7500 24 | SOLVER: 25 | MAX_ITER: 180000 26 | CHECKPOINT_PERIOD: 60000 27 | LR_SCHEDULER_NAME: "WarmupCosineLR" 28 | BASE_LR: 0.04 29 | IMS_PER_BATCH: 32 30 | INPUT: 31 | CUSTOM_AUG: EfficientDetResizeCrop 32 | TRAIN_SIZE: 1280 33 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN-BiFPN_4x+4x_1560_ST.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_res2net_bifpn_backbone" 5 | BIFPN: 6 | NUM_BIFPN: 7 7 | OUT_CHANNELS: 288 8 | WEIGHTS: "output/r2_101.pkl" 9 | RESNETS: 10 | DEPTH: 101 11 | WIDTH_PER_GROUP: 26 12 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 13 | DEFORM_MODULATED: True 14 | PIXEL_MEAN: [123.675, 116.280, 103.530] 15 | PIXEL_STD: [58.395, 57.12, 57.375] 16 | CENTERNET: 17 | USE_DEFORMABLE: True 18 | ROI_HEADS: 19 | IN_FEATURES: ["p3", "p4"] 20 | TEST: 21 | EVAL_PERIOD: 7500 22 | SOLVER: 23 | MAX_ITER: 180000 24 | CHECKPOINT_PERIOD: 7500 25 | LR_SCHEDULER_NAME: "WarmupCosineLR" 26 | BASE_LR: 0.04 27 | IMS_PER_BATCH: 32 28 | DATASETS: 29 | TRAIN: "('coco_2017_train', 'coco_un_yolov4_55_0.5')" 30 | INPUT: 31 | FORMAT: RGB 32 | CUSTOM_AUG: EfficientDetResizeCrop 33 | TRAIN_SIZE: 1280 34 | TEST_SIZE: 1560 35 | TEST_INPUT_TYPE: 'square' 36 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R2-101-DCN_896_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_p67_res2net_fpn_backbone" 5 | WEIGHTS: "output/r2_101.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | WIDTH_PER_GROUP: 26 9 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 10 | DEFORM_MODULATED: True 11 | PIXEL_MEAN: [123.675, 116.280, 103.530] 12 | PIXEL_STD: [58.395, 57.12, 57.375] 13 | CENTERNET: 14 | USE_DEFORMABLE: True 15 | ROI_HEADS: 16 | IN_FEATURES: ["p3", "p4"] 17 | INPUT: 18 | FORMAT: RGB 19 | TEST: 20 | EVAL_PERIOD: 7500 21 | SOLVER: 22 | MAX_ITER: 180000 23 | CHECKPOINT_PERIOD: 600000 24 | LR_SCHEDULER_NAME: "WarmupCosineLR" 25 | BASE_LR: 0.04 26 | IMS_PER_BATCH: 32 27 | INPUT: 28 | CUSTOM_AUG: EfficientDetResizeCrop 29 | TRAIN_SIZE: 896 -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/CenterNet2_X101-DCN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | CENTERNET: 4 | USE_DEFORMABLE: True 5 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 6 | PIXEL_STD: [57.375, 57.120, 58.395] 7 | RESNETS: 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | DEPTH: 101 12 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 13 | DEFORM_MODULATED: True 14 | ROI_HEADS: 15 | IN_FEATURES: ["p3", "p4"] 16 | SOLVER: 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | CHECKPOINT_PERIOD: 40000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (480, 960) 22 | MIN_SIZE_TRAIN_SAMPLING: "range" 23 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/LVIS_CenterNet2_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 1203 5 | SCORE_THRESH_TEST: 0.02 6 | NMS_THRESH_TEST: 0.5 7 | CENTERNET: 8 | NUM_CLASSES: 1203 9 | 10 | DATASETS: 11 | TRAIN: ("lvis_v1_train",) 12 | TEST: ("lvis_v1_val",) 13 | DATALOADER: 14 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 15 | REPEAT_THRESHOLD: 0.001 16 | TEST: 17 | DETECTIONS_PER_IMAGE: 300 18 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/LVIS_CenterNet2_R50_Fed_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 1203 5 | SCORE_THRESH_TEST: 0.02 6 | NMS_THRESH_TEST: 0.5 7 | CENTERNET: 8 | NUM_CLASSES: 1203 9 | ROI_BOX_HEAD: 10 | USE_SIGMOID_CE: True 11 | USE_FED_LOSS: True 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | DATALOADER: 16 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 17 | REPEAT_THRESHOLD: 0.001 18 | TEST: 19 | DETECTIONS_PER_IMAGE: 300 20 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/O365_CenterNet2_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | ROI_HEADS: 4 | NUM_CLASSES: 365 5 | CENTERNET: 6 | NUM_CLASSES: 365 7 | DATASETS: 8 | TRAIN: ("objects365_train",) 9 | TEST: ("objects365_val",) 10 | DATALOADER: 11 | SAMPLER_TRAIN: "ClassAwareSampler" 12 | TEST: 13 | DETECTIONS_PER_IMAGE: 300 -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/configs/nuImages_CenterNet2_DLA_640_8x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CenterNet2.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | ROI_MASK_HEAD: 5 | NAME: "MaskRCNNConvUpsampleHead" 6 | NUM_CONV: 4 7 | POOLER_RESOLUTION: 14 8 | ROI_HEADS: 9 | NUM_CLASSES: 10 10 | IN_FEATURES: ["dla2"] 11 | BACKBONE: 12 | NAME: "build_dla_backbone" 13 | DLA: 14 | NORM: "BN" 15 | CENTERNET: 16 | IN_FEATURES: ["dla2"] 17 | FPN_STRIDES: [4] 18 | SOI: [[0, 1000000]] 19 | NUM_CLS_CONVS: 1 20 | NUM_BOX_CONVS: 1 21 | REG_WEIGHT: 1. 22 | MORE_POS: True 23 | HM_FOCAL_ALPHA: 0.25 24 | POST_NMS_TOPK_TEST: 128 25 | WEIGHTS: '' 26 | PIXEL_MEAN: [123.675, 116.280, 103.530] 27 | PIXEL_STD: [58.395, 57.12, 57.375] 28 | SOLVER: 29 | MAX_ITER: 180000 30 | STEPS: (120000, 160000) 31 | BASE_LR: 0.08 32 | IMS_PER_BATCH: 64 33 | INPUT: 34 | FORMAT: RGB 35 | CUSTOM_AUG: EfficientDetResizeCrop 36 | TRAIN_SIZE: 640 37 | MIN_SIZE_TEST: 608 38 | MAX_SIZE_TEST: 900 39 | MASK_FORMAT: bitmask 40 | DATASETS: 41 | TRAIN: ("nuimages_train",) 42 | TEST: ("nuimages_val",) 43 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/Detic/third_party/CenterNet2/tools/__init__.py -------------------------------------------------------------------------------- /third_party/Detic/third_party/CenterNet2/tools/deploy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # See https://pytorch.org/tutorials/advanced/cpp_frontend.html 3 | cmake_minimum_required(VERSION 3.12 FATAL_ERROR) 4 | project(torchscript_mask_rcnn) 5 | 6 | find_package(Torch REQUIRED) 7 | find_package(OpenCV REQUIRED) 8 | find_package(TorchVision REQUIRED) # needed by export-method=tracing/scripting 9 | 10 | add_executable(torchscript_mask_rcnn torchscript_mask_rcnn.cpp) 11 | target_link_libraries( 12 | torchscript_mask_rcnn 13 | -Wl,--no-as-needed TorchVision::TorchVision -Wl,--as-needed 14 | "${TORCH_LIBRARIES}" ${OpenCV_LIBS}) 15 | set_property(TARGET torchscript_mask_rcnn PROPERTY CXX_STANDARD 14) 16 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | EXP_DIR=exps/r50_deformable_detr 6 | PY_ARGS=${@:1} 7 | 8 | python -u main.py \ 9 | --output_dir ${EXP_DIR} \ 10 | ${PY_ARGS} 11 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_plus_iterative_bbox_refinement.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | EXP_DIR=exps/r50_deformable_detr_plus_iterative_bbox_refinement 6 | PY_ARGS=${@:1} 7 | 8 | python -u main.py \ 9 | --output_dir ${EXP_DIR} \ 10 | --with_box_refine \ 11 | ${PY_ARGS} 12 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | EXP_DIR=exps/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage 6 | PY_ARGS=${@:1} 7 | 8 | python -u main.py \ 9 | --output_dir ${EXP_DIR} \ 10 | --with_box_refine \ 11 | --two_stage \ 12 | ${PY_ARGS} 13 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_single_scale.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | EXP_DIR=exps/r50_deformable_detr_single_scale 6 | PY_ARGS=${@:1} 7 | 8 | python -u main.py \ 9 | --num_feature_levels 1 \ 10 | --output_dir ${EXP_DIR} \ 11 | ${PY_ARGS} 12 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/configs/r50_deformable_detr_single_scale_dc5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | EXP_DIR=exps/r50_deformable_detr_single_scale_dc5 6 | PY_ARGS=${@:1} 7 | 8 | python -u main.py \ 9 | --num_feature_levels 1 \ 10 | --dilation \ 11 | --output_dir ${EXP_DIR} \ 12 | ${PY_ARGS} 13 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/datasets/torchvision_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------ 6 | 7 | from .coco import CocoDetection 8 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/docs/changelog.md: -------------------------------------------------------------------------------- 1 | ## Changelog 2 | 3 | **[2020.12.07]** Fix a bug of sampling offset normalization (see [this issue](https://github.com/fundamentalvision/Deformable-DETR/issues/6)) in the MSDeformAttn module. The final accuracy on COCO is slightly improved. Code and pre-trained models have been updated. This bug only occurs in this released version but not in the original implementation used in our paper. -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR (https://github.com/facebookresearch/detr) 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | 10 | from .deformable_detr import build 11 | 12 | 13 | def build_model(args): 14 | return build(args) 15 | 16 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/models/ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from .ms_deform_attn_func import MSDeformAttnFunction 10 | 11 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/models/ops/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------------------------------ 3 | # Deformable DETR 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------------------------------ 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | # ------------------------------------------------------------------------------------------------ 9 | 10 | python setup.py build install 11 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/models/ops/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from .ms_deform_attn import MSDeformAttn 10 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/models/ops/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include "ms_deform_attn.h" 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 15 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 16 | } 17 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | tqdm 3 | cython 4 | scipy 5 | -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/tools/run_dist_launch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------ 3 | # Deformable DETR 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | 8 | set -x 9 | 10 | GPUS=$1 11 | RUN_COMMAND=${@:2} 12 | if [ $GPUS -lt 8 ]; then 13 | GPUS_PER_NODE=${GPUS_PER_NODE:-$GPUS} 14 | else 15 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 16 | fi 17 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 18 | MASTER_PORT=${MASTER_PORT:-"29500"} 19 | NODE_RANK=${NODE_RANK:-0} 20 | 21 | let "NNODES=GPUS/GPUS_PER_NODE" 22 | 23 | python ./tools/launch.py \ 24 | --nnodes ${NNODES} \ 25 | --node_rank ${NODE_RANK} \ 26 | --master_addr ${MASTER_ADDR} \ 27 | --master_port ${MASTER_PORT} \ 28 | --nproc_per_node ${GPUS_PER_NODE} \ 29 | ${RUN_COMMAND} -------------------------------------------------------------------------------- /third_party/Detic/third_party/Deformable-DETR/util/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR (https://github.com/facebookresearch/detr) 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | -------------------------------------------------------------------------------- /third_party/Detic/tools/fix_o365_path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import argparse 3 | import json 4 | import os 5 | 6 | if __name__ == '__main__': 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--ann", default='datasets/objects365/annotations/zhiyuan_objv2_train_fixname.json') 9 | parser.add_argument("--img_dir", default='datasets/objects365/train/') 10 | args = parser.parse_args() 11 | 12 | print('Loading', args.ann) 13 | data = json.load(open(args.ann, 'r')) 14 | images = [] 15 | count = 0 16 | for x in data['images']: 17 | path = '{}/{}'.format(args.img_dir, x['file_name']) 18 | if os.path.exists(path): 19 | images.append(x) 20 | else: 21 | print(path) 22 | count = count + 1 23 | print('Missing', count, 'images') 24 | data['images'] = images 25 | out_name = args.ann[:-5] + '_fixmiss.json' 26 | print('Saving to', out_name) 27 | json.dump(data, open(out_name, 'w')) 28 | -------------------------------------------------------------------------------- /third_party/Detic/tools/get_coco_zeroshot_oriorder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import argparse 3 | import json 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--data_path', default='datasets/coco/annotations/instances_val2017_unseen_2.json') 8 | parser.add_argument('--cat_path', default='datasets/coco/annotations/instances_val2017.json') 9 | args = parser.parse_args() 10 | print('Loading', args.cat_path) 11 | cat = json.load(open(args.cat_path, 'r'))['categories'] 12 | 13 | print('Loading', args.data_path) 14 | data = json.load(open(args.data_path, 'r')) 15 | data['categories'] = cat 16 | out_path = args.data_path[:-5] + '_oriorder.json' 17 | print('Saving to', out_path) 18 | json.dump(data, open(out_path, 'w')) 19 | -------------------------------------------------------------------------------- /third_party/Detic/tools/remove_lvis_rare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import argparse 3 | import json 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--ann', default='datasets/lvis/lvis_v1_train.json') 8 | args = parser.parse_args() 9 | 10 | print('Loading', args.ann) 11 | data = json.load(open(args.ann, 'r')) 12 | catid2freq = {x['id']: x['frequency'] for x in data['categories']} 13 | print('ori #anns', len(data['annotations'])) 14 | exclude = ['r'] 15 | data['annotations'] = [x for x in data['annotations'] \ 16 | if catid2freq[x['category_id']] not in exclude] 17 | print('filtered #anns', len(data['annotations'])) 18 | out_path = args.ann[:-5] + '_norare.json' 19 | print('Saving to', out_path) 20 | json.dump(data, open(out_path, 'w')) 21 | -------------------------------------------------------------------------------- /third_party/Detic/tools/unzip_imagenet_lvis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | import argparse 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--src_path', default='datasets/imagenet/ImageNet-21K/') 8 | parser.add_argument('--dst_path', default='datasets/imagenet/ImageNet-LVIS/') 9 | parser.add_argument('--data_path', default='datasets/metadata/imagenet_lvis_wnid.txt') 10 | args = parser.parse_args() 11 | 12 | f = open(args.data_path) 13 | for i, line in enumerate(f): 14 | cmd = 'mkdir {x} && tar -xf {src}/{l}.tar -C {x}'.format( 15 | src=args.src_path, 16 | l=line.strip(), 17 | x=args.dst_path + '/' + line.strip()) 18 | print(i, cmd) 19 | os.system(cmd) 20 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | outputs 4 | instant_test_output 5 | inference_test_output 6 | 7 | 8 | *.json 9 | *.diff 10 | *.jpg 11 | !/projects/DensePose/doc/images/*.jpg 12 | 13 | # compilation and distribution 14 | __pycache__ 15 | _ext 16 | *.pyc 17 | *.pyd 18 | *.so 19 | *.dll 20 | *.egg-info/ 21 | build/ 22 | dist/ 23 | wheels/ 24 | 25 | # pytorch/python/numpy formats 26 | *.pth 27 | *.pkl 28 | *.npy 29 | *.ts 30 | model_ts*.txt 31 | 32 | # ipython/jupyter notebooks 33 | *.ipynb 34 | **/.ipynb_checkpoints/ 35 | 36 | # Editor temporaries 37 | *.swn 38 | *.swo 39 | *.swp 40 | *~ 41 | 42 | # editor settings 43 | .idea 44 | .vscode 45 | _darcs 46 | 47 | # project dirs 48 | /detectron2/model_zoo/configs 49 | /datasets/* 50 | !/datasets/*.* 51 | /projects/*/datasets 52 | /models 53 | /snippet 54 | 55 | # vs code 56 | .history 57 | 58 | amlt 59 | thirdparty 60 | wandb 61 | weights -------------------------------------------------------------------------------- /third_party/zsseg.baseline/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 MendelXu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_bs32_60k/official_inference -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k/re_inference -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | PROMPT_LEARNER: "pomp_tuned" 16 | # for learnable prompt 17 | PROMPT_DIM: 512 18 | PROMPT_SHAPE: (16, 0) 19 | CLIP_MODEL_NAME: "ViT-B/16" 20 | PROMPT_CHECKPOINT: output/coco-stuff-164k-156/zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k/model_final.pth 21 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_bs32_60k/re_inference -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn101_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | CLIP_MODEL_NAME: "RN101" 16 | PROMPT_DIM: 512 17 | SEM_SEG_HEAD: 18 | EMBEDDING_DIM: 512 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | CLIP_MODEL_NAME: "RN50" 16 | PROMPT_DIM: 1024 17 | SEM_SEG_HEAD: 18 | EMBEDDING_DIM: 1024 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50x16_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | CLIP_MODEL_NAME: "RN50x16" 16 | PROMPT_DIM: 768 17 | SEM_SEG_HEAD: 18 | EMBEDDING_DIM: 768 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_rn50x4_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | CLIP_MODEL_NAME: "RN50x4" 16 | PROMPT_DIM: 640 17 | SEM_SEG_HEAD: 18 | EMBEDDING_DIM: 640 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_single_prompt_clip_vit-bx32_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | CLIP_MODEL_NAME: "ViT-B/32" -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_maskformer_R101c_vild_prompt_bs32_60k/re_inference -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 156 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "learnable" 11 | # for learnable prompt 12 | PROMPT_DIM: 512 13 | PROMPT_SHAPE: (16, 0) 14 | CLIP_MODEL_NAME: "ViT-B/16" 15 | MASK_FILL: "mean" 16 | MASK_EXPAND_RATIO: 1.0 17 | MASK_THR: 0.5 18 | MASK_MATTING: False 19 | REGION_RESIZED: True 20 | CLIP_ENSEMBLE: True 21 | CLIP_ENSEMBLE_WEIGHT: 0.8 22 | DATASETS: 23 | TRAIN: ("coco_2017_train_stuff_base_sem_seg",) 24 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_imagenet_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 156 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "imagenet" 11 | CLIP_MODEL_NAME: "ViT-B/16" 12 | MASK_FILL: "mean" 13 | MASK_EXPAND_RATIO: 1.0 14 | MASK_THR: 0.5 15 | MASK_MATTING: False 16 | REGION_RESIZED: True 17 | CLIP_ENSEMBLE: True 18 | DATASETS: 19 | TRAIN: ("coco_2017_train_stuff_base_sem_seg",) -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 156 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "predefined" 11 | PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."] 12 | CLIP_MODEL_NAME: "ViT-B/16" 13 | MASK_FILL: "mean" 14 | MASK_EXPAND_RATIO: 1.0 15 | MASK_THR: 0.5 16 | MASK_MATTING: False 17 | REGION_RESIZED: True 18 | CLIP_ENSEMBLE: True 19 | DATASETS: 20 | TRAIN: ("coco_2017_train_stuff_base_sem_seg",) -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_maskformer_R50_vild_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 156 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "vild" 11 | CLIP_MODEL_NAME: "ViT-B/16" 12 | MASK_FILL: "mean" 13 | MASK_EXPAND_RATIO: 1.0 14 | MASK_THR: 0.5 15 | MASK_MATTING: False 16 | REGION_RESIZED: True 17 | CLIP_ENSEMBLE: True 18 | DATASETS: 19 | TRAIN: ("coco_2017_train_stuff_base_sem_seg",) -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_perpixel_R101c_single_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotPerPixelModel" 4 | BACKBONE: 5 | NAME: "build_resnet_deeplab_backbone" 6 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 7 | RESNETS: 8 | DEPTH: 101 9 | STEM_TYPE: "deeplab" 10 | STEM_OUT_CHANNELS: 128 11 | STRIDE_IN_1X1: False 12 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 13 | # NORM: "SyncBN" 14 | RES5_MULTI_GRID: [1, 2, 4] 15 | SEM_SEG_HEAD: 16 | NAME: "ZeroPerPixelBaselineHead" 17 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 18 | IGNORE_VALUE: 255 19 | NUM_CLASSES: 512 20 | COMMON_STRIDE: 4 # not used, hard-coded 21 | LOSS_WEIGHT: 1.0 22 | CONVS_DIM: 256 23 | MASK_DIM: 256 24 | NORM: "GN" 25 | CLIP_ADAPTER: 26 | CLIP_ENSEMBLE_WEIGHT: 0.7 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-156/zero_shot_proposal_classification_bs32_10k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml 2 | ORACLE: True 3 | MODEL: 4 | META_ARCHITECTURE: "ProposalClipClassifier" 5 | MASK_ON: True 6 | CLIP_ADAPTER: 7 | PROMPT_LEARNER: "predefined" 8 | PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."] 9 | # for learnable prompt 10 | PROMPT_DIM: 512 11 | PROMPT_SHAPE: (16, 0) 12 | CLIP_MODEL_NAME: "ViT-B/16" 13 | DATASETS: 14 | TRAIN: ("coco_2017_train_stuff_base_sem_seg_classification",) 15 | TEST: ("coco_2017_test_stuff_sem_seg_classification",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: (224,244) 18 | MIN_SIZE_TEST: 224 19 | MAX_SIZE_TEST: 2560 20 | SIZE_DIVISIBILITY: -1 21 | FORMAT: "RGB" 22 | DATASET_MAPPER_NAME: "mask_former_binary_semantic" 23 | SOLVER: 24 | OPTIMIZER: "SGD" 25 | BASE_LR: 0.002 26 | WEIGHT_DECAY: 0.0005 27 | LR_SCHEDULER_NAME: "WarmupCosineLR" 28 | WARMUP_METHOD: "constant" 29 | WARMUP_FACTOR: 0.005 30 | WARMUP_ITERS: 100 31 | IMS_PER_BATCH: 32 32 | TEST_IMS_PER_BATCH: 4 33 | MAX_ITER: 10000 34 | CHECKPOINT_PERIOD: 5000 35 | TEST: 36 | EVAL_PERIOD: 5000 -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/maskformer_R101c_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-COCOStuff164K-171.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "MaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "MaskFormerHead" 6 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 7 | IGNORE_VALUE: 255 8 | NUM_CLASSES: 171 9 | COMMON_STRIDE: 4 # not used, hard-coded 10 | LOSS_WEIGHT: 1.0 11 | CONVS_DIM: 256 12 | MASK_DIM: 256 13 | NORM: "GN" 14 | MASK_FORMER: 15 | TRANSFORMER_IN_FEATURE: "res5" 16 | DEEP_SUPERVISION: True 17 | NO_OBJECT_WEIGHT: 0.1 18 | DICE_WEIGHT: 1.0 19 | MASK_WEIGHT: 20.0 20 | HIDDEN_DIM: 256 21 | NUM_OBJECT_QUERIES: 100 22 | NHEADS: 8 23 | DROPOUT: 0.1 24 | DIM_FEEDFORWARD: 2048 25 | ENC_LAYERS: 0 26 | DEC_LAYERS: 6 27 | PRE_NORM: False -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_bs32_60k/ -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | CLIP_ADAPTER: 7 | PROMPT_LEARNER: "imagenet" 8 | CLIP_MODEL_NAME: "ViT-B/16" 9 | MASK_FILL: "mean" 10 | MASK_EXPAND_RATIO: 1.0 11 | MASK_THR: 0.5 12 | MASK_MATTING: False 13 | REGION_RESIZED: True 14 | CLIP_ENSEMBLE: True 15 | CLIP_ENSEMBLE_WEIGHT: 0.8 16 | RESNETS: 17 | DEPTH: 101 18 | STEM_TYPE: "deeplab" 19 | STEM_OUT_CHANNELS: 128 20 | STRIDE_IN_1X1: False 21 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 22 | # NORM: "SyncBN" 23 | RES5_MULTI_GRID: [1, 2, 4] 24 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_imagenet_prompt_bs32_60k/ -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | PROMPT_LEARNER: "pomp_tuned" 16 | # for learnable prompt 17 | PROMPT_DIM: 512 18 | PROMPT_SHAPE: (16, 0) 19 | CLIP_MODEL_NAME: "ViT-B/16" 20 | PROMPT_CHECKPOINT: output/coco-stuff-164k-156/zero_shot_proposal_classification_learn_prompt_pomp_bs32_10k/model_final.pth 21 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_pomp_prompt_bs32_60k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_single_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | CLIP_ADAPTER: 7 | PROMPT_LEARNER: "predefined" 8 | PREDEFINED_PROMPT_TEMPLATES: [ "a sculpture of a {}." ] 9 | CLIP_MODEL_NAME: "ViT-B/16" 10 | MASK_FILL: "mean" 11 | MASK_EXPAND_RATIO: 1.0 12 | MASK_THR: 0.5 13 | MASK_MATTING: False 14 | REGION_RESIZED: True 15 | CLIP_ENSEMBLE: True 16 | CLIP_ENSEMBLE_WEIGHT: 0.8 17 | RESNETS: 18 | DEPTH: 101 19 | STEM_TYPE: "deeplab" 20 | STEM_OUT_CHANNELS: 128 21 | STRIDE_IN_1X1: False 22 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 23 | # NORM: "SyncBN" 24 | RES5_MULTI_GRID: [1, 2, 4] 25 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_single_prompt_bs32_60k/ -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R101c_vild_prompt_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | CLIP_ADAPTER: 7 | PROMPT_LEARNER: "vild" 8 | CLIP_MODEL_NAME: "ViT-B/16" 9 | MASK_FILL: "mean" 10 | MASK_EXPAND_RATIO: 1.0 11 | MASK_THR: 0.5 12 | MASK_MATTING: False 13 | REGION_RESIZED: True 14 | CLIP_ENSEMBLE: True 15 | CLIP_ENSEMBLE_WEIGHT: 0.8 16 | RESNETS: 17 | DEPTH: 101 18 | STEM_TYPE: "deeplab" 19 | STEM_OUT_CHANNELS: 128 20 | STRIDE_IN_1X1: False 21 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 22 | # NORM: "SyncBN" 23 | RES5_MULTI_GRID: [1, 2, 4] 24 | OUTPUT_DIR: output/coco-stuff-164k-171/zero_shot_maskformer_R101c_vild_prompt_bs32_60k/ -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/coco-stuff-164k-171/zero_shot_maskformer_R50_bs32_60k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: maskformer_R50_bs32_60k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 171 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "learnable" 11 | # for learnable prompt 12 | PROMPT_DIM: 512 13 | PROMPT_SHAPE: (16, 0) 14 | CLIP_MODEL_NAME: "ViT-B/16" 15 | MASK_FILL: "mean" 16 | MASK_EXPAND_RATIO: 1.0 17 | MASK_THR: 0.5 18 | MASK_MATTING: False 19 | REGION_RESIZED: True 20 | CLIP_ENSEMBLE: True 21 | CLIP_ENSEMBLE_WEIGHT: 0.8 22 | DATASETS: 23 | TRAIN: ("coco_2017_train_stuff_sem_seg",) 24 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_bs16_20k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | PROMPT_LEARNER: "imagenet" 16 | CLIP_MODEL_NAME: "ViT-B/16" 17 | MASK_FILL: "mean" 18 | MASK_EXPAND_RATIO: 1.0 19 | MASK_THR: 0.5 20 | MASK_MATTING: False 21 | REGION_RESIZED: True 22 | CLIP_ENSEMBLE: True 23 | CLIP_ENSEMBLE_WEIGHT: 0.7 24 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_imagenet_prompt_bs16_20k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_pomp_prompt_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | PROMPT_LEARNER: "pomp_tuned" 16 | # for learnable prompt 17 | PROMPT_DIM: 512 18 | PROMPT_SHAPE: (16, 0) 19 | CLIP_MODEL_NAME: "ViT-B/16" 20 | PROMPT_CHECKPOINT: output/voc-11k-15/zero_shot_proposal_classification_learn_prompt_pomp_bs16_10k/model_final.pth 21 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_pomp_prompt_bs16_20k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_single_prompt_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_single_prompt_bs16_20k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R101c_vild_prompt_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | CLIP_ADAPTER: 15 | PROMPT_LEARNER: "vild" 16 | CLIP_MODEL_NAME: "ViT-B/16" 17 | MASK_FILL: "mean" 18 | MASK_EXPAND_RATIO: 1.0 19 | MASK_THR: 0.5 20 | MASK_MATTING: False 21 | REGION_RESIZED: True 22 | CLIP_ENSEMBLE: True 23 | CLIP_ENSEMBLE_WEIGHT: 0.7 24 | OUTPUT_DIR: output/voc-11k-15/zero_shot_maskformer_R101c_vild_prompt_bs16_20k -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R50_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../voc-11k-20/maskformer_R50_bs16_20k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 15 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "learnable" 11 | # for learnable prompt 12 | PROMPT_DIM: 512 13 | PROMPT_SHAPE: (16, 0) 14 | CLIP_MODEL_NAME: "ViT-B/16" 15 | MASK_FILL: "mean" 16 | MASK_EXPAND_RATIO: 1.0 17 | MASK_THR: 0.5 18 | MASK_MATTING: False 19 | REGION_RESIZED: True 20 | CLIP_ENSEMBLE: True 21 | CLIP_ENSEMBLE_WEIGHT: 0.7 22 | # SEPERATE_ADAPTER: True 23 | # REGION_CLIP_ADAPTER: 24 | # PROMPT_LEARNER: "learnable" 25 | 26 | DATASETS: 27 | TRAIN: ("voc_base_sem_seg_train",) 28 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-15/zero_shot_maskformer_R50_single_prompt_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: ../voc-11k-20/maskformer_R50_bs16_20k.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "ZeroShotMaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "ZeroShotMaskFormerHead" 6 | NUM_CLASSES: 15 #only used in set criterion 7 | EMBEDDING_DIM: 512 8 | EMBED_LAYERS: 2 9 | CLIP_ADAPTER: 10 | PROMPT_LEARNER: "predefined" 11 | PREDEFINED_PROMPT_TEMPLATES: ["a sculpture of a {}."] 12 | CLIP_MODEL_NAME: "ViT-B/16" 13 | MASK_FILL: "mean" 14 | MASK_EXPAND_RATIO: 1.0 15 | MASK_THR: 0.5 16 | MASK_MATTING: False 17 | REGION_RESIZED: True 18 | CLIP_ENSEMBLE: True 19 | CLIP_ENSEMBLE_WEIGHT: 0.7 20 | DATASETS: 21 | TRAIN: ("voc_base_sem_seg_train",) -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-20/maskformer_R101c_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: maskformer_R50_bs16_20k.yaml 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_resnet_deeplab_backbone" 5 | WEIGHTS: "detectron2://DeepLab/R-103.pkl" 6 | RESNETS: 7 | DEPTH: 101 8 | STEM_TYPE: "deeplab" 9 | STEM_OUT_CHANNELS: 128 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | # NORM: "SyncBN" 13 | RES5_MULTI_GRID: [1, 2, 4] 14 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/configs/voc-11k-20/maskformer_R50_bs16_20k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-VOC11K-20.yaml 2 | MODEL: 3 | META_ARCHITECTURE: "MaskFormer" 4 | SEM_SEG_HEAD: 5 | NAME: "MaskFormerHead" 6 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 7 | IGNORE_VALUE: 255 8 | NUM_CLASSES: 20 9 | COMMON_STRIDE: 4 # not used, hard-coded 10 | LOSS_WEIGHT: 1.0 11 | CONVS_DIM: 256 12 | MASK_DIM: 256 13 | NORM: "GN" 14 | MASK_FORMER: 15 | TRANSFORMER_IN_FEATURE: "res5" 16 | DEEP_SUPERVISION: True 17 | NO_OBJECT_WEIGHT: 0.1 18 | DICE_WEIGHT: 1.0 19 | MASK_WEIGHT: 20.0 20 | HIDDEN_DIM: 256 21 | NUM_OBJECT_QUERIES: 100 22 | NHEADS: 8 23 | DROPOUT: 0.1 24 | DIM_FEEDFORWARD: 2048 25 | ENC_LAYERS: 0 26 | DEC_LAYERS: 6 27 | PRE_NORM: False -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data 2 | from . import modeling 3 | from .config import add_mask_former_config 4 | 5 | from .test_time_augmentation import SemanticSegmentorWithTTA 6 | from .mask_former_model import MaskFormer 7 | from .zero_shot_mask_former_model import ZeroShotMaskFormer 8 | from .proposal_classification import ProposalClipClassifier 9 | from .ablation.zero_shot_per_pixel_model import ZeroShotPerPixelModel 10 | from .ablation.oracle_mask_former_model import OracleMaskFormer 11 | from .ablation.zero_shot_proposal_based_model import ZeroShotProposalBasedSegmentor 12 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/ablation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/mask_former/ablation/__init__.py -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset_mappers import * 2 | from . import datasets 3 | from .build import ( 4 | build_detection_train_loader, 5 | build_detection_test_loader, 6 | dataset_sample_per_class, 7 | ) 8 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/data/dataset_mappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .mask_former_binary_semantic_dataset_mapper import ( 3 | MaskFormerBinarySemanticDatasetMapper, 4 | ) 5 | from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper 6 | from .oracle_dataset_mapper import OracleDatasetMapper 7 | from .proposal_classification_dataset_mapper import ProposalClasificationDatasetMapper 8 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import register_coco_stuff, register_voc_seg,register_pcontext -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .generalized_sem_seg_evaluation import GeneralizedSemSegEvaluator 2 | from .classification_evaluation import ClassificationEvaluator 3 | from .pseudo_sem_seg_evaluation import GeneralizedPseudoSemSegEvaluator 4 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .backbone.swin import D2SwinTransformer 3 | from .backbone.clip_resnet import D2ModifiedResNet 4 | from .heads.mask_former_head import MaskFormerHead 5 | from .heads.zero_shot_mask_former_head import ZeroShotMaskFormerHead 6 | from .heads.per_pixel_baseline import ( 7 | PerPixelBaselineHead, 8 | PerPixelBaselinePlusHead, 9 | ZeroPerPixelBaselineHead, 10 | ) 11 | from .heads.pixel_decoder import BasePixelDecoder 12 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/modeling/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/modeling/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/mask_former/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .events import setup_wandb, WandbWriter 3 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | scipy 3 | shapely 4 | timm 5 | h5py 6 | wandb 7 | fire 8 | mmcv -------------------------------------------------------------------------------- /third_party/zsseg.baseline/resources/ade_thing_stuff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/ade_thing_stuff.png -------------------------------------------------------------------------------- /third_party/zsseg.baseline/resources/coco_thing_stuff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/coco_thing_stuff.png -------------------------------------------------------------------------------- /third_party/zsseg.baseline/resources/proposal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/resources/proposal.png -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | *.egg-info 5 | .pytest_cache 6 | .ipynb_checkpoints 7 | 8 | thumbs.db 9 | .DS_Store 10 | .idea 11 | data/ 12 | *.pkl 13 | .theia 14 | tmp 15 | */tmp 16 | wandb/ 17 | */wadb 18 | .history -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/CLIP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/third_party/CLIP/CLIP.png -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include clip/bpe_simple_vocab_16e6.txt.gz 2 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/clip/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/third_party/CLIP/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/requirements.txt: -------------------------------------------------------------------------------- 1 | ftfy 2 | regex 3 | tqdm 4 | torch 5 | torchvision 6 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pkg_resources 4 | from setuptools import setup, find_packages 5 | 6 | setup( 7 | name="clip", 8 | py_modules=["clip"], 9 | version="1.0", 10 | description="", 11 | author="OpenAI", 12 | packages=find_packages(exclude=["tests*"]), 13 | install_requires=[ 14 | str(r) 15 | for r in pkg_resources.parse_requirements( 16 | open(os.path.join(os.path.dirname(__file__), "requirements.txt")) 17 | ) 18 | ], 19 | include_package_data=True, 20 | extras_require={"dev": ["pytest"]}, 21 | ) 22 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/third_party/CLIP/tests/test_consistency.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import torch 4 | from PIL import Image 5 | 6 | import clip 7 | 8 | 9 | @pytest.mark.parametrize("model_name", clip.available_models()) 10 | def test_consistency(model_name): 11 | device = "cpu" 12 | jit_model, transform = clip.load(model_name, device=device, jit=True) 13 | py_model, _ = clip.load(model_name, device=device, jit=False) 14 | 15 | image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) 16 | text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) 17 | 18 | with torch.no_grad(): 19 | logits_per_image, _ = jit_model(image, text) 20 | jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy() 21 | 22 | logits_per_image, _ = py_model(image, text) 23 | py_probs = logits_per_image.softmax(dim=-1).cpu().numpy() 24 | 25 | assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1) 26 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/tools/convert-pretrained-swin-model-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import pickle as pkl 5 | import sys 6 | 7 | import torch 8 | 9 | """ 10 | Usage: 11 | # download pretrained swin model: 12 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth 13 | # run the conversion 14 | ./convert-pretrained-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl 15 | # Then, use swin_tiny_patch4_window7_224.pkl with the following changes in config: 16 | MODEL: 17 | WEIGHTS: "/path/to/swin_tiny_patch4_window7_224.pkl" 18 | INPUT: 19 | FORMAT: "RGB" 20 | """ 21 | 22 | if __name__ == "__main__": 23 | input = sys.argv[1] 24 | 25 | obj = torch.load(input, map_location="cpu")["model"] 26 | 27 | res = {"model": obj, "__author__": "third_party", "matching_heuristics": True} 28 | 29 | with open(sys.argv[2], "wb") as f: 30 | pkl.dump(res, f) 31 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/tools/parse_name.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | cfg_path = sys.argv[1] 4 | print(cfg_path.split(".")[0].replace("/", "_")) 5 | -------------------------------------------------------------------------------- /third_party/zsseg.baseline/tools/self_training.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/third_party/zsseg.baseline/tools/self_training.sh -------------------------------------------------------------------------------- /trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-science/prompt-pretraining/24bca56b21b4fab1d493c8758c31fd6d1c40bb96/trainers/__init__.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | 3 | 4 | def is_dist_avail_and_initialized(): 5 | if not dist.is_available(): 6 | return False 7 | if not dist.is_initialized(): 8 | return False 9 | return True 10 | 11 | 12 | def get_rank(): 13 | if not is_dist_avail_and_initialized(): 14 | return 0 15 | return dist.get_rank() 16 | 17 | 18 | def is_main_process(): 19 | return get_rank() == 0 20 | 21 | 22 | def print_args(args, cfg): 23 | print("***************") 24 | print("** Arguments **") 25 | print("***************") 26 | optkeys = list(args.__dict__.keys()) 27 | optkeys.sort() 28 | for key in optkeys: 29 | print("{}: {}".format(key, args.__dict__[key])) 30 | print("************") 31 | print("** Config **") 32 | print("************") 33 | print(cfg) 34 | --------------------------------------------------------------------------------