├── README.md ├── open_clip ├── .DS_Store ├── __init__.py ├── bpe_simple_vocab_16e6.txt.gz ├── coca_model.py ├── constants.py ├── factory.py ├── generation_utils.py ├── hf_configs.py ├── hf_model.py ├── loss.py ├── model.py ├── model_configs │ ├── RN101-quickgelu.json │ ├── RN101.json │ ├── RN50-quickgelu.json │ ├── RN50.json │ ├── RN50x16.json │ ├── RN50x4.json │ ├── RN50x64.json │ ├── ViT-B-16-plus-240.json │ ├── ViT-B-16-plus.json │ ├── ViT-B-16.json │ ├── ViT-B-32-plus-256.json │ ├── ViT-B-32-quickgelu.json │ ├── ViT-B-32.json │ ├── ViT-H-14.json │ ├── ViT-H-16.json │ ├── ViT-L-14-280.json │ ├── ViT-L-14-336.json │ ├── ViT-L-14.json │ ├── ViT-L-16-320.json │ ├── ViT-L-16.json │ ├── ViT-M-16-alt.json │ ├── ViT-M-16.json │ ├── ViT-M-32-alt.json │ ├── ViT-M-32.json │ ├── ViT-S-16-alt.json │ ├── ViT-S-16.json │ ├── ViT-S-32-alt.json │ ├── ViT-S-32.json │ ├── ViT-bigG-14.json │ ├── ViT-e-14.json │ ├── ViT-g-14.json │ ├── ViT-test.json │ ├── coca_ViT-B-32.json │ ├── coca_ViT-L-14.json │ ├── coca_base.json │ ├── coca_roberta-ViT-B-32.json │ ├── convnext_base.json │ ├── convnext_base_w.json │ ├── convnext_base_w_320.json │ ├── convnext_large.json │ ├── convnext_large_d.json │ ├── convnext_large_d_320.json │ ├── convnext_small.json │ ├── convnext_tiny.json │ ├── convnext_xlarge.json │ ├── convnext_xxlarge.json │ ├── convnext_xxlarge_320.json │ ├── mt5-base-ViT-B-32.json │ ├── mt5-xl-ViT-H-14.json │ ├── roberta-ViT-B-32.json │ ├── swin_base_patch4_window7_224.json │ ├── vit_medium_patch16_gap_256.json │ ├── vit_relpos_medium_patch16_cls_224.json │ ├── xlm-roberta-base-ViT-B-32.json │ └── xlm-roberta-large-ViT-H-14.json ├── modified_resnet.py ├── open_clip │ ├── __init__.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── coca_model.py │ ├── constants.py │ ├── factory.py │ ├── generation_utils.py │ ├── hf_configs.py │ ├── hf_model.py │ ├── loss.py │ ├── model.py │ ├── model_configs │ │ ├── EVA01-g-14-plus.json │ │ ├── EVA01-g-14.json │ │ ├── EVA02-B-16.json │ │ ├── EVA02-E-14-plus.json │ │ ├── EVA02-E-14.json │ │ ├── EVA02-L-14-336.json │ │ ├── EVA02-L-14.json │ │ ├── RN101-quickgelu.json │ │ ├── RN101.json │ │ ├── RN50-quickgelu.json │ │ ├── RN50.json │ │ ├── RN50x16.json │ │ ├── RN50x4.json │ │ ├── RN50x64.json │ │ ├── ViT-B-16-plus-240.json │ │ ├── ViT-B-16-plus.json │ │ ├── ViT-B-16.json │ │ ├── ViT-B-32-plus-256.json │ │ ├── ViT-B-32-quickgelu.json │ │ ├── ViT-B-32.json │ │ ├── ViT-H-14.json │ │ ├── ViT-H-16.json │ │ ├── ViT-L-14-280.json │ │ ├── ViT-L-14-336.json │ │ ├── ViT-L-14.json │ │ ├── ViT-L-16-320.json │ │ ├── ViT-L-16.json │ │ ├── ViT-M-16-alt.json │ │ ├── ViT-M-16.json │ │ ├── ViT-M-32-alt.json │ │ ├── ViT-M-32.json │ │ ├── ViT-S-16-alt.json │ │ ├── ViT-S-16.json │ │ ├── ViT-S-32-alt.json │ │ ├── ViT-S-32.json │ │ ├── ViT-bigG-14.json │ │ ├── ViT-e-14.json │ │ ├── ViT-g-14.json │ │ ├── coca_ViT-B-32.json │ │ ├── coca_ViT-L-14.json │ │ ├── coca_base.json │ │ ├── coca_roberta-ViT-B-32.json │ │ ├── convnext_base.json │ │ ├── convnext_base_w.json │ │ ├── convnext_base_w_320.json │ │ ├── convnext_large.json │ │ ├── convnext_large_d.json │ │ ├── convnext_large_d_320.json │ │ ├── convnext_small.json │ │ ├── convnext_tiny.json │ │ ├── convnext_xlarge.json │ │ ├── convnext_xxlarge.json │ │ ├── convnext_xxlarge_320.json │ │ ├── mt5-base-ViT-B-32.json │ │ ├── mt5-xl-ViT-H-14.json │ │ ├── roberta-ViT-B-32.json │ │ ├── swin_base_patch4_window7_224.json │ │ ├── vit_medium_patch16_gap_256.json │ │ ├── vit_relpos_medium_patch16_cls_224.json │ │ ├── xlm-roberta-base-ViT-B-32.json │ │ └── xlm-roberta-large-ViT-H-14.json │ ├── modified_resnet.py │ ├── open_clip │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── coca_model.cpython-37.pyc │ │ │ ├── constants.cpython-37.pyc │ │ │ ├── factory.cpython-37.pyc │ │ │ ├── hf_configs.cpython-37.pyc │ │ │ ├── hf_model.cpython-37.pyc │ │ │ ├── loss.cpython-37.pyc │ │ │ ├── model.cpython-37.pyc │ │ │ ├── modified_resnet.cpython-37.pyc │ │ │ ├── openai.cpython-37.pyc │ │ │ ├── pretrained.cpython-37.pyc │ │ │ ├── push_to_hf_hub.cpython-37.pyc │ │ │ ├── timm_model.cpython-37.pyc │ │ │ ├── tokenizer.cpython-37.pyc │ │ │ ├── transform.cpython-37.pyc │ │ │ ├── transformer.cpython-37.pyc │ │ │ ├── utils.cpython-37.pyc │ │ │ ├── version.cpython-37.pyc │ │ │ ├── zero_shot_classifier.cpython-37.pyc │ │ │ └── zero_shot_metadata.cpython-37.pyc │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── coca_model.py │ │ ├── model.py │ │ ├── model_configs │ │ │ ├── RN101-quickgelu.json │ │ │ ├── RN101.json │ │ │ ├── RN50-quickgelu.json │ │ │ ├── RN50.json │ │ │ ├── RN50x16.json │ │ │ ├── RN50x4.json │ │ │ ├── RN50x64.json │ │ │ ├── ViT-B-16-plus-240.json │ │ │ ├── ViT-B-16-plus.json │ │ │ ├── ViT-B-16.json │ │ │ ├── ViT-B-32-plus-256.json │ │ │ ├── ViT-B-32-quickgelu.json │ │ │ ├── ViT-B-32.json │ │ │ ├── ViT-H-14.json │ │ │ ├── ViT-H-16.json │ │ │ ├── ViT-L-14-280.json │ │ │ ├── ViT-L-14-336.json │ │ │ ├── ViT-L-14.json │ │ │ ├── ViT-L-16-320.json │ │ │ ├── ViT-L-16.json │ │ │ ├── ViT-M-16-alt.json │ │ │ ├── ViT-M-16.json │ │ │ ├── ViT-M-32-alt.json │ │ │ ├── ViT-M-32.json │ │ │ ├── ViT-S-16-alt.json │ │ │ ├── ViT-S-16.json │ │ │ ├── ViT-S-32-alt.json │ │ │ ├── ViT-S-32.json │ │ │ ├── ViT-bigG-14.json │ │ │ ├── ViT-e-14.json │ │ │ ├── ViT-g-14.json │ │ │ ├── ViT-test.json │ │ │ ├── coca_ViT-B-32.json │ │ │ ├── coca_ViT-L-14.json │ │ │ ├── coca_base.json │ │ │ ├── coca_roberta-ViT-B-32.json │ │ │ ├── convnext_base.json │ │ │ ├── convnext_base_w.json │ │ │ ├── convnext_base_w_320.json │ │ │ ├── convnext_large.json │ │ │ ├── convnext_large_d.json │ │ │ ├── convnext_large_d_320.json │ │ │ ├── convnext_small.json │ │ │ ├── convnext_tiny.json │ │ │ ├── convnext_xlarge.json │ │ │ ├── convnext_xxlarge.json │ │ │ ├── convnext_xxlarge_320.json │ │ │ ├── mt5-base-ViT-B-32.json │ │ │ ├── mt5-xl-ViT-H-14.json │ │ │ ├── roberta-ViT-B-32.json │ │ │ ├── swin_base_patch4_window7_224.json │ │ │ ├── vit_medium_patch16_gap_256.json │ │ │ ├── vit_relpos_medium_patch16_cls_224.json │ │ │ ├── xlm-roberta-base-ViT-B-32.json │ │ │ └── xlm-roberta-large-ViT-H-14.json │ │ ├── transformer.py │ │ ├── utils.py │ │ └── zero_shot_classifier.py │ ├── openai.py │ ├── pretrained.py │ ├── push_to_hf_hub.py │ ├── timm_model.py │ ├── tokenizer.py │ ├── transform.py │ ├── transformer.py │ ├── utils.py │ ├── version.py │ ├── zero_shot_classifier.py │ └── zero_shot_metadata.py ├── openai.py ├── pretrained.py ├── push_to_hf_hub.py ├── search.py ├── timm_model.py ├── tokenizer.py ├── transform.py ├── transformer.py ├── utils.py ├── version.py ├── zero_shot_classifier.py └── zero_shot_metadata.py ├── requirement.txt └── training ├── __init__.py ├── data.py ├── distributed.py ├── file_utils.py ├── logger.py ├── main.py ├── params.py ├── precision.py ├── profile.py ├── scheduler.py ├── train.py └── zero_shot.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/README.md -------------------------------------------------------------------------------- /open_clip/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/.DS_Store -------------------------------------------------------------------------------- /open_clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/__init__.py -------------------------------------------------------------------------------- /open_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /open_clip/coca_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/coca_model.py -------------------------------------------------------------------------------- /open_clip/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/constants.py -------------------------------------------------------------------------------- /open_clip/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/factory.py -------------------------------------------------------------------------------- /open_clip/generation_utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_clip/hf_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/hf_configs.py -------------------------------------------------------------------------------- /open_clip/hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/hf_model.py -------------------------------------------------------------------------------- /open_clip/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/loss.py -------------------------------------------------------------------------------- /open_clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model.py -------------------------------------------------------------------------------- /open_clip/model_configs/RN101-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN101-quickgelu.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN101.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN101.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN50-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN50-quickgelu.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN50.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN50x16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN50x16.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN50x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN50x4.json -------------------------------------------------------------------------------- /open_clip/model_configs/RN50x64.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/RN50x64.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-16-plus-240.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-16-plus-240.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-16-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-16-plus.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-16.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-32-plus-256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-32-plus-256.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-32-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-32-quickgelu.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-H-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-H-16.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-L-14-280.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-L-14-280.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-L-14-336.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-L-14-336.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-L-16-320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-L-16-320.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-L-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-L-16.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-M-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-M-16-alt.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-M-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-M-16.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-M-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-M-32-alt.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-M-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-M-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-S-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-S-16-alt.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-S-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-S-16.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-S-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-S-32-alt.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-S-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-S-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-bigG-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-bigG-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-e-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-e-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-g-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-g-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/ViT-test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/ViT-test.json -------------------------------------------------------------------------------- /open_clip/model_configs/coca_ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/coca_ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/coca_ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/coca_ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/coca_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/coca_base.json -------------------------------------------------------------------------------- /open_clip/model_configs/coca_roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/coca_roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_base.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_base_w.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_base_w.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_base_w_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_base_w_320.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_large.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_large_d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_large_d.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_large_d_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_large_d_320.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_small.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_small.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_tiny.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_xlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_xlarge.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_xxlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_xxlarge.json -------------------------------------------------------------------------------- /open_clip/model_configs/convnext_xxlarge_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/convnext_xxlarge_320.json -------------------------------------------------------------------------------- /open_clip/model_configs/mt5-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/mt5-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/mt5-xl-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/mt5-xl-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/model_configs/roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/swin_base_patch4_window7_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/swin_base_patch4_window7_224.json -------------------------------------------------------------------------------- /open_clip/model_configs/vit_medium_patch16_gap_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/vit_medium_patch16_gap_256.json -------------------------------------------------------------------------------- /open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json -------------------------------------------------------------------------------- /open_clip/model_configs/xlm-roberta-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/model_configs/xlm-roberta-large-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/modified_resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/modified_resnet.py -------------------------------------------------------------------------------- /open_clip/open_clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/__init__.py -------------------------------------------------------------------------------- /open_clip/open_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /open_clip/open_clip/coca_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/coca_model.py -------------------------------------------------------------------------------- /open_clip/open_clip/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/constants.py -------------------------------------------------------------------------------- /open_clip/open_clip/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/factory.py -------------------------------------------------------------------------------- /open_clip/open_clip/generation_utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_clip/open_clip/hf_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/hf_configs.py -------------------------------------------------------------------------------- /open_clip/open_clip/hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/hf_model.py -------------------------------------------------------------------------------- /open_clip/open_clip/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/loss.py -------------------------------------------------------------------------------- /open_clip/open_clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model.py -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA01-g-14-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA01-g-14-plus.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA01-g-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA01-g-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA02-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA02-B-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA02-E-14-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA02-E-14-plus.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA02-E-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA02-E-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA02-L-14-336.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA02-L-14-336.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/EVA02-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/EVA02-L-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN101-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN101-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN101.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN101.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN50-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN50-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN50.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN50x16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN50x16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN50x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN50x4.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/RN50x64.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/RN50x64.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-16-plus-240.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-16-plus-240.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-16-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-16-plus.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-32-plus-256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-32-plus-256.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-32-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-32-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-H-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-H-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-L-14-280.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-L-14-280.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-L-14-336.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-L-14-336.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-L-16-320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-L-16-320.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-L-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-L-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-M-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-M-16-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-M-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-M-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-M-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-M-32-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-M-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-M-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-S-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-S-16-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-S-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-S-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-S-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-S-32-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-S-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-S-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-bigG-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-bigG-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-e-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-e-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/ViT-g-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/ViT-g-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/coca_ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/coca_ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/coca_ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/coca_ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/coca_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/coca_base.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/coca_roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/coca_roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_base.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_base_w.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_base_w.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_base_w_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_base_w_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_large.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_large_d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_large_d.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_large_d_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_large_d_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_small.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_small.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_tiny.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_xlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_xlarge.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_xxlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_xxlarge.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/convnext_xxlarge_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/convnext_xxlarge_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/mt5-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/mt5-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/mt5-xl-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/mt5-xl-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/swin_base_patch4_window7_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/swin_base_patch4_window7_224.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/vit_medium_patch16_gap_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/vit_medium_patch16_gap_256.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/modified_resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/modified_resnet.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__init__.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/coca_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/coca_model.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/constants.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/constants.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/factory.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/factory.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/hf_configs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/hf_configs.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/hf_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/hf_model.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/modified_resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/modified_resnet.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/openai.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/openai.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/pretrained.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/pretrained.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/push_to_hf_hub.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/push_to_hf_hub.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/timm_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/timm_model.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/tokenizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/tokenizer.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/transform.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/transformer.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/version.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/version.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/zero_shot_classifier.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/zero_shot_classifier.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/__pycache__/zero_shot_metadata.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/__pycache__/zero_shot_metadata.cpython-37.pyc -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/coca_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/coca_model.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN101-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN101-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN101.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN101.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN50-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN50-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN50.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN50x16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN50x16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN50x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN50x4.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/RN50x64.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/RN50x64.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-16-plus-240.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-16-plus-240.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-16-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-16-plus.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-32-plus-256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-32-plus-256.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-32-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-32-quickgelu.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-H-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-H-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-L-14-280.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-L-14-280.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-L-14-336.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-L-14-336.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-L-16-320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-L-16-320.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-L-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-L-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-M-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-M-16-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-M-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-M-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-M-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-M-32-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-M-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-M-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-S-16-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-S-16-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-S-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-S-16.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-S-32-alt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-S-32-alt.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-S-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-S-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-bigG-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-bigG-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-e-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-e-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-g-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-g-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/ViT-test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/ViT-test.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/coca_ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/coca_ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/coca_ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/coca_ViT-L-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/coca_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/coca_base.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/coca_roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/coca_roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_base.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_base_w.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_base_w.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_base_w_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_base_w_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_large.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_large_d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_large_d.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_large_d_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_large_d_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_small.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_small.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_tiny.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_xlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_xlarge.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_xxlarge.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_xxlarge.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/convnext_xxlarge_320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/convnext_xxlarge_320.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/mt5-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/mt5-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/mt5-xl-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/mt5-xl-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/roberta-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/roberta-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/swin_base_patch4_window7_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/swin_base_patch4_window7_224.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/vit_medium_patch16_gap_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/vit_medium_patch16_gap_256.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/transformer.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/utils.py -------------------------------------------------------------------------------- /open_clip/open_clip/open_clip/zero_shot_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/open_clip/zero_shot_classifier.py -------------------------------------------------------------------------------- /open_clip/open_clip/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/openai.py -------------------------------------------------------------------------------- /open_clip/open_clip/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/pretrained.py -------------------------------------------------------------------------------- /open_clip/open_clip/push_to_hf_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/push_to_hf_hub.py -------------------------------------------------------------------------------- /open_clip/open_clip/timm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/timm_model.py -------------------------------------------------------------------------------- /open_clip/open_clip/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/tokenizer.py -------------------------------------------------------------------------------- /open_clip/open_clip/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/transform.py -------------------------------------------------------------------------------- /open_clip/open_clip/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/transformer.py -------------------------------------------------------------------------------- /open_clip/open_clip/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/utils.py -------------------------------------------------------------------------------- /open_clip/open_clip/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.20.0' 2 | -------------------------------------------------------------------------------- /open_clip/open_clip/zero_shot_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/zero_shot_classifier.py -------------------------------------------------------------------------------- /open_clip/open_clip/zero_shot_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/open_clip/zero_shot_metadata.py -------------------------------------------------------------------------------- /open_clip/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/openai.py -------------------------------------------------------------------------------- /open_clip/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/pretrained.py -------------------------------------------------------------------------------- /open_clip/push_to_hf_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/push_to_hf_hub.py -------------------------------------------------------------------------------- /open_clip/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/search.py -------------------------------------------------------------------------------- /open_clip/timm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/timm_model.py -------------------------------------------------------------------------------- /open_clip/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/tokenizer.py -------------------------------------------------------------------------------- /open_clip/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/transform.py -------------------------------------------------------------------------------- /open_clip/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/transformer.py -------------------------------------------------------------------------------- /open_clip/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/utils.py -------------------------------------------------------------------------------- /open_clip/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.20.0' 2 | -------------------------------------------------------------------------------- /open_clip/zero_shot_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/zero_shot_classifier.py -------------------------------------------------------------------------------- /open_clip/zero_shot_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/open_clip/zero_shot_metadata.py -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/requirement.txt -------------------------------------------------------------------------------- /training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/data.py -------------------------------------------------------------------------------- /training/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/distributed.py -------------------------------------------------------------------------------- /training/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/file_utils.py -------------------------------------------------------------------------------- /training/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/logger.py -------------------------------------------------------------------------------- /training/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/main.py -------------------------------------------------------------------------------- /training/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/params.py -------------------------------------------------------------------------------- /training/precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/precision.py -------------------------------------------------------------------------------- /training/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/profile.py -------------------------------------------------------------------------------- /training/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/scheduler.py -------------------------------------------------------------------------------- /training/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/train.py -------------------------------------------------------------------------------- /training/zero_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zi-hao-Wei/Efficient-Vision-Language-Pre-training-by-Cluster-Masking/HEAD/training/zero_shot.py --------------------------------------------------------------------------------