├── datasets ├── __init__.py ├── imagenet_sketch.py ├── imagenetv2.py ├── imagenet_r.py ├── imagenet_a.py ├── food101.py ├── caltech101.py ├── fgvc_aircraft.py ├── eurosat.py ├── stanford_cars.py ├── sun397.py ├── ucf101.py ├── imagenet.py ├── oxford_flowers.py ├── dtd.py ├── oxford_pets.py └── utils │ └── download_datasets.py ├── prompting ├── __init__.py └── losses.py ├── clip ├── __init__.py ├── bpe_simple_vocab_16e6.txt.gz ├── simple_tokenizer.py └── clip.py ├── configs ├── datasets │ ├── sun397.yaml │ ├── ucf101.yaml │ ├── eurosat.yaml │ ├── food101.yaml │ ├── imagenet.yaml │ ├── oxford_pets.yaml │ ├── caltech101.yaml │ ├── dtd.yaml │ ├── imagenet_a.yaml │ ├── imagenet_r.yaml │ ├── imagenetv2.yaml │ ├── oxford_flowers.yaml │ ├── fgvc_aircraft.yaml │ ├── stanford_cars.yaml │ └── imagenet_sketch.yaml └── LASP │ ├── vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b.yaml │ ├── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml │ ├── vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml │ └── vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32.yaml ├── requirements.txt ├── logs ├── eurosat │ └── vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ │ ├── seed3 │ │ └── results.json │ │ ├── seed1 │ │ └── results.json │ │ └── seed2 │ │ └── results.json ├── food101 │ └── vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b │ │ ├── seed1 │ │ └── results.json │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── sun397 │ └── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ │ ├── seed2 │ │ └── results.json │ │ ├── seed1 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── dtd │ └── vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ └── results.json │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── ucf101 │ └── vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ └── results.json │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── README.md ├── caltech101 │ └── vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ ├── results.json │ │ └── log.txt-2023-03-17-20-00-54 │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── fgvc_aircraft │ └── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ └── results.json │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json ├── oxford_flowers │ └── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ ├── results.json │ │ └── log.txt │ │ ├── seed3 │ │ └── results.json │ │ └── seed2 │ │ ├── results.json │ │ └── log.txt ├── oxford_pets │ └── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ │ ├── seed1 │ │ └── results.json │ │ ├── seed2 │ │ └── results.json │ │ └── seed3 │ │ └── results.json └── stanford_cars │ └── vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32 │ ├── seed2 │ ├── results.json │ └── log.txt-2023-03-17-19-29-19 │ ├── seed1 │ ├── results.json │ └── log.txt-2023-03-17-19-28-06 │ └── seed3 │ ├── results.json │ └── log.txt-2023-03-17-19-30-31 ├── scripts ├── base2new_train.sh ├── base2new_train_all.sh ├── base2new_test.sh └── base2new_test_all.sh ├── LICENSE ├── .gitignore ├── README.md └── train.py /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prompting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /clip/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /configs/datasets/sun397.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "SUN397" 3 | -------------------------------------------------------------------------------- /configs/datasets/ucf101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "UCF101" 3 | -------------------------------------------------------------------------------- /configs/datasets/eurosat.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "EuroSAT" 3 | -------------------------------------------------------------------------------- /configs/datasets/food101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "Food101" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNet" 3 | -------------------------------------------------------------------------------- /configs/datasets/oxford_pets.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "OxfordPets" -------------------------------------------------------------------------------- /configs/datasets/caltech101.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "Caltech101" 3 | -------------------------------------------------------------------------------- /configs/datasets/dtd.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "DescribableTextures" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_a.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetA" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_r.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetR" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenetv2.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetV2" 3 | -------------------------------------------------------------------------------- /configs/datasets/oxford_flowers.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "OxfordFlowers" -------------------------------------------------------------------------------- /configs/datasets/fgvc_aircraft.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "FGVCAircraft" 3 | -------------------------------------------------------------------------------- /configs/datasets/stanford_cars.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "StanfordCars" 3 | -------------------------------------------------------------------------------- /configs/datasets/imagenet_sketch.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | NAME: "ImageNetSketch" 3 | -------------------------------------------------------------------------------- /clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1adrianb/lasp/HEAD/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ftfy 2 | regex 3 | tqdm 4 | torch 5 | torchvision 6 | gdown 7 | git+https://github.com/KaiyangZhou/Dassl.pytorch.git 8 | -------------------------------------------------------------------------------- /logs/eurosat/vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 94.0, "error_rate": 6.0, "macro_f1": 94.02871331131544} -------------------------------------------------------------------------------- /logs/food101/vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 90.84967320261438, "error_rate": 9.150326797385617, "macro_f1": 90.82239612103685} -------------------------------------------------------------------------------- /logs/food101/vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 90.94771241830065, "error_rate": 9.052287581699346, "macro_f1": 90.92411827918939} -------------------------------------------------------------------------------- /logs/food101/vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 90.74509803921569, "error_rate": 9.25490196078431, "macro_f1": 90.72868117815293} -------------------------------------------------------------------------------- /logs/sun397/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 81.678391959799, "error_rate": 18.321608040201, "macro_f1": 81.42798025813353} -------------------------------------------------------------------------------- /logs/dtd/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 80.78703703703704, "error_rate": 19.212962962962962, "macro_f1": 80.62820333816828} -------------------------------------------------------------------------------- /logs/dtd/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 81.48148148148148, "error_rate": 18.51851851851852, "macro_f1": 81.33503933951012} -------------------------------------------------------------------------------- /logs/dtd/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 79.62962962962963, "error_rate": 20.370370370370367, "macro_f1": 79.27492877057101} -------------------------------------------------------------------------------- /logs/eurosat/vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 94.54761904761905, "error_rate": 5.452380952380949, "macro_f1": 94.53073286226926} -------------------------------------------------------------------------------- /logs/eurosat/vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 95.16666666666667, "error_rate": 4.833333333333329, "macro_f1": 95.21552666062357} -------------------------------------------------------------------------------- /logs/sun397/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 81.1356783919598, "error_rate": 18.8643216080402, "macro_f1": 80.93270741155321} -------------------------------------------------------------------------------- /logs/sun397/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 81.21608040201005, "error_rate": 18.78391959798995, "macro_f1": 81.0267134648152} -------------------------------------------------------------------------------- /logs/ucf101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 84.69493278179938, "error_rate": 15.30506721820062, "macro_f1": 84.01638558750325} -------------------------------------------------------------------------------- /logs/ucf101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 85.57394002068253, "error_rate": 14.426059979317472, "macro_f1": 84.6892082594593} -------------------------------------------------------------------------------- /logs/ucf101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 86.1427094105481, "error_rate": 13.857290589451907, "macro_f1": 85.43861941962261} -------------------------------------------------------------------------------- /logs/README.md: -------------------------------------------------------------------------------- 1 | ### Finetuning logs 2 | 3 | To facilitate monitoring the experiments, we provide within each subdirectory the corresponding logs obtained by running the provided code. -------------------------------------------------------------------------------- /logs/caltech101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 98.38605551969012, "error_rate": 1.6139444803098826, "macro_f1": 96.97253277690771} -------------------------------------------------------------------------------- /logs/caltech101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 98.7088444157521, "error_rate": 1.2911555842479032, "macro_f1": 97.52722272985687} -------------------------------------------------------------------------------- /logs/caltech101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 98.25693996126533, "error_rate": 1.7430600387346686, "macro_f1": 96.55988977865061} -------------------------------------------------------------------------------- /logs/fgvc_aircraft/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 34.93397358943577, "error_rate": 65.06602641056423, "macro_f1": 33.41707337864457} -------------------------------------------------------------------------------- /logs/fgvc_aircraft/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 35.114045618247296, "error_rate": 64.8859543817527, "macro_f1": 33.37054228829116} -------------------------------------------------------------------------------- /logs/oxford_flowers/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 97.340930674264, "error_rate": 2.6590693257359987, "macro_f1": 97.3536056251572} -------------------------------------------------------------------------------- /logs/oxford_flowers/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 97.62583095916429, "error_rate": 2.37416904083571, "macro_f1": 97.5987450721239} -------------------------------------------------------------------------------- /logs/oxford_pets/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 95.48112706007443, "error_rate": 4.518872939925572, "macro_f1": 95.455293335961} -------------------------------------------------------------------------------- /logs/oxford_pets/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 95.42796384901648, "error_rate": 4.5720361509835215, "macro_f1": 95.42403747952409} -------------------------------------------------------------------------------- /logs/oxford_pets/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 96.01275917065391, "error_rate": 3.987240829346092, "macro_f1": 96.02081867121782} -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 76.0119940029985, "error_rate": 23.988005997001494, "macro_f1": 75.63769154168176} -------------------------------------------------------------------------------- /logs/fgvc_aircraft/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 36.49459783913566, "error_rate": 63.50540216086434, "macro_f1": 35.078044863308506} -------------------------------------------------------------------------------- /logs/oxford_flowers/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 97.91073124406458, "error_rate": 2.0892687559354215, "macro_f1": 97.79751354052844} -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 76.36181909045477, "error_rate": 23.638180909545227, "macro_f1": 75.86999447245361} -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/results.json: -------------------------------------------------------------------------------- 1 | {"accuracy": 76.38680659670165, "error_rate": 23.613193403298354, "macro_f1": 75.84036046823879} -------------------------------------------------------------------------------- /prompting/losses.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | def transpose(x): 4 | return x.t() if x.dim() == 2 else x.permute(0, 2, 1) 5 | 6 | def contrastive_loss(visual_features, class_prototypes, labels=None, t=0.07): 7 | logits = t.exp() * visual_features @ transpose(class_prototypes) 8 | if labels is not None: 9 | return F.cross_entropy(logits, labels), logits 10 | else: 11 | return None, logits -------------------------------------------------------------------------------- /scripts/base2new_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # custom config 4 | DATA=~/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 5 | TRAINER=LASP 6 | 7 | DATASET=$1 8 | SEED=$2 9 | 10 | CFG=$3 11 | SHOTS=16 12 | 13 | 14 | DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 15 | python train.py \ 16 | --root ${DATA} \ 17 | --seed ${SEED} \ 18 | --trainer ${TRAINER} \ 19 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 20 | --config-file configs/${TRAINER}/${CFG}.yaml \ 21 | --output-dir ${DIR} \ 22 | DATASET.NUM_SHOTS ${SHOTS} \ 23 | DATASET.SUBSAMPLE_CLASSES base 24 | 25 | -------------------------------------------------------------------------------- /scripts/base2new_train_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../.. 4 | 5 | # custom config 6 | DATA=~/Datasets/coop_datasets/ 7 | TRAINER=LASP 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=$3 13 | SHOTS=16 14 | 15 | 16 | DIR=output/base2new/train_base_all/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 17 | python train.py \ 18 | --root ${DATA} \ 19 | --seed ${SEED} \ 20 | --trainer ${TRAINER} \ 21 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 22 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 23 | --output-dir ${DIR} \ 24 | DATASET.NUM_SHOTS ${SHOTS} \ 25 | DATASET.SUBSAMPLE_CLASSES all 26 | 27 | -------------------------------------------------------------------------------- /scripts/base2new_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # custom config 4 | DATA=~/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 5 | TRAINER=LASP 6 | 7 | DATASET=$1 8 | SEED=$2 9 | 10 | CFG=$3 11 | SHOTS=16 12 | LOADEP=50 13 | SUB=new 14 | 15 | 16 | COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 17 | MODEL_DIR=output/base2new/train_base/${COMMON_DIR} 18 | DIR=output/base2new/test_${SUB}/${COMMON_DIR} 19 | python train.py \ 20 | --root ${DATA} \ 21 | --seed ${SEED} \ 22 | --trainer ${TRAINER} \ 23 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 24 | --config-file configs/${TRAINER}/${CFG}.yaml \ 25 | --output-dir ${DIR} \ 26 | --model-dir ${MODEL_DIR} \ 27 | --load-epoch ${LOADEP} \ 28 | --eval-only \ 29 | DATASET.NUM_SHOTS ${SHOTS} \ 30 | DATASET.SUBSAMPLE_CLASSES ${SUB} 31 | -------------------------------------------------------------------------------- /scripts/base2new_test_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../.. 4 | 5 | # custom config 6 | DATA=~/Datasets/coop_datasets/ 7 | TRAINER=LASP 8 | 9 | DATASET=$1 10 | SEED=$2 11 | 12 | CFG=$3 13 | SHOTS=16 14 | LOADEP=10 15 | SUB=new 16 | 17 | 18 | COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} 19 | MODEL_DIR=output/base2new/train_base/${COMMON_DIR} 20 | DIR=output/base2new/test_${SUB}/${COMMON_DIR} 21 | python train.py \ 22 | --root ${DATA} \ 23 | --seed ${SEED} \ 24 | --trainer ${TRAINER} \ 25 | --dataset-config-file configs/datasets/${DATASET}.yaml \ 26 | --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ 27 | --output-dir ${DIR} \ 28 | --model-dir ${MODEL_DIR} \ 29 | --load-epoch ${LOADEP} \ 30 | --eval-only \ 31 | DATASET.NUM_SHOTS ${SHOTS} \ 32 | DATASET.SUBSAMPLE_CLASSES ${SUB} \ 33 | DATASET.INCLUDE_ALL_CLASSES True 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /datasets/imagenet_sketch.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 4 | from dassl.utils import listdir_nohidden 5 | 6 | from .imagenet import ImageNet 7 | 8 | 9 | @DATASET_REGISTRY.register() 10 | class ImageNetSketch(DatasetBase): 11 | """ImageNet-Sketch. 12 | 13 | This dataset is used for testing only. 14 | """ 15 | 16 | dataset_dir = "imagenet-sketch" 17 | 18 | def __init__(self, cfg): 19 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 20 | self.dataset_dir = os.path.join(root, self.dataset_dir) 21 | self.image_dir = os.path.join(self.dataset_dir, "images") 22 | 23 | text_file = os.path.join(self.dataset_dir, "classnames.txt") 24 | classnames = ImageNet.read_classnames(text_file) 25 | 26 | data = self.read_data(classnames) 27 | 28 | super().__init__(train_x=data, test=data) 29 | 30 | def read_data(self, classnames): 31 | image_dir = self.image_dir 32 | folders = listdir_nohidden(image_dir, sort=True) 33 | items = [] 34 | 35 | for label, folder in enumerate(folders): 36 | imnames = listdir_nohidden(os.path.join(image_dir, folder)) 37 | classname = classnames[folder] 38 | for imname in imnames: 39 | impath = os.path.join(image_dir, folder, imname) 40 | item = Datum(impath=impath, label=label, classname=classname) 41 | items.append(item) 42 | 43 | return items 44 | -------------------------------------------------------------------------------- /datasets/imagenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 4 | from dassl.utils import listdir_nohidden 5 | 6 | from .imagenet import ImageNet 7 | 8 | 9 | @DATASET_REGISTRY.register() 10 | class ImageNetV2(DatasetBase): 11 | """ImageNetV2. 12 | 13 | This dataset is used for testing only. 14 | """ 15 | 16 | dataset_dir = "imagenetv2" 17 | 18 | def __init__(self, cfg): 19 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 20 | self.dataset_dir = os.path.join(root, self.dataset_dir) 21 | image_dir = "imagenetv2-matched-frequency-format-val" 22 | self.image_dir = os.path.join(self.dataset_dir, image_dir) 23 | 24 | text_file = os.path.join(self.dataset_dir, "classnames.txt") 25 | classnames = ImageNet.read_classnames(text_file) 26 | 27 | data = self.read_data(classnames) 28 | 29 | super().__init__(train_x=data, test=data) 30 | 31 | def read_data(self, classnames): 32 | image_dir = self.image_dir 33 | folders = list(classnames.keys()) 34 | items = [] 35 | 36 | for label in range(1000): 37 | class_dir = os.path.join(image_dir, str(label)) 38 | imnames = listdir_nohidden(class_dir) 39 | folder = folders[label] 40 | classname = classnames[folder] 41 | for imname in imnames: 42 | impath = os.path.join(class_dir, imname) 43 | item = Datum(impath=impath, label=label, classname=classname) 44 | items.append(item) 45 | 46 | return items 47 | -------------------------------------------------------------------------------- /datasets/imagenet_r.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 4 | from dassl.utils import listdir_nohidden 5 | 6 | from .imagenet import ImageNet 7 | 8 | TO_BE_IGNORED = ["README.txt"] 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class ImageNetR(DatasetBase): 13 | """ImageNet-R(endition). 14 | 15 | This dataset is used for testing only. 16 | """ 17 | 18 | dataset_dir = "imagenet-rendition" 19 | 20 | def __init__(self, cfg): 21 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 22 | self.dataset_dir = os.path.join(root, self.dataset_dir) 23 | self.image_dir = os.path.join(self.dataset_dir, "imagenet-r") 24 | 25 | text_file = os.path.join(self.dataset_dir, "classnames.txt") 26 | classnames = ImageNet.read_classnames(text_file) 27 | 28 | data = self.read_data(classnames) 29 | 30 | super().__init__(train_x=data, test=data) 31 | 32 | def read_data(self, classnames): 33 | image_dir = self.image_dir 34 | folders = listdir_nohidden(image_dir, sort=True) 35 | folders = [f for f in folders if f not in TO_BE_IGNORED] 36 | items = [] 37 | 38 | for label, folder in enumerate(folders): 39 | imnames = listdir_nohidden(os.path.join(image_dir, folder)) 40 | classname = classnames[folder] 41 | for imname in imnames: 42 | impath = os.path.join(image_dir, folder, imname) 43 | item = Datum(impath=impath, label=label, classname=classname) 44 | items.append(item) 45 | 46 | return items 47 | -------------------------------------------------------------------------------- /datasets/imagenet_a.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 4 | from dassl.utils import listdir_nohidden 5 | 6 | from .imagenet import ImageNet 7 | 8 | TO_BE_IGNORED = ["README.txt"] 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class ImageNetA(DatasetBase): 13 | """ImageNet-A(dversarial). 14 | 15 | This dataset is used for testing only. 16 | """ 17 | 18 | dataset_dir = "imagenet-adversarial" 19 | 20 | def __init__(self, cfg): 21 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 22 | self.dataset_dir = os.path.join(root, self.dataset_dir) 23 | self.image_dir = os.path.join(self.dataset_dir, "imagenet-a") 24 | 25 | text_file = os.path.join(self.dataset_dir, "classnames.txt") 26 | classnames = ImageNet.read_classnames(text_file) 27 | 28 | data = self.read_data(classnames) 29 | 30 | super().__init__(train_x=data, test=data) 31 | 32 | def read_data(self, classnames): 33 | image_dir = self.image_dir 34 | folders = listdir_nohidden(image_dir, sort=True) 35 | folders = [f for f in folders if f not in TO_BE_IGNORED] 36 | items = [] 37 | 38 | for label, folder in enumerate(folders): 39 | imnames = listdir_nohidden(os.path.join(image_dir, folder)) 40 | classname = classnames[folder] 41 | for imname in imnames: 42 | impath = os.path.join(image_dir, folder, imname) 43 | item = Datum(impath=impath, label=label, classname=classname) 44 | items.append(item) 45 | 46 | return items 47 | -------------------------------------------------------------------------------- /configs/LASP/vit_b16_c4_ep10_batch1_cls_t2t_5_wcl_25_g1_b.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 1 4 | TEST: 5 | BATCH_SIZE: 32 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.002 # 0.002 18 | MAX_EPOCH: 10 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 1 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | #WEIGHT_DECAY: 0.01 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/16" 31 | 32 | TRAINER: 33 | LASP: 34 | ENABLE: True 35 | ENABLE_CORRECTION: True 36 | FINETUNE_VIT_LN: True 37 | N_CTX: 4 38 | CTX_INIT: "a photo of a" 39 | LASP_PROMPTS: [ 40 | "a photo of a {}, a type of flower.", 41 | "a photo of a person doing {}.", 42 | "a centered satellite photo of {}.", 43 | "a photo of a {}, a type of aircraft.", 44 | "{} texture.", 45 | "itap of a {}.", 46 | "a bad photo of the {}.", 47 | "a origami {}.", 48 | "a photo of the large {}.", 49 | "a {} in a video game.", 50 | "art of the {}.", 51 | "a photo of the small {}.", 52 | "a photo of a {}.", 53 | "a photo of many {}.", 54 | "a photo of the hard to see {}.", 55 | "a low resolution photo of the {}.", 56 | "a rendering of a {}.", 57 | "a bad photo of the {}.", 58 | "a cropped photo of the {}.", 59 | "a pixelated photo of the {}.", 60 | "a bright photo of the {}.", 61 | "a cropped photo of a {}.", 62 | "a photo of the {}.", 63 | "a good photo of the {}.", 64 | "a rendering of the {}.", 65 | "a close-up photo of the {}.", 66 | "a low resolution photo of a {}.", 67 | "a rendition of the {}.", 68 | "a photo of the clean {}.", 69 | "a photo of a large {}.", 70 | "a blurry photo of a {}.", 71 | "a pixelated photo of a {}.", 72 | "itap of the {}.", 73 | "a jpeg corrupted photo of the {}.", 74 | "a good photo of a {}.", 75 | ] 76 | PREC: "amp" 77 | LASP_LOSS_WEIGHT: 5.0 78 | -------------------------------------------------------------------------------- /configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 32 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.032 # 0.002 18 | MAX_EPOCH: 25 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 3 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | #WEIGHT_DECAY: 0.01 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/16" 31 | 32 | TRAINER: 33 | LASP: 34 | ENABLE: True 35 | ENABLE_CORRECTION: True 36 | FINETUNE_VIT_LN: True 37 | N_CTX: 4 38 | CTX_INIT: "a photo of a" 39 | LASP_PROMPTS: [ 40 | "a photo of a {}, a type of flower.", 41 | "a photo of a person doing {}.", 42 | "a centered satellite photo of {}.", 43 | "a photo of a {}, a type of aircraft.", 44 | "{} texture.", 45 | "itap of a {}.", 46 | "a bad photo of the {}.", 47 | "a origami {}.", 48 | "a photo of the large {}.", 49 | "a {} in a video game.", 50 | "art of the {}.", 51 | "a photo of the small {}.", 52 | "a photo of a {}.", 53 | "a photo of many {}.", 54 | "a photo of the hard to see {}.", 55 | "a low resolution photo of the {}.", 56 | "a rendering of a {}.", 57 | "a bad photo of the {}.", 58 | "a cropped photo of the {}.", 59 | "a pixelated photo of the {}.", 60 | "a bright photo of the {}.", 61 | "a cropped photo of a {}.", 62 | "a photo of the {}.", 63 | "a good photo of the {}.", 64 | "a rendering of the {}.", 65 | "a close-up photo of the {}.", 66 | "a low resolution photo of a {}.", 67 | "a rendition of the {}.", 68 | "a photo of the clean {}.", 69 | "a photo of a large {}.", 70 | "a blurry photo of a {}.", 71 | "a pixelated photo of a {}.", 72 | "itap of the {}.", 73 | "a jpeg corrupted photo of the {}.", 74 | "a good photo of a {}.", 75 | ] 76 | PREC: "amp" 77 | LASP_LOSS_WEIGHT: 5.0 78 | -------------------------------------------------------------------------------- /configs/LASP/vit_b16_c4_ep300_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 32 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.032 # 0.002 18 | MAX_EPOCH: 300 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 7 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | #WEIGHT_DECAY: 0.01 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/16" 31 | 32 | TRAINER: 33 | LASP: 34 | ENABLE: True 35 | ENABLE_CORRECTION: True 36 | FINETUNE_VIT_LN: True 37 | N_CTX: 4 38 | CTX_INIT: "a photo of a" 39 | LASP_PROMPTS: [ 40 | "a photo of a {}, a type of flower.", 41 | "a photo of a person doing {}.", 42 | "a centered satellite photo of {}.", 43 | "a photo of a {}, a type of aircraft.", 44 | "{} texture.", 45 | "itap of a {}.", 46 | "a bad photo of the {}.", 47 | "a origami {}.", 48 | "a photo of the large {}.", 49 | "a {} in a video game.", 50 | "art of the {}.", 51 | "a photo of the small {}.", 52 | "a photo of a {}.", 53 | "a photo of many {}.", 54 | "a photo of the hard to see {}.", 55 | "a low resolution photo of the {}.", 56 | "a rendering of a {}.", 57 | "a bad photo of the {}.", 58 | "a cropped photo of the {}.", 59 | "a pixelated photo of the {}.", 60 | "a bright photo of the {}.", 61 | "a cropped photo of a {}.", 62 | "a photo of the {}.", 63 | "a good photo of the {}.", 64 | "a rendering of the {}.", 65 | "a close-up photo of the {}.", 66 | "a low resolution photo of a {}.", 67 | "a rendition of the {}.", 68 | "a photo of the clean {}.", 69 | "a photo of a large {}.", 70 | "a blurry photo of a {}.", 71 | "a pixelated photo of a {}.", 72 | "itap of the {}.", 73 | "a jpeg corrupted photo of the {}.", 74 | "a good photo of a {}.", 75 | ] 76 | PREC: "amp" 77 | LASP_LOSS_WEIGHT: 5.0 78 | -------------------------------------------------------------------------------- /configs/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32.yaml: -------------------------------------------------------------------------------- 1 | DATALOADER: 2 | TRAIN_X: 3 | BATCH_SIZE: 32 4 | TEST: 5 | BATCH_SIZE: 32 6 | NUM_WORKERS: 8 7 | 8 | INPUT: 9 | SIZE: (224, 224) 10 | INTERPOLATION: "bicubic" 11 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 12 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 13 | TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] 14 | 15 | OPTIM: 16 | NAME: "sgd" 17 | LR: 0.032 # 0.002 18 | MAX_EPOCH: 50 19 | LR_SCHEDULER: "cosine" 20 | WARMUP_EPOCH: 5 21 | WARMUP_TYPE: "constant" 22 | WARMUP_CONS_LR: 1e-5 23 | #WEIGHT_DECAY: 0.01 24 | 25 | TRAIN: 26 | PRINT_FREQ: 20 27 | 28 | MODEL: 29 | BACKBONE: 30 | NAME: "ViT-B/16" 31 | 32 | TRAINER: 33 | LASP: 34 | ENABLE: True 35 | ENABLE_CORRECTION: True 36 | FINETUNE_VIT_LN: True 37 | N_CTX: 4 38 | CTX_INIT: "a photo of a" 39 | LASP_PROMPTS: [ 40 | "a photo of a {}, a type of flower.", 41 | "a photo of a person doing {}.", 42 | "a centered satellite photo of {}.", 43 | "a photo of a {}, a type of aircraft.", 44 | "{} texture.", 45 | "itap of a {}.", 46 | "a bad photo of the {}.", 47 | "a origami {}.", 48 | "a photo of the large {}.", 49 | "a {} in a video game.", 50 | "art of the {}.", 51 | "a photo of the small {}.", 52 | "a photo of a {}.", 53 | "a photo of many {}.", 54 | "a photo of the hard to see {}.", 55 | "a low resolution photo of the {}.", 56 | "a rendering of a {}.", 57 | "a bad photo of the {}.", 58 | "a cropped photo of the {}.", 59 | "a pixelated photo of the {}.", 60 | "a bright photo of the {}.", 61 | "a cropped photo of a {}.", 62 | "a photo of the {}.", 63 | "a good photo of the {}.", 64 | "a rendering of the {}.", 65 | "a close-up photo of the {}.", 66 | "a low resolution photo of a {}.", 67 | "a rendition of the {}.", 68 | "a photo of the clean {}.", 69 | "a photo of a large {}.", 70 | "a blurry photo of a {}.", 71 | "a pixelated photo of a {}.", 72 | "itap of the {}.", 73 | "a jpeg corrupted photo of the {}.", 74 | "a good photo of a {}.", 75 | ] 76 | PREC: "amp" 77 | LASP_LOSS_WEIGHT: 10.0 78 | -------------------------------------------------------------------------------- /datasets/food101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 5 | from dassl.utils import mkdir_if_missing 6 | 7 | from .oxford_pets import OxfordPets 8 | from .dtd import DescribableTextures as DTD 9 | 10 | @DATASET_REGISTRY.register() 11 | class Food101(DatasetBase): 12 | 13 | dataset_dir = "food-101" 14 | 15 | def __init__(self, cfg): 16 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 17 | self.dataset_dir = os.path.join(root, self.dataset_dir) 18 | self.image_dir = os.path.join(self.dataset_dir, "images") 19 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_Food101.json") 20 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 21 | mkdir_if_missing(self.split_fewshot_dir) 22 | 23 | if os.path.exists(self.split_path): 24 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 25 | else: 26 | train, val, test = DTD.read_and_split_data(self.image_dir) 27 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 28 | 29 | num_shots = cfg.DATASET.NUM_SHOTS 30 | if num_shots >= 1: 31 | seed = cfg.SEED 32 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 33 | 34 | if os.path.exists(preprocessed): 35 | print(f"Loading preprocessed few-shot data from {preprocessed}") 36 | with open(preprocessed, "rb") as file: 37 | data = pickle.load(file) 38 | train, val = data["train"], data["val"] 39 | else: 40 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 41 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 42 | data = {"train": train, "val": val} 43 | print(f"Saving preprocessed few-shot data to {preprocessed}") 44 | with open(preprocessed, "wb") as file: 45 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 46 | 47 | self.all_class_names = list(set(OxfordPets.get_all_classnames(train, val, test))) 48 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 49 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 50 | 51 | super().__init__(train_x=train, val=val, test=test) 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | output/ -------------------------------------------------------------------------------- /datasets/caltech101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 5 | from dassl.utils import mkdir_if_missing 6 | 7 | from .oxford_pets import OxfordPets 8 | from .dtd import DescribableTextures as DTD 9 | 10 | IGNORED = ["BACKGROUND_Google", "Faces_easy"] 11 | NEW_CNAMES = { 12 | "airplanes": "airplane", 13 | "Faces": "face", 14 | "Leopards": "leopard", 15 | "Motorbikes": "motorbike", 16 | } 17 | 18 | 19 | @DATASET_REGISTRY.register() 20 | class Caltech101(DatasetBase): 21 | 22 | dataset_dir = "caltech-101" 23 | 24 | def __init__(self, cfg): 25 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 26 | self.dataset_dir = os.path.join(root, self.dataset_dir) 27 | self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories") 28 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_Caltech101.json") 29 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 30 | mkdir_if_missing(self.split_fewshot_dir) 31 | 32 | if os.path.exists(self.split_path): 33 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 34 | else: 35 | train, val, test = DTD.read_and_split_data(self.image_dir, ignored=IGNORED, new_cnames=NEW_CNAMES) 36 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 37 | 38 | num_shots = cfg.DATASET.NUM_SHOTS 39 | if num_shots >= 1: 40 | seed = cfg.SEED 41 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 42 | 43 | if os.path.exists(preprocessed): 44 | print(f"Loading preprocessed few-shot data from {preprocessed}") 45 | with open(preprocessed, "rb") as file: 46 | data = pickle.load(file) 47 | train, val = data["train"], data["val"] 48 | else: 49 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 50 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 51 | data = {"train": train, "val": val} 52 | print(f"Saving preprocessed few-shot data to {preprocessed}") 53 | with open(preprocessed, "wb") as file: 54 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 55 | 56 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 57 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 58 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 59 | 60 | super().__init__(train_x=train, val=val, test=test) 61 | -------------------------------------------------------------------------------- /datasets/fgvc_aircraft.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 5 | from dassl.utils import mkdir_if_missing 6 | 7 | from .oxford_pets import OxfordPets 8 | 9 | 10 | @DATASET_REGISTRY.register() 11 | class FGVCAircraft(DatasetBase): 12 | 13 | dataset_dir = "fgvc_aircraft" 14 | 15 | def __init__(self, cfg): 16 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 17 | self.dataset_dir = os.path.join(root, self.dataset_dir) 18 | self.image_dir = os.path.join(self.dataset_dir, "images") 19 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 20 | mkdir_if_missing(self.split_fewshot_dir) 21 | 22 | classnames = [] 23 | with open(os.path.join(self.dataset_dir, "variants.txt"), "r") as f: 24 | lines = f.readlines() 25 | for line in lines: 26 | classnames.append(line.strip()) 27 | cname2lab = {c: i for i, c in enumerate(classnames)} 28 | 29 | train = self.read_data(cname2lab, "images_variant_train.txt") 30 | val = self.read_data(cname2lab, "images_variant_val.txt") 31 | test = self.read_data(cname2lab, "images_variant_test.txt") 32 | 33 | num_shots = cfg.DATASET.NUM_SHOTS 34 | if num_shots >= 1: 35 | seed = cfg.SEED 36 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 37 | 38 | if os.path.exists(preprocessed): 39 | print(f"Loading preprocessed few-shot data from {preprocessed}") 40 | with open(preprocessed, "rb") as file: 41 | data = pickle.load(file) 42 | train, val = data["train"], data["val"] 43 | else: 44 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 45 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 46 | data = {"train": train, "val": val} 47 | print(f"Saving preprocessed few-shot data to {preprocessed}") 48 | with open(preprocessed, "wb") as file: 49 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 50 | 51 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 52 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 53 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 54 | 55 | super().__init__(train_x=train, val=val, test=test) 56 | 57 | def read_data(self, cname2lab, split_file): 58 | filepath = os.path.join(self.dataset_dir, split_file) 59 | items = [] 60 | 61 | with open(filepath, "r") as f: 62 | lines = f.readlines() 63 | for line in lines: 64 | line = line.strip().split(" ") 65 | imname = line[0] + ".jpg" 66 | classname = " ".join(line[1:]) 67 | impath = os.path.join(self.image_dir, imname) 68 | label = cname2lab[classname] 69 | item = Datum(impath=impath, label=label, classname=classname) 70 | items.append(item) 71 | 72 | return items 73 | -------------------------------------------------------------------------------- /datasets/eurosat.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 5 | from dassl.utils import mkdir_if_missing 6 | 7 | from .oxford_pets import OxfordPets 8 | from .dtd import DescribableTextures as DTD 9 | 10 | NEW_CNAMES = { 11 | "AnnualCrop": "Annual Crop Land", 12 | "Forest": "Forest", 13 | "HerbaceousVegetation": "Herbaceous Vegetation Land", 14 | "Highway": "Highway or Road", 15 | "Industrial": "Industrial Buildings", 16 | "Pasture": "Pasture Land", 17 | "PermanentCrop": "Permanent Crop Land", 18 | "Residential": "Residential Buildings", 19 | "River": "River", 20 | "SeaLake": "Sea or Lake", 21 | } 22 | 23 | 24 | @DATASET_REGISTRY.register() 25 | class EuroSAT(DatasetBase): 26 | 27 | dataset_dir = "eurosat" 28 | 29 | def __init__(self, cfg): 30 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 31 | self.dataset_dir = os.path.join(root, self.dataset_dir) 32 | self.image_dir = os.path.join(self.dataset_dir, "2750") 33 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_EuroSAT.json") 34 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 35 | mkdir_if_missing(self.split_fewshot_dir) 36 | 37 | if os.path.exists(self.split_path): 38 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 39 | else: 40 | train, val, test = DTD.read_and_split_data(self.image_dir, new_cnames=NEW_CNAMES) 41 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 42 | 43 | num_shots = cfg.DATASET.NUM_SHOTS 44 | if num_shots >= 1: 45 | seed = cfg.SEED 46 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 47 | 48 | if os.path.exists(preprocessed): 49 | print(f"Loading preprocessed few-shot data from {preprocessed}") 50 | with open(preprocessed, "rb") as file: 51 | data = pickle.load(file) 52 | train, val = data["train"], data["val"] 53 | else: 54 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 55 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 56 | data = {"train": train, "val": val} 57 | print(f"Saving preprocessed few-shot data to {preprocessed}") 58 | with open(preprocessed, "wb") as file: 59 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 60 | 61 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 62 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 63 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 64 | 65 | super().__init__(train_x=train, val=val, test=test) 66 | 67 | def update_classname(self, dataset_old): 68 | dataset_new = [] 69 | for item_old in dataset_old: 70 | cname_old = item_old.classname 71 | cname_new = NEW_CLASSNAMES[cname_old] 72 | item_new = Datum(impath=item_old.impath, label=item_old.label, classname=cname_new) 73 | dataset_new.append(item_new) 74 | return dataset_new 75 | -------------------------------------------------------------------------------- /datasets/stanford_cars.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from scipy.io import loadmat 4 | 5 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 6 | from dassl.utils import mkdir_if_missing 7 | 8 | from .oxford_pets import OxfordPets 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class StanfordCars(DatasetBase): 13 | 14 | dataset_dir = "stanford_cars" 15 | 16 | def __init__(self, cfg): 17 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 18 | self.dataset_dir = os.path.join(root, self.dataset_dir) 19 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_StanfordCars.json") 20 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 21 | mkdir_if_missing(self.split_fewshot_dir) 22 | 23 | if os.path.exists(self.split_path): 24 | train, val, test = OxfordPets.read_split(self.split_path, self.dataset_dir) 25 | else: 26 | trainval_file = os.path.join(self.dataset_dir, "devkit", "cars_train_annos.mat") 27 | test_file = os.path.join(self.dataset_dir, "cars_test_annos_withlabels.mat") 28 | meta_file = os.path.join(self.dataset_dir, "devkit", "cars_meta.mat") 29 | trainval = self.read_data("cars_train", trainval_file, meta_file) 30 | test = self.read_data("cars_test", test_file, meta_file) 31 | train, val = OxfordPets.split_trainval(trainval) 32 | OxfordPets.save_split(train, val, test, self.split_path, self.dataset_dir) 33 | 34 | num_shots = cfg.DATASET.NUM_SHOTS 35 | if num_shots >= 1: 36 | seed = cfg.SEED 37 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 38 | 39 | if os.path.exists(preprocessed): 40 | print(f"Loading preprocessed few-shot data from {preprocessed}") 41 | with open(preprocessed, "rb") as file: 42 | data = pickle.load(file) 43 | train, val = data["train"], data["val"] 44 | else: 45 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 46 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 47 | data = {"train": train, "val": val} 48 | print(f"Saving preprocessed few-shot data to {preprocessed}") 49 | with open(preprocessed, "wb") as file: 50 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 51 | 52 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 53 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 54 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 55 | 56 | super().__init__(train_x=train, val=val, test=test) 57 | 58 | def read_data(self, image_dir, anno_file, meta_file): 59 | anno_file = loadmat(anno_file)["annotations"][0] 60 | meta_file = loadmat(meta_file)["class_names"][0] 61 | items = [] 62 | 63 | for i in range(len(anno_file)): 64 | imname = anno_file[i]["fname"][0] 65 | impath = os.path.join(self.dataset_dir, image_dir, imname) 66 | label = anno_file[i]["class"][0, 0] 67 | label = int(label) - 1 # convert to 0-based index 68 | classname = meta_file[label][0] 69 | names = classname.split(" ") 70 | year = names.pop(-1) 71 | names.insert(0, year) 72 | classname = " ".join(names) 73 | item = Datum(impath=impath, label=label, classname=classname) 74 | items.append(item) 75 | 76 | return items 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## LASP: Text-to-Text Optimization for Language-Aware Soft Prompting of Vision & Language Models 2 | 3 | PDF with the paper: [[here]](https://www.adrianbulat.com/downloads/CVPR2023/LASP.pdf) 4 | 5 | Soft prompt learning has recently emerged as one of the 6 | methods of choice for adapting V&L models to a down- 7 | stream task using a few training examples. However, cur- 8 | rent methods significantly overfit the training data, suffering from large accuracy degradation when tested on unseen classes from the same domain. To this end, in this paper, we make the following 4 contributions: (1) To alleviate base class overfitting, we propose a novel Language-Aware Soft Prompting (LASP) learning method by means of a text-to-text cross-entropy loss that maximizes the probability of the learned prompts to be correctly classified with respect to pre-defined hand-crafted textual prompts. (2) To increase the representation capacity of the prompts, we propose grouped LASP where each group of prompts is optimized with respect to a separate subset of textual prompts. (3) We identify a visual-language misalignment introduced by prompt learning and LASP, and more importantly, propose a re-calibration mechanism to address it. (4) We show that LASP is inherently amenable to including, during training, virtual classes, i.e. class names for which no visual samples are available, further increasing the robustness of the learned prompts. Through evaluations on 11 datasets, we show that our approach (a) significantly outperforms all prior works on soft prompting, and (b) matches and surpasses, for the first time, the accuracy on novel classes obtained by hand-crafted prompts and CLIP for 8 out of 11 test datasets. 9 | 10 | ## Setup 11 | 12 | Install the required dependencies: 13 | ```bash 14 | pip install -r requirements.txt 15 | ``` 16 | Download and prepare the datasets using the instructions listed [here](https://github.com/KaiyangZhou/CoOp/blob/main/DATASETS.md). Alternatively, you can use the download helper utility, simply runing: ``python datasets/utils/download_datasets.py --root path_to_datasets`` 17 | 18 | ## Usage 19 | 20 | *Note:* The schedulers where tweaked compared with the original paper in the interest of training speed, switching from the batch_size=1 setup of CoCoOp to batch_size=N. As a result the numbers will be different. Overall the new numbers are higher than in the original paper, even for G=1: 21 | 22 | Evaluation results: 23 | 24 | | | ImageNet | Caltech101 | Oxford Pets | Stanford Cars | Flowers 102 | Food 101 | FGVC | SUN397 | DTD | EuroSAT | UCF101 | Avg. | 25 | | ---- | -------- | ---------- | ----------- | ------------- | ----------- | -------- | ----- | ------ | ----- | ------- | ------ | ----- | 26 | | Base | 76.25 | 98.45 | 95.64 | 76.25 | 97.62 | 90.84 | 35.51 | 81.34 | 80.63 | 94.57 | 85.47 | 82.96 | 27 | | New | 71.17 | 94.43 | 97.39 | 71.99 | 74.16 | 91.62 | 38.15 | 78.45 | 63.36 | 85.05 | 77.33 | 76.64 | 28 | | H | 73.62 | 96.39 | 96.50 | 74.06 | 84.29 | 91.23 | 36.78 | 79.87 | 70.96 | 89.55 | 81.20 | 79.67 | 29 | 30 | Logs can be found in ```logs/``` folder. 31 | 32 | 33 | ## Citation 34 | 35 | ```bibtex 36 | @inproceedings{bulat2023lasp, 37 | title={LASP: Text-to-Text Optimization for Language-Aware Soft Prompting of Vision \& Language Models}, 38 | author={Bulat, Adrian and Tzimiropoulos, Georgios}, 39 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 40 | pages={23232--23241}, 41 | year={2023} 42 | } 43 | ``` 44 | 45 | ## Acknowledgment 46 | 47 | Code based on the [CoOp repository](https://github.com/KaiyangZhou/CoOp). -------------------------------------------------------------------------------- /datasets/sun397.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 5 | from dassl.utils import mkdir_if_missing 6 | 7 | from .oxford_pets import OxfordPets 8 | 9 | 10 | @DATASET_REGISTRY.register() 11 | class SUN397(DatasetBase): 12 | 13 | dataset_dir = "sun397" 14 | 15 | def __init__(self, cfg): 16 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 17 | self.dataset_dir = os.path.join(root, self.dataset_dir) 18 | self.image_dir = os.path.join(self.dataset_dir, "SUN397") 19 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_SUN397.json") 20 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 21 | mkdir_if_missing(self.split_fewshot_dir) 22 | 23 | if os.path.exists(self.split_path): 24 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 25 | else: 26 | classnames = [] 27 | with open(os.path.join(self.dataset_dir, "ClassName.txt"), "r") as f: 28 | lines = f.readlines() 29 | for line in lines: 30 | line = line.strip()[1:] # remove / 31 | classnames.append(line) 32 | cname2lab = {c: i for i, c in enumerate(classnames)} 33 | trainval = self.read_data(cname2lab, "Training_01.txt") 34 | test = self.read_data(cname2lab, "Testing_01.txt") 35 | train, val = OxfordPets.split_trainval(trainval) 36 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 37 | 38 | num_shots = cfg.DATASET.NUM_SHOTS 39 | if num_shots >= 1: 40 | seed = cfg.SEED 41 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 42 | 43 | if os.path.exists(preprocessed): 44 | print(f"Loading preprocessed few-shot data from {preprocessed}") 45 | with open(preprocessed, "rb") as file: 46 | data = pickle.load(file) 47 | train, val = data["train"], data["val"] 48 | else: 49 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 50 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 51 | data = {"train": train, "val": val} 52 | print(f"Saving preprocessed few-shot data to {preprocessed}") 53 | with open(preprocessed, "wb") as file: 54 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 55 | 56 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 57 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 58 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 59 | 60 | super().__init__(train_x=train, val=val, test=test) 61 | 62 | def read_data(self, cname2lab, text_file): 63 | text_file = os.path.join(self.dataset_dir, text_file) 64 | items = [] 65 | 66 | with open(text_file, "r") as f: 67 | lines = f.readlines() 68 | for line in lines: 69 | imname = line.strip()[1:] # remove / 70 | classname = os.path.dirname(imname) 71 | label = cname2lab[classname] 72 | impath = os.path.join(self.image_dir, imname) 73 | 74 | names = classname.split("/")[1:] # remove 1st letter 75 | names = names[::-1] # put words like indoor/outdoor at first 76 | classname = " ".join(names) 77 | 78 | item = Datum(impath=impath, label=label, classname=classname) 79 | items.append(item) 80 | 81 | return items 82 | -------------------------------------------------------------------------------- /datasets/ucf101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import re 4 | 5 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 6 | from dassl.utils import mkdir_if_missing 7 | 8 | from .oxford_pets import OxfordPets 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class UCF101(DatasetBase): 13 | 14 | dataset_dir = "ucf101" 15 | 16 | def __init__(self, cfg): 17 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 18 | self.dataset_dir = os.path.join(root, self.dataset_dir) 19 | self.image_dir = os.path.join(self.dataset_dir, "UCF-101-midframes") 20 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_UCF101.json") 21 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 22 | mkdir_if_missing(self.split_fewshot_dir) 23 | 24 | if os.path.exists(self.split_path): 25 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 26 | else: 27 | cname2lab = {} 28 | filepath = os.path.join(self.dataset_dir, "ucfTrainTestlist/classInd.txt") 29 | with open(filepath, "r") as f: 30 | lines = f.readlines() 31 | for line in lines: 32 | label, classname = line.strip().split(" ") 33 | label = int(label) - 1 # conver to 0-based index 34 | cname2lab[classname] = label 35 | 36 | trainval = self.read_data(cname2lab, "ucfTrainTestlist/trainlist01.txt") 37 | test = self.read_data(cname2lab, "ucfTrainTestlist/testlist01.txt") 38 | train, val = OxfordPets.split_trainval(trainval) 39 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 40 | 41 | num_shots = cfg.DATASET.NUM_SHOTS 42 | if num_shots >= 1: 43 | seed = cfg.SEED 44 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 45 | 46 | if os.path.exists(preprocessed): 47 | print(f"Loading preprocessed few-shot data from {preprocessed}") 48 | with open(preprocessed, "rb") as file: 49 | data = pickle.load(file) 50 | train, val = data["train"], data["val"] 51 | else: 52 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 53 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 54 | data = {"train": train, "val": val} 55 | print(f"Saving preprocessed few-shot data to {preprocessed}") 56 | with open(preprocessed, "wb") as file: 57 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 58 | 59 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 60 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 61 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 62 | 63 | super().__init__(train_x=train, val=val, test=test) 64 | 65 | def read_data(self, cname2lab, text_file): 66 | text_file = os.path.join(self.dataset_dir, text_file) 67 | items = [] 68 | 69 | with open(text_file, "r") as f: 70 | lines = f.readlines() 71 | for line in lines: 72 | line = line.strip().split(" ")[0] # trainlist: filename, label 73 | action, filename = line.split("/") 74 | label = cname2lab[action] 75 | 76 | elements = re.findall("[A-Z][^A-Z]*", action) 77 | renamed_action = "_".join(elements) 78 | 79 | filename = filename.replace(".avi", ".jpg") 80 | impath = os.path.join(self.image_dir, renamed_action, filename) 81 | 82 | item = Datum(impath=impath, label=label, classname=renamed_action) 83 | items.append(item) 84 | 85 | return items 86 | -------------------------------------------------------------------------------- /datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from collections import OrderedDict 4 | 5 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 6 | from dassl.utils import listdir_nohidden, mkdir_if_missing 7 | 8 | from .oxford_pets import OxfordPets 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class ImageNet(DatasetBase): 13 | 14 | dataset_dir = "imagenet" 15 | 16 | def __init__(self, cfg): 17 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 18 | self.dataset_dir = os.path.join(root, self.dataset_dir) 19 | self.image_dir = os.path.join(self.dataset_dir, "images") 20 | self.preprocessed = os.path.join(self.dataset_dir, "preprocessed.pkl") 21 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 22 | mkdir_if_missing(self.split_fewshot_dir) 23 | 24 | if os.path.exists(self.preprocessed): 25 | with open(self.preprocessed, "rb") as f: 26 | preprocessed = pickle.load(f) 27 | train = preprocessed["train"] 28 | test = preprocessed["test"] 29 | else: 30 | text_file = os.path.join(self.dataset_dir, "classnames.txt") 31 | classnames = self.read_classnames(text_file) 32 | train = self.read_data(classnames, "train") 33 | # Follow standard practice to perform evaluation on the val set 34 | # Also used as the val set (so evaluate the last-step model) 35 | test = self.read_data(classnames, "val") 36 | 37 | preprocessed = {"train": train, "test": test} 38 | with open(self.preprocessed, "wb") as f: 39 | pickle.dump(preprocessed, f, protocol=pickle.HIGHEST_PROTOCOL) 40 | 41 | num_shots = cfg.DATASET.NUM_SHOTS 42 | if num_shots >= 1: 43 | seed = cfg.SEED 44 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 45 | 46 | if os.path.exists(preprocessed): 47 | print(f"Loading preprocessed few-shot data from {preprocessed}") 48 | with open(preprocessed, "rb") as file: 49 | data = pickle.load(file) 50 | train = data["train"] 51 | else: 52 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 53 | data = {"train": train} 54 | print(f"Saving preprocessed few-shot data to {preprocessed}") 55 | with open(preprocessed, "wb") as file: 56 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 57 | 58 | self.all_class_names = OxfordPets.get_all_classnames(train, test) 59 | 60 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 61 | train, test = OxfordPets.subsample_classes(train, test, subsample=subsample) 62 | 63 | super().__init__(train_x=train, val=test, test=test) 64 | 65 | @staticmethod 66 | def read_classnames(text_file): 67 | """Return a dictionary containing 68 | key-value pairs of : . 69 | """ 70 | classnames = OrderedDict() 71 | with open(text_file, "r") as f: 72 | lines = f.readlines() 73 | for line in lines: 74 | line = line.strip().split(" ") 75 | folder = line[0] 76 | classname = " ".join(line[1:]) 77 | classnames[folder] = classname 78 | return classnames 79 | 80 | def read_data(self, classnames, split_dir): 81 | split_dir = os.path.join(self.image_dir, split_dir) 82 | folders = sorted(f.name for f in os.scandir(split_dir) if f.is_dir()) 83 | items = [] 84 | 85 | for label, folder in enumerate(folders): 86 | imnames = listdir_nohidden(os.path.join(split_dir, folder)) 87 | classname = classnames[folder] 88 | for imname in imnames: 89 | impath = os.path.join(split_dir, folder, imname) 90 | item = Datum(impath=impath, label=label, classname=classname) 91 | items.append(item) 92 | 93 | return items 94 | -------------------------------------------------------------------------------- /datasets/oxford_flowers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | from scipy.io import loadmat 5 | from collections import defaultdict 6 | 7 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 8 | from dassl.utils import read_json, mkdir_if_missing 9 | 10 | from .oxford_pets import OxfordPets 11 | 12 | 13 | 14 | @DATASET_REGISTRY.register() 15 | class OxfordFlowers(DatasetBase): 16 | 17 | dataset_dir = "oxford_flowers" 18 | 19 | def __init__(self, cfg): 20 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 21 | self.dataset_dir = os.path.join(root, self.dataset_dir) 22 | self.image_dir = os.path.join(self.dataset_dir, "jpg") 23 | self.label_file = os.path.join(self.dataset_dir, "imagelabels.mat") 24 | self.lab2cname_file = os.path.join(self.dataset_dir, "cat_to_name.json") 25 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordFlowers.json") 26 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 27 | mkdir_if_missing(self.split_fewshot_dir) 28 | 29 | if os.path.exists(self.split_path): 30 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 31 | else: 32 | train, val, test = self.read_data() 33 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 34 | 35 | num_shots = cfg.DATASET.NUM_SHOTS 36 | if num_shots >= 1: 37 | seed = cfg.SEED 38 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 39 | 40 | if os.path.exists(preprocessed): 41 | print(f"Loading preprocessed few-shot data from {preprocessed}") 42 | with open(preprocessed, "rb") as file: 43 | data = pickle.load(file) 44 | train, val = data["train"], data["val"] 45 | else: 46 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 47 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 48 | data = {"train": train, "val": val} 49 | print(f"Saving preprocessed few-shot data to {preprocessed}") 50 | with open(preprocessed, "wb") as file: 51 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 52 | 53 | 54 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 55 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 56 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 57 | 58 | super().__init__(train_x=train, val=val, test=test) 59 | 60 | def read_data(self): 61 | tracker = defaultdict(list) 62 | label_file = loadmat(self.label_file)["labels"][0] 63 | for i, label in enumerate(label_file): 64 | imname = f"image_{str(i + 1).zfill(5)}.jpg" 65 | impath = os.path.join(self.image_dir, imname) 66 | label = int(label) 67 | tracker[label].append(impath) 68 | 69 | print("Splitting data into 50% train, 20% val, and 30% test") 70 | 71 | def _collate(ims, y, c): 72 | items = [] 73 | for im in ims: 74 | item = Datum(impath=im, label=y - 1, classname=c) # convert to 0-based label 75 | items.append(item) 76 | return items 77 | 78 | lab2cname = read_json(self.lab2cname_file) 79 | train, val, test = [], [], [] 80 | for label, impaths in tracker.items(): 81 | random.shuffle(impaths) 82 | n_total = len(impaths) 83 | n_train = round(n_total * 0.5) 84 | n_val = round(n_total * 0.2) 85 | n_test = n_total - n_train - n_val 86 | assert n_train > 0 and n_val > 0 and n_test > 0 87 | cname = lab2cname[str(label)] 88 | train.extend(_collate(impaths[:n_train], label, cname)) 89 | val.extend(_collate(impaths[n_train : n_train + n_val], label, cname)) 90 | test.extend(_collate(impaths[n_train + n_val :], label, cname)) 91 | 92 | return train, val, test 93 | -------------------------------------------------------------------------------- /datasets/dtd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | 5 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 6 | from dassl.utils import listdir_nohidden, mkdir_if_missing 7 | 8 | from .oxford_pets import OxfordPets 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class DescribableTextures(DatasetBase): 13 | 14 | dataset_dir = "dtd" 15 | 16 | def __init__(self, cfg): 17 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 18 | self.dataset_dir = os.path.join(root, self.dataset_dir) 19 | self.image_dir = os.path.join(self.dataset_dir, "images") 20 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_DescribableTextures.json") 21 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 22 | mkdir_if_missing(self.split_fewshot_dir) 23 | 24 | if os.path.exists(self.split_path): 25 | train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) 26 | else: 27 | train, val, test = self.read_and_split_data(self.image_dir) 28 | OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) 29 | 30 | num_shots = cfg.DATASET.NUM_SHOTS 31 | if num_shots >= 1: 32 | seed = cfg.SEED 33 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 34 | 35 | if os.path.exists(preprocessed): 36 | print(f"Loading preprocessed few-shot data from {preprocessed}") 37 | with open(preprocessed, "rb") as file: 38 | data = pickle.load(file) 39 | train, val = data["train"], data["val"] 40 | else: 41 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 42 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 43 | data = {"train": train, "val": val} 44 | print(f"Saving preprocessed few-shot data to {preprocessed}") 45 | with open(preprocessed, "wb") as file: 46 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 47 | 48 | self.all_class_names = OxfordPets.get_all_classnames(train, val, test) 49 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 50 | train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) 51 | 52 | super().__init__(train_x=train, val=val, test=test) 53 | 54 | @staticmethod 55 | def read_and_split_data(image_dir, p_trn=0.5, p_val=0.2, ignored=[], new_cnames=None): 56 | # The data are supposed to be organized into the following structure 57 | # ============= 58 | # images/ 59 | # dog/ 60 | # cat/ 61 | # horse/ 62 | # ============= 63 | categories = listdir_nohidden(image_dir) 64 | categories = [c for c in categories if c not in ignored] 65 | categories.sort() 66 | 67 | p_tst = 1 - p_trn - p_val 68 | print(f"Splitting into {p_trn:.0%} train, {p_val:.0%} val, and {p_tst:.0%} test") 69 | 70 | def _collate(ims, y, c): 71 | items = [] 72 | for im in ims: 73 | item = Datum(impath=im, label=y, classname=c) # is already 0-based 74 | items.append(item) 75 | return items 76 | 77 | train, val, test = [], [], [] 78 | for label, category in enumerate(categories): 79 | category_dir = os.path.join(image_dir, category) 80 | images = listdir_nohidden(category_dir) 81 | images = [os.path.join(category_dir, im) for im in images] 82 | random.shuffle(images) 83 | n_total = len(images) 84 | n_train = round(n_total * p_trn) 85 | n_val = round(n_total * p_val) 86 | n_test = n_total - n_train - n_val 87 | assert n_train > 0 and n_val > 0 and n_test > 0 88 | 89 | if new_cnames is not None and category in new_cnames: 90 | category = new_cnames[category] 91 | 92 | train.extend(_collate(images[:n_train], label, category)) 93 | val.extend(_collate(images[n_train : n_train + n_val], label, category)) 94 | test.extend(_collate(images[n_train + n_val :], label, category)) 95 | 96 | return train, val, test 97 | -------------------------------------------------------------------------------- /clip/simple_tokenizer.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import html 3 | import os 4 | from functools import lru_cache 5 | 6 | import ftfy 7 | import regex as re 8 | 9 | 10 | @lru_cache() 11 | def default_bpe(): 12 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") 13 | 14 | 15 | @lru_cache() 16 | def bytes_to_unicode(): 17 | """ 18 | Returns list of utf-8 byte and a corresponding list of unicode strings. 19 | The reversible bpe codes work on unicode strings. 20 | This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. 21 | When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. 22 | This is a signficant percentage of your normal, say, 32K bpe vocab. 23 | To avoid that, we want lookup tables between utf-8 bytes and unicode strings. 24 | And avoids mapping to whitespace/control characters the bpe code barfs on. 25 | """ 26 | bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) 27 | cs = bs[:] 28 | n = 0 29 | for b in range(2**8): 30 | if b not in bs: 31 | bs.append(b) 32 | cs.append(2**8+n) 33 | n += 1 34 | cs = [chr(n) for n in cs] 35 | return dict(zip(bs, cs)) 36 | 37 | 38 | def get_pairs(word): 39 | """Return set of symbol pairs in a word. 40 | Word is represented as tuple of symbols (symbols being variable-length strings). 41 | """ 42 | pairs = set() 43 | prev_char = word[0] 44 | for char in word[1:]: 45 | pairs.add((prev_char, char)) 46 | prev_char = char 47 | return pairs 48 | 49 | 50 | def basic_clean(text): 51 | text = ftfy.fix_text(text) 52 | text = html.unescape(html.unescape(text)) 53 | return text.strip() 54 | 55 | 56 | def whitespace_clean(text): 57 | text = re.sub(r'\s+', ' ', text) 58 | text = text.strip() 59 | return text 60 | 61 | 62 | class SimpleTokenizer(object): 63 | def __init__(self, bpe_path: str = default_bpe()): 64 | self.byte_encoder = bytes_to_unicode() 65 | self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} 66 | merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') 67 | merges = merges[1:49152-256-2+1] 68 | merges = [tuple(merge.split()) for merge in merges] 69 | vocab = list(bytes_to_unicode().values()) 70 | vocab = vocab + [v+'' for v in vocab] 71 | for merge in merges: 72 | vocab.append(''.join(merge)) 73 | vocab.extend(['<|startoftext|>', '<|endoftext|>']) 74 | self.encoder = dict(zip(vocab, range(len(vocab)))) 75 | self.decoder = {v: k for k, v in self.encoder.items()} 76 | self.bpe_ranks = dict(zip(merges, range(len(merges)))) 77 | self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} 78 | self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) 79 | 80 | def bpe(self, token): 81 | if token in self.cache: 82 | return self.cache[token] 83 | word = tuple(token[:-1]) + ( token[-1] + '',) 84 | pairs = get_pairs(word) 85 | 86 | if not pairs: 87 | return token+'' 88 | 89 | while True: 90 | bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) 91 | if bigram not in self.bpe_ranks: 92 | break 93 | first, second = bigram 94 | new_word = [] 95 | i = 0 96 | while i < len(word): 97 | try: 98 | j = word.index(first, i) 99 | new_word.extend(word[i:j]) 100 | i = j 101 | except: 102 | new_word.extend(word[i:]) 103 | break 104 | 105 | if word[i] == first and i < len(word)-1 and word[i+1] == second: 106 | new_word.append(first+second) 107 | i += 2 108 | else: 109 | new_word.append(word[i]) 110 | i += 1 111 | new_word = tuple(new_word) 112 | word = new_word 113 | if len(word) == 1: 114 | break 115 | else: 116 | pairs = get_pairs(word) 117 | word = ' '.join(word) 118 | self.cache[token] = word 119 | return word 120 | 121 | def encode(self, text): 122 | bpe_tokens = [] 123 | text = whitespace_clean(basic_clean(text)).lower() 124 | for token in re.findall(self.pat, text): 125 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) 126 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) 127 | return bpe_tokens 128 | 129 | def decode(self, tokens): 130 | text = ''.join([self.decoder[token] for token in tokens]) 131 | text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') 132 | return text 133 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | from dassl.utils import setup_logger, set_random_seed, collect_env_info 5 | from dassl.config import get_cfg_default 6 | from dassl.engine import build_trainer 7 | 8 | # custom 9 | import datasets.oxford_pets 10 | import datasets.oxford_flowers 11 | import datasets.fgvc_aircraft 12 | import datasets.dtd 13 | import datasets.eurosat 14 | import datasets.stanford_cars 15 | import datasets.food101 16 | import datasets.sun397 17 | import datasets.caltech101 18 | import datasets.ucf101 19 | import datasets.imagenet 20 | 21 | import datasets.imagenet_sketch 22 | import datasets.imagenetv2 23 | import datasets.imagenet_a 24 | import datasets.imagenet_r 25 | 26 | import prompting.lasp 27 | 28 | 29 | def print_args(args, cfg): 30 | print("***************") 31 | print("** Arguments **") 32 | print("***************") 33 | optkeys = list(args.__dict__.keys()) 34 | optkeys.sort() 35 | for key in optkeys: 36 | print("{}: {}".format(key, args.__dict__[key])) 37 | print("************") 38 | print("** Config **") 39 | print("************") 40 | print(cfg) 41 | 42 | 43 | def reset_cfg(cfg, args): 44 | if args.root: 45 | cfg.DATASET.ROOT = args.root 46 | 47 | if args.output_dir: 48 | cfg.OUTPUT_DIR = args.output_dir 49 | 50 | if args.resume: 51 | cfg.RESUME = args.resume 52 | 53 | if args.seed: 54 | cfg.SEED = args.seed 55 | 56 | if args.source_domains: 57 | cfg.DATASET.SOURCE_DOMAINS = args.source_domains 58 | 59 | if args.target_domains: 60 | cfg.DATASET.TARGET_DOMAINS = args.target_domains 61 | 62 | if args.transforms: 63 | cfg.INPUT.TRANSFORMS = args.transforms 64 | 65 | if args.trainer: 66 | cfg.TRAINER.NAME = args.trainer 67 | 68 | if args.backbone: 69 | cfg.MODEL.BACKBONE.NAME = args.backbone 70 | 71 | if args.head: 72 | cfg.MODEL.HEAD.NAME = args.head 73 | 74 | 75 | def extend_cfg(cfg): 76 | """ 77 | Add new config variables. 78 | 79 | E.g. 80 | from yacs.config import CfgNode as CN 81 | cfg.TRAINER.MY_MODEL = CN() 82 | cfg.TRAINER.MY_MODEL.PARAM_A = 1. 83 | cfg.TRAINER.MY_MODEL.PARAM_B = 0.5 84 | cfg.TRAINER.MY_MODEL.PARAM_C = False 85 | """ 86 | from yacs.config import CfgNode as CN 87 | 88 | cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new 89 | cfg.DATASET.INCLUDE_ALL_CLASSES = False 90 | 91 | ##### ------ UniveralOP ------- ###### 92 | cfg.TRAINER.LASP = CN() 93 | cfg.TRAINER.LASP.ENABLE = True # LARS loss 94 | cfg.TRAINER.LASP.LASP_PROMPTS = ['a photo of {}'] # List of textual prompts for LARS 95 | cfg.TRAINER.LASP.LASP_LOSS_WEIGHT = 1.0 # weighf of LARS text-to-text loss 96 | cfg.TRAINER.LASP.N_CTX = 16 # number of context vectors 97 | cfg.TRAINER.LASP.CTX_INIT = "" # initialization words 98 | cfg.TRAINER.LASP.PREC = "amp" # fp16, fp32, amp 99 | 100 | cfg.TRAINER.LASP.ENABLE_CORRECTION = False 101 | cfg.TRAINER.LASP.ENABLE_IMPLICIT_OP = 'sum' # mul 102 | cfg.TRAINER.LASP.PRETRAINED_PROMPTS_DIR = None 103 | cfg.TRAINER.LASP.TRAIN_W = True 104 | cfg.TRAINER.LASP.FINETUNE_VIT_LN = True 105 | 106 | 107 | 108 | 109 | def setup_cfg(args): 110 | cfg = get_cfg_default() 111 | extend_cfg(cfg) 112 | 113 | # 1. From the dataset config file 114 | if args.dataset_config_file: 115 | cfg.merge_from_file(args.dataset_config_file) 116 | 117 | # 2. From the method config file 118 | if args.config_file: 119 | cfg.merge_from_file(args.config_file) 120 | 121 | # 3. From input arguments 122 | reset_cfg(cfg, args) 123 | 124 | # 4. From optional input arguments 125 | cfg.merge_from_list(args.opts) 126 | 127 | cfg.freeze() 128 | 129 | return cfg 130 | 131 | 132 | def main(args): 133 | cfg = setup_cfg(args) 134 | if cfg.SEED >= 0: 135 | print("Setting fixed seed: {}".format(cfg.SEED)) 136 | set_random_seed(cfg.SEED) 137 | setup_logger(cfg.OUTPUT_DIR) 138 | 139 | if torch.cuda.is_available() and cfg.USE_CUDA: 140 | torch.backends.cudnn.benchmark = True 141 | 142 | print_args(args, cfg) 143 | print("Collecting env info ...") 144 | print("** System info **\n{}\n".format(collect_env_info())) 145 | 146 | trainer = build_trainer(cfg) 147 | 148 | if args.eval_only: 149 | trainer.load_model(args.model_dir, epoch=args.load_epoch) 150 | trainer.test() 151 | return 152 | 153 | trainer.load_model(None, 10) 154 | 155 | if not args.no_train: 156 | trainer.train() 157 | 158 | 159 | if __name__ == "__main__": 160 | parser = argparse.ArgumentParser() 161 | parser.add_argument("--root", type=str, default="", help="path to dataset") 162 | parser.add_argument("--output-dir", type=str, default="", help="output directory") 163 | parser.add_argument( 164 | "--resume", 165 | type=str, 166 | default="", 167 | help="checkpoint directory (from which the training resumes)", 168 | ) 169 | parser.add_argument( 170 | "--seed", type=int, default=-1, help="only positive value enables a fixed seed" 171 | ) 172 | parser.add_argument( 173 | "--source-domains", type=str, nargs="+", help="source domains for DA/DG" 174 | ) 175 | parser.add_argument( 176 | "--target-domains", type=str, nargs="+", help="target domains for DA/DG" 177 | ) 178 | parser.add_argument( 179 | "--transforms", type=str, nargs="+", help="data augmentation methods" 180 | ) 181 | parser.add_argument( 182 | "--config-file", type=str, default="", help="path to config file" 183 | ) 184 | parser.add_argument( 185 | "--dataset-config-file", 186 | type=str, 187 | default="", 188 | help="path to config file for dataset setup", 189 | ) 190 | parser.add_argument("--trainer", type=str, default="", help="name of trainer") 191 | parser.add_argument("--backbone", type=str, default="", help="name of CNN backbone") 192 | parser.add_argument("--head", type=str, default="", help="name of head") 193 | parser.add_argument("--eval-only", action="store_true", help="evaluation only") 194 | parser.add_argument( 195 | "--model-dir", 196 | type=str, 197 | default="", 198 | help="load model from this directory for eval-only mode", 199 | ) 200 | parser.add_argument( 201 | "--load-epoch", type=int, help="load model weights at this epoch for evaluation" 202 | ) 203 | parser.add_argument( 204 | "--no-train", action="store_true", help="do not call trainer.train()" 205 | ) 206 | parser.add_argument( 207 | "opts", 208 | default=None, 209 | nargs=argparse.REMAINDER, 210 | help="modify config options using the command-line", 211 | ) 212 | args = parser.parse_args() 213 | main(args) 214 | -------------------------------------------------------------------------------- /datasets/oxford_pets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import math 4 | import random 5 | from collections import defaultdict 6 | 7 | from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase 8 | from dassl.utils import read_json, write_json, mkdir_if_missing 9 | 10 | 11 | @DATASET_REGISTRY.register() 12 | class OxfordPets(DatasetBase): 13 | 14 | dataset_dir = "oxford_pets" 15 | 16 | def __init__(self, cfg): 17 | root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) 18 | self.dataset_dir = os.path.join(root, self.dataset_dir) 19 | self.image_dir = os.path.join(self.dataset_dir, "images") 20 | self.anno_dir = os.path.join(self.dataset_dir, "annotations") 21 | self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordPets.json") 22 | self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") 23 | mkdir_if_missing(self.split_fewshot_dir) 24 | 25 | if os.path.exists(self.split_path): 26 | train, val, test = self.read_split(self.split_path, self.image_dir) 27 | else: 28 | trainval = self.read_data(split_file="trainval.txt") 29 | test = self.read_data(split_file="test.txt") 30 | train, val = self.split_trainval(trainval) 31 | self.save_split(train, val, test, self.split_path, self.image_dir) 32 | 33 | num_shots = cfg.DATASET.NUM_SHOTS 34 | if num_shots >= 1: 35 | seed = cfg.SEED 36 | preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") 37 | 38 | if os.path.exists(preprocessed): 39 | print(f"Loading preprocessed few-shot data from {preprocessed}") 40 | with open(preprocessed, "rb") as file: 41 | data = pickle.load(file) 42 | train, val = data["train"], data["val"] 43 | else: 44 | train = self.generate_fewshot_dataset(train, num_shots=num_shots) 45 | val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) 46 | data = {"train": train, "val": val} 47 | print(f"Saving preprocessed few-shot data to {preprocessed}") 48 | with open(preprocessed, "wb") as file: 49 | pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) 50 | 51 | self.all_classnames = OxfordPets.get_all_classnames(train, val, test) 52 | subsample = cfg.DATASET.SUBSAMPLE_CLASSES 53 | train, val, test = self.subsample_classes(train, val, test, subsample=subsample) 54 | 55 | super().__init__(train_x=train, val=val, test=test) 56 | 57 | def read_data(self, split_file): 58 | filepath = os.path.join(self.anno_dir, split_file) 59 | items = [] 60 | 61 | with open(filepath, "r") as f: 62 | lines = f.readlines() 63 | for line in lines: 64 | line = line.strip() 65 | imname, label, species, _ = line.split(" ") 66 | breed = imname.split("_")[:-1] 67 | breed = "_".join(breed) 68 | breed = breed.lower() 69 | imname += ".jpg" 70 | impath = os.path.join(self.image_dir, imname) 71 | label = int(label) - 1 # convert to 0-based index 72 | item = Datum(impath=impath, label=label, classname=breed) 73 | items.append(item) 74 | 75 | return items 76 | 77 | @staticmethod 78 | def split_trainval(trainval, p_val=0.2): 79 | p_trn = 1 - p_val 80 | print(f"Splitting trainval into {p_trn:.0%} train and {p_val:.0%} val") 81 | tracker = defaultdict(list) 82 | for idx, item in enumerate(trainval): 83 | label = item.label 84 | tracker[label].append(idx) 85 | 86 | train, val = [], [] 87 | for label, idxs in tracker.items(): 88 | n_val = round(len(idxs) * p_val) 89 | assert n_val > 0 90 | random.shuffle(idxs) 91 | for n, idx in enumerate(idxs): 92 | item = trainval[idx] 93 | if n < n_val: 94 | val.append(item) 95 | else: 96 | train.append(item) 97 | 98 | return train, val 99 | 100 | @staticmethod 101 | def save_split(train, val, test, filepath, path_prefix): 102 | def _extract(items): 103 | out = [] 104 | for item in items: 105 | impath = item.impath 106 | label = item.label 107 | classname = item.classname 108 | impath = impath.replace(path_prefix, "") 109 | if impath.startswith("/"): 110 | impath = impath[1:] 111 | out.append((impath, label, classname)) 112 | return out 113 | 114 | train = _extract(train) 115 | val = _extract(val) 116 | test = _extract(test) 117 | 118 | split = {"train": train, "val": val, "test": test} 119 | 120 | write_json(split, filepath) 121 | print(f"Saved split to {filepath}") 122 | 123 | @staticmethod 124 | def read_split(filepath, path_prefix): 125 | def _convert(items): 126 | out = [] 127 | for impath, label, classname in items: 128 | impath = os.path.join(path_prefix, impath) 129 | item = Datum(impath=impath, label=int(label), classname=classname) 130 | out.append(item) 131 | return out 132 | 133 | print(f"Reading split from {filepath}") 134 | split = read_json(filepath) 135 | train = _convert(split["train"]) 136 | val = _convert(split["val"]) 137 | test = _convert(split["test"]) 138 | 139 | return train, val, test 140 | 141 | @staticmethod 142 | def get_all_classnames(*args): 143 | classnames = [] 144 | for dataset in args: 145 | for item in dataset: 146 | classnames.append(item.classname) 147 | return list(set(classnames)) 148 | 149 | @staticmethod 150 | def subsample_classes(*args, subsample="all"): 151 | """Divide classes into two groups. The first group 152 | represents base classes while the second group represents 153 | new classes. 154 | 155 | Args: 156 | args: a list of datasets, e.g. train, val and test. 157 | subsample (str): what classes to subsample. 158 | """ 159 | assert subsample in ["all", "base", "new"] 160 | 161 | if subsample == "all": 162 | return args 163 | 164 | dataset = args[0] 165 | labels = set() 166 | for item in dataset: 167 | labels.add(item.label) 168 | labels = list(labels) 169 | labels.sort() 170 | n = len(labels) 171 | # Divide classes into two halves 172 | m = math.ceil(n / 2) 173 | 174 | print(f"SUBSAMPLE {subsample.upper()} CLASSES!") 175 | if subsample == "base": 176 | selected = labels[:m] # take the first half 177 | else: 178 | selected = labels[m:] # take the second half 179 | relabeler = {y: y_new for y_new, y in enumerate(selected)} 180 | 181 | output = [] 182 | for dataset in args: 183 | dataset_new = [] 184 | for item in dataset: 185 | if item.label not in selected: 186 | continue 187 | item_new = Datum( 188 | impath=item.impath, 189 | label=relabeler[item.label], 190 | classname=item.classname 191 | ) 192 | dataset_new.append(item_new) 193 | output.append(dataset_new) 194 | 195 | return output 196 | -------------------------------------------------------------------------------- /datasets/utils/download_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import call 3 | import gdown 4 | import argparse 5 | 6 | args = argparse.ArgumentParser() 7 | args.add_argument('--root', required=True, help='path to datasets') 8 | args.add_argument('--dataset_name', default=None, help='name of dataset to download. If not specified all datasets will be downloaded') 9 | 10 | args = args.parse_args() 11 | 12 | def download_datasets(root, dataset_names=None): 13 | if dataset_names is None: 14 | dataset_names = ['imagenet', 'caltech101', 'oxford_pets', 'stanford_cars', 'flowers102', 'food101', 15 | 'fgvc_aircraft', 'sun397', 'dtd', 'eurosat', 'ucf101'] 16 | for dataset_name in dataset_names: 17 | download_dataset(dataset_name, f"{root}/{dataset_name}") 18 | 19 | 20 | def download_dataset(dataset_name, root="root"): 21 | if dataset_name == 'imagenet': 22 | base_root = root 23 | root = os.path.join(root, 'images') 24 | if not os.path.exists(root): 25 | os.makedirs(root, exist_ok=True) 26 | else: 27 | return 28 | else: 29 | if not os.path.exists(root): 30 | os.makedirs(root, exist_ok=True) 31 | else: 32 | return 33 | 34 | if dataset_name == 'imagenet': 35 | call(f"wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar --output-document={root}/ILSVRC2012_img_val.tar", shell=True) 36 | call(f"wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar --output-document={root}/ILSVRC2012_img_train.tar", shell=True) 37 | 38 | call(f"untar -xf {root}/ILSVRC2012_img_val.tar", shell=True) 39 | call(f"untar -xf {root}/ILSVRC2012_img_train.tar", shell=True) 40 | 41 | call(f"rm {root}/ILSVRC2012_img_val.tar", shell=True) 42 | call(f"rm {root}/ILSVRC2012_img_train.tar", shell=True) 43 | 44 | url = 'https://drive.google.com/uc?id=1-61f_ol79pViBFDG_IDlUQSwoLcn2XXF' 45 | gdown.download(url, f"{base_root}/classnames.txt", quiet=False) 46 | elif dataset_name == "caltech101": 47 | call(f"wget http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz --output-document={root}/101_ObjectCategories.tar.gz", shell=True) 48 | call(f"untar -xf {root}/101_ObjectCategories.tar.gz", shell=True) 49 | call(f"rm {root}/101_ObjectCategories.tar.gz", shell=True) 50 | 51 | url = 'https://drive.google.com/uc?id=1hyarUivQE36mY6jSomru6Fjd-JzwcCzN' 52 | gdown.download(url, f"{root}/split_zhou_Caltech101.json", quiet=False) 53 | elif dataset_name == "oxford_pets": 54 | call(f"wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz --output-document={root}/images.tar.gz", shell=True) 55 | call(f"wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz --output-document={root}/annotations.tar.gz", shell=True) 56 | call(f"untar -xf {root}/images.tar.gz", shell=True) 57 | call(f"untar -xf {root}/annotations.tar.gz", shell=True) 58 | call(f"rm {root}/images.tar.gz", shell=True) 59 | call(f"rm {root}/annotations.tar.gz", shell=True) 60 | 61 | url = 'https://drive.google.com/uc?id=1501r8Ber4nNKvmlFVQZ8SeUHTcdTTEqs' 62 | gdown.download(url, f"{root}/split_zhou_OxfordPets.json", quiet=False) 63 | elif dataset_name == "stanford_cars": 64 | call(f"wget http://ai.stanford.edu/~jkrause/car196/cars_train.tgz --output-document={root}/cars_train.tgz", shell=True) 65 | call(f"wget http://ai.stanford.edu/~jkrause/car196/cars_test.tgz --output-document={root}/cars_test.tgz", shell=True) 66 | call(f"wget http://ai.stanford.edu/~jkrause/car196/cars_devkit.tgz --output-document={root}/cars_devkit.tgz", shell=True) 67 | call(f"wget http://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat --output-document={root}/cars_test_annos_withlabels.mat", shell=True) 68 | call(f"untar -xf {root}/cars_train.tgz", shell=True) 69 | call(f"untar -xf {root}/cars_test.tgz", shell=True) 70 | call(f"untar -xf {root}/cars_devkit.tgz", shell=True) 71 | 72 | url = 'https://drive.google.com/uc?id=1ObCFbaAgVu0I-k_Au-gIUcefirdAuizT' 73 | gdown.download(url, f"{root}/split_zhou_StanfordCars.json", quiet=False) 74 | elif dataset_name == "flowers102": 75 | call(f"wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz --output-document={root}/102flowers.tgz", shell=True) 76 | call(f"wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat --output-document={root}/imagelabels.mat", shell=True) 77 | 78 | call(f"untar -xf {root}/102flowers.tgz", shell=True) 79 | call(f"rm {root}/102flowers.tgz", shell=True) 80 | 81 | url = 'https://drive.google.com/uc?id=1AkcxCXeK_RCGCEC_GvmWxjcjaNhu-at0' 82 | gdown.download(url, f"{root}/cat_to_name.json", quiet=False) 83 | 84 | url = 'https://drive.google.com/uc?id=1Pp0sRXzZFZq15zVOzKjKBu4A9i01nozT' 85 | gdown.download(url, f"{root}/split_zhou_OxfordFlowers.json", quiet=False) 86 | elif dataset_name == "food101": 87 | call(f"wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz --output-document={root}/food-101.tar.gz", shell=True) 88 | call(f"untar -xf {root}/food-101.tar.gz", shell=True) 89 | call(f"rm {root}/food-101.tar.gz", shell=True) 90 | 91 | url = 'https://drive.google.com/uc?id=1QK0tGi096I0Ba6kggatX1ee6dJFIcEJl' 92 | gdown.download(url, f"{root}/split_zhou_Food101.json", quiet=False) 93 | elif dataset_name == "fgvc_aircraft": 94 | call(f"wget http://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz --output-document={root}/fgvc-aircraft-2013b.tar.gz", shell=True) 95 | call(f"untar -xf {root}/fgvc-aircraft-2013b.tar.gz", shell=True) 96 | call(f"rm {root}/fgvc-aircraft-2013b.tar.gz", shell=True) 97 | elif dataset_name == "sun397": 98 | call(f"wget http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz --output-document={root}/SUN397.tar.gz", shell=True) 99 | call(f"wget https://vision.princeton.edu/projects/2010/SUN/download/Partitions.zip --output-document={root}/Partitions.zip", shell=True) 100 | call(f"untar -xf {root}/SUN397.tar.gz", shell=True) 101 | call(f"untar -xf {root}/Partitions.zip", shell=True) 102 | call(f"rm {root}/SUN397.tar.gz", shell=True) 103 | call(f"rm {root}/Partitions.zip", shell=True) 104 | 105 | url = 'https://drive.google.com/uc?id=1y2RD81BYuiyvebdN-JymPfyWYcd8_MUq' 106 | gdown.download(url, f"{root}/split_zhou_SUN397.json", quiet=False) 107 | elif dataset_name == "dtd": 108 | call(f"wget https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz --output-document={root}/dtd-r1.0.1.tar.gz", shell=True) 109 | call(f"untar -xf {root}/dtd-r1.0.1.tar.gz", shell=True) 110 | call(f"rm {root}/dtd-r1.0.1.tar.gz", shell=True) 111 | 112 | url = 'https://drive.google.com/uc?id=1u3_QfB467jqHgNXC00UIzbLZRQCg2S7x' 113 | gdown.download(url, f"{root}/split_zhou_DTD.json", quiet=False) 114 | elif dataset_name == "eurosat": 115 | call(f"wget http://madm.dfki.de/files/sentinel/EuroSAT.zip --output-document={root}/EuroSAT.zip", shell=True) 116 | call(f"untar -xf {root}/EuroSAT.zip", shell=True) 117 | call(f"rm {root}/EuroSAT.zip", shell=True) 118 | 119 | url = 'https://drive.google.com/uc?id=1Ip7yaCWFi0eaOFUGga0lUdVi_DDQth1o' 120 | gdown.download(url, f"{root}/split_zhou_EuroSAT.json", quiet=False) 121 | elif dataset_name == "ucf101": 122 | url = 'https://drive.google.com/uc?id=10Jqome3vtUA2keJkNanAiFpgbyC9Hc2O' 123 | gdown.download(url, f"{root}/UCF-101-midframes.zip", quiet=False) 124 | call(f"unzip {root}/UCF-101-midframes.zip -d {root}", shell=True) 125 | call(f"rm -rf {root}/UCF-101-midframes.zip", shell=True) 126 | 127 | url = 'https://drive.google.com/uc?id=1I0S0q91hJfsV9Gf4xDIjgDq4AqBNJb1y' 128 | gdown.download(url, f"{root}/split_zhou_UCF101.json", quiet=False) 129 | else: 130 | raise Exception('Unknown dataset.') 131 | 132 | download_datasets(args.root) 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /clip/clip.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import urllib 4 | import warnings 5 | from typing import Union, List 6 | from pkg_resources import packaging 7 | 8 | import torch 9 | from PIL import Image 10 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize 11 | from tqdm import tqdm 12 | 13 | from .model import build_model 14 | from .simple_tokenizer import SimpleTokenizer as _Tokenizer 15 | 16 | try: 17 | from torchvision.transforms import InterpolationMode 18 | BICUBIC = InterpolationMode.BICUBIC 19 | except ImportError: 20 | BICUBIC = Image.BICUBIC 21 | 22 | 23 | if torch.__version__.split(".") < ["1", "7", "1"]: 24 | warnings.warn("PyTorch version 1.7.1 or higher is recommended") 25 | 26 | 27 | __all__ = ["available_models", "load", "tokenize"] 28 | _tokenizer = _Tokenizer() 29 | 30 | _MODELS = { 31 | "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", 32 | "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", 33 | "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", 34 | "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", 35 | "RN50x64": "https://openaipublic.azureedge.net/clip/models/be1cfb55d75a9666199fb2206c106743da0f6468c9d327f3e0d0a543a9919d9c/RN50x64.pt", 36 | "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", 37 | "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", 38 | "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", 39 | "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt", 40 | } 41 | 42 | 43 | def _download(url: str, root: str = "~/.cache/clip"): 44 | os.makedirs(root, exist_ok=True) 45 | filename = os.path.basename(url) 46 | 47 | expected_sha256 = url.split("/")[-2] 48 | download_target = os.path.join(root, filename) 49 | 50 | if os.path.exists(download_target) and not os.path.isfile(download_target): 51 | raise RuntimeError(f"{download_target} exists and is not a regular file") 52 | 53 | if os.path.isfile(download_target): 54 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: 55 | return download_target 56 | else: 57 | warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") 58 | 59 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: 60 | with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: 61 | while True: 62 | buffer = source.read(8192) 63 | if not buffer: 64 | break 65 | 66 | output.write(buffer) 67 | loop.update(len(buffer)) 68 | 69 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: 70 | raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match") 71 | 72 | return download_target 73 | 74 | 75 | def _transform(n_px): 76 | return Compose([ 77 | Resize(n_px, interpolation=BICUBIC), 78 | CenterCrop(n_px), 79 | lambda image: image.convert("RGB"), 80 | ToTensor(), 81 | Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 82 | ]) 83 | 84 | 85 | def available_models() -> List[str]: 86 | """Returns the names of available CLIP models""" 87 | return list(_MODELS.keys()) 88 | 89 | 90 | 91 | def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit: bool = False, download_root: str = None): 92 | """Load a CLIP model 93 | Parameters 94 | ---------- 95 | name : str 96 | A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict 97 | device : Union[str, torch.device] 98 | The device to put the loaded model 99 | jit : bool 100 | Whether to load the optimized JIT model or more hackable non-JIT model (default). 101 | download_root: str 102 | path to download the model files; by default, it uses "~/.cache/clip" 103 | Returns 104 | ------- 105 | model : torch.nn.Module 106 | The CLIP model 107 | preprocess : Callable[[PIL.Image], torch.Tensor] 108 | A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input 109 | """ 110 | if name in _MODELS: 111 | model_path = _download(_MODELS[name], download_root or os.path.expanduser("~/.cache/clip")) 112 | elif os.path.isfile(name): 113 | model_path = name 114 | else: 115 | raise RuntimeError(f"Model {name} not found; available models = {available_models()}") 116 | 117 | with open(model_path, 'rb') as opened_file: 118 | try: 119 | # loading JIT archive 120 | model = torch.jit.load(opened_file, map_location=device if jit else "cpu").eval() 121 | state_dict = None 122 | except RuntimeError: 123 | # loading saved state dict 124 | if jit: 125 | warnings.warn(f"File {model_path} is not a JIT archive. Loading as a state dict instead") 126 | jit = False 127 | state_dict = torch.load(opened_file, map_location="cpu") 128 | 129 | if not jit: 130 | model = build_model(state_dict or model.state_dict()).to(device) 131 | if str(device) == "cpu": 132 | model.float() 133 | return model, _transform(model.visual.input_resolution) 134 | 135 | # patch the device names 136 | device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) 137 | device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1] 138 | 139 | def patch_device(module): 140 | try: 141 | graphs = [module.graph] if hasattr(module, "graph") else [] 142 | except RuntimeError: 143 | graphs = [] 144 | 145 | if hasattr(module, "forward1"): 146 | graphs.append(module.forward1.graph) 147 | 148 | for graph in graphs: 149 | for node in graph.findAllNodes("prim::Constant"): 150 | if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"): 151 | node.copyAttributes(device_node) 152 | 153 | model.apply(patch_device) 154 | patch_device(model.encode_image) 155 | patch_device(model.encode_text) 156 | 157 | # patch dtype to float32 on CPU 158 | if str(device) == "cpu": 159 | float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) 160 | float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] 161 | float_node = float_input.node() 162 | 163 | def patch_float(module): 164 | try: 165 | graphs = [module.graph] if hasattr(module, "graph") else [] 166 | except RuntimeError: 167 | graphs = [] 168 | 169 | if hasattr(module, "forward1"): 170 | graphs.append(module.forward1.graph) 171 | 172 | for graph in graphs: 173 | for node in graph.findAllNodes("aten::to"): 174 | inputs = list(node.inputs()) 175 | for i in [1, 2]: # dtype can be the second or third argument to aten::to() 176 | if inputs[i].node()["value"] == 5: 177 | inputs[i].node().copyAttributes(float_node) 178 | 179 | model.apply(patch_float) 180 | patch_float(model.encode_image) 181 | patch_float(model.encode_text) 182 | 183 | model.float() 184 | 185 | return model, _transform(model.input_resolution.item()) 186 | 187 | 188 | def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> Union[torch.IntTensor, torch.LongTensor]: 189 | """ 190 | Returns the tokenized representation of given input string(s) 191 | Parameters 192 | ---------- 193 | texts : Union[str, List[str]] 194 | An input string or a list of input strings to tokenize 195 | context_length : int 196 | The context length to use; all CLIP models use 77 as the context length 197 | truncate: bool 198 | Whether to truncate the text in case its encoding is longer than the context length 199 | Returns 200 | ------- 201 | A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. 202 | We return LongTensor when torch version is <1.8.0, since older index_select requires indices to be long. 203 | """ 204 | if isinstance(texts, str): 205 | texts = [texts] 206 | 207 | sot_token = _tokenizer.encoder["<|startoftext|>"] 208 | eot_token = _tokenizer.encoder["<|endoftext|>"] 209 | all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] 210 | if packaging.version.parse(torch.__version__) < packaging.version.parse("1.8.0"): 211 | result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) 212 | else: 213 | result = torch.zeros(len(all_tokens), context_length, dtype=torch.int) 214 | 215 | for i, tokens in enumerate(all_tokens): 216 | if len(tokens) > context_length: 217 | if truncate: 218 | tokens = tokens[:context_length] 219 | tokens[-1] = eot_token 220 | else: 221 | raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") 222 | result[i, :len(tokens)] = torch.tensor(tokens) 223 | 224 | return result 225 | -------------------------------------------------------------------------------- /logs/caltech101/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/log.txt-2023-03-17-20-00-54: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/caltech101.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/caltech101/shots_16/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 1 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: Caltech101 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 50 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 5 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/caltech101/shots_16/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1 112 | RESUME: 113 | SEED: 1 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 10.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3258.089 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: Caltech101 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/caltech-101/split_zhou_Caltech101.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/caltech-101/split_fewshot/shot_16-seed_1.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ---------- 314 | Dataset Caltech101 315 | # classes 50 316 | # train_x 800 317 | # val 200 318 | # test 1,549 319 | --------- ---------- 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 100 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.2.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'prompt_learner.w', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_1.weight', 'prompt_learner.ctx'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | Found checkpoint at output/base2new/train_base/caltech101/shots_16/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1 (will resume training) 332 | Loading checkpoint from "output/base2new/train_base/caltech101/shots_16/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/prompt_learner/model.pth.tar-50" 333 | Loaded model weights 334 | Loaded optimizer 335 | Loaded scheduler 336 | Previous epoch: 50 337 | Initialize tensorboard (log_dir=output/base2new/train_base/caltech101/shots_16/LASP/vit_b16_c4_ep50_batch32_cls_t2t_10_wcl_25_g1_b_lr32/seed1/tensorboard) 338 | Finish training 339 | Deploy the last-epoch model 340 | Evaluate on the *test* set 341 | => result 342 | * total: 1,549 343 | * correct: 1,524 344 | * accuracy: 98.4% 345 | * error: 1.6% 346 | * macro_f1: 97.0% 347 | Elapsed: 0:00:06 348 | -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/log.txt-2023-03-17-19-28-06: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/stanford_cars.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 1 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: StanfordCars 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 25 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 3 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1 112 | RESUME: 113 | SEED: 1 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 5.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3271.511 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: StanfordCars 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_zhou_StanfordCars.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_fewshot/shot_16-seed_1.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ------------ 314 | Dataset StanfordCars 315 | # classes 98 316 | # train_x 1,568 317 | # val 392 318 | # test 4,002 319 | --------- ------------ 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 196 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'prompt_learner.w', 'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_2.bias', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_2.bias', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_2.bias'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | Found checkpoint at output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1 (will resume training) 332 | Loading checkpoint from "output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/prompt_learner/model.pth.tar-25" 333 | Loaded model weights 334 | Loaded optimizer 335 | Loaded scheduler 336 | Previous epoch: 25 337 | Initialize tensorboard (log_dir=output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/tensorboard) 338 | Finish training 339 | Deploy the last-epoch model 340 | Evaluate on the *test* set 341 | => result 342 | * total: 4,002 343 | * correct: 3,056 344 | * accuracy: 76.4% 345 | * error: 23.6% 346 | * macro_f1: 75.9% 347 | Elapsed: 0:00:17 348 | -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/log.txt-2023-03-17-19-29-19: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/stanford_cars.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 2 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: StanfordCars 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 25 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 3 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2 112 | RESUME: 113 | SEED: 2 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 5.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3216.970 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: StanfordCars 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_zhou_StanfordCars.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_fewshot/shot_16-seed_2.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ------------ 314 | Dataset StanfordCars 315 | # classes 98 316 | # train_x 1,568 317 | # val 392 318 | # test 4,002 319 | --------- ------------ 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 196 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.11.ln_2.weight', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'image_encoder.transformer.resblocks.0.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_2.bias', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'prompt_learner.w', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_2.weight', 'image_encoder.transformer.resblocks.2.ln_1.weight'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | Found checkpoint at output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2 (will resume training) 332 | Loading checkpoint from "output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/prompt_learner/model.pth.tar-25" 333 | Loaded model weights 334 | Loaded optimizer 335 | Loaded scheduler 336 | Previous epoch: 25 337 | Initialize tensorboard (log_dir=output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/tensorboard) 338 | Finish training 339 | Deploy the last-epoch model 340 | Evaluate on the *test* set 341 | => result 342 | * total: 4,002 343 | * correct: 3,042 344 | * accuracy: 76.0% 345 | * error: 24.0% 346 | * macro_f1: 75.6% 347 | Elapsed: 0:00:17 348 | -------------------------------------------------------------------------------- /logs/stanford_cars/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/log.txt-2023-03-17-19-30-31: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/stanford_cars.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 3 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: StanfordCars 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 25 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 3 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3 112 | RESUME: 113 | SEED: 3 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 5.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3182.509 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: StanfordCars 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_zhou_StanfordCars.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/stanford_cars/split_fewshot/shot_16-seed_3.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ------------ 314 | Dataset StanfordCars 315 | # classes 98 316 | # train_x 1,568 317 | # val 392 318 | # test 4,002 319 | --------- ------------ 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 196 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.7.ln_2.bias', 'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'prompt_learner.w', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.1.ln_2.bias'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | Found checkpoint at output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3 (will resume training) 332 | Loading checkpoint from "output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/prompt_learner/model.pth.tar-25" 333 | Loaded model weights 334 | Loaded optimizer 335 | Loaded scheduler 336 | Previous epoch: 25 337 | Initialize tensorboard (log_dir=output/base2new/train_base/stanford_cars/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed3/tensorboard) 338 | Finish training 339 | Deploy the last-epoch model 340 | Evaluate on the *test* set 341 | => result 342 | * total: 4,002 343 | * correct: 3,057 344 | * accuracy: 76.4% 345 | * error: 23.6% 346 | * macro_f1: 75.8% 347 | Elapsed: 0:00:17 348 | -------------------------------------------------------------------------------- /logs/oxford_flowers/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/log.txt: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/oxford_flowers.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 1 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: OxfordFlowers 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 25 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 3 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1 112 | RESUME: 113 | SEED: 1 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 5.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3231.615 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: OxfordFlowers 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/oxford_flowers/split_zhou_OxfordFlowers.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/oxford_flowers/split_fewshot/shot_16-seed_1.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ------------- 314 | Dataset OxfordFlowers 315 | # classes 51 316 | # train_x 816 317 | # val 204 318 | # test 1,053 319 | --------- ------------- 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 102 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'image_encoder.transformer.resblocks.11.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_2.bias', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_1.weight', 'prompt_learner.ctx', 'prompt_learner.w', 'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.7.ln_2.bias'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | No checkpoint found, train from scratch 332 | Initialize tensorboard (log_dir=output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/tensorboard) 333 | epoch [1/25] batch [20/25] time 0.088 (0.187) data 0.000 (0.049) loss 1.5808 (1.9021) lr 1.0000e-05 eta 0:01:53 334 | epoch [2/25] batch [20/25] time 0.087 (0.104) data 0.000 (0.014) loss 2.2961 (2.0192) lr 1.0000e-05 eta 0:01:00 335 | epoch [3/25] batch [20/25] time 0.087 (0.101) data 0.000 (0.013) loss 1.6456 (1.7757) lr 1.0000e-05 eta 0:00:56 336 | epoch [4/25] batch [20/25] time 0.088 (0.101) data 0.000 (0.011) loss 1.1334 (1.5285) lr 3.2000e-03 eta 0:00:53 337 | epoch [5/25] batch [20/25] time 0.088 (0.103) data 0.000 (0.012) loss 0.7612 (1.0363) lr 3.1874e-03 eta 0:00:51 338 | epoch [6/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.013) loss 1.1014 (0.8641) lr 3.1497e-03 eta 0:00:49 339 | epoch [7/25] batch [20/25] time 0.090 (0.103) data 0.000 (0.012) loss 0.6053 (0.6282) lr 3.0876e-03 eta 0:00:46 340 | epoch [8/25] batch [20/25] time 0.090 (0.102) data 0.000 (0.011) loss 0.5999 (0.5411) lr 3.0021e-03 eta 0:00:43 341 | epoch [9/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.012) loss 0.3241 (0.4644) lr 2.8944e-03 eta 0:00:41 342 | epoch [10/25] batch [20/25] time 0.090 (0.103) data 0.000 (0.014) loss 0.1757 (0.4572) lr 2.7663e-03 eta 0:00:39 343 | epoch [11/25] batch [20/25] time 0.088 (0.102) data 0.000 (0.013) loss 0.3630 (0.4413) lr 2.6199e-03 eta 0:00:36 344 | epoch [12/25] batch [20/25] time 0.090 (0.104) data 0.000 (0.014) loss 0.7310 (0.4246) lr 2.4573e-03 eta 0:00:34 345 | epoch [13/25] batch [20/25] time 0.088 (0.103) data 0.000 (0.012) loss 0.2795 (0.3392) lr 2.2812e-03 eta 0:00:31 346 | epoch [14/25] batch [20/25] time 0.088 (0.103) data 0.000 (0.014) loss 0.4073 (0.3622) lr 2.0944e-03 eta 0:00:28 347 | epoch [15/25] batch [20/25] time 0.090 (0.105) data 0.000 (0.014) loss 0.0933 (0.3985) lr 1.8998e-03 eta 0:00:26 348 | epoch [16/25] batch [20/25] time 0.088 (0.105) data 0.000 (0.014) loss 0.2926 (0.2836) lr 1.7005e-03 eta 0:00:24 349 | epoch [17/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.014) loss 0.3485 (0.3052) lr 1.4995e-03 eta 0:00:21 350 | epoch [18/25] batch [20/25] time 0.088 (0.102) data 0.000 (0.012) loss 0.2122 (0.3120) lr 1.3002e-03 eta 0:00:18 351 | epoch [19/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.012) loss 0.3261 (0.2796) lr 1.1056e-03 eta 0:00:15 352 | epoch [20/25] batch [20/25] time 0.088 (0.102) data 0.000 (0.012) loss 0.2720 (0.3299) lr 9.1875e-04 eta 0:00:13 353 | epoch [21/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.013) loss 0.1388 (0.2329) lr 7.4268e-04 eta 0:00:10 354 | epoch [22/25] batch [20/25] time 0.091 (0.103) data 0.000 (0.013) loss 0.2645 (0.2705) lr 5.8012e-04 eta 0:00:08 355 | epoch [23/25] batch [20/25] time 0.089 (0.103) data 0.000 (0.012) loss 0.3810 (0.2237) lr 4.3365e-04 eta 0:00:05 356 | epoch [24/25] batch [20/25] time 0.092 (0.106) data 0.000 (0.014) loss 0.1024 (0.2480) lr 3.0557e-04 eta 0:00:03 357 | epoch [25/25] batch [20/25] time 0.092 (0.104) data 0.000 (0.013) loss 0.2363 (0.2637) lr 1.9791e-04 eta 0:00:00 358 | Checkpoint saved to output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed1/prompt_learner/model.pth.tar-25 359 | Finish training 360 | Deploy the last-epoch model 361 | Evaluate on the *test* set 362 | => result 363 | * total: 1,053 364 | * correct: 1,025 365 | * accuracy: 97.3% 366 | * error: 2.7% 367 | * macro_f1: 97.4% 368 | Elapsed: 0:01:09 369 | -------------------------------------------------------------------------------- /logs/oxford_flowers/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/log.txt: -------------------------------------------------------------------------------- 1 | *************** 2 | ** Arguments ** 3 | *************** 4 | backbone: 5 | config_file: configs/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32.yaml 6 | dataset_config_file: configs/datasets/oxford_flowers.yaml 7 | eval_only: False 8 | head: 9 | load_epoch: None 10 | model_dir: 11 | no_train: False 12 | opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base'] 13 | output_dir: output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2 14 | resume: 15 | root: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 16 | seed: 2 17 | source_domains: None 18 | target_domains: None 19 | trainer: LASP 20 | transforms: None 21 | ************ 22 | ** Config ** 23 | ************ 24 | DATALOADER: 25 | K_TRANSFORMS: 1 26 | NUM_WORKERS: 8 27 | RETURN_IMG0: False 28 | TEST: 29 | BATCH_SIZE: 32 30 | SAMPLER: SequentialSampler 31 | TRAIN_U: 32 | BATCH_SIZE: 32 33 | N_DOMAIN: 0 34 | N_INS: 16 35 | SAME_AS_X: True 36 | SAMPLER: RandomSampler 37 | TRAIN_X: 38 | BATCH_SIZE: 32 39 | N_DOMAIN: 0 40 | N_INS: 16 41 | SAMPLER: RandomSampler 42 | DATASET: 43 | ALL_AS_UNLABELED: False 44 | CIFAR_C_LEVEL: 1 45 | CIFAR_C_TYPE: 46 | INCLUDE_ALL_CLASSES: False 47 | NAME: OxfordFlowers 48 | NUM_LABELED: -1 49 | NUM_SHOTS: 16 50 | ROOT: /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/ 51 | SOURCE_DOMAINS: () 52 | STL10_FOLD: -1 53 | SUBSAMPLE_CLASSES: base 54 | TARGET_DOMAINS: () 55 | VAL_PERCENT: 0.1 56 | INPUT: 57 | COLORJITTER_B: 0.4 58 | COLORJITTER_C: 0.4 59 | COLORJITTER_H: 0.1 60 | COLORJITTER_S: 0.4 61 | CROP_PADDING: 4 62 | CUTOUT_LEN: 16 63 | CUTOUT_N: 1 64 | GB_K: 21 65 | GB_P: 0.5 66 | GN_MEAN: 0.0 67 | GN_STD: 0.15 68 | INTERPOLATION: bicubic 69 | NO_TRANSFORM: False 70 | PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] 71 | PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] 72 | RANDAUGMENT_M: 10 73 | RANDAUGMENT_N: 2 74 | RGS_P: 0.2 75 | RRCROP_SCALE: (0.08, 1.0) 76 | SIZE: (224, 224) 77 | TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') 78 | MODEL: 79 | BACKBONE: 80 | NAME: ViT-B/16 81 | PRETRAINED: True 82 | HEAD: 83 | ACTIVATION: relu 84 | BN: True 85 | DROPOUT: 0.0 86 | HIDDEN_LAYERS: () 87 | NAME: 88 | INIT_WEIGHTS: 89 | OPTIM: 90 | ADAM_BETA1: 0.9 91 | ADAM_BETA2: 0.999 92 | BASE_LR_MULT: 0.1 93 | GAMMA: 0.1 94 | LR: 0.032 95 | LR_SCHEDULER: cosine 96 | MAX_EPOCH: 25 97 | MOMENTUM: 0.9 98 | NAME: sgd 99 | NEW_LAYERS: () 100 | RMSPROP_ALPHA: 0.99 101 | SGD_DAMPNING: 0 102 | SGD_NESTEROV: False 103 | STAGED_LR: False 104 | STEPSIZE: (-1,) 105 | WARMUP_CONS_LR: 1e-05 106 | WARMUP_EPOCH: 3 107 | WARMUP_MIN_LR: 1e-05 108 | WARMUP_RECOUNT: True 109 | WARMUP_TYPE: constant 110 | WEIGHT_DECAY: 0.0005 111 | OUTPUT_DIR: output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2 112 | RESUME: 113 | SEED: 2 114 | TEST: 115 | COMPUTE_CMAT: False 116 | EVALUATOR: Classification 117 | FINAL_MODEL: last_step 118 | NO_TEST: False 119 | PER_CLASS_RESULT: False 120 | SPLIT: test 121 | TRAIN: 122 | CHECKPOINT_FREQ: 0 123 | COUNT_ITER: train_x 124 | PRINT_FREQ: 20 125 | TRAINER: 126 | CDAC: 127 | CLASS_LR_MULTI: 10 128 | P_THRESH: 0.95 129 | RAMPUP_COEF: 30 130 | RAMPUP_ITRS: 1000 131 | STRONG_TRANSFORMS: () 132 | TOPK_MATCH: 5 133 | CROSSGRAD: 134 | ALPHA_D: 0.5 135 | ALPHA_F: 0.5 136 | EPS_D: 1.0 137 | EPS_F: 1.0 138 | DAEL: 139 | CONF_THRE: 0.95 140 | STRONG_TRANSFORMS: () 141 | WEIGHT_U: 0.5 142 | DAELDG: 143 | CONF_THRE: 0.95 144 | STRONG_TRANSFORMS: () 145 | WEIGHT_U: 0.5 146 | DDAIG: 147 | ALPHA: 0.5 148 | CLAMP: False 149 | CLAMP_MAX: 1.0 150 | CLAMP_MIN: -1.0 151 | G_ARCH: 152 | LMDA: 0.3 153 | WARMUP: 0 154 | DOMAINMIX: 155 | ALPHA: 1.0 156 | BETA: 1.0 157 | TYPE: crossdomain 158 | ENTMIN: 159 | LMDA: 0.001 160 | FIXMATCH: 161 | CONF_THRE: 0.95 162 | STRONG_TRANSFORMS: () 163 | WEIGHT_U: 1.0 164 | LASP: 165 | CTX_INIT: a photo of a 166 | ENABLE: True 167 | ENABLE_CORRECTION: True 168 | ENABLE_IMPLICIT_OP: sum 169 | FINETUNE_VIT_LN: True 170 | LASP_LOSS_WEIGHT: 5.0 171 | LASP_PROMPTS: ['a photo of a {}, a type of flower.', 'a photo of a person doing {}.', 'a centered satellite photo of {}.', 'a photo of a {}, a type of aircraft.', '{} texture.', 'itap of a {}.', 'a bad photo of the {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 'art of the {}.', 'a photo of the small {}.', 'a photo of a {}.', 'a photo of many {}.', 'a photo of the hard to see {}.', 'a low resolution photo of the {}.', 'a rendering of a {}.', 'a bad photo of the {}.', 'a cropped photo of the {}.', 'a pixelated photo of the {}.', 'a bright photo of the {}.', 'a cropped photo of a {}.', 'a photo of the {}.', 'a good photo of the {}.', 'a rendering of the {}.', 'a close-up photo of the {}.', 'a low resolution photo of a {}.', 'a rendition of the {}.', 'a photo of the clean {}.', 'a photo of a large {}.', 'a blurry photo of a {}.', 'a pixelated photo of a {}.', 'itap of the {}.', 'a jpeg corrupted photo of the {}.', 'a good photo of a {}.'] 172 | N_CTX: 4 173 | PREC: amp 174 | PRETRAINED_PROMPTS_DIR: None 175 | TRAIN_W: True 176 | M3SDA: 177 | LMDA: 0.5 178 | N_STEP_F: 4 179 | MCD: 180 | N_STEP_F: 4 181 | MEANTEACHER: 182 | EMA_ALPHA: 0.999 183 | RAMPUP: 5 184 | WEIGHT_U: 1.0 185 | MIXMATCH: 186 | MIXUP_BETA: 0.75 187 | RAMPUP: 20000 188 | TEMP: 2.0 189 | WEIGHT_U: 100.0 190 | MME: 191 | LMDA: 0.1 192 | NAME: LASP 193 | SE: 194 | CONF_THRE: 0.95 195 | EMA_ALPHA: 0.999 196 | RAMPUP: 300 197 | USE_CUDA: True 198 | VERBOSE: True 199 | VERSION: 1 200 | Collecting env info ... 201 | ** System info ** 202 | PyTorch version: 2.0.0 203 | Is debug build: False 204 | CUDA used to build PyTorch: 11.8 205 | ROCM used to build PyTorch: N/A 206 | 207 | OS: Ubuntu 20.04.4 LTS (x86_64) 208 | GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 209 | Clang version: Could not collect 210 | CMake version: version 3.18.4 211 | Libc version: glibc-2.31 212 | 213 | Python version: 3.10.9 (main, Mar 8 2023, 10:47:38) [GCC 11.2.0] (64-bit runtime) 214 | Python platform: Linux-5.4.0-100-generic-x86_64-with-glibc2.31 215 | Is CUDA available: True 216 | CUDA runtime version: 11.7.64 217 | CUDA_MODULE_LOADING set to: LAZY 218 | GPU models and configuration: 219 | GPU 0: CUDA GPU 220 | GPU 1: CUDA GPU 221 | GPU 2: CUDA GPU 222 | GPU 3: CUDA GPU 223 | 224 | Nvidia driver version: 520.61.05 225 | cuDNN version: Probably one of the following: 226 | /usr/lib/x86_64-linux-gnu/libcudnn.so.8.4.0 227 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.4.0 228 | /usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.4.0 229 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.4.0 230 | /usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.4.0 231 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.4.0 232 | /usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.4.0 233 | HIP runtime version: N/A 234 | MIOpen runtime version: N/A 235 | Is XNNPACK available: True 236 | 237 | CPU: 238 | Architecture: x86_64 239 | CPU op-mode(s): 32-bit, 64-bit 240 | Byte Order: Little Endian 241 | Address sizes: 48 bits physical, 48 bits virtual 242 | CPU(s): 128 243 | On-line CPU(s) list: 0-127 244 | Thread(s) per core: 2 245 | Core(s) per socket: 32 246 | Socket(s): 2 247 | NUMA node(s): 4 248 | Vendor ID: AuthenticAMD 249 | CPU family: 23 250 | Model: 49 251 | Model name: AMD EPYC 7452 32-Core Processor 252 | Stepping: 0 253 | CPU MHz: 3268.857 254 | BogoMIPS: 4691.32 255 | Virtualization: AMD-V 256 | L1d cache: 2 MiB 257 | L1i cache: 2 MiB 258 | L2 cache: 32 MiB 259 | L3 cache: 256 MiB 260 | NUMA node0 CPU(s): 0-15,64-79 261 | NUMA node1 CPU(s): 16-31,80-95 262 | NUMA node2 CPU(s): 32-47,96-111 263 | NUMA node3 CPU(s): 48-63,112-127 264 | Vulnerability Itlb multihit: Not affected 265 | Vulnerability L1tf: Not affected 266 | Vulnerability Mds: Not affected 267 | Vulnerability Meltdown: Not affected 268 | Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp 269 | Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization 270 | Vulnerability Spectre v2: Mitigation; Full AMD retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling 271 | Vulnerability Srbds: Not affected 272 | Vulnerability Tsx async abort: Not affected 273 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca 274 | 275 | Versions of relevant libraries: 276 | [pip3] numpy==1.23.5 277 | [pip3] open-clip-torch==2.16.0 278 | [pip3] torch==2.0.0 279 | [pip3] torchaudio==2.0.0 280 | [pip3] torchvision==0.15.0 281 | [conda] blas 1.0 mkl 282 | [conda] ffmpeg 4.3 hf484d3e_0 pytorch 283 | [conda] mkl 2021.4.0 h06a4308_640 284 | [conda] mkl-service 2.4.0 py310h7f8727e_0 285 | [conda] mkl_fft 1.3.1 py310hd6ae3a3_0 286 | [conda] mkl_random 1.2.2 py310h00e6091_0 287 | [conda] numpy 1.23.5 py310hd5efca6_0 288 | [conda] numpy-base 1.23.5 py310h8e6c178_0 289 | [conda] open-clip-torch 2.16.0 dev_0 290 | [conda] pytorch 2.0.0 py3.10_cuda11.8_cudnn8.7.0_0 pytorch 291 | [conda] pytorch-cuda 11.8 h7e8668a_3 pytorch 292 | [conda] pytorch-mutex 1.0 cuda pytorch 293 | [conda] torchaudio 2.0.0 py310_cu118 pytorch 294 | [conda] torchtriton 2.0.0 py310 pytorch 295 | [conda] torchvision 0.15.0 py310_cu118 pytorch 296 | Pillow (9.4.0) 297 | 298 | Loading trainer: LASP 299 | Loading dataset: OxfordFlowers 300 | Reading split from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/oxford_flowers/split_zhou_OxfordFlowers.json 301 | Loading preprocessed few-shot data from /home/work/shared-fi-datasets-01/users/adrian.bulat/data/fs_datasets/oxford_flowers/split_fewshot/shot_16-seed_2.pkl 302 | SUBSAMPLE BASE CLASSES! 303 | Building transform_train 304 | + random resized crop (size=(224, 224), scale=(0.08, 1.0)) 305 | + random flip 306 | + to torch tensor of range [0, 1] 307 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 308 | Building transform_test 309 | + resize the smaller edge to 224 310 | + 224x224 center crop 311 | + to torch tensor of range [0, 1] 312 | + normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) 313 | --------- ------------- 314 | Dataset OxfordFlowers 315 | # classes 51 316 | # train_x 816 317 | # val 204 318 | # test 1,053 319 | --------- ------------- 320 | Loading CLIP (backbone: ViT-B/16) 321 | Building custom CLIP 322 | Initial context: "a photo of a" 323 | Number of context words (tokens): 4 324 | Initializing LASP prompts... 325 | Num classes used for LASP: 102 326 | Turning off gradients in both the image and the text encoder 327 | Re-enabling LN... 328 | Parameters to be updated: {'image_encoder.transformer.resblocks.11.ln_2.weight', 'prompt_learner.w', 'image_encoder.transformer.resblocks.2.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_2.bias', 'image_encoder.transformer.resblocks.0.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.bias', 'image_encoder.transformer.resblocks.3.ln_2.bias', 'image_encoder.transformer.resblocks.4.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_2.weight', 'image_encoder.transformer.resblocks.11.ln_2.bias', 'image_encoder.transformer.resblocks.10.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.bias', 'image_encoder.transformer.resblocks.10.ln_1.weight', 'image_encoder.transformer.resblocks.5.ln_1.weight', 'image_encoder.transformer.resblocks.1.ln_2.weight', 'image_encoder.transformer.resblocks.4.ln_1.weight', 'image_encoder.transformer.resblocks.6.ln_1.weight', 'image_encoder.transformer.resblocks.0.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_2.weight', 'image_encoder.transformer.resblocks.5.ln_2.bias', 'image_encoder.transformer.resblocks.7.ln_1.bias', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.9.ln_1.weight', 'image_encoder.transformer.resblocks.4.ln_2.weight', 'image_encoder.transformer.resblocks.3.ln_1.weight', 'image_encoder.transformer.resblocks.5.ln_2.weight', 'image_encoder.transformer.resblocks.6.ln_1.bias', 'image_encoder.transformer.resblocks.8.ln_1.bias', 'image_encoder.transformer.resblocks.2.ln_1.bias', 'image_encoder.transformer.resblocks.6.ln_2.weight', 'image_encoder.transformer.resblocks.9.ln_2.weight', 'image_encoder.transformer.resblocks.0.ln_1.bias', 'image_encoder.transformer.resblocks.9.ln_2.bias', 'image_encoder.transformer.resblocks.10.ln_2.bias', 'image_encoder.transformer.resblocks.11.ln_1.weight', 'image_encoder.transformer.resblocks.7.ln_2.bias', 'image_encoder.transformer.resblocks.1.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_2.bias', 'image_encoder.transformer.resblocks.8.ln_1.weight', 'image_encoder.transformer.resblocks.10.ln_1.bias', 'image_encoder.transformer.resblocks.3.ln_1.bias', 'image_encoder.transformer.resblocks.5.ln_1.bias', 'image_encoder.transformer.resblocks.0.ln_2.bias', 'image_encoder.transformer.resblocks.7.ln_2.weight', 'image_encoder.transformer.resblocks.9.ln_1.bias', 'image_encoder.transformer.resblocks.11.ln_1.bias', 'image_encoder.transformer.resblocks.1.ln_1.weight', 'image_encoder.transformer.resblocks.2.ln_2.weight'} 329 | Loading evaluator: Classification 330 | Note that load_model() is skipped as no pretrained model is given 331 | No checkpoint found, train from scratch 332 | Initialize tensorboard (log_dir=output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/tensorboard) 333 | epoch [1/25] batch [20/25] time 0.089 (0.183) data 0.000 (0.042) loss 1.3814 (2.0584) lr 1.0000e-05 eta 0:01:50 334 | epoch [2/25] batch [20/25] time 0.091 (0.106) data 0.000 (0.014) loss 2.1721 (1.8544) lr 1.0000e-05 eta 0:01:01 335 | epoch [3/25] batch [20/25] time 0.090 (0.103) data 0.000 (0.012) loss 2.7486 (1.8380) lr 1.0000e-05 eta 0:00:57 336 | epoch [4/25] batch [20/25] time 0.087 (0.103) data 0.000 (0.015) loss 1.2678 (1.4053) lr 3.2000e-03 eta 0:00:54 337 | epoch [5/25] batch [20/25] time 0.087 (0.103) data 0.000 (0.015) loss 1.1212 (0.9990) lr 3.1874e-03 eta 0:00:51 338 | epoch [6/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.014) loss 1.0785 (0.7719) lr 3.1497e-03 eta 0:00:49 339 | epoch [7/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.013) loss 0.6342 (0.6016) lr 3.0876e-03 eta 0:00:46 340 | epoch [8/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.012) loss 0.5911 (0.5483) lr 3.0021e-03 eta 0:00:43 341 | epoch [9/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.014) loss 0.6032 (0.4615) lr 2.8944e-03 eta 0:00:41 342 | epoch [10/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.012) loss 0.4821 (0.4357) lr 2.7663e-03 eta 0:00:38 343 | epoch [11/25] batch [20/25] time 0.087 (0.102) data 0.000 (0.013) loss 0.5367 (0.4507) lr 2.6199e-03 eta 0:00:36 344 | epoch [12/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.013) loss 0.5392 (0.4471) lr 2.4573e-03 eta 0:00:34 345 | epoch [13/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.013) loss 0.6111 (0.3849) lr 2.2812e-03 eta 0:00:31 346 | epoch [14/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.014) loss 0.2560 (0.3652) lr 2.0944e-03 eta 0:00:29 347 | epoch [15/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.014) loss 0.2976 (0.3451) lr 1.8998e-03 eta 0:00:26 348 | epoch [16/25] batch [20/25] time 0.090 (0.105) data 0.000 (0.015) loss 0.2512 (0.3081) lr 1.7005e-03 eta 0:00:24 349 | epoch [17/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.013) loss 0.1983 (0.3171) lr 1.4995e-03 eta 0:00:21 350 | epoch [18/25] batch [20/25] time 0.092 (0.106) data 0.000 (0.015) loss 0.2670 (0.3097) lr 1.3002e-03 eta 0:00:19 351 | epoch [19/25] batch [20/25] time 0.089 (0.105) data 0.000 (0.015) loss 0.3784 (0.3188) lr 1.1056e-03 eta 0:00:16 352 | epoch [20/25] batch [20/25] time 0.089 (0.104) data 0.000 (0.013) loss 0.1434 (0.2694) lr 9.1875e-04 eta 0:00:13 353 | epoch [21/25] batch [20/25] time 0.089 (0.106) data 0.000 (0.014) loss 0.2248 (0.2209) lr 7.4268e-04 eta 0:00:11 354 | epoch [22/25] batch [20/25] time 0.090 (0.104) data 0.000 (0.013) loss 0.1904 (0.2727) lr 5.8012e-04 eta 0:00:08 355 | epoch [23/25] batch [20/25] time 0.089 (0.105) data 0.000 (0.014) loss 0.7144 (0.3395) lr 4.3365e-04 eta 0:00:05 356 | epoch [24/25] batch [20/25] time 0.090 (0.104) data 0.000 (0.013) loss 0.1510 (0.2931) lr 3.0557e-04 eta 0:00:03 357 | epoch [25/25] batch [20/25] time 0.091 (0.105) data 0.000 (0.013) loss 0.5624 (0.2557) lr 1.9791e-04 eta 0:00:00 358 | Checkpoint saved to output/base2new/train_base/oxford_flowers/shots_16/LASP/vit_b16_c4_ep25_batch32_cls_t2t_5_wcl_25_g1_b_lr32/seed2/prompt_learner/model.pth.tar-25 359 | Finish training 360 | Deploy the last-epoch model 361 | Evaluate on the *test* set 362 | => result 363 | * total: 1,053 364 | * correct: 1,031 365 | * accuracy: 97.9% 366 | * error: 2.1% 367 | * macro_f1: 97.8% 368 | Elapsed: 0:01:09 369 | --------------------------------------------------------------------------------