├── CHANGELOG.md ├── semilearn ├── algorithms │ ├── clss │ │ ├── __init__.py │ │ ├── ordinal_entropy.py │ │ └── clss.py │ ├── rda │ │ ├── __init__.py │ │ ├── utils.py │ │ └── rda.py │ ├── ucvme │ │ └── __init__.py │ ├── rankup │ │ ├── __init__.py │ │ └── rankup_net.py │ ├── mixmatch │ │ └── __init__.py │ ├── pimodel │ │ ├── __init__.py │ │ └── pimodel.py │ ├── rankuprda │ │ ├── __init__.py │ │ ├── rda.py │ │ └── rankup_net.py │ ├── meanteacher │ │ ├── __init__.py │ │ └── meanteacher.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── ops.py │ ├── fullysupervised │ │ ├── __init__.py │ │ └── fullysupervised.py │ ├── hooks │ │ ├── __init__.py │ │ ├── pseudo_label.py │ │ └── masking.py │ └── __init__.py ├── core │ ├── utils │ │ ├── __init__.py │ │ └── registry.py │ ├── __init__.py │ ├── criterions │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── consistency.py │ │ └── cls_consistency.py │ └── hooks │ │ ├── __init__.py │ │ ├── ema.py │ │ ├── sampler_seed.py │ │ ├── checkpoint.py │ │ ├── timer.py │ │ ├── hook.py │ │ ├── param_update.py │ │ ├── evaluation.py │ │ ├── priority.py │ │ ├── logging.py │ │ ├── wandb.py │ │ └── aim.py ├── nets │ ├── bert │ │ ├── __init__.py │ │ └── bert.py │ ├── resnet │ │ └── __init__.py │ ├── hubert │ │ ├── __init__.py │ │ └── hubert.py │ ├── whisper │ │ ├── __init__.py │ │ └── whisper.py │ ├── wave2vecv2 │ │ ├── __init__.py │ │ └── wave2vecv2.py │ ├── vit │ │ └── __init__.py │ ├── wrn │ │ └── __init__.py │ └── __init__.py ├── datasets │ ├── samplers │ │ ├── __init__.py │ │ └── sampler.py │ ├── audio_datasets │ │ ├── augmentation │ │ │ ├── __init__.py │ │ │ ├── transforms.py │ │ │ └── subsample.py │ │ ├── __init__.py │ │ ├── vcc2018.py │ │ ├── get_dataset.py │ │ └── datasetbase.py │ ├── cv_datasets │ │ ├── __init__.py │ │ ├── augmentation │ │ │ ├── __init__.py │ │ │ └── transforms.py │ │ ├── get_dataset.py │ │ ├── utkface.py │ │ └── datasetbase.py │ ├── collactors │ │ ├── __init__.py │ │ ├── nlp_collactor.py │ │ └── audio_collactor.py │ ├── nlp_datasets │ │ ├── __init__.py │ │ ├── get_dataset.py │ │ ├── datasetbase.py │ │ ├── yelp_review.py │ │ └── amazon_review.py │ └── __init__.py └── __init__.py ├── visualization ├── requirements.txt └── plot_multi.py ├── requirements.txt ├── config ├── classic_cv │ ├── fullysupervised │ │ └── fullysupervised_utkface_s0.yaml │ ├── supervised │ │ ├── supervised_utkface_lb50_s0.yaml │ │ ├── supervised_utkface_lb250_s0.yaml │ │ └── supervised_utkface_lb2000_s0.yaml │ ├── pimodel │ │ ├── pimodel_utkface_lb50_s0.yaml │ │ ├── pimodel_utkface_lb250_s0.yaml │ │ └── pimodel_utkface_lb2000_s0.yaml │ ├── rda │ │ ├── rda_utkface_lb50_s0.yaml │ │ ├── rda_utkface_lb250_s0.yaml │ │ └── rda_utkface_lb2000_s0.yaml │ ├── ucvme │ │ ├── ucvme_utkface_lb50_s0.yaml │ │ ├── ucvme_utkface_lb250_s0.yaml │ │ └── ucvme_utkface_lb2000_s0.yaml │ ├── meanteacher │ │ ├── meanteacher_utkface_lb50_s0.yaml │ │ ├── meanteacher_utkface_lb250_s0.yaml │ │ └── meanteacher_utkface_lb2000_s0.yaml │ ├── mixmatch │ │ ├── mixmatch_utkface_lb50_s0.yaml │ │ ├── mixmatch_utkface_lb250_s0.yaml │ │ └── mixmatch_utkface_lb2000_s0.yaml │ ├── clss │ │ ├── clss_utkface_lb50_s0.yaml │ │ ├── clss_utkface_lb2000_s0.yaml │ │ └── clss_utkface_lb250_s0.yaml │ ├── rankup │ │ ├── rankup_utkface_lb50_s0.yaml │ │ ├── rankup_utkface_lb2000_s0.yaml │ │ └── rankup_utkface_lb250_s0.yaml │ └── rankuprda │ │ ├── rankuprda_utkface_lb50_s0.yaml │ │ ├── rankuprda_utkface_lb2000_s0.yaml │ │ └── rankuprda_utkface_lb250_s0.yaml ├── nlp │ ├── fullysupervised │ │ └── fullysupervised_yelp_review_s0.yaml │ ├── supervised │ │ └── supervised_yelp_review_lb250_s0.yaml │ ├── pimodel │ │ └── pimodel_yelp_review_lb250_s0.yaml │ ├── rda │ │ └── rda_yelp_review_lb250_s0.yaml │ ├── ucvme │ │ └── ucvme_yelp_review_lb250_s0.yaml │ ├── meanteacher │ │ └── meanteacher_yelp_review_lb250_s0.yaml │ ├── clss │ │ └── clss_yelp_review_lb250_s0.yaml │ ├── mixmatch │ │ └── mixmatch_yelp_review_lb250_s0.yaml │ ├── rankup │ │ └── rankup_yelp_review_lb250_s0.yaml │ └── rankuprda │ │ └── rankuprda_yelp_review_lb250_s0.yaml └── audio │ ├── fullysupervised │ └── fullysupervised_bvcc_s0.yaml │ ├── supervised │ └── supervised_bvcc_lb250_s0.yaml │ ├── pimodel │ └── pimodel_bvcc_lb250_s0.yaml │ ├── rda │ └── rda_bvcc_lb250_s0.yaml │ ├── ucvme │ └── ucvme_bvcc_lb250_s0.yaml │ ├── meanteacher │ └── meanteacher_bvcc_lb250_s0.yaml │ ├── mixmatch │ └── mixmatch_bvcc_lb250_s0.yaml │ ├── clss │ └── clss_bvcc_lb250_s0.yaml │ ├── rankup │ └── rankup_bvcc_lb250_s0.yaml │ └── rankuprda │ └── rankuprda_bvcc_lb250_s0.yaml ├── results ├── audio_average_log.csv ├── nlp_average_log.csv ├── README.md └── classic_cv_average_log.csv ├── LICENSE └── .gitignore /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /semilearn/algorithms/clss/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | from .clss import CLSS 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/rda/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .rda import RDA 5 | -------------------------------------------------------------------------------- /visualization/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=3.7.5 2 | numpy>=1.24.4 3 | pandas>=2.0.3 4 | umap-learn>=0.5.7 5 | plotly>=5.24.1 6 | seaborn>=0.13.2 -------------------------------------------------------------------------------- /semilearn/algorithms/ucvme/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | from .ucvme import UCVME 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/rankup/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .rankup import RankUp 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/mixmatch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .mixmatch import MixMatch 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/pimodel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .pimodel import PiModel 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/rankuprda/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .rankuprda import RankUpRDA 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/meanteacher/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .meanteacher import MeanTeacher 5 | -------------------------------------------------------------------------------- /semilearn/algorithms/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .misc import * 5 | from .ops import * 6 | -------------------------------------------------------------------------------- /semilearn/algorithms/fullysupervised/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .fullysupervised import FullySupervised 5 | -------------------------------------------------------------------------------- /semilearn/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from .build import * 6 | from .misc import * 7 | from .registry import * 8 | -------------------------------------------------------------------------------- /semilearn/nets/bert/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .bert import bert_base 6 | -------------------------------------------------------------------------------- /semilearn/nets/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .resnet import resnet50 6 | -------------------------------------------------------------------------------- /semilearn/nets/hubert/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .hubert import hubert_base 6 | -------------------------------------------------------------------------------- /semilearn/nets/whisper/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .whisper import whisper_base 6 | -------------------------------------------------------------------------------- /semilearn/nets/wave2vecv2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .wave2vecv2 import wave2vecv2_base 6 | -------------------------------------------------------------------------------- /semilearn/algorithms/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .pseudo_label import PseudoLabelingHook 5 | from .masking import MaskingHook, FixedThresholdingHook 6 | -------------------------------------------------------------------------------- /semilearn/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .sampler import name2sampler, DistributedSampler 6 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | from .subsample import Subsample, RandomSubsample 5 | from .transforms import AudioTransforms 6 | -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .get_dataset import get_cv_dataset 6 | from .utkface import UTKFACE 7 | -------------------------------------------------------------------------------- /semilearn/nets/vit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .vit import vit_tiny_patch2_32, vit_small_patch2_32, vit_small_patch16_224, vit_base_patch16_224, vit_base_patch16_96 5 | from .vit import VisionTransformer 6 | -------------------------------------------------------------------------------- /semilearn/nets/wrn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .wrn import wrn_28_2, wrn_28_8 6 | from .wrn_var import wrn_var_37_2 7 | from .wrn import WideResNet 8 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .get_dataset import get_audio_dataset 6 | from .bvcc import BVCC 7 | from .vcc2018 import VCC2018 8 | -------------------------------------------------------------------------------- /semilearn/datasets/collactors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .nlp_collactor import get_bert_base_collactor 5 | from .audio_collactor import get_wave2vecv2_base_collactor, get_hubert_base_collactor, get_whisper_base_collactor 6 | -------------------------------------------------------------------------------- /semilearn/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .algorithmbase import AlgorithmBase 6 | from .utils.registry import import_all_modules_for_register 7 | 8 | import_all_modules_for_register() 9 | -------------------------------------------------------------------------------- /semilearn/datasets/nlp_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .get_dataset import get_nlp_dataset 6 | from .yelp_review import YELP_REVIEW 7 | from .amazon_review import AMAZON_REVIEW 8 | -------------------------------------------------------------------------------- /semilearn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .core.utils import get_dataset, get_data_loader, get_net_builder 5 | from .algorithms import get_algorithm 6 | from .datasets import split_ssl_data 7 | from .datasets.cv_datasets.datasetbase import BasicDataset 8 | -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .randaugment import RandAugment 6 | from .transforms import get_val_transforms, get_weak_transforms, get_strong_transforms 7 | -------------------------------------------------------------------------------- /semilearn/core/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .cross_entropy import ce_loss, CELoss 6 | from .cls_consistency import cls_consistency_loss, ClsConsistencyLoss 7 | from .consistency import consistency_loss, ConsistencyLoss 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | audiomentations[extras]>=0.37.0 2 | librosa>=0.10.1 3 | matplotlib>=3.7.5 4 | numpy>=1.24.4 5 | pandas>=2.0.3 6 | Pillow>=10.4.0 7 | progress>=1.6 8 | ruamel.yaml>=0.18.6 9 | ruamel.yaml.clib>=0.2.8 10 | scikit-image>=0.21.0 11 | scikit-learn>=1.3.2 12 | scipy>=1.10.1 13 | tensorboard>=2.14.0 14 | timm>=1.0.11 15 | torch>=2.4.0 16 | torchaudio>=2.4.0 17 | torchvision>=0.19.0 18 | tqdm>=4.66.5 19 | transformers>=4.46.0 20 | wandb 21 | wget 22 | aim 23 | -------------------------------------------------------------------------------- /semilearn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from semilearn.datasets.utils import split_ssl_data, get_collactor 6 | from semilearn.datasets.cv_datasets import get_cv_dataset 7 | from semilearn.datasets.nlp_datasets import get_nlp_dataset 8 | from semilearn.datasets.audio_datasets import get_audio_dataset 9 | from semilearn.datasets.samplers import name2sampler, DistributedSampler 10 | -------------------------------------------------------------------------------- /semilearn/nets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from .resnet import resnet50 6 | from .wrn import wrn_28_2, wrn_28_8, wrn_var_37_2 7 | from .vit import vit_base_patch16_224, vit_small_patch16_224, vit_small_patch2_32, vit_tiny_patch2_32, vit_base_patch16_96 8 | from .bert import bert_base 9 | from .wave2vecv2 import wave2vecv2_base 10 | from .hubert import hubert_base 11 | from .whisper import whisper_base 12 | -------------------------------------------------------------------------------- /semilearn/core/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .hook import Hook 5 | from .checkpoint import CheckpointHook 6 | from .evaluation import EvaluationHook 7 | from .logging import LoggingHook 8 | from .param_update import ParamUpdateHook 9 | from .priority import Priority, get_priority 10 | from .sampler_seed import DistSamplerSeedHook 11 | from .timer import TimerHook 12 | from .ema import EMAHook 13 | from .wandb import WANDBHook 14 | from .aim import AimHook 15 | -------------------------------------------------------------------------------- /semilearn/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from semilearn.core.utils import ALGORITHMS 5 | 6 | name2alg = ALGORITHMS 7 | 8 | 9 | def get_algorithm(args, net_builder, tb_log, logger): 10 | if args.algorithm in ALGORITHMS: 11 | alg = ALGORITHMS[args.algorithm](args=args, net_builder=net_builder, tb_log=tb_log, logger=logger) # name2alg[args.algorithm]( 12 | return alg 13 | else: 14 | raise KeyError(f"Unknown algorithm: {str(args.algorithm)}") 15 | -------------------------------------------------------------------------------- /semilearn/core/hooks/ema.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .hook import Hook 5 | from semilearn.core.utils import EMA 6 | 7 | 8 | class EMAHook(Hook): 9 | """ 10 | EMA model Hook for updating ema version of the model 11 | """ 12 | 13 | def before_run(self, algorithm): 14 | algorithm.ema = EMA(algorithm.model, algorithm.ema_m) 15 | algorithm.ema.register() 16 | if algorithm.resume == True: 17 | algorithm.ema.load(algorithm.ema_model) 18 | 19 | def after_train_step(self, algorithm): 20 | if algorithm.ema is not None: 21 | algorithm.ema.update() 22 | algorithm.ema_model.load_state_dict(algorithm.model.state_dict()) 23 | algorithm.ema_model.load_state_dict(algorithm.ema.shadow, strict=False) 24 | -------------------------------------------------------------------------------- /semilearn/core/hooks/sampler_seed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/sampler_seed.py 4 | 5 | from torch.utils.data import DataLoader 6 | 7 | from .hook import Hook 8 | from semilearn.datasets import DistributedSampler 9 | 10 | 11 | class DistSamplerSeedHook(Hook): 12 | """ 13 | Distributed sampler seed Hook 14 | 15 | update the samples' epoch in data loader 16 | """ 17 | 18 | def before_train_epoch(self, algorithm): 19 | for name, dataloader in algorithm.loader_dict.items(): 20 | if not isinstance(dataloader, DataLoader): 21 | continue 22 | 23 | if isinstance(dataloader.sampler, DistributedSampler): 24 | algorithm.loader_dict[name].sampler.set_epoch(algorithm.epoch) 25 | -------------------------------------------------------------------------------- /semilearn/algorithms/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import argparse 6 | 7 | 8 | class SSL_Argument(object): 9 | """ 10 | Algorithm specific argument 11 | """ 12 | 13 | def __init__(self, name, type, default, help=""): 14 | """ 15 | Model specific arguments should be added via this class. 16 | """ 17 | self.name = name 18 | self.type = type 19 | self.default = default 20 | self.help = help 21 | 22 | 23 | def str2bool(v): 24 | """ 25 | str to bool 26 | """ 27 | if isinstance(v, bool): 28 | return v 29 | if v.lower() in ("yes", "true", "t", "y", "1"): 30 | return True 31 | elif v.lower() in ("no", "false", "f", "n", "0"): 32 | return False 33 | else: 34 | raise argparse.ArgumentTypeError("Boolean value expected.") 35 | -------------------------------------------------------------------------------- /semilearn/core/hooks/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py 4 | 5 | import os 6 | 7 | from .hook import Hook 8 | 9 | 10 | class CheckpointHook(Hook): 11 | """ 12 | Checkpoint Hook for saving checkpoint 13 | """ 14 | 15 | def after_train_step(self, algorithm): 16 | # must be called after evaluation for saving the best 17 | if self.every_n_iters(algorithm, algorithm.num_eval_iter) or self.is_last_iter(algorithm): 18 | save_path = os.path.join(algorithm.save_dir, algorithm.save_name) 19 | 20 | if (not algorithm.distributed) or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0): 21 | algorithm.save_model("latest_model.pth", save_path) 22 | 23 | if algorithm.it == algorithm.best_it: 24 | algorithm.save_model("model_best.pth", save_path) 25 | -------------------------------------------------------------------------------- /semilearn/core/hooks/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | 6 | from .hook import Hook 7 | 8 | 9 | class TimerHook(Hook): 10 | """ 11 | Timer Hook 12 | """ 13 | 14 | def before_run(self, algorithm): 15 | algorithm.start_batch = torch.cuda.Event(enable_timing=True) 16 | algorithm.end_batch = torch.cuda.Event(enable_timing=True) 17 | 18 | algorithm.start_run = torch.cuda.Event(enable_timing=True) 19 | algorithm.end_run = torch.cuda.Event(enable_timing=True) 20 | algorithm.start_batch.record() 21 | 22 | def before_train_step(self, algorithm): 23 | algorithm.end_batch.record() 24 | 25 | def after_train_step(self, algorithm): 26 | algorithm.log_dict["lr"] = algorithm.optimizer.param_groups[-1]["lr"] 27 | algorithm.log_dict["train/prefetch_time"] = algorithm.start_batch.elapsed_time(algorithm.end_batch) / 1000.0 28 | algorithm.start_batch.record() 29 | -------------------------------------------------------------------------------- /config/classic_cv/fullysupervised/fullysupervised_utkface_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: fullysupervised 2 | save_dir: ./saved_models/classic_cv 3 | save_name: fullysupervised_utkface_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/fullysupervised_utkface_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | batch_size: 32 15 | eval_batch_size: 256 16 | ema_m: 0.999 17 | optim: SGD 18 | lr: 0.01 19 | momentum: 0.9 20 | weight_decay: 0.001 21 | layer_decay: 1.0 22 | amp: False 23 | clip_grad: 0.0 24 | use_cat: True 25 | criterion: l1 26 | net: wrn_28_2 27 | net_from_name: False 28 | data_dir: ./data 29 | dataset: utkface 30 | train_sampler: RandomSampler 31 | num_workers: 4 32 | crop_ratio: 0.875 33 | img_size: 40 34 | preload: False 35 | seed: 0 36 | world_size: 1 37 | rank: 0 38 | multiprocessing_distributed: False 39 | dist_url: tcp://127.0.0.1:10003 40 | dist_backend: nccl 41 | -------------------------------------------------------------------------------- /config/classic_cv/supervised/supervised_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: supervised 2 | save_dir: ./saved_models/classic_cv 3 | save_name: supervised_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | optim: SGD 19 | lr: 0.01 20 | momentum: 0.9 21 | weight_decay: 0.001 22 | layer_decay: 1.0 23 | amp: False 24 | clip_grad: 0.0 25 | use_cat: True 26 | criterion: l1 27 | net: wrn_28_2 28 | net_from_name: False 29 | data_dir: ./data 30 | dataset: utkface 31 | train_sampler: RandomSampler 32 | num_workers: 4 33 | crop_ratio: 0.875 34 | img_size: 40 35 | preload: False 36 | seed: 0 37 | world_size: 1 38 | rank: 0 39 | multiprocessing_distributed: False 40 | dist_url: tcp://127.0.0.1:10001 41 | dist_backend: nccl 42 | -------------------------------------------------------------------------------- /config/classic_cv/supervised/supervised_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: supervised 2 | save_dir: ./saved_models/classic_cv 3 | save_name: supervised_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | optim: SGD 19 | lr: 0.01 20 | momentum: 0.9 21 | weight_decay: 0.001 22 | layer_decay: 1.0 23 | amp: False 24 | clip_grad: 0.0 25 | use_cat: True 26 | criterion: l1 27 | net: wrn_28_2 28 | net_from_name: False 29 | data_dir: ./data 30 | dataset: utkface 31 | train_sampler: RandomSampler 32 | num_workers: 4 33 | crop_ratio: 0.875 34 | img_size: 40 35 | preload: False 36 | seed: 0 37 | world_size: 1 38 | rank: 0 39 | multiprocessing_distributed: False 40 | dist_url: tcp://127.0.0.1:10002 41 | dist_backend: nccl 42 | -------------------------------------------------------------------------------- /config/classic_cv/supervised/supervised_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: supervised 2 | save_dir: ./saved_models/classic_cv 3 | save_name: supervised_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | optim: SGD 19 | lr: 0.01 20 | momentum: 0.9 21 | weight_decay: 0.001 22 | layer_decay: 1.0 23 | amp: False 24 | clip_grad: 0.0 25 | use_cat: True 26 | criterion: l1 27 | net: wrn_28_2 28 | net_from_name: False 29 | data_dir: ./data 30 | dataset: utkface 31 | train_sampler: RandomSampler 32 | num_workers: 4 33 | crop_ratio: 0.875 34 | img_size: 40 35 | preload: False 36 | seed: 0 37 | world_size: 1 38 | rank: 0 39 | multiprocessing_distributed: False 40 | dist_url: tcp://127.0.0.1:10003 41 | dist_backend: nccl 42 | -------------------------------------------------------------------------------- /config/classic_cv/pimodel/pimodel_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: pimodel 2 | save_dir: ./saved_models/classic_cv 3 | save_name: pimodel_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10001 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/pimodel/pimodel_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: pimodel 2 | save_dir: ./saved_models/classic_cv 3 | save_name: pimodel_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10002 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/nlp/fullysupervised/fullysupervised_yelp_review_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: fullysupervised 2 | save_dir: ./saved_models/nlp 3 | save_name: fullysupervised_yelp_review_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/fullysupervised_yelp_review_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | batch_size: 8 16 | eval_batch_size: 8 17 | ema_m: 0.0 18 | optim: AdamW 19 | lr: 1e-05 20 | momentum: 0.9 21 | weight_decay: 0.0005 22 | layer_decay: 0.75 23 | amp: False 24 | clip_grad: 0.0 25 | use_cat: False 26 | criterion: l1 27 | net: bert_base 28 | net_from_name: False 29 | use_pretrain: True 30 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 31 | data_dir: ./data 32 | dataset: yelp_review 33 | train_sampler: RandomSampler 34 | num_workers: 4 35 | max_length: 512 36 | seed: 0 37 | world_size: 1 38 | rank: 0 39 | multiprocessing_distributed: False 40 | dist_url: tcp://127.0.0.1:10001 41 | dist_backend: nccl 42 | -------------------------------------------------------------------------------- /results/audio_average_log.csv: -------------------------------------------------------------------------------- 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE 2 | clss_bvcc_lb250,3,0.499±0.010,0.385±0.022,0.534±0.027,0.747±0.012,0.748±0.008,0.559±0.009,0.329±0.002 3 | fullysupervised_bvcc,3,0.351±0.003,0.195±0.002,0.764±0.002,0.876±0.001,0.874±0.001,0.698±0.001,0.227±0.004 4 | meanteacher_bvcc_lb250,3,0.532±0.006,0.419±0.014,0.492±0.018,0.735±0.008,0.742±0.008,0.550±0.008,0.362±0.002 5 | mixmatch_bvcc_lb250,3,0.597±0.017,0.535±0.036,0.353±0.044,0.614±0.029,0.626±0.031,0.446±0.026,0.401±0.011 6 | pimodel_bvcc_lb250,3,0.534±0.008,0.422±0.017,0.489±0.021,0.734±0.009,0.740±0.009,0.549±0.009,0.360±0.003 7 | rankuprda_bvcc_lb250,3,0.463±0.013,0.332±0.023,0.598±0.027,0.781±0.011,0.783±0.011,0.591±0.012,0.305±0.007 8 | rankup_bvcc_lb250,3,0.470±0.012,0.340±0.023,0.588±0.028,0.774±0.012,0.776±0.010,0.584±0.010,0.312±0.011 9 | supervised_bvcc_lb250,3,0.533±0.006,0.421±0.014,0.490±0.018,0.734±0.008,0.741±0.009,0.549±0.009,0.362±0.001 10 | ucvme_bvcc_lb250,3,0.498±0.003,0.370±0.009,0.553±0.011,0.770±0.010,0.774±0.008,0.582±0.009,0.333±0.003 11 | -------------------------------------------------------------------------------- /config/classic_cv/pimodel/pimodel_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: pimodel 2 | save_dir: ./saved_models/classic_cv 3 | save_name: pimodel_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10003 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/rda/rda_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rda_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10001 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/ucvme/ucvme_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: ucvme 2 | save_dir: ./saved_models/classic_cv 3 | save_name: ucvme_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.05 20 | dropout_rate: 0.05 21 | num_ensemble: 5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10001 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/rda/rda_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rda_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10002 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/ucvme/ucvme_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: ucvme 2 | save_dir: ./saved_models/classic_cv 3 | save_name: ucvme_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.05 20 | dropout_rate: 0.05 21 | num_ensemble: 5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10002 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/nlp/supervised/supervised_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: supervised 2 | save_dir: ./saved_models/nlp 3 | save_name: supervised_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/supervised_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | optim: AdamW 20 | lr: 1e-05 21 | momentum: 0.9 22 | weight_decay: 0.0005 23 | layer_decay: 0.75 24 | amp: False 25 | clip_grad: 0.0 26 | use_cat: False 27 | criterion: l1 28 | net: bert_base 29 | net_from_name: False 30 | use_pretrain: True 31 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 32 | data_dir: ./data 33 | dataset: yelp_review 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | max_length: 512 37 | seed: 0 38 | world_size: 1 39 | rank: 0 40 | multiprocessing_distributed: False 41 | dist_url: tcp://127.0.0.1:10001 42 | dist_backend: nccl 43 | -------------------------------------------------------------------------------- /config/audio/fullysupervised/fullysupervised_bvcc_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: fullysupervised 2 | save_dir: ./saved_models/audio 3 | save_name: fullysupervised_bvcc_s0 4 | resume: True 5 | load_path: ./saved_models/audio/fullysupervised_bvcc_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | batch_size: 8 16 | eval_batch_size: 16 17 | ema_m: 0.0 18 | optim: AdamW 19 | lr: 2e-06 20 | momentum: 0.9 21 | weight_decay: 2e-05 22 | layer_decay: 0.75 23 | amp: False 24 | clip_grad: 0.0 25 | use_cat: False 26 | criterion: l1 27 | net: whisper_base 28 | net_from_name: False 29 | use_pretrain: True 30 | pretrain_path: openai/whisper-base 31 | data_dir: ./data 32 | dataset: bvcc 33 | train_sampler: RandomSampler 34 | num_workers: 8 35 | max_length_seconds: 6.0 36 | sample_rate: 16000 37 | preload: True 38 | seed: 0 39 | world_size: 1 40 | rank: 0 41 | multiprocessing_distributed: False 42 | dist_url: tcp://127.0.0.1:10001 43 | dist_backend: nccl 44 | -------------------------------------------------------------------------------- /config/classic_cv/meanteacher/meanteacher_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: meanteacher 2 | save_dir: ./saved_models/classic_cv 3 | save_name: meanteacher_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10001 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/rda/rda_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rda_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10003 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/ucvme/ucvme_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: ucvme 2 | save_dir: ./saved_models/classic_cv 3 | save_name: ucvme_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.05 20 | dropout_rate: 0.05 21 | num_ensemble: 5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10003 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/meanteacher/meanteacher_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: meanteacher 2 | save_dir: ./saved_models/classic_cv 3 | save_name: meanteacher_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10002 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/mixmatch/mixmatch_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: mixmatch 2 | save_dir: ./saved_models/classic_cv 3 | save_name: mixmatch_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | mixup_alpha: 0.5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10001 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/meanteacher/meanteacher_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: meanteacher 2 | save_dir: ./saved_models/classic_cv 3 | save_name: meanteacher_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | optim: SGD 22 | lr: 0.01 23 | momentum: 0.9 24 | weight_decay: 0.001 25 | layer_decay: 1.0 26 | amp: False 27 | clip_grad: 0.0 28 | use_cat: True 29 | criterion: l1 30 | net: wrn_28_2 31 | net_from_name: False 32 | data_dir: ./data 33 | dataset: utkface 34 | train_sampler: RandomSampler 35 | num_workers: 4 36 | crop_ratio: 0.875 37 | img_size: 40 38 | preload: False 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10003 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/mixmatch/mixmatch_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: mixmatch 2 | save_dir: ./saved_models/classic_cv 3 | save_name: mixmatch_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | mixup_alpha: 0.5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10002 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/audio/supervised/supervised_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: supervised 2 | save_dir: ./saved_models/audio 3 | save_name: supervised_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/supervised_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | optim: AdamW 20 | lr: 2e-06 21 | momentum: 0.9 22 | weight_decay: 2e-05 23 | layer_decay: 0.75 24 | amp: False 25 | clip_grad: 0.0 26 | use_cat: False 27 | criterion: l1 28 | net: whisper_base 29 | net_from_name: False 30 | use_pretrain: True 31 | pretrain_path: openai/whisper-base 32 | data_dir: ./data 33 | dataset: bvcc 34 | train_sampler: RandomSampler 35 | num_workers: 8 36 | max_length_seconds: 6.0 37 | sample_rate: 16000 38 | preload: True 39 | seed: 0 40 | world_size: 1 41 | rank: 0 42 | multiprocessing_distributed: False 43 | dist_url: tcp://127.0.0.1:10001 44 | dist_backend: nccl 45 | -------------------------------------------------------------------------------- /config/classic_cv/mixmatch/mixmatch_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: mixmatch 2 | save_dir: ./saved_models/classic_cv 3 | save_name: mixmatch_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 1 19 | ulb_loss_ratio: 0.1 20 | unsup_warm_up: 0.4 21 | mixup_alpha: 0.5 22 | optim: SGD 23 | lr: 0.01 24 | momentum: 0.9 25 | weight_decay: 0.001 26 | layer_decay: 1.0 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: True 30 | criterion: l1 31 | net: wrn_28_2 32 | net_from_name: False 33 | data_dir: ./data 34 | dataset: utkface 35 | train_sampler: RandomSampler 36 | num_workers: 4 37 | crop_ratio: 0.875 38 | img_size: 40 39 | preload: False 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10003 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/classic_cv/clss/clss_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: clss 2 | save_dir: ./saved_models/classic_cv 3 | save_name: clss_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 0.25 19 | lb_ctr_loss_ratio: 1.0 20 | ulb_ctr_loss_ratio: 0.05 21 | ulb_rank_loss_ratio: 0.01 22 | lambda_val: 2.0 23 | optim: SGD 24 | lr: 0.01 25 | momentum: 0.9 26 | weight_decay: 0.001 27 | layer_decay: 1.0 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: True 31 | criterion: l1 32 | net: wrn_28_2 33 | net_from_name: False 34 | data_dir: ./data 35 | dataset: utkface 36 | train_sampler: RandomSampler 37 | num_workers: 4 38 | crop_ratio: 0.875 39 | img_size: 40 40 | preload: False 41 | seed: 0 42 | world_size: 1 43 | rank: 0 44 | multiprocessing_distributed: False 45 | dist_url: tcp://127.0.0.1:10001 46 | dist_backend: nccl 47 | -------------------------------------------------------------------------------- /config/classic_cv/clss/clss_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: clss 2 | save_dir: ./saved_models/classic_cv 3 | save_name: clss_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 0.25 19 | lb_ctr_loss_ratio: 1.0 20 | ulb_ctr_loss_ratio: 0.05 21 | ulb_rank_loss_ratio: 0.01 22 | lambda_val: 2.0 23 | optim: SGD 24 | lr: 0.01 25 | momentum: 0.9 26 | weight_decay: 0.001 27 | layer_decay: 1.0 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: True 31 | criterion: l1 32 | net: wrn_28_2 33 | net_from_name: False 34 | data_dir: ./data 35 | dataset: utkface 36 | train_sampler: RandomSampler 37 | num_workers: 4 38 | crop_ratio: 0.875 39 | img_size: 40 40 | preload: False 41 | seed: 0 42 | world_size: 1 43 | rank: 0 44 | multiprocessing_distributed: False 45 | dist_url: tcp://127.0.0.1:10003 46 | dist_backend: nccl 47 | -------------------------------------------------------------------------------- /config/classic_cv/clss/clss_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: clss 2 | save_dir: ./saved_models/classic_cv 3 | save_name: clss_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 0.25 19 | lb_ctr_loss_ratio: 1.0 20 | ulb_ctr_loss_ratio: 0.05 21 | ulb_rank_loss_ratio: 0.01 22 | lambda_val: 2.0 23 | optim: SGD 24 | lr: 0.01 25 | momentum: 0.9 26 | weight_decay: 0.001 27 | layer_decay: 1.0 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: True 31 | criterion: l1 32 | net: wrn_28_2 33 | net_from_name: False 34 | data_dir: ./data 35 | dataset: utkface 36 | train_sampler: RandomSampler 37 | num_workers: 4 38 | crop_ratio: 0.875 39 | img_size: 40 40 | preload: False 41 | seed: 0 42 | world_size: 1 43 | rank: 0 44 | multiprocessing_distributed: False 45 | dist_url: tcp://127.0.0.1:10002 46 | dist_backend: nccl 47 | -------------------------------------------------------------------------------- /config/classic_cv/rankup/rankup_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankup 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankup_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | arc_loss_ratio: 0.2 20 | arc_ulb_loss_ratio: 1.0 21 | hard_label: True 22 | T: 0.5 23 | p_cutoff: 0.95 24 | optim: SGD 25 | lr: 0.01 26 | momentum: 0.9 27 | weight_decay: 0.001 28 | layer_decay: 1.0 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: True 32 | criterion: l1 33 | net: wrn_28_2 34 | net_from_name: False 35 | data_dir: ./data 36 | dataset: utkface 37 | train_sampler: RandomSampler 38 | num_workers: 4 39 | crop_ratio: 0.875 40 | img_size: 40 41 | preload: False 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10001 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/classic_cv/rankup/rankup_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankup 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankup_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | arc_loss_ratio: 0.2 20 | arc_ulb_loss_ratio: 1.0 21 | hard_label: True 22 | T: 0.5 23 | p_cutoff: 0.95 24 | optim: SGD 25 | lr: 0.01 26 | momentum: 0.9 27 | weight_decay: 0.001 28 | layer_decay: 1.0 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: True 32 | criterion: l1 33 | net: wrn_28_2 34 | net_from_name: False 35 | data_dir: ./data 36 | dataset: utkface 37 | train_sampler: RandomSampler 38 | num_workers: 4 39 | crop_ratio: 0.875 40 | img_size: 40 41 | preload: False 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10003 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/classic_cv/rankup/rankup_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankup 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankup_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | arc_loss_ratio: 0.2 20 | arc_ulb_loss_ratio: 1.0 21 | hard_label: True 22 | T: 0.5 23 | p_cutoff: 0.95 24 | optim: SGD 25 | lr: 0.01 26 | momentum: 0.9 27 | weight_decay: 0.001 28 | layer_decay: 1.0 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: True 32 | criterion: l1 33 | net: wrn_28_2 34 | net_from_name: False 35 | data_dir: ./data 36 | dataset: utkface 37 | train_sampler: RandomSampler 38 | num_workers: 4 39 | crop_ratio: 0.875 40 | img_size: 40 41 | preload: False 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10002 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/nlp/pimodel/pimodel_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: pimodel 2 | save_dir: ./saved_models/nlp 3 | save_name: pimodel_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/pimodel_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | optim: AdamW 23 | lr: 1e-05 24 | momentum: 0.9 25 | weight_decay: 0.0005 26 | layer_decay: 0.75 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: False 30 | criterion: l1 31 | net: bert_base 32 | net_from_name: False 33 | use_pretrain: True 34 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 35 | data_dir: ./data 36 | dataset: yelp_review 37 | train_sampler: RandomSampler 38 | num_workers: 4 39 | max_length: 512 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10001 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /results/nlp_average_log.csv: -------------------------------------------------------------------------------- 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE 2 | clss_yelp_review_lb250,3,0.721±0.010,0.913±0.022,0.543±0.011,0.744±0.001,0.748±0.002,0.599±0.002,0.307±0.026 3 | fullysupervised_yelp_review,3,0.418±0.003,0.402±0.004,0.799±0.002,0.898±0.001,0.896±0.001,0.766±0.001,0.150±0.005 4 | meanteacher_yelp_review_lb250,3,0.730±0.024,0.870±0.037,0.565±0.019,0.763±0.011,0.769±0.009,0.619±0.009,0.420±0.018 5 | mixmatch_yelp_review_lb250,3,0.886±0.004,1.238±0.017,0.381±0.008,0.643±0.003,0.660±0.004,0.511±0.003,0.587±0.003 6 | pimodel_yelp_review_lb250,3,0.730±0.024,0.870±0.037,0.565±0.019,0.763±0.011,0.769±0.009,0.619±0.009,0.420±0.018 7 | rankuprda_yelp_review_lb250,3,0.632±0.009,0.698±0.015,0.651±0.007,0.809±0.005,0.810±0.005,0.659±0.005,0.389±0.011 8 | rankup_yelp_review_lb250,3,0.661±0.018,0.711±0.025,0.645±0.013,0.817±0.003,0.829±0.002,0.681±0.002,0.391±0.013 9 | supervised_yelp_review_lb250,3,0.723±0.023,0.868±0.038,0.566±0.019,0.762±0.012,0.769±0.010,0.619±0.009,0.392±0.015 10 | ucvme_yelp_review_lb250,3,0.775±0.006,0.921±0.011,0.540±0.005,0.754±0.007,0.763±0.005,0.611±0.005,0.526±0.007 11 | -------------------------------------------------------------------------------- /config/nlp/rda/rda_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rda 2 | save_dir: ./saved_models/nlp 3 | save_name: rda_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/rda_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 1.0 21 | unsup_warm_up: 0.4 22 | rda_num_refine_iter: 1024 23 | optim: AdamW 24 | lr: 1e-05 25 | momentum: 0.9 26 | weight_decay: 0.0005 27 | layer_decay: 0.75 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: False 31 | criterion: l1 32 | net: bert_base 33 | net_from_name: False 34 | use_pretrain: True 35 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 36 | data_dir: ./data 37 | dataset: yelp_review 38 | train_sampler: RandomSampler 39 | num_workers: 4 40 | max_length: 512 41 | seed: 0 42 | world_size: 1 43 | rank: 0 44 | multiprocessing_distributed: False 45 | dist_url: tcp://127.0.0.1:10001 46 | dist_backend: nccl 47 | -------------------------------------------------------------------------------- /config/nlp/ucvme/ucvme_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: ucvme 2 | save_dir: ./saved_models/nlp 3 | save_name: ucvme_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/ucvme_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.05 21 | dropout_rate: 0.05 22 | num_ensemble: 5 23 | optim: AdamW 24 | lr: 1e-05 25 | momentum: 0.9 26 | weight_decay: 0.0005 27 | layer_decay: 0.75 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: False 31 | criterion: l1 32 | net: bert_base 33 | net_from_name: False 34 | use_pretrain: True 35 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 36 | data_dir: ./data 37 | dataset: yelp_review 38 | train_sampler: RandomSampler 39 | num_workers: 4 40 | max_length: 512 41 | seed: 0 42 | world_size: 1 43 | rank: 0 44 | multiprocessing_distributed: False 45 | dist_url: tcp://127.0.0.1:10001 46 | dist_backend: nccl 47 | -------------------------------------------------------------------------------- /config/audio/pimodel/pimodel_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: pimodel 2 | save_dir: ./saved_models/audio 3 | save_name: pimodel_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/pimodel_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | optim: AdamW 23 | lr: 2e-06 24 | momentum: 0.9 25 | weight_decay: 2e-05 26 | layer_decay: 0.75 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: False 30 | criterion: l1 31 | net: whisper_base 32 | net_from_name: False 33 | use_pretrain: True 34 | pretrain_path: openai/whisper-base 35 | data_dir: ./data 36 | dataset: bvcc 37 | train_sampler: RandomSampler 38 | num_workers: 8 39 | max_length_seconds: 6.0 40 | sample_rate: 16000 41 | preload: True 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10001 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/nlp/meanteacher/meanteacher_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: meanteacher 2 | save_dir: ./saved_models/nlp 3 | save_name: meanteacher_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/meanteacher_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | optim: AdamW 23 | lr: 1e-05 24 | momentum: 0.9 25 | weight_decay: 0.0005 26 | layer_decay: 0.75 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: False 30 | criterion: l1 31 | net: bert_base 32 | net_from_name: False 33 | use_pretrain: True 34 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 35 | data_dir: ./data 36 | dataset: yelp_review 37 | train_sampler: RandomSampler 38 | num_workers: 4 39 | max_length: 512 40 | seed: 0 41 | world_size: 1 42 | rank: 0 43 | multiprocessing_distributed: False 44 | dist_url: tcp://127.0.0.1:10001 45 | dist_backend: nccl 46 | -------------------------------------------------------------------------------- /config/audio/rda/rda_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rda 2 | save_dir: ./saved_models/audio 3 | save_name: rda_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/rda_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 1.0 21 | unsup_warm_up: 0.4 22 | rda_num_refine_iter: 1024 23 | optim: AdamW 24 | lr: 2e-06 25 | momentum: 0.9 26 | weight_decay: 2e-05 27 | layer_decay: 0.75 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: False 31 | criterion: l1 32 | net: whisper_base 33 | net_from_name: False 34 | use_pretrain: True 35 | pretrain_path: openai/whisper-base 36 | data_dir: ./data 37 | dataset: bvcc 38 | train_sampler: RandomSampler 39 | num_workers: 8 40 | max_length_seconds: 6.0 41 | sample_rate: 16000 42 | preload: True 43 | seed: 0 44 | world_size: 1 45 | rank: 0 46 | multiprocessing_distributed: False 47 | dist_url: tcp://127.0.0.1:10001 48 | dist_backend: nccl 49 | -------------------------------------------------------------------------------- /config/audio/ucvme/ucvme_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: ucvme 2 | save_dir: ./saved_models/audio 3 | save_name: ucvme_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/ucvme_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.05 21 | dropout_rate: 0.05 22 | num_ensemble: 5 23 | optim: AdamW 24 | lr: 2e-06 25 | momentum: 0.9 26 | weight_decay: 2e-05 27 | layer_decay: 0.75 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: False 31 | criterion: l1 32 | net: whisper_base 33 | net_from_name: False 34 | use_pretrain: True 35 | pretrain_path: openai/whisper-base 36 | data_dir: ./data 37 | dataset: bvcc 38 | train_sampler: RandomSampler 39 | num_workers: 8 40 | max_length_seconds: 6.0 41 | sample_rate: 16000 42 | preload: True 43 | seed: 0 44 | world_size: 1 45 | rank: 0 46 | multiprocessing_distributed: False 47 | dist_url: tcp://127.0.0.1:10001 48 | dist_backend: nccl 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation 4 | Copyright (c) 2024 Pin-Yen Huang 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /config/audio/meanteacher/meanteacher_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: meanteacher 2 | save_dir: ./saved_models/audio 3 | save_name: meanteacher_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/meanteacher_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | optim: AdamW 23 | lr: 2e-06 24 | momentum: 0.9 25 | weight_decay: 2e-05 26 | layer_decay: 0.75 27 | amp: False 28 | clip_grad: 0.0 29 | use_cat: False 30 | criterion: l1 31 | net: whisper_base 32 | net_from_name: False 33 | use_pretrain: True 34 | pretrain_path: openai/whisper-base 35 | data_dir: ./data 36 | dataset: bvcc 37 | train_sampler: RandomSampler 38 | num_workers: 8 39 | max_length_seconds: 6.0 40 | sample_rate: 16000 41 | preload: True 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10001 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/audio/mixmatch/mixmatch_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: mixmatch 2 | save_dir: ./saved_models/audio 3 | save_name: mixmatch_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/mixmatch_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | mixup_alpha: 0.5 23 | optim: AdamW 24 | lr: 2e-06 25 | momentum: 0.9 26 | weight_decay: 2e-05 27 | layer_decay: 0.75 28 | amp: False 29 | clip_grad: 0.0 30 | use_cat: False 31 | criterion: l1 32 | net: whisper_base 33 | net_from_name: False 34 | use_pretrain: True 35 | pretrain_path: openai/whisper-base 36 | data_dir: ./data 37 | dataset: bvcc 38 | train_sampler: RandomSampler 39 | num_workers: 8 40 | max_length_seconds: 6.0 41 | sample_rate: 16000 42 | preload: True 43 | seed: 0 44 | world_size: 1 45 | rank: 0 46 | multiprocessing_distributed: False 47 | dist_url: tcp://127.0.0.1:10001 48 | dist_backend: nccl 49 | -------------------------------------------------------------------------------- /config/nlp/clss/clss_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: clss 2 | save_dir: ./saved_models/nlp 3 | save_name: clss_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/clss_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 0.25 20 | lb_ctr_loss_ratio: 1.0 21 | ulb_ctr_loss_ratio: 0.05 22 | ulb_rank_loss_ratio: 0.01 23 | lambda_val: 2.0 24 | optim: AdamW 25 | lr: 1e-05 26 | momentum: 0.9 27 | weight_decay: 0.0005 28 | layer_decay: 0.75 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: False 32 | criterion: l1 33 | net: bert_base 34 | net_from_name: False 35 | use_pretrain: True 36 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 37 | data_dir: ./data 38 | dataset: yelp_review 39 | train_sampler: RandomSampler 40 | num_workers: 4 41 | max_length: 512 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10001 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/nlp/mixmatch/mixmatch_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: mixmatch 2 | save_dir: ./saved_models/nlp 3 | save_name: mixmatch_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/mixmatch_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 0.1 21 | unsup_warm_up: 0.4 22 | mixup_alpha: 0.5 23 | mixup_manifold: True 24 | optim: AdamW 25 | lr: 1e-05 26 | momentum: 0.9 27 | weight_decay: 0.0005 28 | layer_decay: 0.75 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: False 32 | criterion: l1 33 | net: bert_base 34 | net_from_name: False 35 | use_pretrain: True 36 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 37 | data_dir: ./data 38 | dataset: yelp_review 39 | train_sampler: RandomSampler 40 | num_workers: 4 41 | max_length: 512 42 | seed: 0 43 | world_size: 1 44 | rank: 0 45 | multiprocessing_distributed: False 46 | dist_url: tcp://127.0.0.1:10001 47 | dist_backend: nccl 48 | -------------------------------------------------------------------------------- /config/nlp/rankup/rankup_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankup 2 | save_dir: ./saved_models/nlp 3 | save_name: rankup_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/rankup_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | arc_loss_ratio: 0.2 21 | arc_ulb_loss_ratio: 1.0 22 | hard_label: True 23 | T: 0.5 24 | p_cutoff: 0.95 25 | optim: AdamW 26 | lr: 1e-05 27 | momentum: 0.9 28 | weight_decay: 0.0005 29 | layer_decay: 0.75 30 | amp: False 31 | clip_grad: 0.0 32 | use_cat: False 33 | criterion: l1 34 | net: bert_base 35 | net_from_name: False 36 | use_pretrain: True 37 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 38 | data_dir: ./data 39 | dataset: yelp_review 40 | train_sampler: RandomSampler 41 | num_workers: 4 42 | max_length: 512 43 | seed: 0 44 | world_size: 1 45 | rank: 0 46 | multiprocessing_distributed: False 47 | dist_url: tcp://127.0.0.1:10001 48 | dist_backend: nccl 49 | -------------------------------------------------------------------------------- /config/audio/clss/clss_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: clss 2 | save_dir: ./saved_models/audio 3 | save_name: clss_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/clss_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | lb_ctr_loss_ratio: 1.0 21 | ulb_ctr_loss_ratio: 0.05 22 | ulb_rank_loss_ratio: 0.01 23 | lambda_val: 2.0 24 | optim: AdamW 25 | lr: 2e-06 26 | momentum: 0.9 27 | weight_decay: 2e-05 28 | layer_decay: 0.75 29 | amp: False 30 | clip_grad: 0.0 31 | use_cat: False 32 | criterion: l1 33 | net: whisper_base 34 | net_from_name: False 35 | use_pretrain: True 36 | pretrain_path: openai/whisper-base 37 | data_dir: ./data 38 | dataset: bvcc 39 | train_sampler: RandomSampler 40 | num_workers: 8 41 | max_length_seconds: 6.0 42 | sample_rate: 16000 43 | preload: True 44 | seed: 0 45 | world_size: 1 46 | rank: 0 47 | multiprocessing_distributed: False 48 | dist_url: tcp://127.0.0.1:10001 49 | dist_backend: nccl 50 | -------------------------------------------------------------------------------- /config/audio/rankup/rankup_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankup 2 | save_dir: ./saved_models/audio 3 | save_name: rankup_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/rankup_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | arc_loss_ratio: 0.2 21 | arc_ulb_loss_ratio: 1.0 22 | hard_label: True 23 | T: 0.5 24 | p_cutoff: 0.95 25 | optim: AdamW 26 | lr: 2e-06 27 | momentum: 0.9 28 | weight_decay: 2e-05 29 | layer_decay: 0.75 30 | amp: False 31 | clip_grad: 0.0 32 | use_cat: False 33 | criterion: l1 34 | net: whisper_base 35 | net_from_name: False 36 | use_pretrain: True 37 | pretrain_path: openai/whisper-base 38 | data_dir: ./data 39 | dataset: bvcc 40 | train_sampler: RandomSampler 41 | num_workers: 8 42 | max_length_seconds: 6.0 43 | sample_rate: 16000 44 | preload: True 45 | seed: 0 46 | world_size: 1 47 | rank: 0 48 | multiprocessing_distributed: False 49 | dist_url: tcp://127.0.0.1:10001 50 | dist_backend: nccl 51 | -------------------------------------------------------------------------------- /config/classic_cv/rankuprda/rankuprda_utkface_lb50_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankuprda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankuprda_utkface_lb50_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb50_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 50 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | arc_loss_ratio: 0.2 23 | arc_ulb_loss_ratio: 1.0 24 | hard_label: True 25 | T: 0.5 26 | p_cutoff: 0.95 27 | optim: SGD 28 | lr: 0.01 29 | momentum: 0.9 30 | weight_decay: 0.001 31 | layer_decay: 1.0 32 | amp: False 33 | clip_grad: 0.0 34 | use_cat: True 35 | criterion: l1 36 | net: wrn_28_2 37 | net_from_name: False 38 | data_dir: ./data 39 | dataset: utkface 40 | train_sampler: RandomSampler 41 | num_workers: 4 42 | crop_ratio: 0.875 43 | img_size: 40 44 | preload: False 45 | seed: 0 46 | world_size: 1 47 | rank: 0 48 | multiprocessing_distributed: False 49 | dist_url: tcp://127.0.0.1:10001 50 | dist_backend: nccl 51 | -------------------------------------------------------------------------------- /config/classic_cv/rankuprda/rankuprda_utkface_lb2000_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankuprda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankuprda_utkface_lb2000_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb2000_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 2000 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | arc_loss_ratio: 0.2 23 | arc_ulb_loss_ratio: 1.0 24 | hard_label: True 25 | T: 0.5 26 | p_cutoff: 0.95 27 | optim: SGD 28 | lr: 0.01 29 | momentum: 0.9 30 | weight_decay: 0.001 31 | layer_decay: 1.0 32 | amp: False 33 | clip_grad: 0.0 34 | use_cat: True 35 | criterion: l1 36 | net: wrn_28_2 37 | net_from_name: False 38 | data_dir: ./data 39 | dataset: utkface 40 | train_sampler: RandomSampler 41 | num_workers: 4 42 | crop_ratio: 0.875 43 | img_size: 40 44 | preload: False 45 | seed: 0 46 | world_size: 1 47 | rank: 0 48 | multiprocessing_distributed: False 49 | dist_url: tcp://127.0.0.1:10003 50 | dist_backend: nccl 51 | -------------------------------------------------------------------------------- /config/classic_cv/rankuprda/rankuprda_utkface_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankuprda 2 | save_dir: ./saved_models/classic_cv 3 | save_name: rankuprda_utkface_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 256 11 | num_train_iter: 262144 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_labels: 250 15 | batch_size: 32 16 | eval_batch_size: 256 17 | ema_m: 0.999 18 | uratio: 7 19 | ulb_loss_ratio: 1.0 20 | unsup_warm_up: 0.4 21 | rda_num_refine_iter: 1024 22 | arc_loss_ratio: 0.2 23 | arc_ulb_loss_ratio: 1.0 24 | hard_label: True 25 | T: 0.5 26 | p_cutoff: 0.95 27 | optim: SGD 28 | lr: 0.01 29 | momentum: 0.9 30 | weight_decay: 0.001 31 | layer_decay: 1.0 32 | amp: False 33 | clip_grad: 0.0 34 | use_cat: True 35 | criterion: l1 36 | net: wrn_28_2 37 | net_from_name: False 38 | data_dir: ./data 39 | dataset: utkface 40 | train_sampler: RandomSampler 41 | num_workers: 4 42 | crop_ratio: 0.875 43 | img_size: 40 44 | preload: False 45 | seed: 0 46 | world_size: 1 47 | rank: 0 48 | multiprocessing_distributed: False 49 | dist_url: tcp://127.0.0.1:10002 50 | dist_backend: nccl 51 | -------------------------------------------------------------------------------- /config/nlp/rankuprda/rankuprda_yelp_review_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankuprda 2 | save_dir: ./saved_models/nlp 3 | save_name: rankuprda_yelp_review_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/nlp/rankuprda_yelp_review_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 8 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 1.0 21 | unsup_warm_up: 0.4 22 | rda_num_refine_iter: 1024 23 | arc_loss_ratio: 0.2 24 | arc_ulb_loss_ratio: 1.0 25 | hard_label: True 26 | T: 0.5 27 | p_cutoff: 0.95 28 | optim: AdamW 29 | lr: 1e-05 30 | momentum: 0.9 31 | weight_decay: 0.0005 32 | layer_decay: 0.75 33 | amp: False 34 | clip_grad: 0.0 35 | use_cat: False 36 | criterion: l1 37 | net: bert_base 38 | net_from_name: False 39 | use_pretrain: True 40 | pretrain_path: google/bert_uncased_L-4_H-512_A-8 41 | data_dir: ./data 42 | dataset: yelp_review 43 | train_sampler: RandomSampler 44 | num_workers: 4 45 | max_length: 512 46 | seed: 0 47 | world_size: 1 48 | rank: 0 49 | multiprocessing_distributed: False 50 | dist_url: tcp://127.0.0.1:10001 51 | dist_backend: nccl 52 | -------------------------------------------------------------------------------- /config/audio/rankuprda/rankuprda_bvcc_lb250_s0.yaml: -------------------------------------------------------------------------------- 1 | algorithm: rankuprda 2 | save_dir: ./saved_models/audio 3 | save_name: rankuprda_bvcc_lb250_s0 4 | resume: True 5 | load_path: ./saved_models/audio/rankuprda_bvcc_lb250_s0/latest_model.pth 6 | overwrite: True 7 | use_tensorboard: True 8 | use_wandb: False 9 | use_aim: False 10 | epoch: 100 11 | num_train_iter: 102400 12 | num_eval_iter: 1024 13 | num_log_iter: 256 14 | num_warmup_iter: 5120 15 | num_labels: 250 16 | batch_size: 8 17 | eval_batch_size: 16 18 | ema_m: 0.0 19 | uratio: 1 20 | ulb_loss_ratio: 1.0 21 | unsup_warm_up: 0.4 22 | rda_num_refine_iter: 1024 23 | arc_loss_ratio: 0.2 24 | arc_ulb_loss_ratio: 1.0 25 | hard_label: True 26 | T: 0.5 27 | p_cutoff: 0.95 28 | optim: AdamW 29 | lr: 2e-06 30 | momentum: 0.9 31 | weight_decay: 2e-05 32 | layer_decay: 0.75 33 | amp: False 34 | clip_grad: 0.0 35 | use_cat: False 36 | criterion: l1 37 | net: whisper_base 38 | net_from_name: False 39 | use_pretrain: True 40 | pretrain_path: openai/whisper-base 41 | data_dir: ./data 42 | dataset: bvcc 43 | train_sampler: RandomSampler 44 | num_workers: 8 45 | max_length_seconds: 6.0 46 | sample_rate: 16000 47 | preload: True 48 | seed: 0 49 | world_size: 1 50 | rank: 0 51 | multiprocessing_distributed: False 52 | dist_url: tcp://127.0.0.1:10001 53 | dist_backend: nccl 54 | -------------------------------------------------------------------------------- /semilearn/core/hooks/hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/hook.py 4 | 5 | 6 | class Hook: 7 | stages = ("before_run", "before_train_epoch", "before_train_step", "after_train_step", "after_train_epoch", "after_run") 8 | 9 | def before_train_epoch(self, algorithm): 10 | pass 11 | 12 | def after_train_epoch(self, algorithm): 13 | pass 14 | 15 | def before_train_step(self, algorithm): 16 | pass 17 | 18 | def after_train_step(self, algorithm): 19 | pass 20 | 21 | def before_run(self, algorithm): 22 | pass 23 | 24 | def after_run(self, algorithm): 25 | pass 26 | 27 | def every_n_epochs(self, algorithm, n): 28 | return (algorithm.epoch + 1) % n == 0 if n > 0 else False 29 | 30 | def every_n_iters(self, algorithm, n): 31 | return (algorithm.it + 1) % n == 0 if n > 0 else False 32 | 33 | def end_of_epoch(self, algorithm): 34 | return algorithm.it + 1 % len(algorithm.data_loader["train_lb"]) == 0 35 | 36 | def is_last_epoch(self, algorithm): 37 | return algorithm.epoch + 1 == algorithm.epochs 38 | 39 | def is_last_iter(self, algorithm): 40 | return algorithm.it + 1 == algorithm.num_train_iter 41 | -------------------------------------------------------------------------------- /semilearn/core/criterions/cross_entropy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from torch.nn import functional as F 8 | 9 | 10 | def ce_loss(logits, targets, reduction="none"): 11 | """ 12 | cross entropy loss in pytorch. 13 | 14 | Args: 15 | logits: logit values, shape=[Batch size, # of classes] 16 | targets: integer or vector, shape=[Batch size] or [Batch size, # of classes] 17 | # use_hard_labels: If True, targets have [Batch size] shape with int values. If False, the target is vector (default True) 18 | reduction: the reduction argument 19 | """ 20 | if logits.shape == targets.shape: 21 | # one-hot target 22 | log_pred = F.log_softmax(logits, dim=-1) 23 | nll_loss = torch.sum(-targets * log_pred, dim=1) 24 | if reduction == "none": 25 | return nll_loss 26 | else: 27 | return nll_loss.mean() 28 | else: 29 | log_pred = F.log_softmax(logits, dim=-1) 30 | return F.nll_loss(log_pred, targets, reduction=reduction) 31 | 32 | 33 | class CELoss(nn.Module): 34 | """ 35 | Wrapper for ce loss 36 | """ 37 | 38 | def forward(self, logits, targets, reduction="none"): 39 | return ce_loss(logits, targets, reduction) 40 | -------------------------------------------------------------------------------- /semilearn/core/criterions/consistency.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | 8 | 9 | def consistency_loss(logits, targets, name="mse", mask=None): 10 | """ 11 | consistency regularization loss in semi-supervised learning (regression). 12 | 13 | Args: 14 | logits: logit to calculate the loss on and back-propagation, usually being the strong-augmented unlabeled samples 15 | targets: pseudo-labels (either hard label or soft label) 16 | name: use mean-absolute-error ('l1') or mean-squared-error ('mse') to calculate loss 17 | mask: masks to mask-out samples when calculating the loss, usually being used as confidence-masking-out 18 | """ 19 | 20 | assert name in ["l1", "mse"] 21 | # logits_w = logits_w.detach() 22 | if name == "l1": 23 | loss = F.l1_loss(logits, targets, reduction="none") 24 | else: 25 | loss = F.mse_loss(logits, targets, reduction="none") 26 | 27 | if mask is not None: 28 | # mask must not be boolean type 29 | loss = loss * mask 30 | 31 | return loss.mean() 32 | 33 | 34 | class ConsistencyLoss(nn.Module): 35 | """ 36 | Wrapper for consistency loss 37 | """ 38 | 39 | def forward(self, logits, targets, name="mse", mask=None): 40 | return consistency_loss(logits, targets, name, mask) 41 | -------------------------------------------------------------------------------- /semilearn/algorithms/utils/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import numpy as np 7 | 8 | 9 | @torch.no_grad() 10 | def concat_all_gather(tensor): 11 | """ 12 | Performs all_gather operation on the provided tensors. 13 | *** Warning ***: torch.distributed.all_gather has no gradient. 14 | """ 15 | tensors_gather = [torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())] 16 | torch.distributed.all_gather(tensors_gather, tensor) 17 | 18 | output = torch.cat(tensors_gather, dim=0) 19 | return output 20 | 21 | 22 | @torch.no_grad() 23 | def mixup_one_target(x, y, alpha=1.0, is_bias=False): 24 | """Returns mixed inputs, mixed targets, and lambda""" 25 | if alpha > 0: 26 | lam = np.random.beta(alpha, alpha) 27 | else: 28 | lam = 1 29 | if is_bias: 30 | lam = max(lam, 1 - lam) 31 | 32 | index = torch.randperm(x.size(0)).to(x.device) 33 | 34 | mixed_x = lam * x + (1 - lam) * x[index] 35 | mixed_y = lam * y + (1 - lam) * y[index] 36 | return mixed_x, mixed_y, lam 37 | 38 | 39 | @torch.no_grad() 40 | def smooth_targets(logits, targets, smoothing=0.1): 41 | """ 42 | label smoothing 43 | """ 44 | true_dist = torch.zeros_like(logits) 45 | true_dist.fill_(smoothing / (logits.shape[-1] - 1)) 46 | true_dist.scatter_(1, targets.data.unsqueeze(1), (1 - smoothing)) 47 | return true_dist 48 | -------------------------------------------------------------------------------- /semilearn/core/criterions/cls_consistency.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn import functional as F 8 | 9 | from .cross_entropy import ce_loss 10 | 11 | 12 | def cls_consistency_loss(logits, targets, name="ce", mask=None): 13 | """ 14 | consistency regularization loss in semi-supervised learning (classification). 15 | 16 | Args: 17 | logits: logit to calculate the loss on and back-propagation, usually being the strong-augmented unlabeled samples 18 | targets: pseudo-labels (either hard label or soft label) 19 | name: use cross-entropy ('ce') or mean-squared-error ('mse') to calculate loss 20 | mask: masks to mask-out samples when calculating the loss, usually being used as confidence-masking-out 21 | """ 22 | 23 | assert name in ["ce", "mse"] 24 | # logits_w = logits_w.detach() 25 | if name == "mse": 26 | probs = torch.softmax(logits, dim=-1) 27 | loss = F.mse_loss(probs, targets, reduction="none").mean(dim=1) 28 | else: 29 | loss = ce_loss(logits, targets, reduction="none") 30 | 31 | if mask is not None: 32 | # mask must not be boolean type 33 | loss = loss * mask 34 | 35 | return loss.mean() 36 | 37 | 38 | class ClsConsistencyLoss(nn.Module): 39 | """ 40 | Wrapper for consistency loss 41 | """ 42 | 43 | def forward(self, logits, targets, name="ce", mask=None): 44 | return cls_consistency_loss(logits, targets, name, mask) 45 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/augmentation/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import random 5 | import warnings 6 | 7 | from audiomentations import * 8 | 9 | 10 | class AudioTransforms: 11 | """ 12 | Strong transformation function for audio data. 13 | 14 | Args: 15 | max_length_seconds (float): Maximum output length of the audio in seconds. 16 | dataset (str): Name of the dataset. 17 | """ 18 | 19 | def __init__(self, max_length_seconds, dataset_name=""): 20 | self.max_length_seconds = max_length_seconds 21 | self.effects_list, self.n = self.get_effects_list(dataset_name) 22 | self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0) 23 | 24 | def get_effects_list(self, dataset_name): 25 | if dataset_name.lower() in ["bvcc", "vcc2018"]: 26 | effects_list = [TimeMask(p=1.0), Trim(p=1.0), Padding(p=1.0)] 27 | num_effects = 1 28 | else: 29 | effects_list = [Gain(p=1.0), PitchShift(p=1.0), TimeStretch(p=1.0), RoomSimulator(p=1.0)] 30 | num_effects = 2 31 | return effects_list, num_effects 32 | 33 | def __call__(self, audio, sample_rate): 34 | transform = Compose(random.choices(self.effects_list, k=self.n)) 35 | with warnings.catch_warnings(): 36 | warnings.filterwarnings("ignore", message="Possible clipped samples in output.") 37 | aug_wav = transform(samples=audio, sample_rate=sample_rate) 38 | aug_wav = self.adjust_duration(aug_wav, sample_rate=sample_rate) 39 | return aug_wav 40 | -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/augmentation/transforms.py: -------------------------------------------------------------------------------- 1 | from torchvision import transforms 2 | 3 | from .randaugment import RandAugment 4 | 5 | 6 | mean, std = {}, {} 7 | mean["utkface"] = [0.59632254, 0.45671629, 0.39103324] 8 | std["utkface"] = [0.25907077, 0.23132719, 0.22686818] 9 | 10 | 11 | def get_val_transforms(crop_size, dataset_name): 12 | return transforms.Compose( 13 | [ 14 | transforms.Resize(crop_size), 15 | transforms.ToTensor(), 16 | transforms.Normalize( 17 | mean[dataset_name.lower()], 18 | std[dataset_name.lower()], 19 | ), 20 | ] 21 | ) 22 | 23 | 24 | def get_weak_transforms(crop_size, crop_ratio, dataset_name): 25 | return transforms.Compose( 26 | [ 27 | transforms.Resize(crop_size), 28 | transforms.RandomCrop(crop_size, padding=int(crop_size * (1 - crop_ratio)), padding_mode="reflect"), 29 | transforms.RandomHorizontalFlip(), 30 | transforms.ToTensor(), 31 | transforms.Normalize(mean[dataset_name.lower()], std[dataset_name.lower()]), 32 | ] 33 | ) 34 | 35 | 36 | def get_strong_transforms(crop_size, crop_ratio, dataset_name): 37 | return transforms.Compose( 38 | [ 39 | transforms.Resize(crop_size), 40 | transforms.RandomCrop(crop_size, padding=int(crop_size * (1 - crop_ratio)), padding_mode="reflect"), 41 | transforms.RandomHorizontalFlip(), 42 | RandAugment(3, 5), 43 | transforms.ToTensor(), 44 | transforms.Normalize(mean[dataset_name.lower()], std[dataset_name.lower()]), 45 | ] 46 | ) 47 | -------------------------------------------------------------------------------- /semilearn/core/hooks/param_update.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | 6 | from .hook import Hook 7 | 8 | 9 | class ParamUpdateHook(Hook): 10 | """ 11 | Parameter Update Hook 12 | 13 | necessary for update the model parameters 14 | """ 15 | 16 | def before_train_step(self, algorithm): 17 | if hasattr(algorithm, "start_run"): 18 | torch.cuda.synchronize() 19 | algorithm.start_run.record() 20 | 21 | # call after each train_step to update parameters 22 | def after_train_step(self, algorithm): 23 | loss = algorithm.out_dict["loss"] 24 | # algorithm.optimizer.zero_grad() 25 | # update parameters 26 | if algorithm.use_amp: 27 | algorithm.loss_scaler.scale(loss).backward() 28 | if algorithm.clip_grad > 0: 29 | algorithm.loss_scaler.unscale_(algorithm.optimizer) 30 | torch.nn.utils.clip_grad_norm_(algorithm.model.parameters(), algorithm.clip_grad) 31 | algorithm.loss_scaler.step(algorithm.optimizer) 32 | algorithm.loss_scaler.update() 33 | else: 34 | loss.backward() 35 | if algorithm.clip_grad > 0: 36 | torch.nn.utils.clip_grad_norm_(algorithm.model.parameters(), algorithm.clip_grad) 37 | algorithm.optimizer.step() 38 | 39 | if algorithm.scheduler is not None: 40 | algorithm.scheduler.step() 41 | algorithm.model.zero_grad() 42 | 43 | if hasattr(algorithm, "end_run"): 44 | algorithm.end_run.record() 45 | torch.cuda.synchronize() 46 | algorithm.log_dict["train/run_time"] = algorithm.start_run.elapsed_time(algorithm.end_run) / 1000.0 47 | -------------------------------------------------------------------------------- /semilearn/algorithms/hooks/pseudo_label.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | 6 | from semilearn.core.hooks import Hook 7 | from semilearn.algorithms.utils import smooth_targets 8 | 9 | 10 | class PseudoLabelingHook(Hook): 11 | """ 12 | Pseudo Labeling Hook 13 | """ 14 | 15 | def __init__(self): 16 | super().__init__() 17 | 18 | @torch.no_grad() 19 | def gen_ulb_targets( 20 | self, 21 | algorithm, 22 | logits, 23 | use_hard_label=True, 24 | T=1.0, 25 | softmax=True, # whether to compute softmax for logits, input must be logits 26 | label_smoothing=0.0, 27 | ): 28 | """ 29 | generate pseudo-labels from logits/probs 30 | 31 | Args: 32 | algorithm: base algorithm 33 | logits: logits (or probs, need to set softmax to False) 34 | use_hard_label: flag of using hard labels instead of soft labels 35 | T: temperature parameters 36 | softmax: flag of using softmax on logits 37 | label_smoothing: label_smoothing parameter 38 | """ 39 | 40 | logits = logits.detach() 41 | if use_hard_label: 42 | # return hard label directly 43 | pseudo_label = torch.argmax(logits, dim=-1) 44 | if label_smoothing: 45 | pseudo_label = smooth_targets(logits, pseudo_label, label_smoothing) 46 | return pseudo_label 47 | 48 | # return soft label 49 | if softmax: 50 | # pseudo_label = torch.softmax(logits / T, dim=-1) 51 | pseudo_label = algorithm.compute_prob(logits / T) 52 | else: 53 | # inputs logits converted to probabilities already 54 | pseudo_label = logits 55 | return pseudo_label 56 | -------------------------------------------------------------------------------- /semilearn/core/hooks/evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/evaluation.py 5 | 6 | import os 7 | 8 | from .hook import Hook 9 | 10 | 11 | class EvaluationHook(Hook): 12 | """ 13 | Evaluation Hook for validation during training 14 | """ 15 | 16 | def after_train_step(self, algorithm): 17 | if self.every_n_iters(algorithm, algorithm.num_eval_iter) or self.is_last_iter(algorithm): 18 | algorithm.print_fn("validating...") 19 | eval_dict = algorithm.evaluate("eval") 20 | algorithm.log_dict.update(eval_dict) 21 | 22 | # update best metrics 23 | if algorithm.log_dict["eval/mae"] < algorithm.best_eval_mae: 24 | algorithm.best_eval_mae = algorithm.log_dict["eval/mae"] 25 | algorithm.best_it = algorithm.it 26 | 27 | def after_run(self, algorithm): 28 | if not algorithm.args.multiprocessing_distributed or ( 29 | algorithm.args.multiprocessing_distributed and algorithm.args.rank % algorithm.ngpus_per_node == 0 30 | ): 31 | save_path = os.path.join(algorithm.save_dir, algorithm.save_name) 32 | algorithm.save_model("latest_model.pth", save_path) 33 | 34 | results_dict = {"eval/mae": algorithm.best_eval_mae, "eval/best_it": algorithm.best_it} 35 | if "test" in algorithm.loader_dict: 36 | # load the best model and evaluate on test dataset 37 | best_model_path = os.path.join(algorithm.args.save_dir, algorithm.args.save_name, "model_best.pth") 38 | algorithm.load_model(best_model_path) 39 | test_dict = algorithm.evaluate("test") 40 | results_dict["test/best_mae"] = test_dict["test/mae"] 41 | algorithm.results_dict = results_dict 42 | -------------------------------------------------------------------------------- /semilearn/algorithms/rda/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import numpy as np 6 | 7 | from semilearn.core.hooks import Hook 8 | 9 | 10 | class RDAHook(Hook): 11 | """ 12 | RDA Hook 13 | """ 14 | 15 | def __init__(self, train_ulb_len, lb_targets, num_refine_iter=1024): 16 | super(RDAHook, self).__init__() 17 | self.train_ulb_len = train_ulb_len 18 | self.sorted_lb_targets, _ = torch.sort(torch.tensor(lb_targets)) 19 | self.num_refine_iter = num_refine_iter 20 | 21 | self.pseudo_raw = torch.ones(self.train_ulb_len, dtype=torch.float32) 22 | self.pseudo_refine = torch.ones(self.train_ulb_len, dtype=torch.float32) 23 | 24 | @torch.no_grad() 25 | def gen_ulb_targets(self, algorithm, logits): 26 | logits = logits.detach() 27 | pseudo_label = self.refine_pseudo_labels(algorithm.idx_ulb, logits, algorithm.it, algorithm.epoch) 28 | return pseudo_label.to(logits.device) 29 | 30 | @torch.no_grad() 31 | def refine_pseudo_labels(self, idx_ulb, logits_x_ulb, it, epoch): 32 | self.pseudo_raw[idx_ulb.to(self.pseudo_raw.device)] = logits_x_ulb.data.cpu().to(self.pseudo_raw.dtype) 33 | if it % self.num_refine_iter == 0: 34 | self.apply_dist_align() 35 | if epoch > 0: 36 | logits_x_ulb = self.pseudo_refine[idx_ulb.to(self.pseudo_raw.device)].detach() 37 | return logits_x_ulb 38 | 39 | @torch.no_grad() 40 | def apply_dist_align(self): 41 | """ 42 | Apply distribution alignment to refine pseudo labels. 43 | """ 44 | cdf_pseudo = np.linspace(0, 1, len(self.pseudo_raw)) 45 | cdf_target = np.linspace(0, 1, len(self.sorted_lb_targets)) 46 | pseudo_refine = np.interp(cdf_pseudo, cdf_target, self.sorted_lb_targets.cpu().numpy()) 47 | idxes = torch.argsort(self.pseudo_raw) 48 | self.pseudo_refine[idxes] = torch.FloatTensor(pseudo_refine).to(self.pseudo_refine.device) 49 | -------------------------------------------------------------------------------- /semilearn/core/hooks/priority.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/priority.py 4 | 5 | from enum import Enum 6 | from typing import Union 7 | 8 | 9 | class Priority(Enum): 10 | """Hook priority levels. 11 | +--------------+------------+ 12 | | Level | Value | 13 | +==============+============+ 14 | | HIGHEST | 0 | 15 | +--------------+------------+ 16 | | VERY_HIGH | 10 | 17 | +--------------+------------+ 18 | | HIGH | 30 | 19 | +--------------+------------+ 20 | | ABOVE_NORMAL | 40 | 21 | +--------------+------------+ 22 | | NORMAL | 50 | 23 | +--------------+------------+ 24 | | BELOW_NORMAL | 60 | 25 | +--------------+------------+ 26 | | LOW | 70 | 27 | +--------------+------------+ 28 | | VERY_LOW | 90 | 29 | +--------------+------------+ 30 | | LOWEST | 100 | 31 | +--------------+------------+ 32 | """ 33 | 34 | HIGHEST = 0 35 | VERY_HIGH = 10 36 | HIGH = 30 37 | ABOVE_NORMAL = 40 38 | NORMAL = 50 39 | BELOW_NORMAL = 60 40 | LOW = 70 41 | VERY_LOW = 90 42 | LOWEST = 100 43 | 44 | 45 | def get_priority(priority: Union[int, str, Priority]) -> int: 46 | """Get priority value. 47 | Args: 48 | priority (int or str or :obj:`Priority`): Priority. 49 | Returns: 50 | int: The priority value. 51 | """ 52 | if isinstance(priority, int): 53 | if priority < 0 or priority > 100: 54 | raise ValueError("priority must be between 0 and 100") 55 | return priority 56 | elif isinstance(priority, Priority): 57 | return priority.value 58 | elif isinstance(priority, str): 59 | return Priority[priority.upper()].value 60 | else: 61 | raise TypeError("priority must be an integer or Priority enum value") 62 | -------------------------------------------------------------------------------- /semilearn/algorithms/rankuprda/rda.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import numpy as np 6 | 7 | from semilearn.core.hooks import Hook 8 | 9 | 10 | class RDAHook(Hook): 11 | """ 12 | RDA Hook 13 | """ 14 | 15 | def __init__(self, train_ulb_len, lb_targets, num_refine_iter=1024): 16 | super(RDAHook, self).__init__() 17 | self.train_ulb_len = train_ulb_len 18 | self.sorted_lb_targets, _ = torch.sort(torch.tensor(lb_targets)) 19 | self.num_refine_iter = num_refine_iter 20 | 21 | self.pseudo_raw = torch.ones(self.train_ulb_len, dtype=torch.float32) 22 | self.pseudo_refine = torch.ones(self.train_ulb_len, dtype=torch.float32) 23 | 24 | @torch.no_grad() 25 | def gen_ulb_targets(self, algorithm, logits): 26 | logits = logits.detach() 27 | pseudo_label = self.refine_pseudo_labels(algorithm.idx_ulb, logits, algorithm.it, algorithm.epoch) 28 | return pseudo_label.to(logits.device) 29 | 30 | @torch.no_grad() 31 | def refine_pseudo_labels(self, idx_ulb, logits_x_ulb, it, epoch): 32 | self.pseudo_raw[idx_ulb.to(self.pseudo_raw.device)] = logits_x_ulb.data.cpu().to(self.pseudo_raw.dtype) 33 | if it % self.num_refine_iter == 0: 34 | self.apply_dist_align() 35 | if epoch > 0: 36 | logits_x_ulb = self.pseudo_refine[idx_ulb.to(self.pseudo_raw.device)].detach() 37 | return logits_x_ulb 38 | 39 | @torch.no_grad() 40 | def apply_dist_align(self): 41 | """ 42 | Apply distribution alignment to refine pseudo labels. 43 | """ 44 | cdf_pseudo = np.linspace(0, 1, len(self.pseudo_raw)) 45 | cdf_target = np.linspace(0, 1, len(self.sorted_lb_targets)) 46 | pseudo_refine = np.interp(cdf_pseudo, cdf_target, self.sorted_lb_targets.cpu().numpy()) 47 | idxes = torch.argsort(self.pseudo_raw) 48 | self.pseudo_refine[idxes] = torch.FloatTensor(pseudo_refine).to(self.pseudo_refine.device) 49 | -------------------------------------------------------------------------------- /semilearn/core/hooks/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | # Ref:https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/logger/base.py 5 | 6 | from .hook import Hook 7 | 8 | 9 | class LoggingHook(Hook): 10 | """ 11 | Logging Hook for print information and log into tensorboard 12 | """ 13 | 14 | def after_train_step(self, algorithm): 15 | """must be called after evaluation""" 16 | if self.every_n_iters(algorithm, algorithm.num_eval_iter): 17 | if not algorithm.distributed or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0): 18 | print_text = f"[{algorithm.it + 1} iteration] USE_EMA: {algorithm.ema_m != 0}, " 19 | for i, (key, item) in enumerate(algorithm.log_dict.items()): 20 | print_text += "{:s}: {:.4f}, ".format(key, item) 21 | print_text += "BEST_EVAL_MAE: {:.4f}, at {:d} iters".format(algorithm.best_eval_mae, algorithm.best_it + 1) 22 | algorithm.print_fn(print_text) 23 | 24 | if algorithm.tb_log is not None: 25 | algorithm.tb_log.update(algorithm.log_dict, algorithm.it) 26 | algorithm.tb_log.update({"eval/best-mae": algorithm.best_eval_mae}, algorithm.it) 27 | 28 | elif self.every_n_iters(algorithm, algorithm.num_log_iter): 29 | if not algorithm.distributed or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0): 30 | print_text = f"{algorithm.it + 1} iteration, " 31 | for i, (key, item) in enumerate(algorithm.log_dict.items()): 32 | print_text += "{:s}: {:.4f}".format(key, item) 33 | if i != len(algorithm.log_dict) - 1: 34 | print_text += ", " 35 | else: 36 | print_text += " " 37 | algorithm.print_fn(print_text) 38 | 39 | if algorithm.tb_log is not None: 40 | algorithm.tb_log.update(algorithm.log_dict, algorithm.it) 41 | -------------------------------------------------------------------------------- /semilearn/algorithms/rankup/rankup_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from semilearn.nets.utils import init_weights 8 | 9 | 10 | class RankUp_Net(nn.Module): 11 | """ 12 | RankUp_Net implementation. 13 | 14 | Attributes: 15 | backbone (nn.Module): The underlying backbone model. 16 | num_features (int): Number of features from the model's hidden layer. 17 | arc_classifier (nn.Linear): Linear layer for Auxiliary Ranking Classifier (ARC) with two output classes. 18 | """ 19 | 20 | def __init__(self, backbone): 21 | super().__init__() 22 | self.backbone = backbone 23 | self.num_features = backbone.num_features 24 | 25 | # Auxiliary Ranking Classifier (ARC) 26 | self.arc_classifier = nn.Linear(self.num_features, 2) 27 | self.arc_classifier.apply(init_weights) 28 | 29 | def forward(self, x, use_arc=False, targets=None, **kwargs): 30 | if not use_arc: 31 | return self.backbone(x, **kwargs) 32 | feat = self.backbone(x, only_feat=True) 33 | logits = self.backbone(feat, only_fc=True) 34 | logits_arc = self.arc_classifier(feat) 35 | logits_mat, targets_mat = self.compute_rank_logits(logits_arc, targets) 36 | return {"logits": logits, "logits_arc": logits_mat, "feat": feat, "targets_arc": targets_mat} 37 | 38 | def compute_rank_logits(self, logits, targets=None): 39 | logits_mat = logits.unsqueeze(dim=0) - logits.unsqueeze(dim=1) 40 | logits_mat = logits_mat.flatten(0, 1) 41 | if targets is not None: 42 | targets_mat = (1 + torch.sign(targets.unsqueeze(dim=0) - targets.unsqueeze(dim=1))) / 2 43 | targets_mat = targets_mat.flatten(0, 1) 44 | # one-hot encode the targets_mat 45 | targets_onehot = torch.zeros((targets_mat.shape[0], 2)).to(targets_mat.device) 46 | targets_onehot[:, 0] = targets_mat 47 | targets_onehot[:, 1] = 1 - targets_mat 48 | return logits_mat, targets_onehot 49 | return logits_mat, None 50 | 51 | def group_matcher(self, coarse=False): 52 | matcher = self.backbone.group_matcher(coarse, prefix="backbone.") 53 | return matcher 54 | -------------------------------------------------------------------------------- /semilearn/algorithms/rankuprda/rankup_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from semilearn.nets.utils import init_weights 8 | 9 | 10 | class RankUp_Net(nn.Module): 11 | """ 12 | RankUp_Net implementation. 13 | 14 | Attributes: 15 | backbone (nn.Module): The underlying backbone model. 16 | num_features (int): Number of features from the model's hidden layer. 17 | arc_classifier (nn.Linear): Linear layer for Auxiliary Ranking Classifier (ARC) with two output classes. 18 | """ 19 | 20 | def __init__(self, backbone): 21 | super().__init__() 22 | self.backbone = backbone 23 | self.num_features = backbone.num_features 24 | 25 | # Auxiliary Ranking Classifier (ARC) 26 | self.arc_classifier = nn.Linear(self.num_features, 2) 27 | self.arc_classifier.apply(init_weights) 28 | 29 | def forward(self, x, use_arc=False, targets=None, **kwargs): 30 | if not use_arc: 31 | return self.backbone(x, **kwargs) 32 | feat = self.backbone(x, only_feat=True) 33 | logits = self.backbone(feat, only_fc=True) 34 | logits_arc = self.arc_classifier(feat) 35 | logits_mat, targets_mat = self.compute_rank_logits(logits_arc, targets) 36 | return {"logits": logits, "logits_arc": logits_mat, "feat": feat, "targets_arc": targets_mat} 37 | 38 | def compute_rank_logits(self, logits, targets=None): 39 | logits_mat = logits.unsqueeze(dim=0) - logits.unsqueeze(dim=1) 40 | logits_mat = logits_mat.flatten(0, 1) 41 | if targets is not None: 42 | targets_mat = (1 + torch.sign(targets.unsqueeze(dim=0) - targets.unsqueeze(dim=1))) / 2 43 | targets_mat = targets_mat.flatten(0, 1) 44 | # one-hot encode the targets_mat 45 | targets_onehot = torch.zeros((targets_mat.shape[0], 2)).to(targets_mat.device) 46 | targets_onehot[:, 0] = targets_mat 47 | targets_onehot[:, 1] = 1 - targets_mat 48 | return logits_mat, targets_onehot 49 | return logits_mat, None 50 | 51 | def group_matcher(self, coarse=False): 52 | matcher = self.backbone.group_matcher(coarse, prefix="backbone.") 53 | return matcher 54 | -------------------------------------------------------------------------------- /semilearn/algorithms/hooks/masking.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import numpy as np 6 | from semilearn.core.hooks import Hook 7 | 8 | 9 | class MaskingHook(Hook): 10 | """ 11 | Base MaskingHook, used for computing the mask of unlabeled (consistency) loss 12 | define MaskingHook in each algorithm when needed, and call hook inside each train_step 13 | easy support for other settings 14 | """ 15 | 16 | def __init__(self, *args, **kwargs) -> None: 17 | super().__init__() 18 | 19 | def update(self, *args, **kwargs): 20 | pass 21 | 22 | @torch.no_grad() 23 | def masking( 24 | self, algorithm, logits_x_lb=None, logits_x_ulb=None, idx_lb=None, idx_ulb=None, softmax_x_lb=True, softmax_x_ulb=True, *args, **kwargs 25 | ): 26 | """ 27 | generate mask for unlabeled loss 28 | 29 | Args: 30 | algorithm: base algorithm 31 | logits_x_lb: labeled batch logits (or probs, need to set softmax_x_lb to False) 32 | logits_x_ulb: unlabeled batch logits (or probs, need to set softmax_x_ulb to False) 33 | idx_lb: labeled batch index 34 | idx_ulb: unlabeled batch index 35 | softmax_x_lb: flag of using softmax on labeled logits 36 | softmax_x_ulb: flag of using softmax on unlabeled logits 37 | """ 38 | raise NotImplementedError 39 | 40 | 41 | class FixedThresholdingHook(MaskingHook): 42 | """ 43 | Common Fixed Threshold used in fixmatch, uda, pseudo label, et. al. 44 | """ 45 | 46 | @torch.no_grad() 47 | def masking(self, algorithm, logits_x_ulb, softmax_x_ulb=True, *args, **kwargs): 48 | if softmax_x_ulb: 49 | # probs_x_ulb = torch.softmax(logits_x_ulb.detach(), dim=-1) 50 | probs_x_ulb = algorithm.compute_prob(logits_x_ulb.detach()) 51 | else: 52 | # logits is already probs 53 | probs_x_ulb = logits_x_ulb.detach() 54 | max_probs, _ = torch.max(probs_x_ulb, dim=-1) 55 | mask = max_probs.ge(algorithm.p_cutoff).to(max_probs.dtype) 56 | return mask 57 | 58 | 59 | # class RampupWeightingHook(MaskingHook): 60 | # """ 61 | # Common Rampup weight used in mean teacher, pi model, et. al. 62 | # """ 63 | # def masking(self, algorithm, *args, **kwargs): 64 | # return np.clip(algorithm.it / (algorithm.unsup_warm_up * algorithm.num_train_iter), a_min=0.0, a_max=1.0) 65 | -------------------------------------------------------------------------------- /results/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Benchmark Results 4 | 5 |
6 | 📋 Table of Contents 7 |
    8 |
  1. Classic CV Results
  2. 9 |
  3. NLP Results
  4. 10 |
  5. Audio Results
  6. 11 |
12 |
13 | 14 | This folder contains benchmark results and experiment logs for semi-supervised regression algorithms. The best evaluation metrics (e.g., MAE, MSE, R², SRCC) are reported across training iterations. Each setting runs 3 different random seeds and computes the average performance with standard deviation. 15 | 16 | ## 📷 Classic CV Results 17 | 18 | ### a.) Datasets 19 | 20 | | Dataset | # Labels | # Training Data | # Test Data | Target Range | 21 | | ------- | --------------- | --------------- | ----------- | ------------ | 22 | | UTKFace | 50 / 250 / 2000 | 18,964 | 4,741 | [1, 116] | 23 | 24 | ### b.) Results 25 | 26 | The results for Classic CV benchmarks can be found in [classic_cv_average_log.csv](classic_cv_average_log.csv). 27 | 28 | ### c.) Logs 29 | 30 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon). 31 | 32 |

(back to top)

33 | 34 | ## 📝 NLP Results 35 | 36 | ### a.) Datasets 37 | 38 | | Dataset | # Labels | # Training Data | # Test Data | Target Range | 39 | | ----------- | -------- | --------------- | ----------- | ------------ | 40 | | Yelp Review | 250 | 250,000 | 25,000 | [0, 4] | 41 | 42 | ### b.) Results 43 | 44 | The results for NLP benchmarks can be found in [nlp_average_log.csv](nlp_average_log.csv). 45 | 46 | ### c.) Logs 47 | 48 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon). 49 | 50 |

(back to top)

51 | 52 | ## 🎧 Audio Results 53 | 54 | ### a.) Datasets 55 | 56 | | Dataset | # Labels | # Training Data | # Test Data | Target Range | 57 | | ------- | -------- | --------------- | ----------- | ------------ | 58 | | BVCC | 250 | 4,974 | 1,066 | [1, 5] | 59 | 60 | ### b.) Results 61 | 62 | The results for Audio benchmarks can be found in [audio_average_log.csv](audio_average_log.csv). 63 | 64 | ### c.) Logs 65 | 66 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon). 67 | 68 |

(back to top)

69 | -------------------------------------------------------------------------------- /semilearn/core/hooks/wandb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import os 6 | import wandb 7 | 8 | from .hook import Hook 9 | 10 | 11 | class WANDBHook(Hook): 12 | """ 13 | Wandb Hook 14 | """ 15 | 16 | def __init__(self): 17 | super().__init__() 18 | self.log_key_list = [ 19 | "train/sup_loss", 20 | "train/unsup_loss", 21 | "train/total_loss", 22 | "train/util_ratio", 23 | "train/run_time", 24 | "train/prefetch_time", 25 | "lr", 26 | "eval/mae", 27 | "eval/mse", 28 | "eval/r2", 29 | "eval/lcc", 30 | "eval/srcc", 31 | "eval/ktau", 32 | "eval/gmae", 33 | ] 34 | 35 | def before_run(self, algorithm): 36 | name = algorithm.save_name 37 | project = "ssr_" + algorithm.save_dir.split("/")[-1] 38 | group = "_".join(algorithm.args.save_name.split("_")[:-1]) 39 | 40 | # tags 41 | benchmark = f"benchmark: {project}" 42 | dataset = f"dataset: {algorithm.args.dataset}" 43 | data_setting = f"setting: {algorithm.args.dataset}_lb{algorithm.args.num_labels}_ulb{algorithm.args.ulb_num_labels}" 44 | alg = f"alg: {algorithm.args.algorithm}" 45 | tags = [benchmark, dataset, data_setting, alg] 46 | if algorithm.args.resume: 47 | resume = "auto" 48 | else: 49 | resume = "never" 50 | # resume = 'never' 51 | 52 | save_dir = os.path.join(algorithm.args.save_dir, "wandb", algorithm.args.save_name) 53 | if not os.path.exists(save_dir): 54 | os.makedirs(save_dir) 55 | 56 | self.run = wandb.init(name=name, tags=tags, config=algorithm.args.__dict__, project=project, group=group, resume=resume, dir=save_dir) 57 | 58 | def after_train_step(self, algorithm): 59 | if self.every_n_iters(algorithm, algorithm.num_log_iter): 60 | log_dict = {} 61 | for key, item in algorithm.log_dict.items(): 62 | if key in self.log_key_list: 63 | log_dict[key] = item 64 | self.run.log(log_dict, step=algorithm.it) 65 | 66 | if self.every_n_iters(algorithm, algorithm.num_eval_iter): 67 | self.run.log({"eval/best-mae": algorithm.best_eval_mae}, step=algorithm.it) 68 | 69 | def after_run(self, algorithm): 70 | self.run.finish() 71 | -------------------------------------------------------------------------------- /semilearn/core/hooks/aim.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import aim 6 | 7 | from .hook import Hook 8 | 9 | 10 | class AimHook(Hook): 11 | """ 12 | A hook for tracking training progress with Aim. 13 | """ 14 | 15 | def __init__(self): 16 | super().__init__() 17 | self.log_key_list = [ 18 | "train/sup_loss", 19 | "train/unsup_loss", 20 | "train/total_loss", 21 | "train/util_ratio", 22 | "train/run_time", 23 | "train/prefetch_time", 24 | "lr", 25 | "eval/mae", 26 | "eval/mse", 27 | "eval/r2", 28 | "eval/lcc", 29 | "eval/srcc", 30 | "eval/ktau", 31 | "eval/gmae", 32 | ] 33 | 34 | def before_run(self, algorithm): 35 | """Setup the Aim tracking. 36 | 37 | Args: 38 | algorithm (AlgorithmBase): The training algorithm. 39 | """ 40 | # initialize aim run 41 | name = algorithm.save_name 42 | project = algorithm.save_dir.split("/")[-1] 43 | repo = algorithm.args.save_dir.split("/")[-2] 44 | self.run = aim.Run(experiment=name, repo=repo, log_system_params=True) 45 | 46 | # set configuration 47 | self.run["hparams"] = algorithm.args.__dict__ 48 | 49 | # set tags 50 | benchmark = f"benchmark: {project}" 51 | dataset = f"dataset: {algorithm.args.dataset}" 52 | data_setting = f"setting: {algorithm.args.dataset}_lb{algorithm.args.num_labels}_ulb{algorithm.args.ulb_num_labels}" 53 | alg = f"alg: {algorithm.args.algorithm}" 54 | self.run.add_tag(benchmark) 55 | self.run.add_tag(dataset) 56 | self.run.add_tag(data_setting) 57 | self.run.add_tag(alg) 58 | 59 | def after_train_step(self, algorithm): 60 | """Log the metric values in the log dictionary to Aim. 61 | 62 | Args: 63 | algorithm (AlgorithmBase): The training algorithm. 64 | """ 65 | if self.every_n_iters(algorithm, algorithm.num_log_iter): 66 | for key, item in algorithm.log_dict.items(): 67 | if key in self.log_key_list: 68 | self.run.track(item, name=key, step=algorithm.it) 69 | 70 | if self.every_n_iters(algorithm, algorithm.num_eval_iter): 71 | self.run.track(algorithm.best_eval_mae, name="eval/best-mae", step=algorithm.it) 72 | -------------------------------------------------------------------------------- /semilearn/core/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import importlib 6 | 7 | __all__ = ["ALGORITHMS"] 8 | 9 | 10 | class Register: 11 | def __init__(self, registry_name): 12 | self._dict = {} 13 | self._name = registry_name 14 | 15 | def __setitem__(self, key, value): 16 | if not callable(value): 17 | raise Exception(f"Value of a Registry must be a callable!\nValue: {value}") 18 | if key is None: 19 | key = value.__name__ 20 | if key in self._dict: 21 | print("Key %s already in registry %s." % (key, self._name)) 22 | self._dict[key] = value 23 | 24 | def register(self, target): 25 | """Decorator to register a function or class.""" 26 | 27 | def add(key, value): 28 | self[key] = value 29 | return value 30 | 31 | if callable(target): 32 | # @reg.register 33 | return add(None, target) 34 | # @reg.register('alias') 35 | return lambda x: add(target, x) 36 | 37 | def __getitem__(self, key): 38 | return self._dict[key] 39 | 40 | def __contains__(self, key): 41 | return key in self._dict 42 | 43 | def keys(self): 44 | """key""" 45 | return self._dict.keys() 46 | 47 | 48 | ALGORITHMS = Register("algorithms") 49 | 50 | 51 | def _handle_errors(errors): 52 | """ 53 | Log out and possibly reraise errors during import. 54 | """ 55 | if not errors: 56 | return 57 | 58 | for name, err in errors: 59 | print("Module {} import failed: {}".format(name, err)) 60 | 61 | 62 | ALL_MODULES = [ 63 | # NOTE: add all algorithms here 64 | ("semilearn.algorithms", ["fullysupervised", "meanteacher", "mixmatch", "pimodel", "ucvme", "clss", "rankup", "rda", "rankuprda"]) 65 | ] 66 | 67 | 68 | def import_all_modules_for_register(): 69 | """ 70 | Import all modules for register. 71 | """ 72 | all_modules = ALL_MODULES 73 | errors = [] 74 | for base_dir, modules in all_modules: 75 | for name in modules: 76 | try: 77 | if base_dir != "": 78 | full_name = base_dir + "." + name 79 | else: 80 | full_name = name 81 | importlib.import_module(full_name) 82 | except ImportError as error: 83 | errors.append((name, error)) 84 | _handle_errors(errors) 85 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/augmentation/subsample.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import random 5 | from audiomentations import AdjustDuration 6 | 7 | 8 | class Subsample: 9 | """Sample the first `max_length` seconds from the input audio. 10 | 11 | Args: 12 | max_length_seconds (float): Maximum output length of the audio in seconds. 13 | Output will be padded or truncated to this length. 14 | """ 15 | 16 | def __init__(self, max_length_seconds): 17 | self.max_length_seconds = max_length_seconds 18 | self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0) 19 | 20 | def __call__(self, audio, sample_rate): 21 | max_sample_length = int(round(sample_rate * self.max_length_seconds)) 22 | if len(audio) > max_sample_length: 23 | audio = audio[:max_sample_length] 24 | audio = self.adjust_duration(audio, sample_rate) # padding to the `max_length_seconds` 25 | return audio 26 | 27 | 28 | class RandomSubsample: 29 | """Randomly samples a chunk of audio of length between [`min_length`, `max_length`] seconds and pads it to `max_length` seconds. 30 | 31 | Args: 32 | max_length_seconds (float): Maximum output length of the audio in seconds. 33 | Output will be padded or truncated to this length. 34 | min_ratio (float): Minimum ratio of the maximum length for subsampling, 35 | should be between 0.0 and 1.0. 36 | """ 37 | 38 | def __init__(self, max_length_seconds, min_ratio=1.0): 39 | if not (0.0 <= min_ratio <= 1.0): 40 | raise ValueError("min_ratio should be between 0 and 1") 41 | 42 | self.max_length_seconds = max_length_seconds 43 | self.min_ratio = min_ratio 44 | self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0) 45 | self.min_length_seconds = max_length_seconds * self.min_ratio 46 | 47 | def __call__(self, audio, sample_rate): 48 | subsample_seconds = random.uniform(self.min_length_seconds, self.max_length_seconds) 49 | subsample_length = int(round(sample_rate * subsample_seconds)) 50 | if len(audio) > subsample_length: 51 | max_offset = len(audio) - subsample_length 52 | random_offset = random.randint(0, max_offset) 53 | audio = audio[random_offset : random_offset + subsample_length] 54 | audio = self.adjust_duration(audio, sample_rate) # padding to the `max_length_seconds` 55 | return audio 56 | -------------------------------------------------------------------------------- /semilearn/datasets/nlp_datasets/get_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from semilearn.datasets import nlp_datasets 6 | from semilearn.datasets.utils import split_ssl_data 7 | 8 | from .datasetbase import BasicDataset 9 | 10 | 11 | def get_nlp_dataset(args, alg, dataset_name, num_labels=40, data_dir="./data", include_lb_to_ulb=True): 12 | """ 13 | Get the NLP dataset and split the training samples into labeled and unlabeled sets. 14 | 15 | Args: 16 | alg (str): Algorithm. 17 | dataset_name (str): The name of the dataset to load. 18 | num_labels (int): The number of labeled samples for the training set. 19 | data_dir (str): The directory from which to load the dataset. 20 | include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set. 21 | 22 | Returns: 23 | Tuple[Dataset, Dataset, Dataset, Dataset]: 24 | A tuple containing: 25 | - train labeled dataset 26 | - train unlabeled dataset 27 | - evaluation dataset 28 | - test dataset 29 | """ 30 | dataset = getattr(nlp_datasets, dataset_name.upper()) 31 | 32 | train_dataset = dataset(data_dir, split="train", download=True) 33 | train_texts, train_targets = train_dataset._texts, train_dataset._labels 34 | 35 | test_dataset = dataset(data_dir, split="test", download=True) 36 | test_texts, test_targets = test_dataset._texts, test_dataset._labels 37 | 38 | eval_dset = BasicDataset(alg, test_texts, test_targets, False) 39 | test_dset = None 40 | 41 | if dataset_name.lower() in ["yelp_review", "amazon_review"]: 42 | dev_dataset = dataset(data_dir, split="dev", download=True) 43 | dev_texts, dev_targets = dev_dataset._texts, dev_dataset._labels 44 | eval_dset = BasicDataset(alg, dev_texts, dev_targets, False) 45 | test_dset = BasicDataset(alg, test_texts, test_targets, False) 46 | 47 | if alg == "fullysupervised": 48 | lb_dset = BasicDataset(alg, train_texts, train_targets, False) 49 | return lb_dset, None, eval_dset, test_dset 50 | 51 | lb_texts, lb_targets, ulb_texts, ulb_targets = split_ssl_data( 52 | args, 53 | train_texts, 54 | train_targets, 55 | lb_num_labels=num_labels, 56 | ulb_num_labels=args.ulb_num_labels, 57 | include_lb_to_ulb=include_lb_to_ulb, 58 | ) 59 | 60 | lb_dset = BasicDataset(alg, lb_texts, lb_targets, False) 61 | ulb_dset = BasicDataset(alg, ulb_texts, ulb_targets, True) 62 | 63 | if alg == "supervised": 64 | ulb_dset = None 65 | 66 | return lb_dset, ulb_dset, eval_dset, test_dset 67 | -------------------------------------------------------------------------------- /semilearn/algorithms/fullysupervised/fullysupervised.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | 6 | from semilearn.core import AlgorithmBase 7 | from semilearn.core.utils import ALGORITHMS 8 | 9 | 10 | @ALGORITHMS.register("fullysupervised") 11 | class FullySupervised(AlgorithmBase): 12 | """ 13 | Train a fully supervised model using labeled data only. This serves as a baseline for comparison. 14 | 15 | Args: 16 | - args (`argparse`): 17 | algorithm arguments 18 | - net_builder (`callable`): 19 | network loading function 20 | - tb_log (`TBLog`): 21 | tensorboard logger 22 | - logger (`logging.Logger`): 23 | logger to use 24 | """ 25 | 26 | def __init__(self, args, net_builder, tb_log=None, logger=None): 27 | super().__init__(args, net_builder, tb_log, logger) 28 | 29 | def train_step(self, x_lb, y_lb, **kwargs): 30 | # inference and calculate sup losses 31 | with self.amp_cm(): 32 | outs_x_lb = self.model(x_lb) 33 | logits_x_lb = outs_x_lb["logits"] 34 | feats_x_lb = outs_x_lb["feat"] 35 | sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean") 36 | 37 | # extract features for further use in the classification algorithm. 38 | feat_dict = {"x_lb": feats_x_lb} 39 | for k in kwargs: 40 | feat_dict[k] = self.model(kwargs[k], only_feat=True) 41 | 42 | out_dict = self.process_out_dict(loss=sup_loss, feat=feat_dict) 43 | log_dict = self.process_log_dict(sup_loss=sup_loss.item(), total_loss=sup_loss.item()) 44 | return out_dict, log_dict 45 | 46 | def train(self): 47 | # lb: labeled, ulb: unlabeled 48 | self.model.train() 49 | self.call_hook("before_run") 50 | 51 | for epoch in range(self.start_epoch, self.epochs): 52 | self.epoch = epoch 53 | 54 | # prevent the training iterations exceed args.num_train_iter 55 | if self.it > self.num_train_iter: 56 | break 57 | 58 | self.call_hook("before_train_epoch") 59 | 60 | for data_lb in self.loader_dict["train_lb"]: 61 | # prevent the training iterations exceed args.num_train_iter 62 | if self.it > self.num_train_iter: 63 | break 64 | 65 | self.call_hook("before_train_step") 66 | self.out_dict, self.log_dict = self.train_step(**self.process_batch(**data_lb)) 67 | self.call_hook("after_train_step") 68 | self.it += 1 69 | 70 | self.call_hook("after_train_epoch") 71 | self.call_hook("after_run") 72 | 73 | 74 | ALGORITHMS["supervised"] = FullySupervised 75 | -------------------------------------------------------------------------------- /semilearn/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | 13 | It is especially useful in conjunction with 14 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 15 | process can pass a DistributedSampler instance as a DataLoader sampler, 16 | and load a subset of the original dataset that is exclusive to it. 17 | 18 | .. note:: 19 | Dataset is assumed to be of constant size. 20 | 21 | Arguments: 22 | dataset: Dataset used for sampling. 23 | num_replicas (optional): Number of processes participating in 24 | distributed training. 25 | rank (optional): Rank of the current process within num_replicas. 26 | """ 27 | 28 | def __init__(self, dataset, num_replicas=None, rank=None, num_samples=None, **kwargs): 29 | if not isinstance(num_samples, int) or num_samples <= 0: 30 | raise ValueError("num_samples should be a positive integer " "value, but got num_samples={}".format(num_samples)) 31 | 32 | if num_replicas is None: 33 | if not dist.is_available(): 34 | raise RuntimeError("Requires distributed package to be available") 35 | else: 36 | num_replicas = dist.get_world_size() 37 | if rank is None: 38 | if not dist.is_available(): 39 | raise RuntimeError("Requires distributed package to be available") 40 | else: 41 | rank = dist.get_rank() 42 | self.dataset = dataset 43 | self.num_replicas = num_replicas 44 | self.rank = rank 45 | self.epoch = 0 46 | 47 | self.total_size = num_samples 48 | assert num_samples % self.num_replicas == 0, f"{num_samples} samples cant" f"be evenly distributed among {num_replicas} devices." 49 | self.num_samples = int(num_samples // self.num_replicas) 50 | 51 | def __iter__(self): 52 | # deterministically shuffle based on epoch 53 | g = torch.Generator() 54 | g.manual_seed(self.epoch) 55 | 56 | n = len(self.dataset) 57 | n_repeats = self.total_size // n 58 | n_remain = self.total_size % n 59 | indices = [torch.randperm(n, generator=g) for _ in range(n_repeats)] 60 | indices.append(torch.randperm(n, generator=g)[:n_remain]) 61 | indices = torch.cat(indices, dim=0).tolist() 62 | 63 | assert len(indices) == self.total_size 64 | 65 | # subsample 66 | indices = indices[self.rank : self.total_size : self.num_replicas] 67 | assert len(indices) == self.num_samples 68 | 69 | return iter(indices) 70 | 71 | def __len__(self): 72 | return self.num_samples 73 | 74 | def set_epoch(self, epoch): 75 | self.epoch = epoch 76 | 77 | 78 | name2sampler = {"RandomSampler": DistributedSampler} 79 | -------------------------------------------------------------------------------- /semilearn/algorithms/pimodel/pimodel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import numpy as np 6 | from semilearn.core import AlgorithmBase 7 | from semilearn.core.utils import ALGORITHMS 8 | from semilearn.algorithms.utils import SSL_Argument 9 | 10 | 11 | @ALGORITHMS.register("pimodel") 12 | class PiModel(AlgorithmBase): 13 | """ 14 | Pi-Model algorithm (https://arxiv.org/abs/1610.02242). 15 | 16 | Args: 17 | - args (`argparse`): 18 | algorithm arguments 19 | - net_builder (`callable`): 20 | network loading function 21 | - tb_log (`TBLog`): 22 | tensorboard logger 23 | - logger (`logging.Logger`): 24 | logger to use 25 | - unsup_warm_up (`float`, *optional*, defaults to 0.4): 26 | Ramp up for weights for unsupervised loss 27 | """ 28 | 29 | def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs): 30 | super().__init__(args, net_builder, tb_log, logger, **kwargs) 31 | self.init(unsup_warm_up=args.unsup_warm_up) 32 | 33 | def init(self, unsup_warm_up=0.4): 34 | self.unsup_warm_up = unsup_warm_up 35 | 36 | def train_step(self, x_lb, y_lb, x_ulb_w, x_ulb_w_2, **kwargs): 37 | # inference and calculate sup/unsup losses 38 | with self.amp_cm(): 39 | outs_x_lb = self.model(x_lb) 40 | logits_x_lb = outs_x_lb["logits"] 41 | feats_x_lb = outs_x_lb["feat"] 42 | 43 | # calculate BN only for the first batch 44 | self.bn_controller.freeze_bn(self.model) 45 | outs_x_ulb_w = self.model(x_ulb_w) 46 | logits_x_ulb_w = outs_x_ulb_w["logits"] 47 | feats_x_ulb_w = outs_x_ulb_w["feat"] 48 | outs_x_ulb_w_2 = self.model(x_ulb_w_2) 49 | logits_x_ulb_w_2 = outs_x_ulb_w_2["logits"] 50 | feats_x_ulb_w_2 = outs_x_ulb_w_2["feat"] 51 | self.bn_controller.unfreeze_bn(self.model) 52 | 53 | # extract features for further use in the classification algorithm. 54 | feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w, "x_ulb_w_2": feats_x_ulb_w_2} 55 | for k in kwargs: 56 | feat_dict[k] = self.model(kwargs[k], only_feat=True) 57 | 58 | sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean") 59 | unsup_loss = self.consistency_loss(logits_x_ulb_w_2, logits_x_ulb_w.detach(), "mse") 60 | 61 | unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0) 62 | total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup 63 | 64 | out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict) 65 | log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item()) 66 | return out_dict, log_dict 67 | 68 | @staticmethod 69 | def get_argument(): 70 | return [ 71 | SSL_Argument("--unsup_warm_up", float, 0.4, "warm up ratio for regression unsupervised loss"), 72 | ] 73 | -------------------------------------------------------------------------------- /semilearn/algorithms/clss/ordinal_entropy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from xmed-lab/CLSS 4 | # https://github.com/xmed-lab/CLSS/blob/main/age_estimation/models/OrdinalEntropy.py 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | def ordinal_entropy(features, targets): 11 | """ 12 | Compute the ordinal entropy of features given targets. 13 | 14 | Args: 15 | features (torch.Tensor): Input features. Shape: (batch_size, feat_dim). 16 | targets (torch.Tensor): Target labels. Shape: (batch_size,). 17 | 18 | Returns: 19 | torch.Tensor: Ordinal entropy. 20 | """ 21 | if features.dim() != 2 or targets.dim() != 1 or features.size(0) != targets.size(0): 22 | raise ValueError("Input shapes are invalid.") 23 | 24 | batch_size, feat_dim = features.size() 25 | 26 | uni_values, uni_indices, uni_counts = torch.unique(targets, return_inverse=True, return_counts=True) 27 | 28 | center_feats = torch.zeros([len(uni_values), feat_dim], device=features.device) 29 | center_feats.index_add_(0, uni_indices, features) 30 | center_feats = center_feats / uni_counts.unsqueeze(1) 31 | 32 | norm_center_feats = F.normalize(center_feats, dim=1) 33 | distance = euclidean_dist(norm_center_feats, norm_center_feats) 34 | distance = flatten_upper_triangular(distance) 35 | 36 | _uni_values = uni_values.unsqueeze(1) 37 | weight = euclidean_dist(_uni_values, _uni_values) 38 | weight = flatten_upper_triangular(weight) 39 | weight = (weight - torch.min(weight)) / torch.max(weight) if len(weight) != 0 else 0 40 | 41 | distance = distance * weight 42 | entropy = torch.mean(distance) 43 | 44 | norm_feats = F.normalize(features, dim=1) 45 | norm_feats -= norm_center_feats[uni_indices, :] 46 | tightness = torch.sum(norm_feats.pow(2), dim=1) 47 | tightness = tightness[tightness > 0].mean() 48 | 49 | return tightness - entropy 50 | 51 | 52 | def euclidean_dist(x, y): 53 | """ 54 | Calculate Euclidean distance between two sets of vectors. 55 | 56 | Args: 57 | x (torch.Tensor): Set of vectors. Shape: (m, d). 58 | y (torch.Tensor): Set of vectors. Shape: (n, d). 59 | 60 | Returns: 61 | torch.Tensor: Pairwise Euclidean distance. Shape: (m, n). 62 | """ 63 | m, n = x.size(0), y.size(0) 64 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 65 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 66 | dist = xx + yy 67 | dist.addmm_(x, y.t(), beta=1, alpha=-2) 68 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 69 | return dist 70 | 71 | 72 | def flatten_upper_triangular(x): 73 | """ 74 | Flatten the upper triangular elements of a square matrix. 75 | 76 | Args: 77 | x (torch.Tensor): Square matrix. 78 | 79 | Returns: 80 | torch.Tensor: Flattened upper triangular elements. 81 | """ 82 | if len(x.shape) != 2 or x.shape[0] != x.shape[1]: 83 | raise ValueError(f"Input tensor must be a square matrix, but got shape {x.shape}") 84 | n = x.shape[0] 85 | mask = torch.triu(torch.ones(n, n), diagonal=1).to(torch.bool) 86 | return x[mask] 87 | -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/get_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from semilearn.datasets import cv_datasets 6 | from semilearn.datasets.utils import split_ssl_data, load_image_files 7 | 8 | from .datasetbase import BasicDataset, ImagePathDataset 9 | from .augmentation import get_val_transforms, get_weak_transforms, get_strong_transforms 10 | 11 | 12 | def get_cv_dataset(args, alg, dataset_name, num_labels, data_dir="./data", include_lb_to_ulb=True): 13 | """ 14 | Get the computer vision dataset and split the training samples into labeled and unlabeled sets. 15 | 16 | Args: 17 | alg (str): Algorithm. 18 | dataset_name (str): The name of the dataset to load. 19 | num_labels (int): The number of labeled samples for the training set. 20 | data_dir (str): The directory from which to load the dataset. 21 | include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set. 22 | 23 | Returns: 24 | Tuple[Dataset, Dataset, Dataset, Dataset]: 25 | A tuple containing: 26 | - train labeled dataset 27 | - train unlabeled dataset 28 | - evaluation dataset 29 | - test dataset 30 | """ 31 | 32 | dataset = getattr(cv_datasets, dataset_name.upper()) 33 | 34 | train_dataset = dataset(data_dir, split="train", download=True) 35 | train_paths, train_targets = train_dataset._file_paths, train_dataset._labels 36 | 37 | test_dataset = dataset(data_dir, split="test", download=True) 38 | test_paths, test_targets = test_dataset._file_paths, test_dataset._labels 39 | 40 | if args.preload: 41 | train_data = load_image_files(train_paths) 42 | test_data = load_image_files(test_paths) 43 | ImageDataset = BasicDataset 44 | else: 45 | train_data = train_paths 46 | test_data = test_paths 47 | ImageDataset = ImagePathDataset 48 | 49 | transform_weak = get_weak_transforms(crop_size=args.img_size, crop_ratio=args.crop_ratio, dataset_name=dataset_name) 50 | transform_strong = get_strong_transforms(crop_size=args.img_size, crop_ratio=args.crop_ratio, dataset_name=dataset_name) 51 | transform_val = get_val_transforms(crop_size=args.img_size, dataset_name=dataset_name) 52 | 53 | eval_dset = ImageDataset(alg, test_data, test_targets, transform_val, False, None) 54 | test_dset = None 55 | 56 | if alg == "fullysupervised": 57 | lb_dset = ImageDataset(alg, train_data, train_targets, transform_weak, False, transform_strong) 58 | return lb_dset, None, eval_dset, test_dset 59 | 60 | lb_data, lb_targets, ulb_data, ulb_targets = split_ssl_data( 61 | args, 62 | train_data, 63 | train_targets, 64 | lb_num_labels=num_labels, 65 | ulb_num_labels=args.ulb_num_labels, 66 | include_lb_to_ulb=include_lb_to_ulb, 67 | ) 68 | 69 | lb_dset = ImageDataset(alg, lb_data, lb_targets, transform_weak, False, transform_strong) 70 | ulb_dset = ImageDataset(alg, ulb_data, ulb_targets, transform_weak, True, transform_strong) 71 | 72 | if alg == "supervised": 73 | ulb_dset = None 74 | 75 | return lb_dset, ulb_dset, eval_dset, test_dset 76 | -------------------------------------------------------------------------------- /semilearn/datasets/nlp_datasets/datasetbase.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import random 6 | 7 | from torch.utils.data import Dataset 8 | 9 | 10 | class BasicDataset(Dataset): 11 | """ 12 | BasicDataset returns a pair of text and labels (targets). 13 | If targets are not given, BasicDataset returns None as the label. 14 | Returns both original and augmented text. Augmented texts can be None. 15 | """ 16 | 17 | def __init__(self, alg, data, targets=None, is_ulb=False, *args, **kwargs): 18 | """ 19 | Args: 20 | alg (str): Algorithm. 21 | data (list): List of text data along with two augmented texts (e.g., [text, aug_text1 (or None), aug_text2 (or None)]). 22 | targets (list or None): Target labels corresponding to the images. 23 | is_ulb (bool): Indicates if the dataset is unlabeled. 24 | """ 25 | super(BasicDataset, self).__init__() 26 | self.alg = alg 27 | self.data = data 28 | self.targets = targets 29 | self.is_ulb = is_ulb 30 | self.transform = None 31 | 32 | def random_choose_sen(self): 33 | """Randomly choose one of the augmented sentences.""" 34 | return random.randint(1, 2) 35 | 36 | def __sample__(self, idx): 37 | """Retrieve the text and corresponding target at a specific index.""" 38 | sen = self.data[idx] 39 | target = None if self.targets is None else self.targets[idx] 40 | return sen, target 41 | 42 | def __getitem__(self, idx): 43 | """ 44 | Returns raw and/or augmented text based on the algorithm and dataset type. 45 | """ 46 | sen, target = self.__sample__(idx) 47 | 48 | data_dict = { 49 | "idx_lb": lambda: idx, 50 | "x_lb": lambda: sen[0], 51 | "x_lb_s": lambda: sen[self.random_choose_sen()], 52 | "y_lb": lambda: target, 53 | "idx_ulb": lambda: idx, 54 | "x_ulb_w": lambda: sen[0], 55 | "x_ulb_w_2": lambda: sen[0], 56 | "x_ulb_s": lambda: sen[self.random_choose_sen()], 57 | "x_ulb_s_2": lambda: sen[self.random_choose_sen()], 58 | } 59 | 60 | data_keys = self._determine_data_keys() 61 | return {k: data_dict[k]() for k in data_keys} 62 | 63 | def __len__(self): 64 | return len(self.data) 65 | 66 | def _determine_data_keys(self): 67 | """Determine the required output data based on the algorithm type.""" 68 | data_keys = set() 69 | 70 | if not self.is_ulb: 71 | data_keys.update({"idx_lb", "x_lb", "y_lb"}) 72 | return data_keys 73 | 74 | # for regression algorithms 75 | if self.alg == "fullysupervised" or self.alg == "supervised": 76 | data_keys.update({"idx_ulb"}) 77 | elif self.alg == "rankup" or self.alg == "rankuprda": 78 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"}) 79 | elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch": 80 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"}) 81 | else: 82 | data_keys.update({"idx_ulb", "x_ulb_w"}) 83 | 84 | return data_keys 85 | -------------------------------------------------------------------------------- /semilearn/algorithms/meanteacher/meanteacher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from semilearn.core import AlgorithmBase 9 | from semilearn.core.utils import ALGORITHMS 10 | from semilearn.algorithms.utils import SSL_Argument 11 | 12 | 13 | @ALGORITHMS.register("meanteacher") 14 | class MeanTeacher(AlgorithmBase): 15 | """ 16 | MeanTeacher algorithm (https://arxiv.org/abs/1703.01780). 17 | 18 | Args: 19 | - args (`argparse`): 20 | algorithm arguments 21 | - net_builder (`callable`): 22 | network loading function 23 | - tb_log (`TBLog`): 24 | tensorboard logger 25 | - logger (`logging.Logger`): 26 | logger to use 27 | - unsup_warm_up (`float`, *optional*, defaults to 0.4): 28 | Ramp up for weights for unsupervised loss 29 | """ 30 | 31 | def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs): 32 | super().__init__(args, net_builder, tb_log, logger, **kwargs) 33 | # mean teacher specified arguments 34 | self.init(unsup_warm_up=args.unsup_warm_up) 35 | 36 | def init(self, unsup_warm_up=0.4): 37 | self.unsup_warm_up = unsup_warm_up 38 | 39 | def train_step(self, x_lb, y_lb, x_ulb_w, x_ulb_w_2, **kwargs): 40 | # inference and calculate sup/unsup losses 41 | with self.amp_cm(): 42 | outs_x_lb = self.model(x_lb) 43 | logits_x_lb = outs_x_lb["logits"] 44 | feats_x_lb = outs_x_lb["feat"] 45 | 46 | self.ema.apply_shadow() 47 | with torch.no_grad(): 48 | self.bn_controller.freeze_bn(self.model) 49 | outs_x_ulb_w = self.model(x_ulb_w) 50 | logits_x_ulb_w = outs_x_ulb_w["logits"] # self.model(x_ulb_w) 51 | feats_x_ulb_w = outs_x_ulb_w["feat"] 52 | self.bn_controller.unfreeze_bn(self.model) 53 | self.ema.restore() 54 | 55 | self.bn_controller.freeze_bn(self.model) 56 | outs_x_ulb_w_2 = self.model(x_ulb_w_2) 57 | logits_x_ulb_w_2 = outs_x_ulb_w_2["logits"] 58 | feats_x_ulb_w_2 = outs_x_ulb_w_2["feat"] 59 | self.bn_controller.unfreeze_bn(self.model) 60 | 61 | # extract features for further use in the classification algorithm. 62 | feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w, "x_ulb_w_2": feats_x_ulb_w_2} 63 | for k in kwargs: 64 | feat_dict[k] = self.model(kwargs[k], only_feat=True) 65 | 66 | sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean") 67 | unsup_loss = self.consistency_loss(logits_x_ulb_w_2, logits_x_ulb_w.detach(), "mse") 68 | 69 | unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0) 70 | total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup 71 | 72 | out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict) 73 | log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item()) 74 | return out_dict, log_dict 75 | 76 | @staticmethod 77 | def get_argument(): 78 | return [ 79 | SSL_Argument("--unsup_warm_up", float, 0.4, "warm up ratio for regression unsupervised loss"), 80 | ] 81 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/vcc2018.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from pytorch/pytorch 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py 5 | 6 | import librosa 7 | import numpy as np 8 | import pandas as pd 9 | from pathlib import Path 10 | from typing import Any, Tuple 11 | 12 | from torch.utils.data import Dataset 13 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg 14 | 15 | 16 | class VCC2018(Dataset): 17 | """`The VCC2018 Data Set ` 18 | 19 | The Voice Conversion Challenge 2018 (VCC2018) dataset is an audio quality assessment dataset, 20 | where the objective is to predict the quality of an audio sample. The labels, ranging from 1 21 | to 5, are obtained by averaging the scores provided by multiple listeners. The dataset 22 | comprises over 20,000 audio files, which we split into 16,464 training samples and 4,116 test samples. 23 | 24 | Args: 25 | root (string): Root directory of the dataset. 26 | split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``. 27 | download (bool, optional): If True, downloads the dataset from the internet and 28 | puts it in root directory. If dataset is already downloaded, it is not 29 | downloaded again. Default is False. 30 | """ 31 | 32 | _URL_MD5 = { 33 | "data": ( 34 | "https://datashare.ed.ac.uk/bitstream/handle/10283/3061/vcc2018_submitted_systems_converted_speech.tar.gz", 35 | "75b0f937240f6850a56ec2cbad34b4ad", 36 | ), 37 | "meta": ("https://github.com/pm25/regression-datasets/raw/refs/heads/main/data/vcc2018/meta.zip", "66ea41b35ffbc1ad6565e538320f011d"), 38 | } 39 | 40 | def __init__( 41 | self, 42 | root: str, 43 | split: str = "train", 44 | download: bool = False, 45 | ) -> None: 46 | super().__init__() 47 | self._split = verify_str_arg(split, "split", ("train", "test")) 48 | self._base_folder = Path(root) / "vcc2018" 49 | self._meta_folder = self._base_folder / "meta" 50 | self._audio_folder = self._base_folder / "mnt/sysope/test_files/testVCC2" 51 | 52 | if download: 53 | self._download() 54 | 55 | if not self._check_exists(): 56 | raise RuntimeError("Dataset not found. You can use download=True to download it") 57 | 58 | metadata = pd.read_csv(self._meta_folder / f"{split}.csv") 59 | self._file_paths = metadata["file_name"].apply(lambda x: self._audio_folder / x).to_numpy(dtype="object") 60 | self._labels = metadata["label"].to_numpy(dtype=np.float32) 61 | 62 | def __len__(self) -> int: 63 | return len(self._file_paths) 64 | 65 | def __getitem__(self, idx: int) -> Tuple[Any, Any]: 66 | audio_file, label = self._file_paths[idx], self._labels[idx] 67 | waveform, sample_rate = librosa.load(audio_file, sr=None, mono=True) 68 | return waveform, sample_rate, label 69 | 70 | def extra_repr(self) -> str: 71 | return f"split={self._split}" 72 | 73 | def _check_exists(self) -> bool: 74 | return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._audio_folder)) 75 | 76 | def _download(self) -> None: 77 | if self._check_exists(): 78 | return 79 | for url, md5 in self._URL_MD5.values(): 80 | download_and_extract_archive(url, download_root=self._base_folder, md5=md5) 81 | -------------------------------------------------------------------------------- /visualization/plot_multi.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import seaborn as sns 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | from matplotlib.gridspec import GridSpec 10 | 11 | from umap import UMAP 12 | from pathlib import Path 13 | from sklearn.manifold import TSNE 14 | 15 | font = {"size": 18} 16 | matplotlib.rc("font", **font) 17 | 18 | 19 | def plot_by_seaborn(ax, x_feats, y_labels, method="t-sne"): 20 | n_components = 2 21 | 22 | if method.lower() == "t-sne": 23 | m = TSNE( 24 | n_components=n_components, 25 | perplexity=50, 26 | learning_rate="auto", 27 | init="pca", 28 | random_state=222, 29 | ) 30 | elif method.lower() == "umap": 31 | m = UMAP(n_components=n_components, n_neighbors=50, init="pca", random_state=222) 32 | 33 | projections = m.fit_transform(x_feats) 34 | 35 | projections_df = pd.DataFrame( 36 | { 37 | "Dimension 1": projections[:, 0], 38 | "Dimension 2": projections[:, 1], 39 | "label": y_labels, 40 | } 41 | ) 42 | 43 | sns.scatterplot( 44 | ax=ax, 45 | x="Dimension 1", 46 | y="Dimension 2", 47 | hue="label", 48 | palette=sns.color_palette("coolwarm", as_cmap=True), 49 | data=projections_df, 50 | legend=False, 51 | s=9, 52 | ) 53 | 54 | x_min, x_max = ax.get_xlim() 55 | y_min, y_max = ax.get_ylim() 56 | xy_lim = (min(x_min, y_min), max(x_max, y_max)) 57 | ax.set_xlim(xy_lim) 58 | ax.set_ylim(xy_lim) 59 | ax.set_aspect("equal") 60 | 61 | 62 | if __name__ == "__main__": 63 | data_info_list = [ 64 | {"path": "features/supervised_utkface_lb250_s0.npy", "name": "Supervised"}, 65 | {"path": "features/mixmatch_utkface_lb250_s0.npy", "name": "MixMatch"}, 66 | {"path": "features/supervised_fixmatch_utkface_lb250_s0.npy", "name": "RankUp"}, 67 | ] 68 | method = "t-SNE" # UMAP 69 | 70 | features = [np.load(d["path"], allow_pickle=True).item() for d in data_info_list] 71 | 72 | n_data = len(data_info_list) 73 | fig = plt.figure(figsize=(8 * n_data, 6)) 74 | gs = GridSpec(1, n_data + 1, width_ratios=[1] * n_data + [0.05], wspace=0.0) 75 | 76 | axes = [fig.add_subplot(gs[i]) for i in range(n_data)] 77 | cbar_ax = fig.add_subplot(gs[-1]) 78 | 79 | for i, ax in enumerate(axes): 80 | data_name = data_info_list[i]["name"] 81 | print(f"Running {method} for {data_name} features ...") 82 | plot_by_seaborn(ax, features[i]["feat"], features[i]["label"], method=method) 83 | ax.set_title(data_info_list[i]["name"]) 84 | 85 | norm = plt.Normalize( 86 | min([feat["label"].min() for feat in features]), 87 | max([feat["label"].max() for feat in features]), 88 | ) 89 | 90 | cbar = fig.colorbar( 91 | plt.cm.ScalarMappable(cmap=sns.color_palette("coolwarm", as_cmap=True), norm=norm), 92 | cax=cbar_ax, 93 | orientation="vertical", 94 | label="Label (Age)", 95 | ) 96 | 97 | # Ensure output directory exists 98 | output_dir = Path("plots") 99 | output_dir.mkdir(exist_ok=True, parents=True) 100 | 101 | # Save the plot 102 | save_name = "_".join([d["name"].lower() for d in data_info_list]) + f"_{method.lower()}_2d.png" 103 | save_path = output_dir / save_name 104 | plt.savefig(save_path, dpi=300, bbox_inches="tight") 105 | plt.show() 106 | print(f"Plot saved to {save_path}") 107 | -------------------------------------------------------------------------------- /semilearn/algorithms/clss/clss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from xmed-lab/CLSS 4 | # https://github.com/xmed-lab/CLSS 5 | 6 | from semilearn.core import AlgorithmBase 7 | from semilearn.core.utils import ALGORITHMS 8 | from semilearn.algorithms.utils import SSL_Argument 9 | 10 | from .ordinal_entropy import ordinal_entropy 11 | from .ulb_rank import ulb_rank, ulb_rank_prdlb 12 | 13 | 14 | @ALGORITHMS.register("clss") 15 | class CLSS(AlgorithmBase): 16 | """ 17 | CLSS algorithm (https://proceedings.neurips.cc/paper_files/paper/2023/file/b2d4051f03a7038a2771dfbbe5c7b54e-Paper-Conference.pdf). 18 | 19 | Args: 20 | - args (`argparse`): 21 | algorithm arguments 22 | - net_builder (`callable`): 23 | network loading function 24 | - tb_log (`TBLog`): 25 | tensorboard logger 26 | - logger (`logging.Logger`): 27 | logger to use 28 | """ 29 | 30 | def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs): 31 | super().__init__(args, net_builder, tb_log, logger, **kwargs) 32 | self.init( 33 | lambda_val=args.lambda_val, 34 | lb_ctr_loss_ratio=args.lb_ctr_loss_ratio, 35 | ulb_ctr_loss_ratio=args.ulb_ctr_loss_ratio, 36 | ulb_rank_loss_ratio=args.ulb_rank_loss_ratio, 37 | ) 38 | 39 | def init( 40 | self, 41 | lambda_val=2, 42 | lb_ctr_loss_ratio=1.0, 43 | ulb_ctr_loss_ratio=0.05, 44 | ulb_rank_loss_ratio=0.01, 45 | ): 46 | self.lambda_val = lambda_val 47 | self.lb_ctr_loss_ratio = lb_ctr_loss_ratio 48 | self.ulb_ctr_loss_ratio = ulb_ctr_loss_ratio 49 | self.ulb_rank_loss_ratio = ulb_rank_loss_ratio 50 | 51 | def train_step(self, x_lb, y_lb, x_ulb_w, **kwargs): 52 | # inference and calculate sup/unsup losses 53 | with self.amp_cm(): 54 | x_lb_outputs = self.model(x_lb) 55 | logits_x_lb = x_lb_outputs["logits"] 56 | feats_x_lb = x_lb_outputs["feat"] 57 | 58 | x_ulb_outputs = self.model(x_ulb_w) 59 | logits_x_ulb_w = x_ulb_outputs["logits"] 60 | feats_x_ulb_w = x_ulb_outputs["feat"] 61 | 62 | # extract features for further use in the classification algorithm. 63 | feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w} 64 | for k in kwargs: 65 | feat_dict[k] = self.model(kwargs[k], only_feat=True) 66 | 67 | sup_reg_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean") 68 | sup_ctr_loss = ordinal_entropy(feats_x_lb, y_lb) 69 | sup_loss = sup_reg_loss + self.lb_ctr_loss_ratio * sup_ctr_loss 70 | 71 | unsup_ctr_loss, ft_rank = ulb_rank(feats_x_ulb_w, self.lambda_val) 72 | unsup_rank_loss = ulb_rank_prdlb(logits_x_ulb_w.unsqueeze(1), self.lambda_val, pred_inp=ft_rank) 73 | unsup_loss = self.ulb_ctr_loss_ratio * unsup_ctr_loss + self.ulb_rank_loss_ratio * unsup_rank_loss 74 | 75 | total_loss = sup_loss + unsup_loss 76 | 77 | out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict) 78 | log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item()) 79 | return out_dict, log_dict 80 | 81 | @staticmethod 82 | def get_argument(): 83 | return [ 84 | SSL_Argument("--lambda_val", float, 2.0), 85 | SSL_Argument("--lb_ctr_loss_ratio", float, 1.0), 86 | SSL_Argument("--ulb_ctr_loss_ratio", float, 0.05), 87 | SSL_Argument("--ulb_rank_loss_ratio", float, 0.01), 88 | ] 89 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/get_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from semilearn.datasets import audio_datasets 6 | from semilearn.datasets.utils import split_ssl_data, load_audio_files 7 | 8 | from .datasetbase import BasicDataset, AudioPathDataset 9 | from .augmentation import Subsample, RandomSubsample, AudioTransforms 10 | 11 | 12 | def get_audio_dataset(args, alg, dataset_name, num_labels, data_dir="./data", include_lb_to_ulb=True): 13 | """ 14 | Get the audio dataset and split the training samples into labeled and unlabeled sets. 15 | 16 | Args: 17 | alg (str): Algorithm. 18 | dataset_name (str): The name of the dataset to load. 19 | num_labels (int): The number of labeled samples for the training set. 20 | data_dir (str): The directory from which to load the dataset. 21 | include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set. 22 | 23 | Returns: 24 | Tuple[Dataset, Dataset, Dataset, Dataset]: 25 | A tuple containing: 26 | - train labeled dataset 27 | - train unlabeled dataset 28 | - evaluation dataset 29 | - test dataset 30 | """ 31 | dataset = getattr(audio_datasets, dataset_name.upper()) 32 | 33 | train_dataset = dataset(data_dir, split="train", download=True) 34 | train_paths, train_targets = train_dataset._file_paths, train_dataset._labels 35 | 36 | test_dataset = dataset(data_dir, split="test", download=True) 37 | test_paths, test_targets = test_dataset._file_paths, test_dataset._labels 38 | 39 | if args.preload: 40 | train_data = load_audio_files(train_paths, args.sample_rate) 41 | test_data = load_audio_files(test_paths, args.sample_rate) 42 | AudioDataset = BasicDataset 43 | else: 44 | train_data = train_paths 45 | test_data = test_paths 46 | AudioDataset = AudioPathDataset 47 | 48 | transform_weak = RandomSubsample(max_length_seconds=args.max_length_seconds) 49 | transform_strong = AudioTransforms(max_length_seconds=args.max_length_seconds, dataset_name=dataset_name) 50 | transform_val = Subsample(max_length_seconds=args.max_length_seconds) 51 | 52 | eval_dset = AudioDataset(alg, test_data, test_targets, transform_val, False, None, args.sample_rate) 53 | test_dset = None 54 | 55 | if dataset_name.lower() in ["bvcc"]: 56 | dev_dataset = dataset(data_dir, split="dev", download=True) 57 | dev_paths, dev_targets = dev_dataset._file_paths, dev_dataset._labels 58 | dev_data = load_audio_files(dev_paths, args.sample_rate) if args.preload else dev_paths 59 | eval_dset = AudioDataset(alg, dev_data, dev_targets, transform_val, False, None, args.sample_rate) 60 | test_dset = AudioDataset(alg, test_data, test_targets, transform_val, False, None, args.sample_rate) 61 | 62 | if alg == "fullysupervised": 63 | lb_dset = AudioDataset(alg, train_data, train_targets, transform_weak, False, transform_strong, args.sample_rate) 64 | return lb_dset, None, eval_dset, test_dset 65 | 66 | lb_data, lb_targets, ulb_data, ulb_targets = split_ssl_data( 67 | args, 68 | train_data, 69 | train_targets, 70 | lb_num_labels=num_labels, 71 | ulb_num_labels=args.ulb_num_labels, 72 | include_lb_to_ulb=include_lb_to_ulb, 73 | ) 74 | 75 | lb_dset = AudioDataset(alg, lb_data, lb_targets, transform_weak, False, transform_strong, args.sample_rate) 76 | ulb_dset = AudioDataset(alg, ulb_data, ulb_targets, transform_weak, True, transform_strong, args.sample_rate) 77 | 78 | if alg == "supervised": 79 | ulb_dset = None 80 | 81 | return lb_dset, ulb_dset, eval_dset, test_dset 82 | -------------------------------------------------------------------------------- /semilearn/algorithms/rda/rda.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | 6 | from .utils import RDAHook 7 | 8 | from semilearn.core import AlgorithmBase 9 | from semilearn.core.utils import ALGORITHMS 10 | from semilearn.algorithms.utils import SSL_Argument 11 | 12 | 13 | @ALGORITHMS.register("rda") 14 | class RDA(AlgorithmBase): 15 | """ 16 | RDA algorithm (https://arxiv.org/abs/2410.22124). 17 | 18 | Args: 19 | - args (`argparse`): 20 | algorithm arguments 21 | - net_builder (`callable`): 22 | network loading function 23 | - tb_log (`TBLog`): 24 | tensorboard logger 25 | - logger (`logging.Logger`): 26 | logger to use 27 | - unsup_warm_up (`float`, *optional*, defaults to 0.4): 28 | Ramp up for weights for unsupervised loss 29 | - rda_num_refine_iter (`int`): 30 | Number of iterations to apply RDA. 31 | """ 32 | 33 | def __init__(self, args, net_builder, tb_log=None, logger=None): 34 | self.init( 35 | unsup_warm_up=args.unsup_warm_up, 36 | rda_num_refine_iter=args.rda_num_refine_iter, 37 | ) 38 | super().__init__(args, net_builder, tb_log, logger) 39 | 40 | def init(self, unsup_warm_up, rda_num_refine_iter): 41 | self.unsup_warm_up = unsup_warm_up 42 | self.rda_num_refine_iter = rda_num_refine_iter 43 | 44 | def set_hooks(self): 45 | super().set_hooks() 46 | # reset PseudoLabelingHook hook 47 | self.register_hook( 48 | RDAHook( 49 | train_ulb_len=len(self.dataset_dict["train_ulb"]), 50 | lb_targets=np.copy(self.dataset_dict["train_lb"].targets), 51 | num_refine_iter=self.rda_num_refine_iter, 52 | ), 53 | "RDAHook", 54 | ) 55 | 56 | def train_step(self, x_lb, y_lb, idx_ulb, x_ulb_w, **kwargs): 57 | self.idx_ulb = idx_ulb 58 | 59 | # inference and calculate sup losses 60 | with self.amp_cm(): 61 | outs_x_lb = self.model(x_lb) 62 | logits_x_lb = outs_x_lb["logits"] 63 | feats_x_lb = outs_x_lb["feat"] 64 | sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean") 65 | 66 | self.bn_controller.freeze_bn(self.model) 67 | outs_x_ulb_w = self.model(x_ulb_w) 68 | logits_x_ulb_w = outs_x_ulb_w["logits"] 69 | feats_x_ulb_w = outs_x_ulb_w["feat"] 70 | self.bn_controller.unfreeze_bn(self.model) 71 | 72 | # extract features for further use in the classification algorithm. 73 | feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w} 74 | for k in kwargs: 75 | feat_dict[k] = self.model(kwargs[k], only_feat=True) 76 | 77 | # generate unlabeled targets using pseudo label hook 78 | pseudo_label = self.call_hook( 79 | "gen_ulb_targets", 80 | "RDAHook", 81 | logits=logits_x_ulb_w, 82 | ) 83 | 84 | unsup_loss = self.consistency_loss(logits_x_ulb_w, pseudo_label.detach(), "mse") 85 | 86 | unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0) 87 | total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup 88 | 89 | out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict) 90 | log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item()) 91 | return out_dict, log_dict 92 | 93 | @staticmethod 94 | def get_argument(): 95 | return [ 96 | SSL_Argument("--unsup_warm_up", float, 0.4), 97 | SSL_Argument("--rda_num_refine_iter", int, 1024), 98 | ] 99 | -------------------------------------------------------------------------------- /semilearn/datasets/nlp_datasets/yelp_review.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from pytorch/pytorch 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py 5 | 6 | import json 7 | import numpy as np 8 | from pathlib import Path 9 | from typing import Any, Tuple 10 | 11 | from torch.utils.data import Dataset 12 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg 13 | 14 | 15 | class YELP_REVIEW(Dataset): 16 | """`Yelp Review Dataset ` 17 | 18 | The Yelp Review dataset is a sentiment ordinal regression dataset, where the goal 19 | is to predict the rating of a customer based on their comment. The labels are divided 20 | into 5 classes (scores ranging from 0 to 4). Originally, each class contains 130,000 21 | training samples and 10,000 test samples. 22 | 23 | This version uses a processed Yelp Review dataset provided by USB 24 | (https://github.com/microsoft/semi-supervised-learning). It contains: 25 | - 50,000 samples per class for the training split (250,000 samples total) 26 | - 5,000 samples per class for the validation split (25,000 samples total) 27 | - The original test dataset remains unchanged (50,000 samples total) 28 | 29 | Additionally, the dataset includes preprocessed augmented text data (aug_0 and aug_1) 30 | generated using back-translation, along with the original text (ori). 31 | 32 | Args: 33 | root (string): Root directory of the dataset. 34 | split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``. 35 | download (bool, optional): If True, downloads the dataset from the internet and 36 | puts it in root directory. If dataset is already downloaded, it is not 37 | downloaded again. Default is False. 38 | """ 39 | 40 | _URL = "https://huggingface.co/datasets/py97/Yelp-Review/resolve/main/YelpReview.tar.gz" 41 | _MD5 = "4c3e3736f3dc2c175f5ff9b0f69e6043" 42 | 43 | def __init__( 44 | self, 45 | root: str, 46 | split: str = "train", 47 | download: bool = False, 48 | ) -> None: 49 | super().__init__() 50 | self._split = verify_str_arg(split, "split", ("train", "dev", "test")) 51 | self._base_folder = Path(root) / "yelp_review" 52 | self._text_folder = self._base_folder / "YelpReview" 53 | 54 | if download: 55 | self._download() 56 | 57 | if not self._check_exists(): 58 | raise RuntimeError("Dataset not found. You can use download=True to download it") 59 | 60 | with open(self._text_folder / f"{split}.json", "r") as f: 61 | data = json.load(f) 62 | 63 | texts, labels = [], [] 64 | for idx in data: 65 | aug_0 = data[idx].get("aug_0", None) 66 | aug_1 = data[idx].get("aug_1", None) 67 | texts.append((data[idx]["ori"], aug_0, aug_1)) 68 | labels.append(float(data[idx]["label"])) 69 | 70 | self._texts = np.array(texts, dtype="object") 71 | self._labels = np.array(labels, dtype=np.float32) 72 | 73 | def __len__(self) -> int: 74 | return len(self._texts) 75 | 76 | def __getitem__(self, idx: int) -> Tuple[Any, Any]: 77 | text, label = self._texts[idx], self._labels[idx] 78 | return text, label 79 | 80 | def extra_repr(self) -> str: 81 | return f"split={self._split}" 82 | 83 | def _check_exists(self) -> bool: 84 | return self._text_folder.exists() and self._text_folder.is_dir() 85 | 86 | def _download(self) -> None: 87 | if self._check_exists(): 88 | return 89 | download_and_extract_archive(self._URL, download_root=self._base_folder, md5=self._MD5) 90 | -------------------------------------------------------------------------------- /semilearn/datasets/collactors/nlp_collactor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from dataclasses import dataclass 6 | from typing import Optional, Union 7 | 8 | from transformers import BertTokenizerFast 9 | from transformers.file_utils import PaddingStrategy 10 | from transformers.tokenization_utils_base import PreTrainedTokenizerBase 11 | from transformers.data import default_data_collator 12 | 13 | 14 | @dataclass 15 | class DataCollatorWithPadding: 16 | """ 17 | Data collator that will dynamically pad the inputs received. 18 | 19 | Args: 20 | tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): 21 | The tokenizer used for encoding the data. 22 | padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): 23 | Select a strategy to pad the returned sequences (according to the model's padding side and padding index) 24 | among: 25 | - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence 26 | if provided). 27 | - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum 28 | acceptable input length for the model if that argument is not provided. 29 | - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different 30 | lengths). 31 | max_length (`int`, *optional*): 32 | Maximum length of the returned list and optionally padding length (see above). 33 | pad_to_multiple_of (`int`, *optional*): 34 | If set will pad the sequence to a multiple of the provided value. 35 | This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 36 | 7.5 (Volta). 37 | return_tensors (`str`): 38 | The type of Tensor to return. Allowable values are "np", "pt" and "tf". 39 | """ 40 | 41 | tokenizer: PreTrainedTokenizerBase 42 | padding: Union[bool, str, PaddingStrategy] = True 43 | max_length: Optional[int] = None 44 | pad_to_multiple_of: Optional[int] = None 45 | return_tensors: str = "pt" 46 | 47 | def __call__(self, features): 48 | text_keys = ["x_lb", "x_lb_s", "x_ulb_w", "x_ulb_w_2", "x_ulb_s", "x_ulb_s2"] 49 | text_features = {k: [] for k in text_keys} 50 | other_features = [] 51 | for f in features: 52 | exist_ks = [k for k in text_keys if k in f] 53 | for k in exist_ks: 54 | text = f.pop(k) 55 | input_ids = self.tokenizer(text, max_length=self.max_length, truncation=True, padding=False)["input_ids"] 56 | text_features[k].append({"input_ids": input_ids}) 57 | other_features.append(f) 58 | 59 | batch = default_data_collator(other_features, return_tensors="pt") 60 | 61 | for key, feats in text_features.items(): 62 | if len(feats) > 0: 63 | out = self.tokenizer.pad( 64 | feats, 65 | padding=True, 66 | max_length=None, 67 | pad_to_multiple_of=self.pad_to_multiple_of, 68 | return_tensors=self.return_tensors, 69 | ) 70 | batch[key] = {"input_ids": out["input_ids"], "attention_mask": out["attention_mask"]} 71 | 72 | return batch 73 | 74 | 75 | def get_bert_base_collactor(pretrain_path="bert-base-uncased", max_length=512): 76 | tokenizer = BertTokenizerFast.from_pretrained(pretrain_path) 77 | tokenizer.deprecation_warnings["Asking-to-pad-a-fast-tokenizer"] = True # turn off 78 | collact_fn = DataCollatorWithPadding(tokenizer, max_length=max_length) 79 | return collact_fn 80 | -------------------------------------------------------------------------------- /semilearn/datasets/nlp_datasets/amazon_review.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from pytorch/pytorch 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py 5 | 6 | import json 7 | import numpy as np 8 | from pathlib import Path 9 | from typing import Any, Tuple 10 | 11 | from torch.utils.data import Dataset 12 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg 13 | 14 | 15 | class AMAZON_REVIEW(Dataset): 16 | """`Amazon Review Dataset ` 17 | 18 | The Amazon Review dataset is a sentiment ordinal regression dataset, where the goal 19 | is to predict the rating of a customer based on their comment. The labels are divided 20 | into 5 classes (scores ranging from 0 to 4). Originally, each class contains 600,000 21 | training samples and 130,000 test samples. 22 | 23 | This version uses a processed Amazon Review dataset provided by USB 24 | (https://github.com/microsoft/semi-supervised-learning). It contains: 25 | - 50,000 samples per class for the training split (250,000 samples total) 26 | - 5,000 samples per class for the validation split (25,000 samples total) 27 | - The original test dataset remains unchanged (650,000 samples total) 28 | 29 | Additionally, the dataset includes preprocessed augmented text data (aug_0 and aug_1) 30 | generated using back-translation, along with the original text (ori). 31 | 32 | Args: 33 | root (string): Root directory of the dataset. 34 | split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``. 35 | download (bool, optional): If True, downloads the dataset from the internet and 36 | puts it in root directory. If dataset is already downloaded, it is not 37 | downloaded again. Default is False. 38 | """ 39 | 40 | _URL = "https://huggingface.co/datasets/py97/Amazon-Review/resolve/main/AmazonReview.tar.gz" 41 | _MD5 = "080761d785bd86ed6ebcd8e388e401e3" 42 | 43 | def __init__( 44 | self, 45 | root: str, 46 | split: str = "train", 47 | download: bool = False, 48 | ) -> None: 49 | super().__init__() 50 | self._split = verify_str_arg(split, "split", ("train", "dev", "test")) 51 | self._base_folder = Path(root) / "amazon_review" 52 | self._text_folder = self._base_folder / "AmazonReview" 53 | 54 | if download: 55 | self._download() 56 | 57 | if not self._check_exists(): 58 | raise RuntimeError("Dataset not found. You can use download=True to download it") 59 | 60 | with open(self._text_folder / f"{split}.json", "r") as f: 61 | data = json.load(f) 62 | 63 | texts, labels = [], [] 64 | for idx in data: 65 | aug_0 = data[idx].get("aug_0", None) 66 | aug_1 = data[idx].get("aug_1", None) 67 | texts.append((data[idx]["ori"], aug_0, aug_1)) 68 | labels.append(float(data[idx]["label"])) 69 | 70 | self._texts = np.array(texts, dtype="object") 71 | self._labels = np.array(labels, dtype=np.float32) 72 | 73 | def __len__(self) -> int: 74 | return len(self._texts) 75 | 76 | def __getitem__(self, idx: int) -> Tuple[Any, Any]: 77 | text, label = self._texts[idx], self._labels[idx] 78 | return text, label 79 | 80 | def extra_repr(self) -> str: 81 | return f"split={self._split}" 82 | 83 | def _check_exists(self) -> bool: 84 | return self._text_folder.exists() and self._text_folder.is_dir() 85 | 86 | def _download(self) -> None: 87 | if self._check_exists(): 88 | return 89 | download_and_extract_archive(self._URL, download_root=self._base_folder, md5=self._MD5) 90 | -------------------------------------------------------------------------------- /results/classic_cv_average_log.csv: -------------------------------------------------------------------------------- 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE 2 | clss_utkface_lb2000,3,6.288±0.013,81.669±1.321,0.794±0.003,0.892±0.002,0.862±0.001,0.700±0.001,3.405±0.021 3 | clss_utkface_lb250,3,9.096±0.150,163.917±6.450,0.586±0.016,0.768±0.011,0.737±0.014,0.566±0.010,4.994±0.198 4 | clss_utkface_lb50,3,13.609±0.917,340.943±39.913,0.138±0.101,0.438±0.084,0.447±0.074,0.320±0.057,7.564±0.403 5 | fullysupervised_utkface,3,4.851±0.006,49.599±0.079,0.875±0.000,0.936±0.000,0.910±0.001,0.765±0.001,2.564±0.014 6 | rankup(meanteacher-fixmatch)_utkface_lb250,3,7.006±0.172,98.049±5.135,0.752±0.013,0.868±0.008,0.831±0.004,0.662±0.006,3.809±0.128 7 | meanteacher_utkface_lb2000,3,6.291±0.029,81.706±1.547,0.793±0.004,0.892±0.002,0.862±0.001,0.700±0.000,3.256±0.030 8 | meanteacher_utkface_lb250,3,8.849±0.249,163.861±7.975,0.586±0.020,0.771±0.012,0.745±0.013,0.572±0.012,4.631±0.167 9 | meanteacher_utkface_lb50,3,13.925±0.197,345.331±14.528,0.127±0.037,0.417±0.030,0.423±0.023,0.300±0.017,7.942±0.164 10 | rankup(mixmatch-fixmatch)_utkface_lb250,3,7.117±0.092,91.350±2.525,0.769±0.006,0.893±0.003,0.866±0.002,0.701±0.001,4.443±0.113 11 | mixmatch_utkface_lb2000,3,6.033±0.070,69.569±1.441,0.824±0.004,0.911±0.002,0.883±0.002,0.724±0.002,3.584±0.024 12 | mixmatch_utkface_lb250,3,7.951±0.146,121.761±5.062,0.692±0.013,0.852±0.005,0.832±0.008,0.658±0.008,4.674±0.103 13 | mixmatch_utkface_lb50,3,11.441±0.451,237.182±11.271,0.401±0.028,0.677±0.029,0.674±0.035,0.500±0.028,6.979±0.361 14 | rankup(pimodel-fixmatch)_utkface_lb250,3,6.953±0.161,95.715±3.946,0.758±0.010,0.872±0.006,0.837±0.005,0.668±0.006,3.765±0.109 15 | pimodel_utkface_lb2000,3,6.308±0.101,83.168±2.448,0.790±0.006,0.890±0.004,0.860±0.003,0.698±0.003,3.267±0.021 16 | pimodel_utkface_lb250,3,9.453±0.302,184.454±11.701,0.534±0.030,0.737±0.019,0.706±0.015,0.533±0.013,4.890±0.141 17 | pimodel_utkface_lb50,3,13.821±1.020,356.119±33.893,0.100±0.086,0.385±0.100,0.387±0.092,0.275±0.069,7.820±0.707 18 | rankuprda_utkface_lb2000,3,5.513±0.074,61.921±1.524,0.844±0.004,0.919±0.002,0.890±0.003,0.736±0.004,2.967±0.041 19 | rankuprda_utkface_lb250,3,6.570±0.184,86.350±4.743,0.782±0.012,0.886±0.007,0.856±0.005,0.690±0.005,3.523±0.121 20 | rankuprda_utkface_lb50,3,9.329±0.539,177.405±16.268,0.552±0.041,0.757±0.026,0.770±0.009,0.595±0.005,4.938±0.476 21 | rda_utkface_lb250,3,8.644±0.217,154.906±9.034,0.609±0.023,0.794±0.014,0.772±0.012,0.599±0.009,4.497±0.172 22 | rda_utkface_lb50,3,14.342±1.273,371.893±49.433,0.060±0.125,0.409±0.108,0.442±0.104,0.317±0.078,7.834±0.703 23 | rankup_utkface_lb2000,3,5.614±0.068,64.245±1.032,0.838±0.003,0.916±0.002,0.887±0.003,0.732±0.004,2.988±0.061 24 | rankup_utkface_lb250,3,7.055±0.115,98.548±4.235,0.751±0.011,0.868±0.007,0.835±0.008,0.664±0.010,3.820±0.055 25 | rankup_utkface_lb50,3,9.959±0.620,192.124±17.021,0.514±0.043,0.720±0.030,0.703±0.019,0.530±0.018,5.511±0.528 26 | rankup(supervised-meanteacher)_utkface_lb250,3,8.757±0.135,155.536±6.919,0.607±0.018,0.782±0.010,0.750±0.005,0.577±0.006,4.623±0.072 27 | supervised_utkface_lb2000,3,6.281±0.064,81.616±1.769,0.794±0.004,0.892±0.002,0.862±0.001,0.700±0.001,3.262±0.043 28 | supervised_utkface_lb250,3,9.421±0.160,181.946±5.435,0.540±0.014,0.740±0.009,0.712±0.010,0.540±0.008,4.923±0.136 29 | supervised_utkface_lb50,3,14.128±0.555,360.227±36.492,0.090±0.092,0.357±0.082,0.371±0.071,0.262±0.052,8.035±0.148 30 | rankup(supervised-pimodel)_utkface_lb250,3,8.812±0.109,161.721±4.977,0.591±0.013,0.776±0.006,0.751±0.012,0.577±0.010,4.594±0.061 31 | rankup(supervised-supervised)_utkface_lb250,3,9.027±0.087,163.129±7.112,0.588±0.018,0.769±0.013,0.746±0.008,0.571±0.008,4.852±0.103 32 | ucvme_utkface_lb2000,3,5.902±0.066,70.910±2.583,0.821±0.007,0.906±0.004,0.877±0.002,0.718±0.003,3.203±0.045 33 | ucvme_utkface_lb250,3,8.630±0.170,148.059±2.562,0.626±0.006,0.794±0.006,0.767±0.007,0.593±0.008,4.773±0.143 34 | ucvme_utkface_lb50,3,13.491±0.954,333.775±43.387,0.157±0.110,0.409±0.132,0.412±0.127,0.294±0.093,7.960±0.585 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .vscode/ 163 | 164 | data/ 165 | saved_models/ 166 | visualization/features/ 167 | visualization/figures/ 168 | _*/ -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/utkface.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Pin-Yen Huang. 2 | # Licensed under the MIT License. 3 | # Code in this file is adapted from pytorch/pytorch 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from pathlib import Path 9 | from typing import Any, Callable, Optional, Tuple 10 | 11 | import PIL.Image 12 | 13 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg 14 | from torchvision.datasets.vision import VisionDataset 15 | 16 | 17 | class UTKFACE(VisionDataset): 18 | """`The UTKFace Data Set ` 19 | 20 | The UTKFace dataset is an image age estimation dataset, where the goal is to predict the age of the person in an image. 21 | The labels range from 1 to 116 years old. The dataset consists of 23,705 face images, which we split into 18,964 22 | training samples and 4,741 test samples. The dataset is available in two versions: the original images and an aligned 23 | and cropped version. We use the aligned and cropped version of the UTKFace dataset here. 24 | 25 | Args: 26 | root (string): Root directory of the dataset. 27 | split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``. 28 | transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed 29 | version. E.g, ``transforms.RandomCrop``. 30 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 31 | download (bool, optional): If True, downloads the dataset from the internet and 32 | puts it in root directory. If dataset is already downloaded, it is not 33 | downloaded again. Default is False. 34 | """ 35 | 36 | _URL_MD5 = { 37 | "data": ("https://huggingface.co/datasets/py97/UTKFace-Cropped/resolve/main/UTKFace.tar.gz", "ae1a16905fbd795db921ff1d940df9cc"), 38 | "meta": ("https://github.com/pm25/regression-datasets/raw/refs/heads/main/data/utkface/meta.zip", "0983459bcfddbd93d6abdb821ae176c4"), 39 | } 40 | 41 | def __init__( 42 | self, 43 | root: str, 44 | split: str = "train", 45 | transform: Optional[Callable] = None, 46 | target_transform: Optional[Callable] = None, 47 | download: bool = False, 48 | ) -> None: 49 | super().__init__(root, transform=transform, target_transform=target_transform) 50 | self._split = verify_str_arg(split, "split", ("train", "test")) 51 | self._base_folder = Path(self.root) / "utkface" 52 | self._meta_folder = self._base_folder / "meta" 53 | self._images_folder = self._base_folder / "UTKFace" 54 | 55 | if download: 56 | self._download() 57 | 58 | if not self._check_exists(): 59 | raise RuntimeError("Dataset not found. You can use download=True to download it") 60 | 61 | metadata = pd.read_csv(self._meta_folder / f"{split}.csv") 62 | self._file_paths = metadata["file_name"].apply(lambda x: self._images_folder / x).to_numpy(dtype="object") 63 | self._labels = metadata["label"].to_numpy(dtype=np.float32) 64 | 65 | def __len__(self) -> int: 66 | return len(self._file_paths) 67 | 68 | def __getitem__(self, idx: int) -> Tuple[Any, Any]: 69 | image_file, label = self._file_paths[idx], self._labels[idx] 70 | image = PIL.Image.open(image_file).convert("RGB") 71 | 72 | if self.transform: 73 | image = self.transform(image) 74 | 75 | if self.target_transform: 76 | label = self.target_transform(label) 77 | 78 | return image, label 79 | 80 | def extra_repr(self) -> str: 81 | return f"split={self._split}" 82 | 83 | def _check_exists(self) -> bool: 84 | return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder)) 85 | 86 | def _download(self) -> None: 87 | if self._check_exists(): 88 | return 89 | for url, md5 in self._URL_MD5.values(): 90 | download_and_extract_archive(url, download_root=self._base_folder, md5=md5) 91 | -------------------------------------------------------------------------------- /semilearn/datasets/cv_datasets/datasetbase.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from torchvision import transforms 9 | from torch.utils.data import Dataset 10 | 11 | 12 | class BasicDataset(Dataset): 13 | """ 14 | BasicDataset returns a pair of image and labels (targets). 15 | If targets are not given, BasicDataset returns None as the label. 16 | This class supports strong augmentation, 17 | and return both weakly and strongly augmented images. 18 | """ 19 | 20 | def __init__(self, alg, data, targets=None, transform=None, is_ulb=False, strong_transform=None, *args, **kwargs): 21 | """ 22 | Args: 23 | alg (str): Algorithm. 24 | data (list): List of PIL images or numpy arrays. 25 | targets (list or None): Target labels corresponding to the images. 26 | transform (callable or None): Basic transformation function applied to the image. 27 | is_ulb (bool): Indicates if the dataset is unlabeled. 28 | strong_transform (callable or None): Strong transformation function applied to the image. 29 | """ 30 | super(BasicDataset, self).__init__() 31 | self.alg = alg 32 | self.data = data 33 | self.targets = targets 34 | self.transform = transform 35 | self.strong_transform = strong_transform 36 | self.is_ulb = is_ulb 37 | 38 | self._check_transform() 39 | 40 | def __sample__(self, idx): 41 | """Retrieve the image and corresponding target at a specific index.""" 42 | img = self.data[idx] 43 | target = None if self.targets is None else self.targets[idx] 44 | return img, target 45 | 46 | def __getitem__(self, idx): 47 | """ 48 | Returns weakly and/or strongly augmented images based on the algorithm and dataset type. 49 | """ 50 | img, target = self.__sample__(idx) 51 | 52 | if isinstance(img, np.ndarray): 53 | img = Image.fromarray(img) 54 | 55 | if self.transform is None: 56 | return {"x_lb": transforms.ToTensor()(img), "y_lb": target} 57 | 58 | data_dict = { 59 | "idx_lb": lambda: idx, 60 | "x_lb": lambda: self.transform(img), 61 | "x_lb_s": lambda: self.strong_transform(img), 62 | "y_lb": lambda: target, 63 | "idx_ulb": lambda: idx, 64 | "x_ulb_w": lambda: self.transform(img), 65 | "x_ulb_w_2": lambda: self.transform(img), 66 | "x_ulb_s": lambda: self.strong_transform(img), 67 | "x_ulb_s_2": lambda: self.strong_transform(img), 68 | } 69 | 70 | data_keys = self._determine_data_keys() 71 | return {k: data_dict[k]() for k in data_keys} 72 | 73 | def __len__(self): 74 | return len(self.data) 75 | 76 | def _check_transform(self): 77 | """Ensure strong augmentation is used if required by the algorithm.""" 78 | if self.strong_transform is None and self.is_ulb: 79 | assert self.alg not in ["rankup"], f"alg {self.alg} requires strong augmentation" 80 | 81 | def _determine_data_keys(self): 82 | """Determine the required output data based on the algorithm type.""" 83 | data_keys = set() 84 | 85 | if not self.is_ulb: 86 | data_keys.update({"idx_lb", "x_lb", "y_lb"}) 87 | return data_keys 88 | 89 | # for regression algorithms 90 | if self.alg == "fullysupervised" or self.alg == "supervised": 91 | data_keys.update({"idx_ulb"}) 92 | elif self.alg == "rankup" or self.alg == "rankuprda": 93 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"}) 94 | elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch": 95 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"}) 96 | else: 97 | data_keys.update({"idx_ulb", "x_ulb_w"}) 98 | 99 | return data_keys 100 | 101 | 102 | class ImagePathDataset(BasicDataset): 103 | """Dataset subclass that directly opens images from file paths.""" 104 | 105 | def __sample__(self, idx): 106 | path, target = super().__sample__(idx) 107 | img = Image.open(path).convert("RGB") 108 | return img, target 109 | -------------------------------------------------------------------------------- /semilearn/datasets/audio_datasets/datasetbase.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import librosa 6 | 7 | from torch.utils.data import Dataset 8 | 9 | 10 | class BasicDataset(Dataset): 11 | """ 12 | BasicDataset returns a pair of audio and labels (targets). 13 | If targets are not given, BasicDataset returns None as the label. 14 | This class supports strong augmentation, 15 | and return both weakly and strongly augmented images. 16 | """ 17 | 18 | def __init__(self, alg, data, targets=None, transform=None, is_ulb=False, strong_transform=None, sample_rate=16000, *args, **kwargs): 19 | """ 20 | Args: 21 | alg (str): Algorithm. 22 | data (list): List of audio data. 23 | targets (list or None): Target labels corresponding to the images. 24 | is_ulb (bool): Indicates if the dataset is unlabeled. 25 | transform (callable or None): Basic transformation function applied to the image. 26 | is_ulb (bool): Indicates if the dataset is unlabeled. 27 | strong_transform (callable or None): Strong transformation function applied to the image. 28 | """ 29 | super(BasicDataset, self).__init__() 30 | self.alg = alg 31 | self.data = data 32 | self.targets = targets 33 | self.transform = transform 34 | self.strong_transform = strong_transform 35 | self.is_ulb = is_ulb 36 | self.sample_rate = sample_rate 37 | 38 | self._check_transform() 39 | 40 | def __sample__(self, idx): 41 | """Retrieve the audio and corresponding target at a specific index.""" 42 | audio = self.data[idx] 43 | target = None if self.targets is None else self.targets[idx] 44 | return audio, target 45 | 46 | def __getitem__(self, idx): 47 | """ 48 | Returns weakly and/or strongly augmented images based on the algorithm and dataset type. 49 | """ 50 | wav, target = self.__sample__(idx) 51 | 52 | if self.transform is None: 53 | return {"x_lb": wav, "y_lb": target} 54 | 55 | data_dict = { 56 | "idx_lb": lambda: idx, 57 | "x_lb": lambda: self.transform(wav, sample_rate=self.sample_rate), 58 | "x_lb_s": lambda: self.strong_transform(wav, sample_rate=self.sample_rate), 59 | "y_lb": lambda: target, 60 | "idx_ulb": lambda: idx, 61 | "x_ulb_w": lambda: self.transform(wav, sample_rate=self.sample_rate), 62 | "x_ulb_w_2": lambda: self.transform(wav, sample_rate=self.sample_rate), 63 | "x_ulb_s": lambda: self.strong_transform(wav, sample_rate=self.sample_rate), 64 | "x_ulb_s_2": lambda: self.strong_transform(wav, sample_rate=self.sample_rate), 65 | } 66 | 67 | data_keys = self._determine_data_keys() 68 | return {k: data_dict[k]() for k in data_keys} 69 | 70 | def __len__(self): 71 | return len(self.data) 72 | 73 | def _check_transform(self): 74 | """Ensure strong augmentation is used if required by the algorithm.""" 75 | if self.strong_transform is None and self.is_ulb: 76 | assert self.alg not in ["rankup"], f"alg {self.alg} requires strong augmentation" 77 | 78 | def _determine_data_keys(self): 79 | """Determine the required output data based on the algorithm type.""" 80 | data_keys = set() 81 | 82 | if not self.is_ulb: 83 | data_keys.update({"idx_lb", "x_lb", "y_lb"}) 84 | return data_keys 85 | 86 | # for regression algorithms 87 | if self.alg == "fullysupervised" or self.alg == "supervised": 88 | data_keys.update({"idx_ulb"}) 89 | elif self.alg == "rankup" or self.alg == "rankuprda": 90 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"}) 91 | elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch": 92 | data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"}) 93 | else: 94 | data_keys.update({"idx_ulb", "x_ulb_w"}) 95 | 96 | return data_keys 97 | 98 | 99 | class AudioPathDataset(BasicDataset): 100 | """Dataset subclass that directly opens audio from file paths.""" 101 | 102 | def __sample__(self, idx): 103 | path, target = super().__sample__(idx) 104 | waveform, _ = librosa.load(path, sr=self.sample_rate, mono=True) 105 | return waveform, target 106 | -------------------------------------------------------------------------------- /semilearn/nets/bert/bert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.nn as nn 7 | from transformers import BertModel, BertConfig 8 | 9 | from semilearn.nets.utils import init_weights 10 | 11 | 12 | class RegressionBert(nn.Module): 13 | """ 14 | A regression model based on the Bert architecture (https://arxiv.org/abs/1810.04805). 15 | 16 | Args: 17 | use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and 18 | `use_pretrained` is `False`, the model will load architecture 19 | without pretrained weights. Defaults to True. 20 | pretrained_path (str or None): The Hugging Face path to the pretrained model. 21 | If None, a model with default configuration will be created. 22 | drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1. 23 | freeze_encoder (bool): If True, the encoder will be frozen during training, 24 | and only the regressor head will be trained. 25 | Do not freeze the encoder when using with RankUp or ARC. 26 | 27 | Attributes: 28 | model (BertModel): The underlying Bert model. 29 | config (BertConfig): Configuration of the Bert model. 30 | dropout (nn.Dropout): Dropout layer for regularization. 31 | num_features (int): Number of features from the model's hidden layer. 32 | regressor (nn.Sequential): The regressor head consisting of linear layers and activation. 33 | """ 34 | 35 | def __init__(self, use_pretrained=False, pretrained_path=None, drop_rate=0.1): 36 | super(RegressionBert, self).__init__() 37 | # Load pre-trained bert model 38 | self.model, self.config = self.load_model(use_pretrained, pretrained_path) 39 | self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False) 40 | self.num_features = self.config.hidden_size 41 | self.regressor = nn.Sequential(*[nn.Linear(self.num_features, self.num_features), nn.GELU(), nn.Linear(self.num_features, 1)]) 42 | 43 | self.regressor.apply(init_weights) 44 | 45 | def load_model(self, use_pretrained=True, pretrained_path=None): 46 | if use_pretrained and pretrained_path: 47 | config = BertConfig.from_pretrained(pretrained_path) 48 | model = BertModel.from_pretrained(pretrained_path) 49 | return model, config 50 | 51 | config = BertConfig() if not pretrained_path else BertConfig.from_pretrained(pretrained_path) 52 | model = BertModel(config) 53 | return model, config 54 | 55 | def forward(self, x, only_fc=False, only_feat=False, return_embed=False, **kwargs): 56 | """ 57 | Args: 58 | x: input tensor, depends on only_fc and only_feat flag 59 | only_fc: only use classifier, input should be features before classifier 60 | only_feat: only return pooled features 61 | return_embed: return word embedding, used for vat 62 | """ 63 | if only_fc: 64 | logits = self.regressor(x).flatten() 65 | return logits 66 | 67 | out_dict = self.model(**x, output_hidden_states=True, return_dict=True) 68 | last_hidden = out_dict["last_hidden_state"] 69 | drop_hidden = self.dropout(last_hidden) 70 | pooled_output = torch.mean(drop_hidden, 1) 71 | 72 | if only_feat: 73 | return pooled_output 74 | 75 | logits = self.regressor(pooled_output).flatten() 76 | result_dict = {"logits": logits, "feat": pooled_output} 77 | 78 | if return_embed: 79 | result_dict["embed"] = out_dict["hidden_states"][0] 80 | 81 | return result_dict 82 | 83 | def extract(self, x): 84 | out_dict = self.bert(**x, output_hidden_states=True, return_dict=True) 85 | last_hidden = out_dict["last_hidden_state"] 86 | drop_hidden = self.dropout(last_hidden) 87 | pooled_output = torch.mean(drop_hidden, 1) 88 | return pooled_output 89 | 90 | def group_matcher(self, coarse=False, prefix=""): 91 | matcher = dict(stem=r"^{}bert.embeddings".format(prefix), blocks=r"^{}bert.encoder.layer.(\d+)".format(prefix)) 92 | return matcher 93 | 94 | def no_weight_decay(self): 95 | return [] 96 | 97 | 98 | def bert_base(pretrained=True, pretrained_path="bert-base-cased", **kwargs): 99 | model = RegressionBert(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs) 100 | return model 101 | -------------------------------------------------------------------------------- /semilearn/nets/hubert/hubert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from transformers import HubertModel, HubertConfig 9 | 10 | from semilearn.nets.utils import init_weights 11 | 12 | 13 | class RegressionHubert(nn.Module): 14 | """ 15 | A regression model based on the Hubert architecture (https://arxiv.org/abs/2106.07447). 16 | 17 | Args: 18 | use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and 19 | `use_pretrained` is `False`, the model will load architecture 20 | without pretrained weights. Defaults to True. 21 | pretrained_path (str or None): The Hugging Face path to the pretrained model. 22 | If None, a model with default configuration will be created. 23 | drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1. 24 | freeze_encoder (bool): If True, the encoder will be frozen during training, 25 | and only the regressor head will be trained. 26 | Do not freeze the encoder when using with RankUp or ARC. 27 | 28 | Attributes: 29 | model (HubertModel): The underlying Hubert model. 30 | config (HubertConfig): Configuration of the Hubert model. 31 | dropout (nn.Dropout): Dropout layer for regularization. 32 | num_features (int): Number of features from the model's hidden layer. 33 | regressor (nn.Sequential): The regressor head consisting of linear layers and activation. 34 | """ 35 | 36 | def __init__(self, use_pretrained=False, pretrained_path=None, drop_rate=0.1, freeze_encoder=True): 37 | super(RegressionHubert, self).__init__() 38 | self.model, self.config = self.load_model(use_pretrained, pretrained_path) 39 | if freeze_encoder: 40 | self.model.freeze_feature_encoder() 41 | self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False) 42 | self.num_features = self.config.hidden_size 43 | self.regressor = nn.Sequential(*[nn.Linear(self.num_features, self.num_features), nn.GELU(), nn.Linear(self.num_features, 1)]) 44 | 45 | self.regressor.apply(init_weights) 46 | 47 | def load_model(self, use_pretrained=True, pretrained_path=None): 48 | if use_pretrained and pretrained_path: 49 | config = HubertConfig.from_pretrained(pretrained_path) 50 | model = HubertModel.from_pretrained(pretrained_path) 51 | return model, config 52 | 53 | config = HubertConfig() if not pretrained_path else HubertConfig.from_pretrained(pretrained_path) 54 | model = HubertModel(config) 55 | return model, config 56 | 57 | def forward(self, x, only_fc=False, only_feat=False, **kwargs): 58 | """ 59 | Args: 60 | x: input tensor, depends on only_fc and only_feat flag 61 | only_fc: only use classifier, input should be features before classifier 62 | only_feat: only return pooled features 63 | """ 64 | if only_fc: 65 | logits = self.regressor(x).flatten() 66 | return logits 67 | 68 | pooled_output = self.extract(x) 69 | 70 | if only_feat: 71 | return pooled_output 72 | 73 | logits = self.regressor(pooled_output).flatten() 74 | result_dict = {"logits": logits, "feat": pooled_output} 75 | return result_dict 76 | 77 | def extract(self, x): 78 | out_dict = self.model(x, output_hidden_states=True, return_dict=True) 79 | last_hidden = out_dict["last_hidden_state"] 80 | embed = out_dict["hidden_states"][0] 81 | drop_hidden = self.dropout(last_hidden) 82 | pooled_output = torch.mean(drop_hidden, 1) 83 | return pooled_output 84 | 85 | def group_matcher(self, coarse=False, prefix=""): 86 | matcher = dict( 87 | stem=r"^{}model.feature_projection|^{}model.feature_extractor|^{}model.encoder.pos_conv_embed".format(prefix, prefix, prefix), 88 | blocks=r"^{}model.encoder.layers.(\d+)".format(prefix), 89 | ) 90 | return matcher 91 | 92 | def no_weight_decay(self): 93 | return [] 94 | 95 | 96 | def hubert_base(pretrained=False, pretrained_path="facebook/hubert-base-ls960", **kwargs): 97 | model = RegressionHubert(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs) 98 | return model 99 | 100 | 101 | if __name__ == "__main__": 102 | model = hubert_base() 103 | print(model) 104 | -------------------------------------------------------------------------------- /semilearn/nets/wave2vecv2/wave2vecv2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from transformers import Wav2Vec2Model, Wav2Vec2Config 9 | 10 | from semilearn.nets.utils import init_weights 11 | 12 | 13 | class RegressionWav2Vec2(nn.Module): 14 | """ 15 | A regression model based on the Wav2Vec2 architecture (https://arxiv.org/abs/2006.11477). 16 | 17 | Args: 18 | use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and 19 | `use_pretrained` is `False`, the model will load architecture 20 | without pretrained weights. Defaults to True. 21 | pretrained_path (str or None): The Hugging Face path to the pretrained model. 22 | If None, a model with default configuration will be created. 23 | drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1. 24 | freeze_encoder (bool): If True, the encoder will be frozen during training, 25 | and only the regressor head will be trained. 26 | Do not freeze the encoder when using with RankUp or ARC. 27 | 28 | Attributes: 29 | model (Wav2Vec2Model): The underlying Wav2Vec2 model. 30 | config (Wav2Vec2Config): Configuration of the Wav2Vec2 model. 31 | dropout (nn.Dropout): Dropout layer for regularization. 32 | num_features (int): Number of features from the model's hidden layer. 33 | regressor (nn.Sequential): The regressor head consisting of linear layers and activation. 34 | """ 35 | 36 | def __init__(self, use_pretrained=True, pretrained_path=None, drop_rate=0.1, freeze_encoder=True): 37 | super(RegressionWav2Vec2, self).__init__() 38 | self.model, self.config = self.load_model(use_pretrained, pretrained_path) 39 | if freeze_encoder: 40 | self.model.freeze_feature_encoder() 41 | self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False) 42 | self.num_features = self.config.hidden_size 43 | self.regressor = nn.Sequential( 44 | *[nn.Linear(self.config.hidden_size, self.config.hidden_size), nn.GELU(), nn.Linear(self.config.hidden_size, 1)] 45 | ) 46 | 47 | self.regressor.apply(init_weights) 48 | 49 | def load_model(self, use_pretrained=True, pretrained_path=None): 50 | if use_pretrained and pretrained_path: 51 | config = Wav2Vec2Config.from_pretrained(pretrained_path) 52 | model = Wav2Vec2Model.from_pretrained(pretrained_path) 53 | return model, config 54 | 55 | config = Wav2Vec2Config() if not pretrained_path else Wav2Vec2Config.from_pretrained(pretrained_path) 56 | model = Wav2Vec2Model(config) 57 | return model, config 58 | 59 | def forward(self, x, only_fc=False, only_feat=False, **kwargs): 60 | """ 61 | Args: 62 | x: input tensor, depends on only_fc and only_feat flag 63 | only_fc: only use classifier, input should be features before classifier 64 | only_feat: only return pooled features 65 | """ 66 | if only_fc: 67 | logits = self.regressor(x).flatten() 68 | return logits 69 | 70 | pooled_output = self.extract(x) 71 | 72 | if only_feat: 73 | return pooled_output 74 | 75 | logits = self.regressor(pooled_output).flatten() 76 | result_dict = {"logits": logits, "feat": pooled_output} 77 | return result_dict 78 | 79 | def extract(self, x): 80 | out_dict = self.model(x, output_hidden_states=True, return_dict=True) 81 | last_hidden = out_dict["last_hidden_state"] 82 | embed = out_dict["hidden_states"][0] 83 | drop_hidden = self.dropout(last_hidden) 84 | pooled_output = torch.mean(drop_hidden, 1) 85 | return pooled_output 86 | 87 | def group_matcher(self, coarse=False, prefix=""): 88 | matcher = dict( 89 | stem=r"^{}model.feature_projection|^{}model.feature_extractor".format(prefix, prefix), 90 | blocks=r"^{}model.encoder.layers.(\d+)".format(prefix), 91 | ) 92 | return matcher 93 | 94 | def no_weight_decay(self): 95 | return [] 96 | 97 | 98 | def wave2vecv2_base(pretrained=True, pretrained_path="facebook/wav2vec2-base-960h", **kwargs): 99 | model = RegressionWav2Vec2(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs) 100 | return model 101 | 102 | 103 | if __name__ == "__main__": 104 | model = wave2vecv2_base(True, "facebook/wav2vec2-base-960h") 105 | print(model) 106 | -------------------------------------------------------------------------------- /semilearn/nets/whisper/whisper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from transformers import WhisperModel, WhisperConfig 9 | 10 | from .whisper_encoder import MyWhisperEncoder 11 | from semilearn.nets.utils import init_weights 12 | 13 | 14 | class RegressionWhisper(nn.Module): 15 | """ 16 | A regression model based on the Whisper architecture (https://arxiv.org/abs/2212.04356). 17 | 18 | Args: 19 | use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and 20 | `use_pretrained` is `False`, the model will load architecture 21 | without pretrained weights. Defaults to True. 22 | pretrained_path (str or None): The Hugging Face path to the pretrained model. 23 | If None, a model with default configuration will be created. 24 | drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1. 25 | freeze_encoder (bool): If True, the encoder will be frozen during training, 26 | and only the regressor head will be trained. 27 | Do not freeze the encoder when using with RankUp or ARC. 28 | 29 | Attributes: 30 | model (WhisperModel): The underlying Whisper model. 31 | config (WhisperConfig): Configuration of the Whisper model. 32 | dropout (nn.Dropout): Dropout layer for regularization. 33 | num_features (int): Number of features from the model's hidden layer. 34 | regressor (nn.Sequential): The regressor head consisting of linear layers and activation. 35 | """ 36 | 37 | def __init__(self, use_pretrained=True, pretrained_path=None, drop_rate=0.1, freeze_encoder=False): 38 | super(RegressionWhisper, self).__init__() 39 | self.model, self.config = self.load_model(use_pretrained, pretrained_path) 40 | if freeze_encoder: 41 | self.model.freeze_encoder() 42 | self.model = MyWhisperEncoder.cast(self.model.encoder) # overwrite default WhisperEncoder forward() function 43 | self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False) 44 | self.num_features = self.config.hidden_size 45 | self.regressor = nn.Sequential( 46 | *[nn.Linear(self.config.hidden_size, self.config.hidden_size), nn.GELU(), nn.Linear(self.config.hidden_size, 1)] 47 | ) 48 | 49 | self.regressor.apply(init_weights) 50 | 51 | def load_model(self, use_pretrained=True, pretrained_path=None): 52 | if use_pretrained and pretrained_path: 53 | config = WhisperConfig.from_pretrained(pretrained_path) 54 | model = WhisperModel.from_pretrained(pretrained_path) 55 | return model, config 56 | 57 | config = WhisperConfig() if not pretrained_path else WhisperConfig.from_pretrained(pretrained_path) 58 | model = WhisperModel(config) 59 | return model, config 60 | 61 | def forward(self, x, only_fc=False, only_feat=False, **kwargs): 62 | """ 63 | Args: 64 | x: input tensor, depends on only_fc and only_feat flag 65 | only_fc: only use classifier, input should be features before classifier 66 | only_feat: only return pooled features 67 | """ 68 | if only_fc: 69 | logits = self.regressor(x).flatten() 70 | return logits 71 | 72 | pooled_output = self.extract(x) 73 | 74 | if only_feat: 75 | return pooled_output 76 | 77 | logits = self.regressor(pooled_output).flatten() 78 | result_dict = {"logits": logits, "feat": pooled_output} 79 | return result_dict 80 | 81 | def extract(self, x): 82 | out_dict = self.model(x, output_hidden_states=True, return_dict=True) 83 | last_hidden = out_dict["last_hidden_state"] 84 | embed = out_dict["hidden_states"][0] 85 | drop_hidden = self.dropout(last_hidden) 86 | pooled_output = torch.mean(drop_hidden, 1) 87 | return pooled_output 88 | 89 | def group_matcher(self, coarse=False, prefix=""): 90 | matcher = dict( 91 | stem=r"^{}model.feature_projection|^{}model.feature_extractor".format(prefix, prefix), 92 | blocks=r"^{}model.encoder.layers.(\d+)".format(prefix), 93 | ) 94 | return matcher 95 | 96 | def no_weight_decay(self): 97 | return [] 98 | 99 | 100 | def whisper_base(pretrained=True, pretrained_path="openai/whisper-base", **kwargs): 101 | model = RegressionWhisper(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs) 102 | return model 103 | 104 | 105 | if __name__ == "__main__": 106 | model = whisper_base(True, "openai/whisper-tiny") 107 | print(model) 108 | -------------------------------------------------------------------------------- /semilearn/datasets/collactors/audio_collactor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang. 3 | # Licensed under the MIT License. 4 | 5 | from dataclasses import dataclass 6 | from typing import Any, Dict, List, Optional, Union 7 | 8 | from transformers import AutoFeatureExtractor 9 | from transformers.file_utils import PaddingStrategy 10 | from transformers.tokenization_utils_base import PreTrainedTokenizerBase 11 | from transformers.data import default_data_collator 12 | 13 | 14 | @dataclass 15 | class DataCollatorWithPadding: 16 | """ 17 | Data collator that will dynamically pad the inputs received. 18 | 19 | Args: 20 | tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): 21 | The tokenizer used for encoding the data. 22 | padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): 23 | Select a strategy to pad the returned sequences (according to the model's padding side and padding index) 24 | among: 25 | - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence 26 | if provided). 27 | - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum 28 | acceptable input length for the model if that argument is not provided. 29 | - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different 30 | lengths). 31 | max_length (`int`, *optional*): 32 | Maximum length of the returned list and optionally padding length (see above). 33 | pad_to_multiple_of (`int`, *optional*): 34 | If set will pad the sequence to a multiple of the provided value. 35 | This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 36 | 7.5 (Volta). 37 | return_tensors (`str`): 38 | The type of Tensor to return. Allowable values are "np", "pt" and "tf". 39 | """ 40 | 41 | tokenizer: PreTrainedTokenizerBase 42 | padding: Union[bool, str, PaddingStrategy] = True 43 | max_length: Optional[int] = None 44 | sample_rate: Optional[int] = 16000 45 | pad_to_multiple_of: Optional[int] = None 46 | return_tensors: str = "pt" 47 | 48 | def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: 49 | wav_keys = ["x_lb", "x_lb_s", "x_ulb_w", "x_ulb_w_2", "x_ulb_s", "x_ulb_s2"] 50 | wav_features = {k: [] for k in wav_keys} 51 | other_features = [] 52 | for f in features: 53 | exist_ks = [k for k in wav_keys if k in f] 54 | for k in exist_ks: 55 | feat = f.pop(k) 56 | wav_features[k].append(feat) 57 | other_features.append(f) 58 | 59 | batch = default_data_collator(other_features, return_tensors="pt") 60 | 61 | for key, feats in wav_features.items(): 62 | if len(feats) > 0: 63 | out = self.tokenizer( 64 | feats, 65 | padding=True if key == "x_lb" else "max_length", 66 | max_length=int(self.max_length * self.sample_rate), 67 | sampling_rate=self.sample_rate, 68 | pad_to_multiple_of=self.pad_to_multiple_of, 69 | return_tensors=self.return_tensors, 70 | truncation=True, 71 | ) 72 | if "input_values" in out: 73 | input_values = out["input_values"] 74 | elif "input_features" in out: 75 | input_values = out["input_features"] 76 | batch[key] = input_values 77 | 78 | return batch 79 | 80 | 81 | def get_wave2vecv2_base_collactor(pretrain_path="facebook/wav2vec2-base-960h", max_length=4, sample_rate=16000): 82 | pretrain_path = "facebook/wav2vec2-base-960h" if pretrain_path == "" else pretrain_path 83 | feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path) 84 | collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate) 85 | return collator 86 | 87 | 88 | def get_hubert_base_collactor(pretrain_path="facebook/hubert-base-ls960", max_length=4, sample_rate=16000): 89 | pretrain_path = "facebook/hubert-base-ls960" if pretrain_path == "" else pretrain_path 90 | feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path) 91 | collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate) 92 | return collator 93 | 94 | 95 | def get_whisper_base_collactor(pretrain_path="openai/whisper-base", max_length=30, sample_rate=16000): 96 | pretrain_path = "openai/whisper-base" if pretrain_path == "" else pretrain_path 97 | feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path) 98 | collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate) 99 | return collator 100 | --------------------------------------------------------------------------------