├── CHANGELOG.md
├── semilearn
    ├── algorithms
    │   ├── clss
    │   │   ├── __init__.py
    │   │   ├── ordinal_entropy.py
    │   │   └── clss.py
    │   ├── rda
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── rda.py
    │   ├── ucvme
    │   │   └── __init__.py
    │   ├── rankup
    │   │   ├── __init__.py
    │   │   └── rankup_net.py
    │   ├── mixmatch
    │   │   └── __init__.py
    │   ├── pimodel
    │   │   ├── __init__.py
    │   │   └── pimodel.py
    │   ├── rankuprda
    │   │   ├── __init__.py
    │   │   ├── rda.py
    │   │   └── rankup_net.py
    │   ├── meanteacher
    │   │   ├── __init__.py
    │   │   └── meanteacher.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── ops.py
    │   ├── fullysupervised
    │   │   ├── __init__.py
    │   │   └── fullysupervised.py
    │   ├── hooks
    │   │   ├── __init__.py
    │   │   ├── pseudo_label.py
    │   │   └── masking.py
    │   └── __init__.py
    ├── core
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── registry.py
    │   ├── __init__.py
    │   ├── criterions
    │   │   ├── __init__.py
    │   │   ├── cross_entropy.py
    │   │   ├── consistency.py
    │   │   └── cls_consistency.py
    │   └── hooks
    │   │   ├── __init__.py
    │   │   ├── ema.py
    │   │   ├── sampler_seed.py
    │   │   ├── checkpoint.py
    │   │   ├── timer.py
    │   │   ├── hook.py
    │   │   ├── param_update.py
    │   │   ├── evaluation.py
    │   │   ├── priority.py
    │   │   ├── logging.py
    │   │   ├── wandb.py
    │   │   └── aim.py
    ├── nets
    │   ├── bert
    │   │   ├── __init__.py
    │   │   └── bert.py
    │   ├── resnet
    │   │   └── __init__.py
    │   ├── hubert
    │   │   ├── __init__.py
    │   │   └── hubert.py
    │   ├── whisper
    │   │   ├── __init__.py
    │   │   └── whisper.py
    │   ├── wave2vecv2
    │   │   ├── __init__.py
    │   │   └── wave2vecv2.py
    │   ├── vit
    │   │   └── __init__.py
    │   ├── wrn
    │   │   └── __init__.py
    │   └── __init__.py
    ├── datasets
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── sampler.py
    │   ├── audio_datasets
    │   │   ├── augmentation
    │   │   │   ├── __init__.py
    │   │   │   ├── transforms.py
    │   │   │   └── subsample.py
    │   │   ├── __init__.py
    │   │   ├── vcc2018.py
    │   │   ├── get_dataset.py
    │   │   └── datasetbase.py
    │   ├── cv_datasets
    │   │   ├── __init__.py
    │   │   ├── augmentation
    │   │   │   ├── __init__.py
    │   │   │   └── transforms.py
    │   │   ├── get_dataset.py
    │   │   ├── utkface.py
    │   │   └── datasetbase.py
    │   ├── collactors
    │   │   ├── __init__.py
    │   │   ├── nlp_collactor.py
    │   │   └── audio_collactor.py
    │   ├── nlp_datasets
    │   │   ├── __init__.py
    │   │   ├── get_dataset.py
    │   │   ├── datasetbase.py
    │   │   ├── yelp_review.py
    │   │   └── amazon_review.py
    │   └── __init__.py
    └── __init__.py
├── visualization
    ├── requirements.txt
    └── plot_multi.py
├── requirements.txt
├── config
    ├── classic_cv
    │   ├── fullysupervised
    │   │   └── fullysupervised_utkface_s0.yaml
    │   ├── supervised
    │   │   ├── supervised_utkface_lb50_s0.yaml
    │   │   ├── supervised_utkface_lb250_s0.yaml
    │   │   └── supervised_utkface_lb2000_s0.yaml
    │   ├── pimodel
    │   │   ├── pimodel_utkface_lb50_s0.yaml
    │   │   ├── pimodel_utkface_lb250_s0.yaml
    │   │   └── pimodel_utkface_lb2000_s0.yaml
    │   ├── rda
    │   │   ├── rda_utkface_lb50_s0.yaml
    │   │   ├── rda_utkface_lb250_s0.yaml
    │   │   └── rda_utkface_lb2000_s0.yaml
    │   ├── ucvme
    │   │   ├── ucvme_utkface_lb50_s0.yaml
    │   │   ├── ucvme_utkface_lb250_s0.yaml
    │   │   └── ucvme_utkface_lb2000_s0.yaml
    │   ├── meanteacher
    │   │   ├── meanteacher_utkface_lb50_s0.yaml
    │   │   ├── meanteacher_utkface_lb250_s0.yaml
    │   │   └── meanteacher_utkface_lb2000_s0.yaml
    │   ├── mixmatch
    │   │   ├── mixmatch_utkface_lb50_s0.yaml
    │   │   ├── mixmatch_utkface_lb250_s0.yaml
    │   │   └── mixmatch_utkface_lb2000_s0.yaml
    │   ├── clss
    │   │   ├── clss_utkface_lb50_s0.yaml
    │   │   ├── clss_utkface_lb2000_s0.yaml
    │   │   └── clss_utkface_lb250_s0.yaml
    │   ├── rankup
    │   │   ├── rankup_utkface_lb50_s0.yaml
    │   │   ├── rankup_utkface_lb2000_s0.yaml
    │   │   └── rankup_utkface_lb250_s0.yaml
    │   └── rankuprda
    │   │   ├── rankuprda_utkface_lb50_s0.yaml
    │   │   ├── rankuprda_utkface_lb2000_s0.yaml
    │   │   └── rankuprda_utkface_lb250_s0.yaml
    ├── nlp
    │   ├── fullysupervised
    │   │   └── fullysupervised_yelp_review_s0.yaml
    │   ├── supervised
    │   │   └── supervised_yelp_review_lb250_s0.yaml
    │   ├── pimodel
    │   │   └── pimodel_yelp_review_lb250_s0.yaml
    │   ├── rda
    │   │   └── rda_yelp_review_lb250_s0.yaml
    │   ├── ucvme
    │   │   └── ucvme_yelp_review_lb250_s0.yaml
    │   ├── meanteacher
    │   │   └── meanteacher_yelp_review_lb250_s0.yaml
    │   ├── clss
    │   │   └── clss_yelp_review_lb250_s0.yaml
    │   ├── mixmatch
    │   │   └── mixmatch_yelp_review_lb250_s0.yaml
    │   ├── rankup
    │   │   └── rankup_yelp_review_lb250_s0.yaml
    │   └── rankuprda
    │   │   └── rankuprda_yelp_review_lb250_s0.yaml
    └── audio
    │   ├── fullysupervised
    │       └── fullysupervised_bvcc_s0.yaml
    │   ├── supervised
    │       └── supervised_bvcc_lb250_s0.yaml
    │   ├── pimodel
    │       └── pimodel_bvcc_lb250_s0.yaml
    │   ├── rda
    │       └── rda_bvcc_lb250_s0.yaml
    │   ├── ucvme
    │       └── ucvme_bvcc_lb250_s0.yaml
    │   ├── meanteacher
    │       └── meanteacher_bvcc_lb250_s0.yaml
    │   ├── mixmatch
    │       └── mixmatch_bvcc_lb250_s0.yaml
    │   ├── clss
    │       └── clss_bvcc_lb250_s0.yaml
    │   ├── rankup
    │       └── rankup_bvcc_lb250_s0.yaml
    │   └── rankuprda
    │       └── rankuprda_bvcc_lb250_s0.yaml
├── results
    ├── audio_average_log.csv
    ├── nlp_average_log.csv
    ├── README.md
    └── classic_cv_average_log.csv
├── LICENSE
└── .gitignore


/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/clss/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 Pin-Yen Huang.
2 | # Licensed under the MIT License.
3 | 
4 | from .clss import CLSS
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rda/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .rda import RDA
5 | 


--------------------------------------------------------------------------------
/visualization/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib>=3.7.5
2 | numpy>=1.24.4
3 | pandas>=2.0.3
4 | umap-learn>=0.5.7
5 | plotly>=5.24.1
6 | seaborn>=0.13.2


--------------------------------------------------------------------------------
/semilearn/algorithms/ucvme/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 Pin-Yen Huang.
2 | # Licensed under the MIT License.
3 | 
4 | from .ucvme import UCVME
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rankup/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .rankup import RankUp
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/mixmatch/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .mixmatch import MixMatch
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/pimodel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .pimodel import PiModel
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rankuprda/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .rankuprda import RankUpRDA
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/meanteacher/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .meanteacher import MeanTeacher
5 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .misc import *
5 | from .ops import *
6 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/fullysupervised/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .fullysupervised import FullySupervised
5 | 


--------------------------------------------------------------------------------
/semilearn/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | 
5 | from .build import *
6 | from .misc import *
7 | from .registry import *
8 | 


--------------------------------------------------------------------------------
/semilearn/nets/bert/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .bert import bert_base
6 | 


--------------------------------------------------------------------------------
/semilearn/nets/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .resnet import resnet50
6 | 


--------------------------------------------------------------------------------
/semilearn/nets/hubert/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .hubert import hubert_base
6 | 


--------------------------------------------------------------------------------
/semilearn/nets/whisper/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .whisper import whisper_base
6 | 


--------------------------------------------------------------------------------
/semilearn/nets/wave2vecv2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .wave2vecv2 import wave2vecv2_base
6 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .pseudo_label import PseudoLabelingHook
5 | from .masking import MaskingHook, FixedThresholdingHook
6 | 


--------------------------------------------------------------------------------
/semilearn/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .sampler import name2sampler, DistributedSampler
6 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 Pin-Yen Huang.
2 | # Licensed under the MIT License.
3 | 
4 | from .subsample import Subsample, RandomSubsample
5 | from .transforms import AudioTransforms
6 | 


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .get_dataset import get_cv_dataset
6 | from .utkface import UTKFACE
7 | 


--------------------------------------------------------------------------------
/semilearn/nets/vit/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .vit import vit_tiny_patch2_32, vit_small_patch2_32, vit_small_patch16_224, vit_base_patch16_224, vit_base_patch16_96
5 | from .vit import VisionTransformer
6 | 


--------------------------------------------------------------------------------
/semilearn/nets/wrn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .wrn import wrn_28_2, wrn_28_8
6 | from .wrn_var import wrn_var_37_2
7 | from .wrn import WideResNet
8 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .get_dataset import get_audio_dataset
6 | from .bvcc import BVCC
7 | from .vcc2018 import VCC2018
8 | 


--------------------------------------------------------------------------------
/semilearn/datasets/collactors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .nlp_collactor import get_bert_base_collactor
5 | from .audio_collactor import get_wave2vecv2_base_collactor, get_hubert_base_collactor, get_whisper_base_collactor
6 | 


--------------------------------------------------------------------------------
/semilearn/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .algorithmbase import AlgorithmBase
6 | from .utils.registry import import_all_modules_for_register
7 | 
8 | import_all_modules_for_register()
9 | 


--------------------------------------------------------------------------------
/semilearn/datasets/nlp_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .get_dataset import get_nlp_dataset
6 | from .yelp_review import YELP_REVIEW
7 | from .amazon_review import AMAZON_REVIEW
8 | 


--------------------------------------------------------------------------------
/semilearn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT License.
3 | 
4 | from .core.utils import get_dataset, get_data_loader, get_net_builder
5 | from .algorithms import get_algorithm
6 | from .datasets import split_ssl_data
7 | from .datasets.cv_datasets.datasetbase import BasicDataset
8 | 


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .randaugment import RandAugment
6 | from .transforms import get_val_transforms, get_weak_transforms, get_strong_transforms
7 | 


--------------------------------------------------------------------------------
/semilearn/core/criterions/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
3 | # Licensed under the MIT License.
4 | 
5 | from .cross_entropy import ce_loss, CELoss
6 | from .cls_consistency import cls_consistency_loss, ClsConsistencyLoss
7 | from .consistency import consistency_loss, ConsistencyLoss
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | audiomentations[extras]>=0.37.0
 2 | librosa>=0.10.1
 3 | matplotlib>=3.7.5
 4 | numpy>=1.24.4
 5 | pandas>=2.0.3
 6 | Pillow>=10.4.0
 7 | progress>=1.6
 8 | ruamel.yaml>=0.18.6
 9 | ruamel.yaml.clib>=0.2.8
10 | scikit-image>=0.21.0
11 | scikit-learn>=1.3.2
12 | scipy>=1.10.1
13 | tensorboard>=2.14.0
14 | timm>=1.0.11
15 | torch>=2.4.0
16 | torchaudio>=2.4.0
17 | torchvision>=0.19.0
18 | tqdm>=4.66.5
19 | transformers>=4.46.0
20 | wandb
21 | wget
22 | aim
23 | 


--------------------------------------------------------------------------------
/semilearn/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from semilearn.datasets.utils import split_ssl_data, get_collactor
 6 | from semilearn.datasets.cv_datasets import get_cv_dataset
 7 | from semilearn.datasets.nlp_datasets import get_nlp_dataset
 8 | from semilearn.datasets.audio_datasets import get_audio_dataset
 9 | from semilearn.datasets.samplers import name2sampler, DistributedSampler
10 | 


--------------------------------------------------------------------------------
/semilearn/nets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from .resnet import resnet50
 6 | from .wrn import wrn_28_2, wrn_28_8, wrn_var_37_2
 7 | from .vit import vit_base_patch16_224, vit_small_patch16_224, vit_small_patch2_32, vit_tiny_patch2_32, vit_base_patch16_96
 8 | from .bert import bert_base
 9 | from .wave2vecv2 import wave2vecv2_base
10 | from .hubert import hubert_base
11 | from .whisper import whisper_base
12 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .hook import Hook
 5 | from .checkpoint import CheckpointHook
 6 | from .evaluation import EvaluationHook
 7 | from .logging import LoggingHook
 8 | from .param_update import ParamUpdateHook
 9 | from .priority import Priority, get_priority
10 | from .sampler_seed import DistSamplerSeedHook
11 | from .timer import TimerHook
12 | from .ema import EMAHook
13 | from .wandb import WANDBHook
14 | from .aim import AimHook
15 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from semilearn.core.utils import ALGORITHMS
 5 | 
 6 | name2alg = ALGORITHMS
 7 | 
 8 | 
 9 | def get_algorithm(args, net_builder, tb_log, logger):
10 |     if args.algorithm in ALGORITHMS:
11 |         alg = ALGORITHMS[args.algorithm](args=args, net_builder=net_builder, tb_log=tb_log, logger=logger)  # name2alg[args.algorithm](
12 |         return alg
13 |     else:
14 |         raise KeyError(f"Unknown algorithm: {str(args.algorithm)}")
15 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/ema.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .hook import Hook
 5 | from semilearn.core.utils import EMA
 6 | 
 7 | 
 8 | class EMAHook(Hook):
 9 |     """
10 |     EMA model Hook for updating ema version of the model
11 |     """
12 | 
13 |     def before_run(self, algorithm):
14 |         algorithm.ema = EMA(algorithm.model, algorithm.ema_m)
15 |         algorithm.ema.register()
16 |         if algorithm.resume == True:
17 |             algorithm.ema.load(algorithm.ema_model)
18 | 
19 |     def after_train_step(self, algorithm):
20 |         if algorithm.ema is not None:
21 |             algorithm.ema.update()
22 |             algorithm.ema_model.load_state_dict(algorithm.model.state_dict())
23 |             algorithm.ema_model.load_state_dict(algorithm.ema.shadow, strict=False)
24 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/sampler_seed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/sampler_seed.py
 4 | 
 5 | from torch.utils.data import DataLoader
 6 | 
 7 | from .hook import Hook
 8 | from semilearn.datasets import DistributedSampler
 9 | 
10 | 
11 | class DistSamplerSeedHook(Hook):
12 |     """
13 |     Distributed sampler seed Hook
14 | 
15 |     update the samples' epoch in data loader
16 |     """
17 | 
18 |     def before_train_epoch(self, algorithm):
19 |         for name, dataloader in algorithm.loader_dict.items():
20 |             if not isinstance(dataloader, DataLoader):
21 |                 continue
22 | 
23 |             if isinstance(dataloader.sampler, DistributedSampler):
24 |                 algorithm.loader_dict[name].sampler.set_epoch(algorithm.epoch)
25 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import argparse
 6 | 
 7 | 
 8 | class SSL_Argument(object):
 9 |     """
10 |     Algorithm specific argument
11 |     """
12 | 
13 |     def __init__(self, name, type, default, help=""):
14 |         """
15 |         Model specific arguments should be added via this class.
16 |         """
17 |         self.name = name
18 |         self.type = type
19 |         self.default = default
20 |         self.help = help
21 | 
22 | 
23 | def str2bool(v):
24 |     """
25 |     str to bool
26 |     """
27 |     if isinstance(v, bool):
28 |         return v
29 |     if v.lower() in ("yes", "true", "t", "y", "1"):
30 |         return True
31 |     elif v.lower() in ("no", "false", "f", "n", "0"):
32 |         return False
33 |     else:
34 |         raise argparse.ArgumentTypeError("Boolean value expected.")
35 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py
 4 | 
 5 | import os
 6 | 
 7 | from .hook import Hook
 8 | 
 9 | 
10 | class CheckpointHook(Hook):
11 |     """
12 |     Checkpoint Hook for saving checkpoint
13 |     """
14 | 
15 |     def after_train_step(self, algorithm):
16 |         # must be called after evaluation for saving the best
17 |         if self.every_n_iters(algorithm, algorithm.num_eval_iter) or self.is_last_iter(algorithm):
18 |             save_path = os.path.join(algorithm.save_dir, algorithm.save_name)
19 | 
20 |             if (not algorithm.distributed) or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0):
21 |                 algorithm.save_model("latest_model.pth", save_path)
22 | 
23 |                 if algorithm.it == algorithm.best_it:
24 |                     algorithm.save_model("model_best.pth", save_path)
25 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | 
 6 | from .hook import Hook
 7 | 
 8 | 
 9 | class TimerHook(Hook):
10 |     """
11 |     Timer Hook
12 |     """
13 | 
14 |     def before_run(self, algorithm):
15 |         algorithm.start_batch = torch.cuda.Event(enable_timing=True)
16 |         algorithm.end_batch = torch.cuda.Event(enable_timing=True)
17 | 
18 |         algorithm.start_run = torch.cuda.Event(enable_timing=True)
19 |         algorithm.end_run = torch.cuda.Event(enable_timing=True)
20 |         algorithm.start_batch.record()
21 | 
22 |     def before_train_step(self, algorithm):
23 |         algorithm.end_batch.record()
24 | 
25 |     def after_train_step(self, algorithm):
26 |         algorithm.log_dict["lr"] = algorithm.optimizer.param_groups[-1]["lr"]
27 |         algorithm.log_dict["train/prefetch_time"] = algorithm.start_batch.elapsed_time(algorithm.end_batch) / 1000.0
28 |         algorithm.start_batch.record()
29 | 


--------------------------------------------------------------------------------
/config/classic_cv/fullysupervised/fullysupervised_utkface_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: fullysupervised
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: fullysupervised_utkface_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/fullysupervised_utkface_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | batch_size: 32
15 | eval_batch_size: 256
16 | ema_m: 0.999
17 | optim: SGD
18 | lr: 0.01
19 | momentum: 0.9
20 | weight_decay: 0.001
21 | layer_decay: 1.0
22 | amp: False
23 | clip_grad: 0.0
24 | use_cat: True
25 | criterion: l1
26 | net: wrn_28_2
27 | net_from_name: False
28 | data_dir: ./data
29 | dataset: utkface
30 | train_sampler: RandomSampler
31 | num_workers: 4
32 | crop_ratio: 0.875
33 | img_size: 40
34 | preload: False
35 | seed: 0
36 | world_size: 1
37 | rank: 0
38 | multiprocessing_distributed: False
39 | dist_url: tcp://127.0.0.1:10003
40 | dist_backend: nccl
41 | 


--------------------------------------------------------------------------------
/config/classic_cv/supervised/supervised_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: supervised
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: supervised_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | optim: SGD
19 | lr: 0.01
20 | momentum: 0.9
21 | weight_decay: 0.001
22 | layer_decay: 1.0
23 | amp: False
24 | clip_grad: 0.0
25 | use_cat: True
26 | criterion: l1
27 | net: wrn_28_2
28 | net_from_name: False
29 | data_dir: ./data
30 | dataset: utkface
31 | train_sampler: RandomSampler
32 | num_workers: 4
33 | crop_ratio: 0.875
34 | img_size: 40
35 | preload: False
36 | seed: 0
37 | world_size: 1
38 | rank: 0
39 | multiprocessing_distributed: False
40 | dist_url: tcp://127.0.0.1:10001
41 | dist_backend: nccl
42 | 


--------------------------------------------------------------------------------
/config/classic_cv/supervised/supervised_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: supervised
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: supervised_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | optim: SGD
19 | lr: 0.01
20 | momentum: 0.9
21 | weight_decay: 0.001
22 | layer_decay: 1.0
23 | amp: False
24 | clip_grad: 0.0
25 | use_cat: True
26 | criterion: l1
27 | net: wrn_28_2
28 | net_from_name: False
29 | data_dir: ./data
30 | dataset: utkface
31 | train_sampler: RandomSampler
32 | num_workers: 4
33 | crop_ratio: 0.875
34 | img_size: 40
35 | preload: False
36 | seed: 0
37 | world_size: 1
38 | rank: 0
39 | multiprocessing_distributed: False
40 | dist_url: tcp://127.0.0.1:10002
41 | dist_backend: nccl
42 | 


--------------------------------------------------------------------------------
/config/classic_cv/supervised/supervised_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: supervised
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: supervised_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/supervised_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | optim: SGD
19 | lr: 0.01
20 | momentum: 0.9
21 | weight_decay: 0.001
22 | layer_decay: 1.0
23 | amp: False
24 | clip_grad: 0.0
25 | use_cat: True
26 | criterion: l1
27 | net: wrn_28_2
28 | net_from_name: False
29 | data_dir: ./data
30 | dataset: utkface
31 | train_sampler: RandomSampler
32 | num_workers: 4
33 | crop_ratio: 0.875
34 | img_size: 40
35 | preload: False
36 | seed: 0
37 | world_size: 1
38 | rank: 0
39 | multiprocessing_distributed: False
40 | dist_url: tcp://127.0.0.1:10003
41 | dist_backend: nccl
42 | 


--------------------------------------------------------------------------------
/config/classic_cv/pimodel/pimodel_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: pimodel
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: pimodel_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10001
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/pimodel/pimodel_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: pimodel
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: pimodel_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10002
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/nlp/fullysupervised/fullysupervised_yelp_review_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: fullysupervised
 2 | save_dir: ./saved_models/nlp
 3 | save_name: fullysupervised_yelp_review_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/fullysupervised_yelp_review_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | batch_size: 8
16 | eval_batch_size: 8
17 | ema_m: 0.0
18 | optim: AdamW
19 | lr: 1e-05
20 | momentum: 0.9
21 | weight_decay: 0.0005
22 | layer_decay: 0.75
23 | amp: False
24 | clip_grad: 0.0
25 | use_cat: False
26 | criterion: l1
27 | net: bert_base
28 | net_from_name: False
29 | use_pretrain: True
30 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
31 | data_dir: ./data
32 | dataset: yelp_review
33 | train_sampler: RandomSampler
34 | num_workers: 4
35 | max_length: 512
36 | seed: 0
37 | world_size: 1
38 | rank: 0
39 | multiprocessing_distributed: False
40 | dist_url: tcp://127.0.0.1:10001
41 | dist_backend: nccl
42 | 


--------------------------------------------------------------------------------
/results/audio_average_log.csv:
--------------------------------------------------------------------------------
 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE
 2 | clss_bvcc_lb250,3,0.499±0.010,0.385±0.022,0.534±0.027,0.747±0.012,0.748±0.008,0.559±0.009,0.329±0.002
 3 | fullysupervised_bvcc,3,0.351±0.003,0.195±0.002,0.764±0.002,0.876±0.001,0.874±0.001,0.698±0.001,0.227±0.004
 4 | meanteacher_bvcc_lb250,3,0.532±0.006,0.419±0.014,0.492±0.018,0.735±0.008,0.742±0.008,0.550±0.008,0.362±0.002
 5 | mixmatch_bvcc_lb250,3,0.597±0.017,0.535±0.036,0.353±0.044,0.614±0.029,0.626±0.031,0.446±0.026,0.401±0.011
 6 | pimodel_bvcc_lb250,3,0.534±0.008,0.422±0.017,0.489±0.021,0.734±0.009,0.740±0.009,0.549±0.009,0.360±0.003
 7 | rankuprda_bvcc_lb250,3,0.463±0.013,0.332±0.023,0.598±0.027,0.781±0.011,0.783±0.011,0.591±0.012,0.305±0.007
 8 | rankup_bvcc_lb250,3,0.470±0.012,0.340±0.023,0.588±0.028,0.774±0.012,0.776±0.010,0.584±0.010,0.312±0.011
 9 | supervised_bvcc_lb250,3,0.533±0.006,0.421±0.014,0.490±0.018,0.734±0.008,0.741±0.009,0.549±0.009,0.362±0.001
10 | ucvme_bvcc_lb250,3,0.498±0.003,0.370±0.009,0.553±0.011,0.770±0.010,0.774±0.008,0.582±0.009,0.333±0.003
11 | 


--------------------------------------------------------------------------------
/config/classic_cv/pimodel/pimodel_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: pimodel
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: pimodel_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/pimodel_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10003
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/rda/rda_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rda_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10001
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/ucvme/ucvme_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: ucvme
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: ucvme_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.05
20 | dropout_rate: 0.05
21 | num_ensemble: 5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10001
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/rda/rda_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rda_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10002
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/ucvme/ucvme_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: ucvme
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: ucvme_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.05
20 | dropout_rate: 0.05
21 | num_ensemble: 5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10002
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/nlp/supervised/supervised_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: supervised
 2 | save_dir: ./saved_models/nlp
 3 | save_name: supervised_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/supervised_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | optim: AdamW
20 | lr: 1e-05
21 | momentum: 0.9
22 | weight_decay: 0.0005
23 | layer_decay: 0.75
24 | amp: False
25 | clip_grad: 0.0
26 | use_cat: False
27 | criterion: l1
28 | net: bert_base
29 | net_from_name: False
30 | use_pretrain: True
31 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
32 | data_dir: ./data
33 | dataset: yelp_review
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | max_length: 512
37 | seed: 0
38 | world_size: 1
39 | rank: 0
40 | multiprocessing_distributed: False
41 | dist_url: tcp://127.0.0.1:10001
42 | dist_backend: nccl
43 | 


--------------------------------------------------------------------------------
/config/audio/fullysupervised/fullysupervised_bvcc_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: fullysupervised
 2 | save_dir: ./saved_models/audio
 3 | save_name: fullysupervised_bvcc_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/fullysupervised_bvcc_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | batch_size: 8
16 | eval_batch_size: 16
17 | ema_m: 0.0
18 | optim: AdamW
19 | lr: 2e-06
20 | momentum: 0.9
21 | weight_decay: 2e-05
22 | layer_decay: 0.75
23 | amp: False
24 | clip_grad: 0.0
25 | use_cat: False
26 | criterion: l1
27 | net: whisper_base
28 | net_from_name: False
29 | use_pretrain: True
30 | pretrain_path: openai/whisper-base
31 | data_dir: ./data
32 | dataset: bvcc
33 | train_sampler: RandomSampler
34 | num_workers: 8
35 | max_length_seconds: 6.0
36 | sample_rate: 16000
37 | preload: True
38 | seed: 0
39 | world_size: 1
40 | rank: 0
41 | multiprocessing_distributed: False
42 | dist_url: tcp://127.0.0.1:10001
43 | dist_backend: nccl
44 | 


--------------------------------------------------------------------------------
/config/classic_cv/meanteacher/meanteacher_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: meanteacher
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: meanteacher_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10001
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/rda/rda_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rda_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rda_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10003
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/ucvme/ucvme_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: ucvme
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: ucvme_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/ucvme_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.05
20 | dropout_rate: 0.05
21 | num_ensemble: 5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10003
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/meanteacher/meanteacher_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: meanteacher
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: meanteacher_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10002
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/mixmatch/mixmatch_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: mixmatch
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: mixmatch_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | mixup_alpha: 0.5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10001
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/meanteacher/meanteacher_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: meanteacher
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: meanteacher_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/meanteacher_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | optim: SGD
22 | lr: 0.01
23 | momentum: 0.9
24 | weight_decay: 0.001
25 | layer_decay: 1.0
26 | amp: False
27 | clip_grad: 0.0
28 | use_cat: True
29 | criterion: l1
30 | net: wrn_28_2
31 | net_from_name: False
32 | data_dir: ./data
33 | dataset: utkface
34 | train_sampler: RandomSampler
35 | num_workers: 4
36 | crop_ratio: 0.875
37 | img_size: 40
38 | preload: False
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10003
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/mixmatch/mixmatch_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: mixmatch
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: mixmatch_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | mixup_alpha: 0.5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10002
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/audio/supervised/supervised_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: supervised
 2 | save_dir: ./saved_models/audio
 3 | save_name: supervised_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/supervised_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | optim: AdamW
20 | lr: 2e-06
21 | momentum: 0.9
22 | weight_decay: 2e-05
23 | layer_decay: 0.75
24 | amp: False
25 | clip_grad: 0.0
26 | use_cat: False
27 | criterion: l1
28 | net: whisper_base
29 | net_from_name: False
30 | use_pretrain: True
31 | pretrain_path: openai/whisper-base
32 | data_dir: ./data
33 | dataset: bvcc
34 | train_sampler: RandomSampler
35 | num_workers: 8
36 | max_length_seconds: 6.0
37 | sample_rate: 16000
38 | preload: True
39 | seed: 0
40 | world_size: 1
41 | rank: 0
42 | multiprocessing_distributed: False
43 | dist_url: tcp://127.0.0.1:10001
44 | dist_backend: nccl
45 | 


--------------------------------------------------------------------------------
/config/classic_cv/mixmatch/mixmatch_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: mixmatch
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: mixmatch_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/mixmatch_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 1
19 | ulb_loss_ratio: 0.1
20 | unsup_warm_up: 0.4
21 | mixup_alpha: 0.5
22 | optim: SGD
23 | lr: 0.01
24 | momentum: 0.9
25 | weight_decay: 0.001
26 | layer_decay: 1.0
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: True
30 | criterion: l1
31 | net: wrn_28_2
32 | net_from_name: False
33 | data_dir: ./data
34 | dataset: utkface
35 | train_sampler: RandomSampler
36 | num_workers: 4
37 | crop_ratio: 0.875
38 | img_size: 40
39 | preload: False
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10003
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/classic_cv/clss/clss_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: clss
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: clss_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 0.25
19 | lb_ctr_loss_ratio: 1.0
20 | ulb_ctr_loss_ratio: 0.05
21 | ulb_rank_loss_ratio: 0.01
22 | lambda_val: 2.0
23 | optim: SGD
24 | lr: 0.01
25 | momentum: 0.9
26 | weight_decay: 0.001
27 | layer_decay: 1.0
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: True
31 | criterion: l1
32 | net: wrn_28_2
33 | net_from_name: False
34 | data_dir: ./data
35 | dataset: utkface
36 | train_sampler: RandomSampler
37 | num_workers: 4
38 | crop_ratio: 0.875
39 | img_size: 40
40 | preload: False
41 | seed: 0
42 | world_size: 1
43 | rank: 0
44 | multiprocessing_distributed: False
45 | dist_url: tcp://127.0.0.1:10001
46 | dist_backend: nccl
47 | 


--------------------------------------------------------------------------------
/config/classic_cv/clss/clss_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: clss
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: clss_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 0.25
19 | lb_ctr_loss_ratio: 1.0
20 | ulb_ctr_loss_ratio: 0.05
21 | ulb_rank_loss_ratio: 0.01
22 | lambda_val: 2.0
23 | optim: SGD
24 | lr: 0.01
25 | momentum: 0.9
26 | weight_decay: 0.001
27 | layer_decay: 1.0
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: True
31 | criterion: l1
32 | net: wrn_28_2
33 | net_from_name: False
34 | data_dir: ./data
35 | dataset: utkface
36 | train_sampler: RandomSampler
37 | num_workers: 4
38 | crop_ratio: 0.875
39 | img_size: 40
40 | preload: False
41 | seed: 0
42 | world_size: 1
43 | rank: 0
44 | multiprocessing_distributed: False
45 | dist_url: tcp://127.0.0.1:10003
46 | dist_backend: nccl
47 | 


--------------------------------------------------------------------------------
/config/classic_cv/clss/clss_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: clss
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: clss_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/clss_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 0.25
19 | lb_ctr_loss_ratio: 1.0
20 | ulb_ctr_loss_ratio: 0.05
21 | ulb_rank_loss_ratio: 0.01
22 | lambda_val: 2.0
23 | optim: SGD
24 | lr: 0.01
25 | momentum: 0.9
26 | weight_decay: 0.001
27 | layer_decay: 1.0
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: True
31 | criterion: l1
32 | net: wrn_28_2
33 | net_from_name: False
34 | data_dir: ./data
35 | dataset: utkface
36 | train_sampler: RandomSampler
37 | num_workers: 4
38 | crop_ratio: 0.875
39 | img_size: 40
40 | preload: False
41 | seed: 0
42 | world_size: 1
43 | rank: 0
44 | multiprocessing_distributed: False
45 | dist_url: tcp://127.0.0.1:10002
46 | dist_backend: nccl
47 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankup/rankup_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankup
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankup_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | arc_loss_ratio: 0.2
20 | arc_ulb_loss_ratio: 1.0
21 | hard_label: True
22 | T: 0.5
23 | p_cutoff: 0.95
24 | optim: SGD
25 | lr: 0.01
26 | momentum: 0.9
27 | weight_decay: 0.001
28 | layer_decay: 1.0
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: True
32 | criterion: l1
33 | net: wrn_28_2
34 | net_from_name: False
35 | data_dir: ./data
36 | dataset: utkface
37 | train_sampler: RandomSampler
38 | num_workers: 4
39 | crop_ratio: 0.875
40 | img_size: 40
41 | preload: False
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10001
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankup/rankup_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankup
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankup_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | arc_loss_ratio: 0.2
20 | arc_ulb_loss_ratio: 1.0
21 | hard_label: True
22 | T: 0.5
23 | p_cutoff: 0.95
24 | optim: SGD
25 | lr: 0.01
26 | momentum: 0.9
27 | weight_decay: 0.001
28 | layer_decay: 1.0
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: True
32 | criterion: l1
33 | net: wrn_28_2
34 | net_from_name: False
35 | data_dir: ./data
36 | dataset: utkface
37 | train_sampler: RandomSampler
38 | num_workers: 4
39 | crop_ratio: 0.875
40 | img_size: 40
41 | preload: False
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10003
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankup/rankup_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankup
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankup_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankup_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | arc_loss_ratio: 0.2
20 | arc_ulb_loss_ratio: 1.0
21 | hard_label: True
22 | T: 0.5
23 | p_cutoff: 0.95
24 | optim: SGD
25 | lr: 0.01
26 | momentum: 0.9
27 | weight_decay: 0.001
28 | layer_decay: 1.0
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: True
32 | criterion: l1
33 | net: wrn_28_2
34 | net_from_name: False
35 | data_dir: ./data
36 | dataset: utkface
37 | train_sampler: RandomSampler
38 | num_workers: 4
39 | crop_ratio: 0.875
40 | img_size: 40
41 | preload: False
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10002
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/nlp/pimodel/pimodel_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: pimodel
 2 | save_dir: ./saved_models/nlp
 3 | save_name: pimodel_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/pimodel_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | optim: AdamW
23 | lr: 1e-05
24 | momentum: 0.9
25 | weight_decay: 0.0005
26 | layer_decay: 0.75
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: False
30 | criterion: l1
31 | net: bert_base
32 | net_from_name: False
33 | use_pretrain: True
34 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
35 | data_dir: ./data
36 | dataset: yelp_review
37 | train_sampler: RandomSampler
38 | num_workers: 4
39 | max_length: 512
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10001
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/results/nlp_average_log.csv:
--------------------------------------------------------------------------------
 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE
 2 | clss_yelp_review_lb250,3,0.721±0.010,0.913±0.022,0.543±0.011,0.744±0.001,0.748±0.002,0.599±0.002,0.307±0.026
 3 | fullysupervised_yelp_review,3,0.418±0.003,0.402±0.004,0.799±0.002,0.898±0.001,0.896±0.001,0.766±0.001,0.150±0.005
 4 | meanteacher_yelp_review_lb250,3,0.730±0.024,0.870±0.037,0.565±0.019,0.763±0.011,0.769±0.009,0.619±0.009,0.420±0.018
 5 | mixmatch_yelp_review_lb250,3,0.886±0.004,1.238±0.017,0.381±0.008,0.643±0.003,0.660±0.004,0.511±0.003,0.587±0.003
 6 | pimodel_yelp_review_lb250,3,0.730±0.024,0.870±0.037,0.565±0.019,0.763±0.011,0.769±0.009,0.619±0.009,0.420±0.018
 7 | rankuprda_yelp_review_lb250,3,0.632±0.009,0.698±0.015,0.651±0.007,0.809±0.005,0.810±0.005,0.659±0.005,0.389±0.011
 8 | rankup_yelp_review_lb250,3,0.661±0.018,0.711±0.025,0.645±0.013,0.817±0.003,0.829±0.002,0.681±0.002,0.391±0.013
 9 | supervised_yelp_review_lb250,3,0.723±0.023,0.868±0.038,0.566±0.019,0.762±0.012,0.769±0.010,0.619±0.009,0.392±0.015
10 | ucvme_yelp_review_lb250,3,0.775±0.006,0.921±0.011,0.540±0.005,0.754±0.007,0.763±0.005,0.611±0.005,0.526±0.007
11 | 


--------------------------------------------------------------------------------
/config/nlp/rda/rda_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rda
 2 | save_dir: ./saved_models/nlp
 3 | save_name: rda_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/rda_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 1.0
21 | unsup_warm_up: 0.4
22 | rda_num_refine_iter: 1024
23 | optim: AdamW
24 | lr: 1e-05
25 | momentum: 0.9
26 | weight_decay: 0.0005
27 | layer_decay: 0.75
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: False
31 | criterion: l1
32 | net: bert_base
33 | net_from_name: False
34 | use_pretrain: True
35 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
36 | data_dir: ./data
37 | dataset: yelp_review
38 | train_sampler: RandomSampler
39 | num_workers: 4
40 | max_length: 512
41 | seed: 0
42 | world_size: 1
43 | rank: 0
44 | multiprocessing_distributed: False
45 | dist_url: tcp://127.0.0.1:10001
46 | dist_backend: nccl
47 | 


--------------------------------------------------------------------------------
/config/nlp/ucvme/ucvme_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: ucvme
 2 | save_dir: ./saved_models/nlp
 3 | save_name: ucvme_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/ucvme_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.05
21 | dropout_rate: 0.05
22 | num_ensemble: 5
23 | optim: AdamW
24 | lr: 1e-05
25 | momentum: 0.9
26 | weight_decay: 0.0005
27 | layer_decay: 0.75
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: False
31 | criterion: l1
32 | net: bert_base
33 | net_from_name: False
34 | use_pretrain: True
35 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
36 | data_dir: ./data
37 | dataset: yelp_review
38 | train_sampler: RandomSampler
39 | num_workers: 4
40 | max_length: 512
41 | seed: 0
42 | world_size: 1
43 | rank: 0
44 | multiprocessing_distributed: False
45 | dist_url: tcp://127.0.0.1:10001
46 | dist_backend: nccl
47 | 


--------------------------------------------------------------------------------
/config/audio/pimodel/pimodel_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: pimodel
 2 | save_dir: ./saved_models/audio
 3 | save_name: pimodel_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/pimodel_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | optim: AdamW
23 | lr: 2e-06
24 | momentum: 0.9
25 | weight_decay: 2e-05
26 | layer_decay: 0.75
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: False
30 | criterion: l1
31 | net: whisper_base
32 | net_from_name: False
33 | use_pretrain: True
34 | pretrain_path: openai/whisper-base
35 | data_dir: ./data
36 | dataset: bvcc
37 | train_sampler: RandomSampler
38 | num_workers: 8
39 | max_length_seconds: 6.0
40 | sample_rate: 16000
41 | preload: True
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10001
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/nlp/meanteacher/meanteacher_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: meanteacher
 2 | save_dir: ./saved_models/nlp
 3 | save_name: meanteacher_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/meanteacher_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | optim: AdamW
23 | lr: 1e-05
24 | momentum: 0.9
25 | weight_decay: 0.0005
26 | layer_decay: 0.75
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: False
30 | criterion: l1
31 | net: bert_base
32 | net_from_name: False
33 | use_pretrain: True
34 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
35 | data_dir: ./data
36 | dataset: yelp_review
37 | train_sampler: RandomSampler
38 | num_workers: 4
39 | max_length: 512
40 | seed: 0
41 | world_size: 1
42 | rank: 0
43 | multiprocessing_distributed: False
44 | dist_url: tcp://127.0.0.1:10001
45 | dist_backend: nccl
46 | 


--------------------------------------------------------------------------------
/config/audio/rda/rda_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rda
 2 | save_dir: ./saved_models/audio
 3 | save_name: rda_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/rda_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 1.0
21 | unsup_warm_up: 0.4
22 | rda_num_refine_iter: 1024
23 | optim: AdamW
24 | lr: 2e-06
25 | momentum: 0.9
26 | weight_decay: 2e-05
27 | layer_decay: 0.75
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: False
31 | criterion: l1
32 | net: whisper_base
33 | net_from_name: False
34 | use_pretrain: True
35 | pretrain_path: openai/whisper-base
36 | data_dir: ./data
37 | dataset: bvcc
38 | train_sampler: RandomSampler
39 | num_workers: 8
40 | max_length_seconds: 6.0
41 | sample_rate: 16000
42 | preload: True
43 | seed: 0
44 | world_size: 1
45 | rank: 0
46 | multiprocessing_distributed: False
47 | dist_url: tcp://127.0.0.1:10001
48 | dist_backend: nccl
49 | 


--------------------------------------------------------------------------------
/config/audio/ucvme/ucvme_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: ucvme
 2 | save_dir: ./saved_models/audio
 3 | save_name: ucvme_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/ucvme_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.05
21 | dropout_rate: 0.05
22 | num_ensemble: 5
23 | optim: AdamW
24 | lr: 2e-06
25 | momentum: 0.9
26 | weight_decay: 2e-05
27 | layer_decay: 0.75
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: False
31 | criterion: l1
32 | net: whisper_base
33 | net_from_name: False
34 | use_pretrain: True
35 | pretrain_path: openai/whisper-base
36 | data_dir: ./data
37 | dataset: bvcc
38 | train_sampler: RandomSampler
39 | num_workers: 8
40 | max_length_seconds: 6.0
41 | sample_rate: 16000
42 | preload: True
43 | seed: 0
44 | world_size: 1
45 | rank: 0
46 | multiprocessing_distributed: False
47 | dist_url: tcp://127.0.0.1:10001
48 | dist_backend: nccl
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | Copyright (c) 2024 Pin-Yen Huang
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.


--------------------------------------------------------------------------------
/config/audio/meanteacher/meanteacher_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: meanteacher
 2 | save_dir: ./saved_models/audio
 3 | save_name: meanteacher_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/meanteacher_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | optim: AdamW
23 | lr: 2e-06
24 | momentum: 0.9
25 | weight_decay: 2e-05
26 | layer_decay: 0.75
27 | amp: False
28 | clip_grad: 0.0
29 | use_cat: False
30 | criterion: l1
31 | net: whisper_base
32 | net_from_name: False
33 | use_pretrain: True
34 | pretrain_path: openai/whisper-base
35 | data_dir: ./data
36 | dataset: bvcc
37 | train_sampler: RandomSampler
38 | num_workers: 8
39 | max_length_seconds: 6.0
40 | sample_rate: 16000
41 | preload: True
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10001
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/audio/mixmatch/mixmatch_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: mixmatch
 2 | save_dir: ./saved_models/audio
 3 | save_name: mixmatch_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/mixmatch_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | mixup_alpha: 0.5
23 | optim: AdamW
24 | lr: 2e-06
25 | momentum: 0.9
26 | weight_decay: 2e-05
27 | layer_decay: 0.75
28 | amp: False
29 | clip_grad: 0.0
30 | use_cat: False
31 | criterion: l1
32 | net: whisper_base
33 | net_from_name: False
34 | use_pretrain: True
35 | pretrain_path: openai/whisper-base
36 | data_dir: ./data
37 | dataset: bvcc
38 | train_sampler: RandomSampler
39 | num_workers: 8
40 | max_length_seconds: 6.0
41 | sample_rate: 16000
42 | preload: True
43 | seed: 0
44 | world_size: 1
45 | rank: 0
46 | multiprocessing_distributed: False
47 | dist_url: tcp://127.0.0.1:10001
48 | dist_backend: nccl
49 | 


--------------------------------------------------------------------------------
/config/nlp/clss/clss_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: clss
 2 | save_dir: ./saved_models/nlp
 3 | save_name: clss_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/clss_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 0.25
20 | lb_ctr_loss_ratio: 1.0
21 | ulb_ctr_loss_ratio: 0.05
22 | ulb_rank_loss_ratio: 0.01
23 | lambda_val: 2.0
24 | optim: AdamW
25 | lr: 1e-05
26 | momentum: 0.9
27 | weight_decay: 0.0005
28 | layer_decay: 0.75
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: False
32 | criterion: l1
33 | net: bert_base
34 | net_from_name: False
35 | use_pretrain: True
36 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
37 | data_dir: ./data
38 | dataset: yelp_review
39 | train_sampler: RandomSampler
40 | num_workers: 4
41 | max_length: 512
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10001
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/nlp/mixmatch/mixmatch_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: mixmatch
 2 | save_dir: ./saved_models/nlp
 3 | save_name: mixmatch_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/mixmatch_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 0.1
21 | unsup_warm_up: 0.4
22 | mixup_alpha: 0.5
23 | mixup_manifold: True
24 | optim: AdamW
25 | lr: 1e-05
26 | momentum: 0.9
27 | weight_decay: 0.0005
28 | layer_decay: 0.75
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: False
32 | criterion: l1
33 | net: bert_base
34 | net_from_name: False
35 | use_pretrain: True
36 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
37 | data_dir: ./data
38 | dataset: yelp_review
39 | train_sampler: RandomSampler
40 | num_workers: 4
41 | max_length: 512
42 | seed: 0
43 | world_size: 1
44 | rank: 0
45 | multiprocessing_distributed: False
46 | dist_url: tcp://127.0.0.1:10001
47 | dist_backend: nccl
48 | 


--------------------------------------------------------------------------------
/config/nlp/rankup/rankup_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankup
 2 | save_dir: ./saved_models/nlp
 3 | save_name: rankup_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/rankup_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | arc_loss_ratio: 0.2
21 | arc_ulb_loss_ratio: 1.0
22 | hard_label: True
23 | T: 0.5
24 | p_cutoff: 0.95
25 | optim: AdamW
26 | lr: 1e-05
27 | momentum: 0.9
28 | weight_decay: 0.0005
29 | layer_decay: 0.75
30 | amp: False
31 | clip_grad: 0.0
32 | use_cat: False
33 | criterion: l1
34 | net: bert_base
35 | net_from_name: False
36 | use_pretrain: True
37 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
38 | data_dir: ./data
39 | dataset: yelp_review
40 | train_sampler: RandomSampler
41 | num_workers: 4
42 | max_length: 512
43 | seed: 0
44 | world_size: 1
45 | rank: 0
46 | multiprocessing_distributed: False
47 | dist_url: tcp://127.0.0.1:10001
48 | dist_backend: nccl
49 | 


--------------------------------------------------------------------------------
/config/audio/clss/clss_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: clss
 2 | save_dir: ./saved_models/audio
 3 | save_name: clss_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/clss_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | lb_ctr_loss_ratio: 1.0
21 | ulb_ctr_loss_ratio: 0.05
22 | ulb_rank_loss_ratio: 0.01
23 | lambda_val: 2.0
24 | optim: AdamW
25 | lr: 2e-06
26 | momentum: 0.9
27 | weight_decay: 2e-05
28 | layer_decay: 0.75
29 | amp: False
30 | clip_grad: 0.0
31 | use_cat: False
32 | criterion: l1
33 | net: whisper_base
34 | net_from_name: False
35 | use_pretrain: True
36 | pretrain_path: openai/whisper-base
37 | data_dir: ./data
38 | dataset: bvcc
39 | train_sampler: RandomSampler
40 | num_workers: 8
41 | max_length_seconds: 6.0
42 | sample_rate: 16000
43 | preload: True
44 | seed: 0
45 | world_size: 1
46 | rank: 0
47 | multiprocessing_distributed: False
48 | dist_url: tcp://127.0.0.1:10001
49 | dist_backend: nccl
50 | 


--------------------------------------------------------------------------------
/config/audio/rankup/rankup_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankup
 2 | save_dir: ./saved_models/audio
 3 | save_name: rankup_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/rankup_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | arc_loss_ratio: 0.2
21 | arc_ulb_loss_ratio: 1.0
22 | hard_label: True
23 | T: 0.5
24 | p_cutoff: 0.95
25 | optim: AdamW
26 | lr: 2e-06
27 | momentum: 0.9
28 | weight_decay: 2e-05
29 | layer_decay: 0.75
30 | amp: False
31 | clip_grad: 0.0
32 | use_cat: False
33 | criterion: l1
34 | net: whisper_base
35 | net_from_name: False
36 | use_pretrain: True
37 | pretrain_path: openai/whisper-base
38 | data_dir: ./data
39 | dataset: bvcc
40 | train_sampler: RandomSampler
41 | num_workers: 8
42 | max_length_seconds: 6.0
43 | sample_rate: 16000
44 | preload: True
45 | seed: 0
46 | world_size: 1
47 | rank: 0
48 | multiprocessing_distributed: False
49 | dist_url: tcp://127.0.0.1:10001
50 | dist_backend: nccl
51 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankuprda/rankuprda_utkface_lb50_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankuprda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankuprda_utkface_lb50_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb50_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 50
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | arc_loss_ratio: 0.2
23 | arc_ulb_loss_ratio: 1.0
24 | hard_label: True
25 | T: 0.5
26 | p_cutoff: 0.95
27 | optim: SGD
28 | lr: 0.01
29 | momentum: 0.9
30 | weight_decay: 0.001
31 | layer_decay: 1.0
32 | amp: False
33 | clip_grad: 0.0
34 | use_cat: True
35 | criterion: l1
36 | net: wrn_28_2
37 | net_from_name: False
38 | data_dir: ./data
39 | dataset: utkface
40 | train_sampler: RandomSampler
41 | num_workers: 4
42 | crop_ratio: 0.875
43 | img_size: 40
44 | preload: False
45 | seed: 0
46 | world_size: 1
47 | rank: 0
48 | multiprocessing_distributed: False
49 | dist_url: tcp://127.0.0.1:10001
50 | dist_backend: nccl
51 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankuprda/rankuprda_utkface_lb2000_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankuprda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankuprda_utkface_lb2000_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb2000_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 2000
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | arc_loss_ratio: 0.2
23 | arc_ulb_loss_ratio: 1.0
24 | hard_label: True
25 | T: 0.5
26 | p_cutoff: 0.95
27 | optim: SGD
28 | lr: 0.01
29 | momentum: 0.9
30 | weight_decay: 0.001
31 | layer_decay: 1.0
32 | amp: False
33 | clip_grad: 0.0
34 | use_cat: True
35 | criterion: l1
36 | net: wrn_28_2
37 | net_from_name: False
38 | data_dir: ./data
39 | dataset: utkface
40 | train_sampler: RandomSampler
41 | num_workers: 4
42 | crop_ratio: 0.875
43 | img_size: 40
44 | preload: False
45 | seed: 0
46 | world_size: 1
47 | rank: 0
48 | multiprocessing_distributed: False
49 | dist_url: tcp://127.0.0.1:10003
50 | dist_backend: nccl
51 | 


--------------------------------------------------------------------------------
/config/classic_cv/rankuprda/rankuprda_utkface_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankuprda
 2 | save_dir: ./saved_models/classic_cv
 3 | save_name: rankuprda_utkface_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/classic_cv/rankuprda_utkface_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 256
11 | num_train_iter: 262144
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_labels: 250
15 | batch_size: 32
16 | eval_batch_size: 256
17 | ema_m: 0.999
18 | uratio: 7
19 | ulb_loss_ratio: 1.0
20 | unsup_warm_up: 0.4
21 | rda_num_refine_iter: 1024
22 | arc_loss_ratio: 0.2
23 | arc_ulb_loss_ratio: 1.0
24 | hard_label: True
25 | T: 0.5
26 | p_cutoff: 0.95
27 | optim: SGD
28 | lr: 0.01
29 | momentum: 0.9
30 | weight_decay: 0.001
31 | layer_decay: 1.0
32 | amp: False
33 | clip_grad: 0.0
34 | use_cat: True
35 | criterion: l1
36 | net: wrn_28_2
37 | net_from_name: False
38 | data_dir: ./data
39 | dataset: utkface
40 | train_sampler: RandomSampler
41 | num_workers: 4
42 | crop_ratio: 0.875
43 | img_size: 40
44 | preload: False
45 | seed: 0
46 | world_size: 1
47 | rank: 0
48 | multiprocessing_distributed: False
49 | dist_url: tcp://127.0.0.1:10002
50 | dist_backend: nccl
51 | 


--------------------------------------------------------------------------------
/config/nlp/rankuprda/rankuprda_yelp_review_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankuprda
 2 | save_dir: ./saved_models/nlp
 3 | save_name: rankuprda_yelp_review_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/nlp/rankuprda_yelp_review_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 8
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 1.0
21 | unsup_warm_up: 0.4
22 | rda_num_refine_iter: 1024
23 | arc_loss_ratio: 0.2
24 | arc_ulb_loss_ratio: 1.0
25 | hard_label: True
26 | T: 0.5
27 | p_cutoff: 0.95
28 | optim: AdamW
29 | lr: 1e-05
30 | momentum: 0.9
31 | weight_decay: 0.0005
32 | layer_decay: 0.75
33 | amp: False
34 | clip_grad: 0.0
35 | use_cat: False
36 | criterion: l1
37 | net: bert_base
38 | net_from_name: False
39 | use_pretrain: True
40 | pretrain_path: google/bert_uncased_L-4_H-512_A-8
41 | data_dir: ./data
42 | dataset: yelp_review
43 | train_sampler: RandomSampler
44 | num_workers: 4
45 | max_length: 512
46 | seed: 0
47 | world_size: 1
48 | rank: 0
49 | multiprocessing_distributed: False
50 | dist_url: tcp://127.0.0.1:10001
51 | dist_backend: nccl
52 | 


--------------------------------------------------------------------------------
/config/audio/rankuprda/rankuprda_bvcc_lb250_s0.yaml:
--------------------------------------------------------------------------------
 1 | algorithm: rankuprda
 2 | save_dir: ./saved_models/audio
 3 | save_name: rankuprda_bvcc_lb250_s0
 4 | resume: True
 5 | load_path: ./saved_models/audio/rankuprda_bvcc_lb250_s0/latest_model.pth
 6 | overwrite: True
 7 | use_tensorboard: True
 8 | use_wandb: False
 9 | use_aim: False
10 | epoch: 100
11 | num_train_iter: 102400
12 | num_eval_iter: 1024
13 | num_log_iter: 256
14 | num_warmup_iter: 5120
15 | num_labels: 250
16 | batch_size: 8
17 | eval_batch_size: 16
18 | ema_m: 0.0
19 | uratio: 1
20 | ulb_loss_ratio: 1.0
21 | unsup_warm_up: 0.4
22 | rda_num_refine_iter: 1024
23 | arc_loss_ratio: 0.2
24 | arc_ulb_loss_ratio: 1.0
25 | hard_label: True
26 | T: 0.5
27 | p_cutoff: 0.95
28 | optim: AdamW
29 | lr: 2e-06
30 | momentum: 0.9
31 | weight_decay: 2e-05
32 | layer_decay: 0.75
33 | amp: False
34 | clip_grad: 0.0
35 | use_cat: False
36 | criterion: l1
37 | net: whisper_base
38 | net_from_name: False
39 | use_pretrain: True
40 | pretrain_path: openai/whisper-base
41 | data_dir: ./data
42 | dataset: bvcc
43 | train_sampler: RandomSampler
44 | num_workers: 8
45 | max_length_seconds: 6.0
46 | sample_rate: 16000
47 | preload: True
48 | seed: 0
49 | world_size: 1
50 | rank: 0
51 | multiprocessing_distributed: False
52 | dist_url: tcp://127.0.0.1:10001
53 | dist_backend: nccl
54 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/hook.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/hook.py
 4 | 
 5 | 
 6 | class Hook:
 7 |     stages = ("before_run", "before_train_epoch", "before_train_step", "after_train_step", "after_train_epoch", "after_run")
 8 | 
 9 |     def before_train_epoch(self, algorithm):
10 |         pass
11 | 
12 |     def after_train_epoch(self, algorithm):
13 |         pass
14 | 
15 |     def before_train_step(self, algorithm):
16 |         pass
17 | 
18 |     def after_train_step(self, algorithm):
19 |         pass
20 | 
21 |     def before_run(self, algorithm):
22 |         pass
23 | 
24 |     def after_run(self, algorithm):
25 |         pass
26 | 
27 |     def every_n_epochs(self, algorithm, n):
28 |         return (algorithm.epoch + 1) % n == 0 if n > 0 else False
29 | 
30 |     def every_n_iters(self, algorithm, n):
31 |         return (algorithm.it + 1) % n == 0 if n > 0 else False
32 | 
33 |     def end_of_epoch(self, algorithm):
34 |         return algorithm.it + 1 % len(algorithm.data_loader["train_lb"]) == 0
35 | 
36 |     def is_last_epoch(self, algorithm):
37 |         return algorithm.epoch + 1 == algorithm.epochs
38 | 
39 |     def is_last_iter(self, algorithm):
40 |         return algorithm.it + 1 == algorithm.num_train_iter
41 | 


--------------------------------------------------------------------------------
/semilearn/core/criterions/cross_entropy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from torch.nn import functional as F
 8 | 
 9 | 
10 | def ce_loss(logits, targets, reduction="none"):
11 |     """
12 |     cross entropy loss in pytorch.
13 | 
14 |     Args:
15 |         logits: logit values, shape=[Batch size, # of classes]
16 |         targets: integer or vector, shape=[Batch size] or [Batch size, # of classes]
17 |         # use_hard_labels: If True, targets have [Batch size] shape with int values. If False, the target is vector (default True)
18 |         reduction: the reduction argument
19 |     """
20 |     if logits.shape == targets.shape:
21 |         # one-hot target
22 |         log_pred = F.log_softmax(logits, dim=-1)
23 |         nll_loss = torch.sum(-targets * log_pred, dim=1)
24 |         if reduction == "none":
25 |             return nll_loss
26 |         else:
27 |             return nll_loss.mean()
28 |     else:
29 |         log_pred = F.log_softmax(logits, dim=-1)
30 |         return F.nll_loss(log_pred, targets, reduction=reduction)
31 | 
32 | 
33 | class CELoss(nn.Module):
34 |     """
35 |     Wrapper for ce loss
36 |     """
37 | 
38 |     def forward(self, logits, targets, reduction="none"):
39 |         return ce_loss(logits, targets, reduction)
40 | 


--------------------------------------------------------------------------------
/semilearn/core/criterions/consistency.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import torch.nn as nn
 6 | from torch.nn import functional as F
 7 | 
 8 | 
 9 | def consistency_loss(logits, targets, name="mse", mask=None):
10 |     """
11 |     consistency regularization loss in semi-supervised learning (regression).
12 | 
13 |     Args:
14 |         logits: logit to calculate the loss on and back-propagation, usually being the strong-augmented unlabeled samples
15 |         targets: pseudo-labels (either hard label or soft label)
16 |         name: use mean-absolute-error ('l1') or mean-squared-error ('mse') to calculate loss
17 |         mask: masks to mask-out samples when calculating the loss, usually being used as confidence-masking-out
18 |     """
19 | 
20 |     assert name in ["l1", "mse"]
21 |     # logits_w = logits_w.detach()
22 |     if name == "l1":
23 |         loss = F.l1_loss(logits, targets, reduction="none")
24 |     else:
25 |         loss = F.mse_loss(logits, targets, reduction="none")
26 | 
27 |     if mask is not None:
28 |         # mask must not be boolean type
29 |         loss = loss * mask
30 | 
31 |     return loss.mean()
32 | 
33 | 
34 | class ConsistencyLoss(nn.Module):
35 |     """
36 |     Wrapper for consistency loss
37 |     """
38 | 
39 |     def forward(self, logits, targets, name="mse", mask=None):
40 |         return consistency_loss(logits, targets, name, mask)
41 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/utils/ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import torch
 6 | import numpy as np
 7 | 
 8 | 
 9 | @torch.no_grad()
10 | def concat_all_gather(tensor):
11 |     """
12 |     Performs all_gather operation on the provided tensors.
13 |     *** Warning ***: torch.distributed.all_gather has no gradient.
14 |     """
15 |     tensors_gather = [torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())]
16 |     torch.distributed.all_gather(tensors_gather, tensor)
17 | 
18 |     output = torch.cat(tensors_gather, dim=0)
19 |     return output
20 | 
21 | 
22 | @torch.no_grad()
23 | def mixup_one_target(x, y, alpha=1.0, is_bias=False):
24 |     """Returns mixed inputs, mixed targets, and lambda"""
25 |     if alpha > 0:
26 |         lam = np.random.beta(alpha, alpha)
27 |     else:
28 |         lam = 1
29 |     if is_bias:
30 |         lam = max(lam, 1 - lam)
31 | 
32 |     index = torch.randperm(x.size(0)).to(x.device)
33 | 
34 |     mixed_x = lam * x + (1 - lam) * x[index]
35 |     mixed_y = lam * y + (1 - lam) * y[index]
36 |     return mixed_x, mixed_y, lam
37 | 
38 | 
39 | @torch.no_grad()
40 | def smooth_targets(logits, targets, smoothing=0.1):
41 |     """
42 |     label smoothing
43 |     """
44 |     true_dist = torch.zeros_like(logits)
45 |     true_dist.fill_(smoothing / (logits.shape[-1] - 1))
46 |     true_dist.scatter_(1, targets.data.unsqueeze(1), (1 - smoothing))
47 |     return true_dist
48 | 


--------------------------------------------------------------------------------
/semilearn/core/criterions/cls_consistency.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.nn import functional as F
 8 | 
 9 | from .cross_entropy import ce_loss
10 | 
11 | 
12 | def cls_consistency_loss(logits, targets, name="ce", mask=None):
13 |     """
14 |     consistency regularization loss in semi-supervised learning (classification).
15 | 
16 |     Args:
17 |         logits: logit to calculate the loss on and back-propagation, usually being the strong-augmented unlabeled samples
18 |         targets: pseudo-labels (either hard label or soft label)
19 |         name: use cross-entropy ('ce') or mean-squared-error ('mse') to calculate loss
20 |         mask: masks to mask-out samples when calculating the loss, usually being used as confidence-masking-out
21 |     """
22 | 
23 |     assert name in ["ce", "mse"]
24 |     # logits_w = logits_w.detach()
25 |     if name == "mse":
26 |         probs = torch.softmax(logits, dim=-1)
27 |         loss = F.mse_loss(probs, targets, reduction="none").mean(dim=1)
28 |     else:
29 |         loss = ce_loss(logits, targets, reduction="none")
30 | 
31 |     if mask is not None:
32 |         # mask must not be boolean type
33 |         loss = loss * mask
34 | 
35 |     return loss.mean()
36 | 
37 | 
38 | class ClsConsistencyLoss(nn.Module):
39 |     """
40 |     Wrapper for consistency loss
41 |     """
42 | 
43 |     def forward(self, logits, targets, name="ce", mask=None):
44 |         return cls_consistency_loss(logits, targets, name, mask)
45 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/augmentation/transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import random
 5 | import warnings
 6 | 
 7 | from audiomentations import *
 8 | 
 9 | 
10 | class AudioTransforms:
11 |     """
12 |     Strong transformation function for audio data.
13 | 
14 |     Args:
15 |         max_length_seconds (float): Maximum output length of the audio in seconds.
16 |         dataset (str): Name of the dataset.
17 |     """
18 | 
19 |     def __init__(self, max_length_seconds, dataset_name=""):
20 |         self.max_length_seconds = max_length_seconds
21 |         self.effects_list, self.n = self.get_effects_list(dataset_name)
22 |         self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0)
23 | 
24 |     def get_effects_list(self, dataset_name):
25 |         if dataset_name.lower() in ["bvcc", "vcc2018"]:
26 |             effects_list = [TimeMask(p=1.0), Trim(p=1.0), Padding(p=1.0)]
27 |             num_effects = 1
28 |         else:
29 |             effects_list = [Gain(p=1.0), PitchShift(p=1.0), TimeStretch(p=1.0), RoomSimulator(p=1.0)]
30 |             num_effects = 2
31 |         return effects_list, num_effects
32 | 
33 |     def __call__(self, audio, sample_rate):
34 |         transform = Compose(random.choices(self.effects_list, k=self.n))
35 |         with warnings.catch_warnings():
36 |             warnings.filterwarnings("ignore", message="Possible clipped samples in output.")
37 |             aug_wav = transform(samples=audio, sample_rate=sample_rate)
38 |         aug_wav = self.adjust_duration(aug_wav, sample_rate=sample_rate)
39 |         return aug_wav
40 | 


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/augmentation/transforms.py:
--------------------------------------------------------------------------------
 1 | from torchvision import transforms
 2 | 
 3 | from .randaugment import RandAugment
 4 | 
 5 | 
 6 | mean, std = {}, {}
 7 | mean["utkface"] = [0.59632254, 0.45671629, 0.39103324]
 8 | std["utkface"] = [0.25907077, 0.23132719, 0.22686818]
 9 | 
10 | 
11 | def get_val_transforms(crop_size, dataset_name):
12 |     return transforms.Compose(
13 |         [
14 |             transforms.Resize(crop_size),
15 |             transforms.ToTensor(),
16 |             transforms.Normalize(
17 |                 mean[dataset_name.lower()],
18 |                 std[dataset_name.lower()],
19 |             ),
20 |         ]
21 |     )
22 | 
23 | 
24 | def get_weak_transforms(crop_size, crop_ratio, dataset_name):
25 |     return transforms.Compose(
26 |         [
27 |             transforms.Resize(crop_size),
28 |             transforms.RandomCrop(crop_size, padding=int(crop_size * (1 - crop_ratio)), padding_mode="reflect"),
29 |             transforms.RandomHorizontalFlip(),
30 |             transforms.ToTensor(),
31 |             transforms.Normalize(mean[dataset_name.lower()], std[dataset_name.lower()]),
32 |         ]
33 |     )
34 | 
35 | 
36 | def get_strong_transforms(crop_size, crop_ratio, dataset_name):
37 |     return transforms.Compose(
38 |         [
39 |             transforms.Resize(crop_size),
40 |             transforms.RandomCrop(crop_size, padding=int(crop_size * (1 - crop_ratio)), padding_mode="reflect"),
41 |             transforms.RandomHorizontalFlip(),
42 |             RandAugment(3, 5),
43 |             transforms.ToTensor(),
44 |             transforms.Normalize(mean[dataset_name.lower()], std[dataset_name.lower()]),
45 |         ]
46 |     )
47 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/param_update.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | 
 6 | from .hook import Hook
 7 | 
 8 | 
 9 | class ParamUpdateHook(Hook):
10 |     """
11 |     Parameter Update Hook
12 | 
13 |     necessary for update the model parameters
14 |     """
15 | 
16 |     def before_train_step(self, algorithm):
17 |         if hasattr(algorithm, "start_run"):
18 |             torch.cuda.synchronize()
19 |             algorithm.start_run.record()
20 | 
21 |     # call after each train_step to update parameters
22 |     def after_train_step(self, algorithm):
23 |         loss = algorithm.out_dict["loss"]
24 |         # algorithm.optimizer.zero_grad()
25 |         # update parameters
26 |         if algorithm.use_amp:
27 |             algorithm.loss_scaler.scale(loss).backward()
28 |             if algorithm.clip_grad > 0:
29 |                 algorithm.loss_scaler.unscale_(algorithm.optimizer)
30 |                 torch.nn.utils.clip_grad_norm_(algorithm.model.parameters(), algorithm.clip_grad)
31 |             algorithm.loss_scaler.step(algorithm.optimizer)
32 |             algorithm.loss_scaler.update()
33 |         else:
34 |             loss.backward()
35 |             if algorithm.clip_grad > 0:
36 |                 torch.nn.utils.clip_grad_norm_(algorithm.model.parameters(), algorithm.clip_grad)
37 |             algorithm.optimizer.step()
38 | 
39 |         if algorithm.scheduler is not None:
40 |             algorithm.scheduler.step()
41 |         algorithm.model.zero_grad()
42 | 
43 |         if hasattr(algorithm, "end_run"):
44 |             algorithm.end_run.record()
45 |             torch.cuda.synchronize()
46 |             algorithm.log_dict["train/run_time"] = algorithm.start_run.elapsed_time(algorithm.end_run) / 1000.0
47 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/hooks/pseudo_label.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | 
 6 | from semilearn.core.hooks import Hook
 7 | from semilearn.algorithms.utils import smooth_targets
 8 | 
 9 | 
10 | class PseudoLabelingHook(Hook):
11 |     """
12 |     Pseudo Labeling Hook
13 |     """
14 | 
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |     @torch.no_grad()
19 |     def gen_ulb_targets(
20 |         self,
21 |         algorithm,
22 |         logits,
23 |         use_hard_label=True,
24 |         T=1.0,
25 |         softmax=True,  # whether to compute softmax for logits, input must be logits
26 |         label_smoothing=0.0,
27 |     ):
28 |         """
29 |         generate pseudo-labels from logits/probs
30 | 
31 |         Args:
32 |             algorithm: base algorithm
33 |             logits: logits (or probs, need to set softmax to False)
34 |             use_hard_label: flag of using hard labels instead of soft labels
35 |             T: temperature parameters
36 |             softmax: flag of using softmax on logits
37 |             label_smoothing: label_smoothing parameter
38 |         """
39 | 
40 |         logits = logits.detach()
41 |         if use_hard_label:
42 |             # return hard label directly
43 |             pseudo_label = torch.argmax(logits, dim=-1)
44 |             if label_smoothing:
45 |                 pseudo_label = smooth_targets(logits, pseudo_label, label_smoothing)
46 |             return pseudo_label
47 | 
48 |         # return soft label
49 |         if softmax:
50 |             # pseudo_label = torch.softmax(logits / T, dim=-1)
51 |             pseudo_label = algorithm.compute_prob(logits / T)
52 |         else:
53 |             # inputs logits converted to probabilities already
54 |             pseudo_label = logits
55 |         return pseudo_label
56 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/evaluation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/evaluation.py
 5 | 
 6 | import os
 7 | 
 8 | from .hook import Hook
 9 | 
10 | 
11 | class EvaluationHook(Hook):
12 |     """
13 |     Evaluation Hook for validation during training
14 |     """
15 | 
16 |     def after_train_step(self, algorithm):
17 |         if self.every_n_iters(algorithm, algorithm.num_eval_iter) or self.is_last_iter(algorithm):
18 |             algorithm.print_fn("validating...")
19 |             eval_dict = algorithm.evaluate("eval")
20 |             algorithm.log_dict.update(eval_dict)
21 | 
22 |             # update best metrics
23 |             if algorithm.log_dict["eval/mae"] < algorithm.best_eval_mae:
24 |                 algorithm.best_eval_mae = algorithm.log_dict["eval/mae"]
25 |                 algorithm.best_it = algorithm.it
26 | 
27 |     def after_run(self, algorithm):
28 |         if not algorithm.args.multiprocessing_distributed or (
29 |             algorithm.args.multiprocessing_distributed and algorithm.args.rank % algorithm.ngpus_per_node == 0
30 |         ):
31 |             save_path = os.path.join(algorithm.save_dir, algorithm.save_name)
32 |             algorithm.save_model("latest_model.pth", save_path)
33 | 
34 |         results_dict = {"eval/mae": algorithm.best_eval_mae, "eval/best_it": algorithm.best_it}
35 |         if "test" in algorithm.loader_dict:
36 |             # load the best model and evaluate on test dataset
37 |             best_model_path = os.path.join(algorithm.args.save_dir, algorithm.args.save_name, "model_best.pth")
38 |             algorithm.load_model(best_model_path)
39 |             test_dict = algorithm.evaluate("test")
40 |             results_dict["test/best_mae"] = test_dict["test/mae"]
41 |         algorithm.results_dict = results_dict
42 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rda/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | 
 7 | from semilearn.core.hooks import Hook
 8 | 
 9 | 
10 | class RDAHook(Hook):
11 |     """
12 |     RDA Hook
13 |     """
14 | 
15 |     def __init__(self, train_ulb_len, lb_targets, num_refine_iter=1024):
16 |         super(RDAHook, self).__init__()
17 |         self.train_ulb_len = train_ulb_len
18 |         self.sorted_lb_targets, _ = torch.sort(torch.tensor(lb_targets))
19 |         self.num_refine_iter = num_refine_iter
20 | 
21 |         self.pseudo_raw = torch.ones(self.train_ulb_len, dtype=torch.float32)
22 |         self.pseudo_refine = torch.ones(self.train_ulb_len, dtype=torch.float32)
23 | 
24 |     @torch.no_grad()
25 |     def gen_ulb_targets(self, algorithm, logits):
26 |         logits = logits.detach()
27 |         pseudo_label = self.refine_pseudo_labels(algorithm.idx_ulb, logits, algorithm.it, algorithm.epoch)
28 |         return pseudo_label.to(logits.device)
29 | 
30 |     @torch.no_grad()
31 |     def refine_pseudo_labels(self, idx_ulb, logits_x_ulb, it, epoch):
32 |         self.pseudo_raw[idx_ulb.to(self.pseudo_raw.device)] = logits_x_ulb.data.cpu().to(self.pseudo_raw.dtype)
33 |         if it % self.num_refine_iter == 0:
34 |             self.apply_dist_align()
35 |         if epoch > 0:
36 |             logits_x_ulb = self.pseudo_refine[idx_ulb.to(self.pseudo_raw.device)].detach()
37 |         return logits_x_ulb
38 | 
39 |     @torch.no_grad()
40 |     def apply_dist_align(self):
41 |         """
42 |         Apply distribution alignment to refine pseudo labels.
43 |         """
44 |         cdf_pseudo = np.linspace(0, 1, len(self.pseudo_raw))
45 |         cdf_target = np.linspace(0, 1, len(self.sorted_lb_targets))
46 |         pseudo_refine = np.interp(cdf_pseudo, cdf_target, self.sorted_lb_targets.cpu().numpy())
47 |         idxes = torch.argsort(self.pseudo_raw)
48 |         self.pseudo_refine[idxes] = torch.FloatTensor(pseudo_refine).to(self.pseudo_refine.device)
49 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/priority.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | # Ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/priority.py
 4 | 
 5 | from enum import Enum
 6 | from typing import Union
 7 | 
 8 | 
 9 | class Priority(Enum):
10 |     """Hook priority levels.
11 |     +--------------+------------+
12 |     | Level        | Value      |
13 |     +==============+============+
14 |     | HIGHEST      | 0          |
15 |     +--------------+------------+
16 |     | VERY_HIGH    | 10         |
17 |     +--------------+------------+
18 |     | HIGH         | 30         |
19 |     +--------------+------------+
20 |     | ABOVE_NORMAL | 40         |
21 |     +--------------+------------+
22 |     | NORMAL       | 50         |
23 |     +--------------+------------+
24 |     | BELOW_NORMAL | 60         |
25 |     +--------------+------------+
26 |     | LOW          | 70         |
27 |     +--------------+------------+
28 |     | VERY_LOW     | 90         |
29 |     +--------------+------------+
30 |     | LOWEST       | 100        |
31 |     +--------------+------------+
32 |     """
33 | 
34 |     HIGHEST = 0
35 |     VERY_HIGH = 10
36 |     HIGH = 30
37 |     ABOVE_NORMAL = 40
38 |     NORMAL = 50
39 |     BELOW_NORMAL = 60
40 |     LOW = 70
41 |     VERY_LOW = 90
42 |     LOWEST = 100
43 | 
44 | 
45 | def get_priority(priority: Union[int, str, Priority]) -> int:
46 |     """Get priority value.
47 |     Args:
48 |         priority (int or str or :obj:`Priority`): Priority.
49 |     Returns:
50 |         int: The priority value.
51 |     """
52 |     if isinstance(priority, int):
53 |         if priority < 0 or priority > 100:
54 |             raise ValueError("priority must be between 0 and 100")
55 |         return priority
56 |     elif isinstance(priority, Priority):
57 |         return priority.value
58 |     elif isinstance(priority, str):
59 |         return Priority[priority.upper()].value
60 |     else:
61 |         raise TypeError("priority must be an integer or Priority enum value")
62 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rankuprda/rda.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | 
 7 | from semilearn.core.hooks import Hook
 8 | 
 9 | 
10 | class RDAHook(Hook):
11 |     """
12 |     RDA Hook
13 |     """
14 | 
15 |     def __init__(self, train_ulb_len, lb_targets, num_refine_iter=1024):
16 |         super(RDAHook, self).__init__()
17 |         self.train_ulb_len = train_ulb_len
18 |         self.sorted_lb_targets, _ = torch.sort(torch.tensor(lb_targets))
19 |         self.num_refine_iter = num_refine_iter
20 | 
21 |         self.pseudo_raw = torch.ones(self.train_ulb_len, dtype=torch.float32)
22 |         self.pseudo_refine = torch.ones(self.train_ulb_len, dtype=torch.float32)
23 | 
24 |     @torch.no_grad()
25 |     def gen_ulb_targets(self, algorithm, logits):
26 |         logits = logits.detach()
27 |         pseudo_label = self.refine_pseudo_labels(algorithm.idx_ulb, logits, algorithm.it, algorithm.epoch)
28 |         return pseudo_label.to(logits.device)
29 | 
30 |     @torch.no_grad()
31 |     def refine_pseudo_labels(self, idx_ulb, logits_x_ulb, it, epoch):
32 |         self.pseudo_raw[idx_ulb.to(self.pseudo_raw.device)] = logits_x_ulb.data.cpu().to(self.pseudo_raw.dtype)
33 |         if it % self.num_refine_iter == 0:
34 |             self.apply_dist_align()
35 |         if epoch > 0:
36 |             logits_x_ulb = self.pseudo_refine[idx_ulb.to(self.pseudo_raw.device)].detach()
37 |         return logits_x_ulb
38 | 
39 |     @torch.no_grad()
40 |     def apply_dist_align(self):
41 |         """
42 |         Apply distribution alignment to refine pseudo labels.
43 |         """
44 |         cdf_pseudo = np.linspace(0, 1, len(self.pseudo_raw))
45 |         cdf_target = np.linspace(0, 1, len(self.sorted_lb_targets))
46 |         pseudo_refine = np.interp(cdf_pseudo, cdf_target, self.sorted_lb_targets.cpu().numpy())
47 |         idxes = torch.argsort(self.pseudo_raw)
48 |         self.pseudo_refine[idxes] = torch.FloatTensor(pseudo_refine).to(self.pseudo_refine.device)
49 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/logging.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | # Ref:https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/logger/base.py
 5 | 
 6 | from .hook import Hook
 7 | 
 8 | 
 9 | class LoggingHook(Hook):
10 |     """
11 |     Logging Hook for print information and log into tensorboard
12 |     """
13 | 
14 |     def after_train_step(self, algorithm):
15 |         """must be called after evaluation"""
16 |         if self.every_n_iters(algorithm, algorithm.num_eval_iter):
17 |             if not algorithm.distributed or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0):
18 |                 print_text = f"[{algorithm.it + 1} iteration] USE_EMA: {algorithm.ema_m != 0}, "
19 |                 for i, (key, item) in enumerate(algorithm.log_dict.items()):
20 |                     print_text += "{:s}: {:.4f}, ".format(key, item)
21 |                 print_text += "BEST_EVAL_MAE: {:.4f}, at {:d} iters".format(algorithm.best_eval_mae, algorithm.best_it + 1)
22 |                 algorithm.print_fn(print_text)
23 | 
24 |             if algorithm.tb_log is not None:
25 |                 algorithm.tb_log.update(algorithm.log_dict, algorithm.it)
26 |                 algorithm.tb_log.update({"eval/best-mae": algorithm.best_eval_mae}, algorithm.it)
27 | 
28 |         elif self.every_n_iters(algorithm, algorithm.num_log_iter):
29 |             if not algorithm.distributed or (algorithm.distributed and algorithm.rank % algorithm.ngpus_per_node == 0):
30 |                 print_text = f"{algorithm.it + 1} iteration, "
31 |                 for i, (key, item) in enumerate(algorithm.log_dict.items()):
32 |                     print_text += "{:s}: {:.4f}".format(key, item)
33 |                     if i != len(algorithm.log_dict) - 1:
34 |                         print_text += ", "
35 |                     else:
36 |                         print_text += " "
37 |                 algorithm.print_fn(print_text)
38 | 
39 |             if algorithm.tb_log is not None:
40 |                 algorithm.tb_log.update(algorithm.log_dict, algorithm.it)
41 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rankup/rankup_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from semilearn.nets.utils import init_weights
 8 | 
 9 | 
10 | class RankUp_Net(nn.Module):
11 |     """
12 |     RankUp_Net implementation.
13 | 
14 |     Attributes:
15 |         backbone (nn.Module): The underlying backbone model.
16 |         num_features (int): Number of features from the model's hidden layer.
17 |         arc_classifier (nn.Linear): Linear layer for Auxiliary Ranking Classifier (ARC) with two output classes.
18 |     """
19 | 
20 |     def __init__(self, backbone):
21 |         super().__init__()
22 |         self.backbone = backbone
23 |         self.num_features = backbone.num_features
24 | 
25 |         # Auxiliary Ranking Classifier (ARC)
26 |         self.arc_classifier = nn.Linear(self.num_features, 2)
27 |         self.arc_classifier.apply(init_weights)
28 | 
29 |     def forward(self, x, use_arc=False, targets=None, **kwargs):
30 |         if not use_arc:
31 |             return self.backbone(x, **kwargs)
32 |         feat = self.backbone(x, only_feat=True)
33 |         logits = self.backbone(feat, only_fc=True)
34 |         logits_arc = self.arc_classifier(feat)
35 |         logits_mat, targets_mat = self.compute_rank_logits(logits_arc, targets)
36 |         return {"logits": logits, "logits_arc": logits_mat, "feat": feat, "targets_arc": targets_mat}
37 | 
38 |     def compute_rank_logits(self, logits, targets=None):
39 |         logits_mat = logits.unsqueeze(dim=0) - logits.unsqueeze(dim=1)
40 |         logits_mat = logits_mat.flatten(0, 1)
41 |         if targets is not None:
42 |             targets_mat = (1 + torch.sign(targets.unsqueeze(dim=0) - targets.unsqueeze(dim=1))) / 2
43 |             targets_mat = targets_mat.flatten(0, 1)
44 |             # one-hot encode the targets_mat
45 |             targets_onehot = torch.zeros((targets_mat.shape[0], 2)).to(targets_mat.device)
46 |             targets_onehot[:, 0] = targets_mat
47 |             targets_onehot[:, 1] = 1 - targets_mat
48 |             return logits_mat, targets_onehot
49 |         return logits_mat, None
50 | 
51 |     def group_matcher(self, coarse=False):
52 |         matcher = self.backbone.group_matcher(coarse, prefix="backbone.")
53 |         return matcher
54 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rankuprda/rankup_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from semilearn.nets.utils import init_weights
 8 | 
 9 | 
10 | class RankUp_Net(nn.Module):
11 |     """
12 |     RankUp_Net implementation.
13 | 
14 |     Attributes:
15 |         backbone (nn.Module): The underlying backbone model.
16 |         num_features (int): Number of features from the model's hidden layer.
17 |         arc_classifier (nn.Linear): Linear layer for Auxiliary Ranking Classifier (ARC) with two output classes.
18 |     """
19 | 
20 |     def __init__(self, backbone):
21 |         super().__init__()
22 |         self.backbone = backbone
23 |         self.num_features = backbone.num_features
24 | 
25 |         # Auxiliary Ranking Classifier (ARC)
26 |         self.arc_classifier = nn.Linear(self.num_features, 2)
27 |         self.arc_classifier.apply(init_weights)
28 | 
29 |     def forward(self, x, use_arc=False, targets=None, **kwargs):
30 |         if not use_arc:
31 |             return self.backbone(x, **kwargs)
32 |         feat = self.backbone(x, only_feat=True)
33 |         logits = self.backbone(feat, only_fc=True)
34 |         logits_arc = self.arc_classifier(feat)
35 |         logits_mat, targets_mat = self.compute_rank_logits(logits_arc, targets)
36 |         return {"logits": logits, "logits_arc": logits_mat, "feat": feat, "targets_arc": targets_mat}
37 | 
38 |     def compute_rank_logits(self, logits, targets=None):
39 |         logits_mat = logits.unsqueeze(dim=0) - logits.unsqueeze(dim=1)
40 |         logits_mat = logits_mat.flatten(0, 1)
41 |         if targets is not None:
42 |             targets_mat = (1 + torch.sign(targets.unsqueeze(dim=0) - targets.unsqueeze(dim=1))) / 2
43 |             targets_mat = targets_mat.flatten(0, 1)
44 |             # one-hot encode the targets_mat
45 |             targets_onehot = torch.zeros((targets_mat.shape[0], 2)).to(targets_mat.device)
46 |             targets_onehot[:, 0] = targets_mat
47 |             targets_onehot[:, 1] = 1 - targets_mat
48 |             return logits_mat, targets_onehot
49 |         return logits_mat, None
50 | 
51 |     def group_matcher(self, coarse=False):
52 |         matcher = self.backbone.group_matcher(coarse, prefix="backbone.")
53 |         return matcher
54 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/hooks/masking.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | from semilearn.core.hooks import Hook
 7 | 
 8 | 
 9 | class MaskingHook(Hook):
10 |     """
11 |     Base MaskingHook, used for computing the mask of unlabeled (consistency) loss
12 |     define MaskingHook in each algorithm when needed, and call hook inside each train_step
13 |     easy support for other settings
14 |     """
15 | 
16 |     def __init__(self, *args, **kwargs) -> None:
17 |         super().__init__()
18 | 
19 |     def update(self, *args, **kwargs):
20 |         pass
21 | 
22 |     @torch.no_grad()
23 |     def masking(
24 |         self, algorithm, logits_x_lb=None, logits_x_ulb=None, idx_lb=None, idx_ulb=None, softmax_x_lb=True, softmax_x_ulb=True, *args, **kwargs
25 |     ):
26 |         """
27 |         generate mask for unlabeled loss
28 | 
29 |         Args:
30 |             algorithm: base algorithm
31 |             logits_x_lb: labeled batch logits (or probs, need to set softmax_x_lb to False)
32 |             logits_x_ulb: unlabeled batch logits (or probs, need to set softmax_x_ulb to False)
33 |             idx_lb: labeled batch index
34 |             idx_ulb: unlabeled batch index
35 |             softmax_x_lb: flag of using softmax on labeled logits
36 |             softmax_x_ulb: flag of using softmax on unlabeled logits
37 |         """
38 |         raise NotImplementedError
39 | 
40 | 
41 | class FixedThresholdingHook(MaskingHook):
42 |     """
43 |     Common Fixed Threshold used in fixmatch, uda, pseudo label, et. al.
44 |     """
45 | 
46 |     @torch.no_grad()
47 |     def masking(self, algorithm, logits_x_ulb, softmax_x_ulb=True, *args, **kwargs):
48 |         if softmax_x_ulb:
49 |             # probs_x_ulb = torch.softmax(logits_x_ulb.detach(), dim=-1)
50 |             probs_x_ulb = algorithm.compute_prob(logits_x_ulb.detach())
51 |         else:
52 |             # logits is already probs
53 |             probs_x_ulb = logits_x_ulb.detach()
54 |         max_probs, _ = torch.max(probs_x_ulb, dim=-1)
55 |         mask = max_probs.ge(algorithm.p_cutoff).to(max_probs.dtype)
56 |         return mask
57 | 
58 | 
59 | # class RampupWeightingHook(MaskingHook):
60 | #     """
61 | #     Common Rampup weight used in mean teacher, pi model, et. al.
62 | #     """
63 | #     def masking(self, algorithm, *args, **kwargs):
64 | #         return np.clip(algorithm.it / (algorithm.unsup_warm_up * algorithm.num_train_iter),  a_min=0.0, a_max=1.0)
65 | 


--------------------------------------------------------------------------------
/results/README.md:
--------------------------------------------------------------------------------
 1 | <a id="readme-top"></a>
 2 | 
 3 | # Benchmark Results
 4 | 
 5 | <details>
 6 |   <summary><strong>📋 Table of Contents</strong></summary>
 7 |   <ol>
 8 |     <li><a href="#-classic-cv-results">Classic CV Results</a></li>
 9 |     <li><a href="#-nlp-results">NLP Results</a></li>
10 |     <li><a href="#-audio-results">Audio Results</a></li>
11 |   </ol>
12 | </details>
13 | 
14 | This folder contains benchmark results and experiment logs for semi-supervised regression algorithms. The best evaluation metrics (e.g., MAE, MSE, R², SRCC) are reported across training iterations. Each setting runs 3 different random seeds and computes the average performance with standard deviation.
15 | 
16 | ## 📷 Classic CV Results
17 | 
18 | ### a.) Datasets
19 | 
20 | | Dataset | # Labels        | # Training Data | # Test Data | Target Range |
21 | | ------- | --------------- | --------------- | ----------- | ------------ |
22 | | UTKFace | 50 / 250 / 2000 | 18,964          | 4,741       | [1, 116]     |
23 | 
24 | ### b.) Results
25 | 
26 | The results for Classic CV benchmarks can be found in [classic_cv_average_log.csv](classic_cv_average_log.csv).
27 | 
28 | ### c.) Logs
29 | 
30 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon).
31 | 
32 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
33 | 
34 | ## 📝 NLP Results
35 | 
36 | ### a.) Datasets
37 | 
38 | | Dataset     | # Labels | # Training Data | # Test Data | Target Range |
39 | | ----------- | -------- | --------------- | ----------- | ------------ |
40 | | Yelp Review | 250      | 250,000         | 25,000      | [0, 4]       |
41 | 
42 | ### b.) Results
43 | 
44 | The results for NLP benchmarks can be found in [nlp_average_log.csv](nlp_average_log.csv).
45 | 
46 | ### c.) Logs
47 | 
48 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon).
49 | 
50 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
51 | 
52 | ## 🎧 Audio Results
53 | 
54 | ### a.) Datasets
55 | 
56 | | Dataset | # Labels | # Training Data | # Test Data | Target Range |
57 | | ------- | -------- | --------------- | ----------- | ------------ |
58 | | BVCC    | 250      | 4,974           | 1,066       | [1, 5]       |
59 | 
60 | ### b.) Results
61 | 
62 | The results for Audio benchmarks can be found in [audio_average_log.csv](audio_average_log.csv).
63 | 
64 | ### c.) Logs
65 | 
66 | For detailed experiment logs, check out our **WandB project page** (🚧 Coming Soon).
67 | 
68 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
69 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/wandb.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import os
 6 | import wandb
 7 | 
 8 | from .hook import Hook
 9 | 
10 | 
11 | class WANDBHook(Hook):
12 |     """
13 |     Wandb Hook
14 |     """
15 | 
16 |     def __init__(self):
17 |         super().__init__()
18 |         self.log_key_list = [
19 |             "train/sup_loss",
20 |             "train/unsup_loss",
21 |             "train/total_loss",
22 |             "train/util_ratio",
23 |             "train/run_time",
24 |             "train/prefetch_time",
25 |             "lr",
26 |             "eval/mae",
27 |             "eval/mse",
28 |             "eval/r2",
29 |             "eval/lcc",
30 |             "eval/srcc",
31 |             "eval/ktau",
32 |             "eval/gmae",
33 |         ]
34 | 
35 |     def before_run(self, algorithm):
36 |         name = algorithm.save_name
37 |         project = "ssr_" + algorithm.save_dir.split("/")[-1]
38 |         group = "_".join(algorithm.args.save_name.split("_")[:-1])
39 | 
40 |         # tags
41 |         benchmark = f"benchmark: {project}"
42 |         dataset = f"dataset: {algorithm.args.dataset}"
43 |         data_setting = f"setting: {algorithm.args.dataset}_lb{algorithm.args.num_labels}_ulb{algorithm.args.ulb_num_labels}"
44 |         alg = f"alg: {algorithm.args.algorithm}"
45 |         tags = [benchmark, dataset, data_setting, alg]
46 |         if algorithm.args.resume:
47 |             resume = "auto"
48 |         else:
49 |             resume = "never"
50 |         # resume = 'never'
51 | 
52 |         save_dir = os.path.join(algorithm.args.save_dir, "wandb", algorithm.args.save_name)
53 |         if not os.path.exists(save_dir):
54 |             os.makedirs(save_dir)
55 | 
56 |         self.run = wandb.init(name=name, tags=tags, config=algorithm.args.__dict__, project=project, group=group, resume=resume, dir=save_dir)
57 | 
58 |     def after_train_step(self, algorithm):
59 |         if self.every_n_iters(algorithm, algorithm.num_log_iter):
60 |             log_dict = {}
61 |             for key, item in algorithm.log_dict.items():
62 |                 if key in self.log_key_list:
63 |                     log_dict[key] = item
64 |             self.run.log(log_dict, step=algorithm.it)
65 | 
66 |         if self.every_n_iters(algorithm, algorithm.num_eval_iter):
67 |             self.run.log({"eval/best-mae": algorithm.best_eval_mae}, step=algorithm.it)
68 | 
69 |     def after_run(self, algorithm):
70 |         self.run.finish()
71 | 


--------------------------------------------------------------------------------
/semilearn/core/hooks/aim.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import aim
 6 | 
 7 | from .hook import Hook
 8 | 
 9 | 
10 | class AimHook(Hook):
11 |     """
12 |     A hook for tracking training progress with Aim.
13 |     """
14 | 
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.log_key_list = [
18 |             "train/sup_loss",
19 |             "train/unsup_loss",
20 |             "train/total_loss",
21 |             "train/util_ratio",
22 |             "train/run_time",
23 |             "train/prefetch_time",
24 |             "lr",
25 |             "eval/mae",
26 |             "eval/mse",
27 |             "eval/r2",
28 |             "eval/lcc",
29 |             "eval/srcc",
30 |             "eval/ktau",
31 |             "eval/gmae",
32 |         ]
33 | 
34 |     def before_run(self, algorithm):
35 |         """Setup the Aim tracking.
36 | 
37 |         Args:
38 |             algorithm (AlgorithmBase): The training algorithm.
39 |         """
40 |         # initialize aim run
41 |         name = algorithm.save_name
42 |         project = algorithm.save_dir.split("/")[-1]
43 |         repo = algorithm.args.save_dir.split("/")[-2]
44 |         self.run = aim.Run(experiment=name, repo=repo, log_system_params=True)
45 | 
46 |         # set configuration
47 |         self.run["hparams"] = algorithm.args.__dict__
48 | 
49 |         # set tags
50 |         benchmark = f"benchmark: {project}"
51 |         dataset = f"dataset: {algorithm.args.dataset}"
52 |         data_setting = f"setting: {algorithm.args.dataset}_lb{algorithm.args.num_labels}_ulb{algorithm.args.ulb_num_labels}"
53 |         alg = f"alg: {algorithm.args.algorithm}"
54 |         self.run.add_tag(benchmark)
55 |         self.run.add_tag(dataset)
56 |         self.run.add_tag(data_setting)
57 |         self.run.add_tag(alg)
58 | 
59 |     def after_train_step(self, algorithm):
60 |         """Log the metric values in the log dictionary to Aim.
61 | 
62 |         Args:
63 |             algorithm (AlgorithmBase): The training algorithm.
64 |         """
65 |         if self.every_n_iters(algorithm, algorithm.num_log_iter):
66 |             for key, item in algorithm.log_dict.items():
67 |                 if key in self.log_key_list:
68 |                     self.run.track(item, name=key, step=algorithm.it)
69 | 
70 |         if self.every_n_iters(algorithm, algorithm.num_eval_iter):
71 |             self.run.track(algorithm.best_eval_mae, name="eval/best-mae", step=algorithm.it)
72 | 


--------------------------------------------------------------------------------
/semilearn/core/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import importlib
 6 | 
 7 | __all__ = ["ALGORITHMS"]
 8 | 
 9 | 
10 | class Register:
11 |     def __init__(self, registry_name):
12 |         self._dict = {}
13 |         self._name = registry_name
14 | 
15 |     def __setitem__(self, key, value):
16 |         if not callable(value):
17 |             raise Exception(f"Value of a Registry must be a callable!\nValue: {value}")
18 |         if key is None:
19 |             key = value.__name__
20 |         if key in self._dict:
21 |             print("Key %s already in registry %s." % (key, self._name))
22 |         self._dict[key] = value
23 | 
24 |     def register(self, target):
25 |         """Decorator to register a function or class."""
26 | 
27 |         def add(key, value):
28 |             self[key] = value
29 |             return value
30 | 
31 |         if callable(target):
32 |             # @reg.register
33 |             return add(None, target)
34 |         # @reg.register('alias')
35 |         return lambda x: add(target, x)
36 | 
37 |     def __getitem__(self, key):
38 |         return self._dict[key]
39 | 
40 |     def __contains__(self, key):
41 |         return key in self._dict
42 | 
43 |     def keys(self):
44 |         """key"""
45 |         return self._dict.keys()
46 | 
47 | 
48 | ALGORITHMS = Register("algorithms")
49 | 
50 | 
51 | def _handle_errors(errors):
52 |     """
53 |     Log out and possibly reraise errors during import.
54 |     """
55 |     if not errors:
56 |         return
57 | 
58 |     for name, err in errors:
59 |         print("Module {} import failed: {}".format(name, err))
60 | 
61 | 
62 | ALL_MODULES = [
63 |     # NOTE: add all algorithms here
64 |     ("semilearn.algorithms", ["fullysupervised", "meanteacher", "mixmatch", "pimodel", "ucvme", "clss", "rankup", "rda", "rankuprda"])
65 | ]
66 | 
67 | 
68 | def import_all_modules_for_register():
69 |     """
70 |     Import all modules for register.
71 |     """
72 |     all_modules = ALL_MODULES
73 |     errors = []
74 |     for base_dir, modules in all_modules:
75 |         for name in modules:
76 |             try:
77 |                 if base_dir != "":
78 |                     full_name = base_dir + "." + name
79 |                 else:
80 |                     full_name = name
81 |                 importlib.import_module(full_name)
82 |             except ImportError as error:
83 |                 errors.append((name, error))
84 |     _handle_errors(errors)
85 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/augmentation/subsample.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import random
 5 | from audiomentations import AdjustDuration
 6 | 
 7 | 
 8 | class Subsample:
 9 |     """Sample the first `max_length` seconds from the input audio.
10 | 
11 |     Args:
12 |         max_length_seconds (float): Maximum output length of the audio in seconds.
13 |                                      Output will be padded or truncated to this length.
14 |     """
15 | 
16 |     def __init__(self, max_length_seconds):
17 |         self.max_length_seconds = max_length_seconds
18 |         self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0)
19 | 
20 |     def __call__(self, audio, sample_rate):
21 |         max_sample_length = int(round(sample_rate * self.max_length_seconds))
22 |         if len(audio) > max_sample_length:
23 |             audio = audio[:max_sample_length]
24 |         audio = self.adjust_duration(audio, sample_rate)  # padding to the `max_length_seconds`
25 |         return audio
26 | 
27 | 
28 | class RandomSubsample:
29 |     """Randomly samples a chunk of audio of length between [`min_length`, `max_length`] seconds and pads it to `max_length` seconds.
30 | 
31 |     Args:
32 |         max_length_seconds (float): Maximum output length of the audio in seconds.
33 |                                      Output will be padded or truncated to this length.
34 |         min_ratio (float): Minimum ratio of the maximum length for subsampling,
35 |                            should be between 0.0 and 1.0.
36 |     """
37 | 
38 |     def __init__(self, max_length_seconds, min_ratio=1.0):
39 |         if not (0.0 <= min_ratio <= 1.0):
40 |             raise ValueError("min_ratio should be between 0 and 1")
41 | 
42 |         self.max_length_seconds = max_length_seconds
43 |         self.min_ratio = min_ratio
44 |         self.adjust_duration = AdjustDuration(duration_seconds=max_length_seconds, p=1.0)
45 |         self.min_length_seconds = max_length_seconds * self.min_ratio
46 | 
47 |     def __call__(self, audio, sample_rate):
48 |         subsample_seconds = random.uniform(self.min_length_seconds, self.max_length_seconds)
49 |         subsample_length = int(round(sample_rate * subsample_seconds))
50 |         if len(audio) > subsample_length:
51 |             max_offset = len(audio) - subsample_length
52 |             random_offset = random.randint(0, max_offset)
53 |             audio = audio[random_offset : random_offset + subsample_length]
54 |         audio = self.adjust_duration(audio, sample_rate)  # padding to the `max_length_seconds`
55 |         return audio
56 | 


--------------------------------------------------------------------------------
/semilearn/datasets/nlp_datasets/get_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from semilearn.datasets import nlp_datasets
 6 | from semilearn.datasets.utils import split_ssl_data
 7 | 
 8 | from .datasetbase import BasicDataset
 9 | 
10 | 
11 | def get_nlp_dataset(args, alg, dataset_name, num_labels=40, data_dir="./data", include_lb_to_ulb=True):
12 |     """
13 |     Get the NLP dataset and split the training samples into labeled and unlabeled sets.
14 | 
15 |     Args:
16 |         alg (str): Algorithm.
17 |         dataset_name (str): The name of the dataset to load.
18 |         num_labels (int): The number of labeled samples for the training set.
19 |         data_dir (str): The directory from which to load the dataset.
20 |         include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set.
21 | 
22 |     Returns:
23 |         Tuple[Dataset, Dataset, Dataset, Dataset]:
24 |             A tuple containing:
25 |                 - train labeled dataset
26 |                 - train unlabeled dataset
27 |                 - evaluation dataset
28 |                 - test dataset
29 |     """
30 |     dataset = getattr(nlp_datasets, dataset_name.upper())
31 | 
32 |     train_dataset = dataset(data_dir, split="train", download=True)
33 |     train_texts, train_targets = train_dataset._texts, train_dataset._labels
34 | 
35 |     test_dataset = dataset(data_dir, split="test", download=True)
36 |     test_texts, test_targets = test_dataset._texts, test_dataset._labels
37 | 
38 |     eval_dset = BasicDataset(alg, test_texts, test_targets, False)
39 |     test_dset = None
40 | 
41 |     if dataset_name.lower() in ["yelp_review", "amazon_review"]:
42 |         dev_dataset = dataset(data_dir, split="dev", download=True)
43 |         dev_texts, dev_targets = dev_dataset._texts, dev_dataset._labels
44 |         eval_dset = BasicDataset(alg, dev_texts, dev_targets, False)
45 |         test_dset = BasicDataset(alg, test_texts, test_targets, False)
46 | 
47 |     if alg == "fullysupervised":
48 |         lb_dset = BasicDataset(alg, train_texts, train_targets, False)
49 |         return lb_dset, None, eval_dset, test_dset
50 | 
51 |     lb_texts, lb_targets, ulb_texts, ulb_targets = split_ssl_data(
52 |         args,
53 |         train_texts,
54 |         train_targets,
55 |         lb_num_labels=num_labels,
56 |         ulb_num_labels=args.ulb_num_labels,
57 |         include_lb_to_ulb=include_lb_to_ulb,
58 |     )
59 | 
60 |     lb_dset = BasicDataset(alg, lb_texts, lb_targets, False)
61 |     ulb_dset = BasicDataset(alg, ulb_texts, ulb_targets, True)
62 | 
63 |     if alg == "supervised":
64 |         ulb_dset = None
65 | 
66 |     return lb_dset, ulb_dset, eval_dset, test_dset
67 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/fullysupervised/fullysupervised.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | 
 6 | from semilearn.core import AlgorithmBase
 7 | from semilearn.core.utils import ALGORITHMS
 8 | 
 9 | 
10 | @ALGORITHMS.register("fullysupervised")
11 | class FullySupervised(AlgorithmBase):
12 |     """
13 |     Train a fully supervised model using labeled data only. This serves as a baseline for comparison.
14 | 
15 |     Args:
16 |         - args (`argparse`):
17 |             algorithm arguments
18 |         - net_builder (`callable`):
19 |             network loading function
20 |         - tb_log (`TBLog`):
21 |             tensorboard logger
22 |         - logger (`logging.Logger`):
23 |             logger to use
24 |     """
25 | 
26 |     def __init__(self, args, net_builder, tb_log=None, logger=None):
27 |         super().__init__(args, net_builder, tb_log, logger)
28 | 
29 |     def train_step(self, x_lb, y_lb, **kwargs):
30 |         # inference and calculate sup losses
31 |         with self.amp_cm():
32 |             outs_x_lb = self.model(x_lb)
33 |             logits_x_lb = outs_x_lb["logits"]
34 |             feats_x_lb = outs_x_lb["feat"]
35 |             sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean")
36 | 
37 |             # extract features for further use in the classification algorithm.
38 |             feat_dict = {"x_lb": feats_x_lb}
39 |             for k in kwargs:
40 |                 feat_dict[k] = self.model(kwargs[k], only_feat=True)
41 | 
42 |         out_dict = self.process_out_dict(loss=sup_loss, feat=feat_dict)
43 |         log_dict = self.process_log_dict(sup_loss=sup_loss.item(), total_loss=sup_loss.item())
44 |         return out_dict, log_dict
45 | 
46 |     def train(self):
47 |         # lb: labeled, ulb: unlabeled
48 |         self.model.train()
49 |         self.call_hook("before_run")
50 | 
51 |         for epoch in range(self.start_epoch, self.epochs):
52 |             self.epoch = epoch
53 | 
54 |             # prevent the training iterations exceed args.num_train_iter
55 |             if self.it > self.num_train_iter:
56 |                 break
57 | 
58 |             self.call_hook("before_train_epoch")
59 | 
60 |             for data_lb in self.loader_dict["train_lb"]:
61 |                 # prevent the training iterations exceed args.num_train_iter
62 |                 if self.it > self.num_train_iter:
63 |                     break
64 | 
65 |                 self.call_hook("before_train_step")
66 |                 self.out_dict, self.log_dict = self.train_step(**self.process_batch(**data_lb))
67 |                 self.call_hook("after_train_step")
68 |                 self.it += 1
69 | 
70 |             self.call_hook("after_train_epoch")
71 |         self.call_hook("after_run")
72 | 
73 | 
74 | ALGORITHMS["supervised"] = FullySupervised
75 | 


--------------------------------------------------------------------------------
/semilearn/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 | 
13 |     It is especially useful in conjunction with
14 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
15 |     process can pass a DistributedSampler instance as a DataLoader sampler,
16 |     and load a subset of the original dataset that is exclusive to it.
17 | 
18 |     .. note::
19 |         Dataset is assumed to be of constant size.
20 | 
21 |     Arguments:
22 |         dataset: Dataset used for sampling.
23 |         num_replicas (optional): Number of processes participating in
24 |             distributed training.
25 |         rank (optional): Rank of the current process within num_replicas.
26 |     """
27 | 
28 |     def __init__(self, dataset, num_replicas=None, rank=None, num_samples=None, **kwargs):
29 |         if not isinstance(num_samples, int) or num_samples <= 0:
30 |             raise ValueError("num_samples should be a positive integer " "value, but got num_samples={}".format(num_samples))
31 | 
32 |         if num_replicas is None:
33 |             if not dist.is_available():
34 |                 raise RuntimeError("Requires distributed package to be available")
35 |             else:
36 |                 num_replicas = dist.get_world_size()
37 |         if rank is None:
38 |             if not dist.is_available():
39 |                 raise RuntimeError("Requires distributed package to be available")
40 |             else:
41 |                 rank = dist.get_rank()
42 |         self.dataset = dataset
43 |         self.num_replicas = num_replicas
44 |         self.rank = rank
45 |         self.epoch = 0
46 | 
47 |         self.total_size = num_samples
48 |         assert num_samples % self.num_replicas == 0, f"{num_samples} samples cant" f"be evenly distributed among {num_replicas} devices."
49 |         self.num_samples = int(num_samples // self.num_replicas)
50 | 
51 |     def __iter__(self):
52 |         # deterministically shuffle based on epoch
53 |         g = torch.Generator()
54 |         g.manual_seed(self.epoch)
55 | 
56 |         n = len(self.dataset)
57 |         n_repeats = self.total_size // n
58 |         n_remain = self.total_size % n
59 |         indices = [torch.randperm(n, generator=g) for _ in range(n_repeats)]
60 |         indices.append(torch.randperm(n, generator=g)[:n_remain])
61 |         indices = torch.cat(indices, dim=0).tolist()
62 | 
63 |         assert len(indices) == self.total_size
64 | 
65 |         # subsample
66 |         indices = indices[self.rank : self.total_size : self.num_replicas]
67 |         assert len(indices) == self.num_samples
68 | 
69 |         return iter(indices)
70 | 
71 |     def __len__(self):
72 |         return self.num_samples
73 | 
74 |     def set_epoch(self, epoch):
75 |         self.epoch = epoch
76 | 
77 | 
78 | name2sampler = {"RandomSampler": DistributedSampler}
79 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/pimodel/pimodel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import numpy as np
 6 | from semilearn.core import AlgorithmBase
 7 | from semilearn.core.utils import ALGORITHMS
 8 | from semilearn.algorithms.utils import SSL_Argument
 9 | 
10 | 
11 | @ALGORITHMS.register("pimodel")
12 | class PiModel(AlgorithmBase):
13 |     """
14 |     Pi-Model algorithm (https://arxiv.org/abs/1610.02242).
15 | 
16 |     Args:
17 |     - args (`argparse`):
18 |         algorithm arguments
19 |     - net_builder (`callable`):
20 |         network loading function
21 |     - tb_log (`TBLog`):
22 |         tensorboard logger
23 |     - logger (`logging.Logger`):
24 |         logger to use
25 |     - unsup_warm_up (`float`, *optional*, defaults to 0.4):
26 |         Ramp up for weights for unsupervised loss
27 |     """
28 | 
29 |     def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs):
30 |         super().__init__(args, net_builder, tb_log, logger, **kwargs)
31 |         self.init(unsup_warm_up=args.unsup_warm_up)
32 | 
33 |     def init(self, unsup_warm_up=0.4):
34 |         self.unsup_warm_up = unsup_warm_up
35 | 
36 |     def train_step(self, x_lb, y_lb, x_ulb_w, x_ulb_w_2, **kwargs):
37 |         # inference and calculate sup/unsup losses
38 |         with self.amp_cm():
39 |             outs_x_lb = self.model(x_lb)
40 |             logits_x_lb = outs_x_lb["logits"]
41 |             feats_x_lb = outs_x_lb["feat"]
42 | 
43 |             # calculate BN only for the first batch
44 |             self.bn_controller.freeze_bn(self.model)
45 |             outs_x_ulb_w = self.model(x_ulb_w)
46 |             logits_x_ulb_w = outs_x_ulb_w["logits"]
47 |             feats_x_ulb_w = outs_x_ulb_w["feat"]
48 |             outs_x_ulb_w_2 = self.model(x_ulb_w_2)
49 |             logits_x_ulb_w_2 = outs_x_ulb_w_2["logits"]
50 |             feats_x_ulb_w_2 = outs_x_ulb_w_2["feat"]
51 |             self.bn_controller.unfreeze_bn(self.model)
52 | 
53 |             # extract features for further use in the classification algorithm.
54 |             feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w, "x_ulb_w_2": feats_x_ulb_w_2}
55 |             for k in kwargs:
56 |                 feat_dict[k] = self.model(kwargs[k], only_feat=True)
57 | 
58 |             sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean")
59 |             unsup_loss = self.consistency_loss(logits_x_ulb_w_2, logits_x_ulb_w.detach(), "mse")
60 | 
61 |             unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0)
62 |             total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup
63 | 
64 |         out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict)
65 |         log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item())
66 |         return out_dict, log_dict
67 | 
68 |     @staticmethod
69 |     def get_argument():
70 |         return [
71 |             SSL_Argument("--unsup_warm_up", float, 0.4, "warm up ratio for regression unsupervised loss"),
72 |         ]
73 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/clss/ordinal_entropy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from xmed-lab/CLSS
 4 | # https://github.com/xmed-lab/CLSS/blob/main/age_estimation/models/OrdinalEntropy.py
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | def ordinal_entropy(features, targets):
11 |     """
12 |     Compute the ordinal entropy of features given targets.
13 | 
14 |     Args:
15 |         features (torch.Tensor): Input features. Shape: (batch_size, feat_dim).
16 |         targets (torch.Tensor): Target labels. Shape:  (batch_size,).
17 | 
18 |     Returns:
19 |         torch.Tensor: Ordinal entropy.
20 |     """
21 |     if features.dim() != 2 or targets.dim() != 1 or features.size(0) != targets.size(0):
22 |         raise ValueError("Input shapes are invalid.")
23 | 
24 |     batch_size, feat_dim = features.size()
25 | 
26 |     uni_values, uni_indices, uni_counts = torch.unique(targets, return_inverse=True, return_counts=True)
27 | 
28 |     center_feats = torch.zeros([len(uni_values), feat_dim], device=features.device)
29 |     center_feats.index_add_(0, uni_indices, features)
30 |     center_feats = center_feats / uni_counts.unsqueeze(1)
31 | 
32 |     norm_center_feats = F.normalize(center_feats, dim=1)
33 |     distance = euclidean_dist(norm_center_feats, norm_center_feats)
34 |     distance = flatten_upper_triangular(distance)
35 | 
36 |     _uni_values = uni_values.unsqueeze(1)
37 |     weight = euclidean_dist(_uni_values, _uni_values)
38 |     weight = flatten_upper_triangular(weight)
39 |     weight = (weight - torch.min(weight)) / torch.max(weight) if len(weight) != 0 else 0
40 | 
41 |     distance = distance * weight
42 |     entropy = torch.mean(distance)
43 | 
44 |     norm_feats = F.normalize(features, dim=1)
45 |     norm_feats -= norm_center_feats[uni_indices, :]
46 |     tightness = torch.sum(norm_feats.pow(2), dim=1)
47 |     tightness = tightness[tightness > 0].mean()
48 | 
49 |     return tightness - entropy
50 | 
51 | 
52 | def euclidean_dist(x, y):
53 |     """
54 |     Calculate Euclidean distance between two sets of vectors.
55 | 
56 |     Args:
57 |         x (torch.Tensor): Set of vectors. Shape: (m, d).
58 |         y (torch.Tensor): Set of vectors. Shape: (n, d).
59 | 
60 |     Returns:
61 |         torch.Tensor: Pairwise Euclidean distance. Shape: (m, n).
62 |     """
63 |     m, n = x.size(0), y.size(0)
64 |     xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
65 |     yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
66 |     dist = xx + yy
67 |     dist.addmm_(x, y.t(), beta=1, alpha=-2)
68 |     dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
69 |     return dist
70 | 
71 | 
72 | def flatten_upper_triangular(x):
73 |     """
74 |     Flatten the upper triangular elements of a square matrix.
75 | 
76 |     Args:
77 |         x (torch.Tensor): Square matrix.
78 | 
79 |     Returns:
80 |         torch.Tensor: Flattened upper triangular elements.
81 |     """
82 |     if len(x.shape) != 2 or x.shape[0] != x.shape[1]:
83 |         raise ValueError(f"Input tensor must be a square matrix, but got shape {x.shape}")
84 |     n = x.shape[0]
85 |     mask = torch.triu(torch.ones(n, n), diagonal=1).to(torch.bool)
86 |     return x[mask]
87 | 


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/get_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from semilearn.datasets import cv_datasets
 6 | from semilearn.datasets.utils import split_ssl_data, load_image_files
 7 | 
 8 | from .datasetbase import BasicDataset, ImagePathDataset
 9 | from .augmentation import get_val_transforms, get_weak_transforms, get_strong_transforms
10 | 
11 | 
12 | def get_cv_dataset(args, alg, dataset_name, num_labels, data_dir="./data", include_lb_to_ulb=True):
13 |     """
14 |     Get the computer vision dataset and split the training samples into labeled and unlabeled sets.
15 | 
16 |     Args:
17 |         alg (str): Algorithm.
18 |         dataset_name (str): The name of the dataset to load.
19 |         num_labels (int): The number of labeled samples for the training set.
20 |         data_dir (str): The directory from which to load the dataset.
21 |         include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set.
22 | 
23 |     Returns:
24 |         Tuple[Dataset, Dataset, Dataset, Dataset]:
25 |             A tuple containing:
26 |                 - train labeled dataset
27 |                 - train unlabeled dataset
28 |                 - evaluation dataset
29 |                 - test dataset
30 |     """
31 | 
32 |     dataset = getattr(cv_datasets, dataset_name.upper())
33 | 
34 |     train_dataset = dataset(data_dir, split="train", download=True)
35 |     train_paths, train_targets = train_dataset._file_paths, train_dataset._labels
36 | 
37 |     test_dataset = dataset(data_dir, split="test", download=True)
38 |     test_paths, test_targets = test_dataset._file_paths, test_dataset._labels
39 | 
40 |     if args.preload:
41 |         train_data = load_image_files(train_paths)
42 |         test_data = load_image_files(test_paths)
43 |         ImageDataset = BasicDataset
44 |     else:
45 |         train_data = train_paths
46 |         test_data = test_paths
47 |         ImageDataset = ImagePathDataset
48 | 
49 |     transform_weak = get_weak_transforms(crop_size=args.img_size, crop_ratio=args.crop_ratio, dataset_name=dataset_name)
50 |     transform_strong = get_strong_transforms(crop_size=args.img_size, crop_ratio=args.crop_ratio, dataset_name=dataset_name)
51 |     transform_val = get_val_transforms(crop_size=args.img_size, dataset_name=dataset_name)
52 | 
53 |     eval_dset = ImageDataset(alg, test_data, test_targets, transform_val, False, None)
54 |     test_dset = None
55 | 
56 |     if alg == "fullysupervised":
57 |         lb_dset = ImageDataset(alg, train_data, train_targets, transform_weak, False, transform_strong)
58 |         return lb_dset, None, eval_dset, test_dset
59 | 
60 |     lb_data, lb_targets, ulb_data, ulb_targets = split_ssl_data(
61 |         args,
62 |         train_data,
63 |         train_targets,
64 |         lb_num_labels=num_labels,
65 |         ulb_num_labels=args.ulb_num_labels,
66 |         include_lb_to_ulb=include_lb_to_ulb,
67 |     )
68 | 
69 |     lb_dset = ImageDataset(alg, lb_data, lb_targets, transform_weak, False, transform_strong)
70 |     ulb_dset = ImageDataset(alg, ulb_data, ulb_targets, transform_weak, True, transform_strong)
71 | 
72 |     if alg == "supervised":
73 |         ulb_dset = None
74 | 
75 |     return lb_dset, ulb_dset, eval_dset, test_dset
76 | 


--------------------------------------------------------------------------------
/semilearn/datasets/nlp_datasets/datasetbase.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import random
 6 | 
 7 | from torch.utils.data import Dataset
 8 | 
 9 | 
10 | class BasicDataset(Dataset):
11 |     """
12 |     BasicDataset returns a pair of text and labels (targets).
13 |     If targets are not given, BasicDataset returns None as the label.
14 |     Returns both original and augmented text. Augmented texts can be None.
15 |     """
16 | 
17 |     def __init__(self, alg, data, targets=None, is_ulb=False, *args, **kwargs):
18 |         """
19 |         Args:
20 |             alg (str): Algorithm.
21 |             data (list): List of text data along with two augmented texts (e.g., [text, aug_text1 (or None), aug_text2 (or None)]).
22 |             targets (list or None): Target labels corresponding to the images.
23 |             is_ulb (bool): Indicates if the dataset is unlabeled.
24 |         """
25 |         super(BasicDataset, self).__init__()
26 |         self.alg = alg
27 |         self.data = data
28 |         self.targets = targets
29 |         self.is_ulb = is_ulb
30 |         self.transform = None
31 | 
32 |     def random_choose_sen(self):
33 |         """Randomly choose one of the augmented sentences."""
34 |         return random.randint(1, 2)
35 | 
36 |     def __sample__(self, idx):
37 |         """Retrieve the text and corresponding target at a specific index."""
38 |         sen = self.data[idx]
39 |         target = None if self.targets is None else self.targets[idx]
40 |         return sen, target
41 | 
42 |     def __getitem__(self, idx):
43 |         """
44 |         Returns raw and/or augmented text based on the algorithm and dataset type.
45 |         """
46 |         sen, target = self.__sample__(idx)
47 | 
48 |         data_dict = {
49 |             "idx_lb": lambda: idx,
50 |             "x_lb": lambda: sen[0],
51 |             "x_lb_s": lambda: sen[self.random_choose_sen()],
52 |             "y_lb": lambda: target,
53 |             "idx_ulb": lambda: idx,
54 |             "x_ulb_w": lambda: sen[0],
55 |             "x_ulb_w_2": lambda: sen[0],
56 |             "x_ulb_s": lambda: sen[self.random_choose_sen()],
57 |             "x_ulb_s_2": lambda: sen[self.random_choose_sen()],
58 |         }
59 | 
60 |         data_keys = self._determine_data_keys()
61 |         return {k: data_dict[k]() for k in data_keys}
62 | 
63 |     def __len__(self):
64 |         return len(self.data)
65 | 
66 |     def _determine_data_keys(self):
67 |         """Determine the required output data based on the algorithm type."""
68 |         data_keys = set()
69 | 
70 |         if not self.is_ulb:
71 |             data_keys.update({"idx_lb", "x_lb", "y_lb"})
72 |             return data_keys
73 | 
74 |         # for regression algorithms
75 |         if self.alg == "fullysupervised" or self.alg == "supervised":
76 |             data_keys.update({"idx_ulb"})
77 |         elif self.alg == "rankup" or self.alg == "rankuprda":
78 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"})
79 |         elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch":
80 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"})
81 |         else:
82 |             data_keys.update({"idx_ulb", "x_ulb_w"})
83 | 
84 |         return data_keys
85 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/meanteacher/meanteacher.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | import torch
 6 | import numpy as np
 7 | 
 8 | from semilearn.core import AlgorithmBase
 9 | from semilearn.core.utils import ALGORITHMS
10 | from semilearn.algorithms.utils import SSL_Argument
11 | 
12 | 
13 | @ALGORITHMS.register("meanteacher")
14 | class MeanTeacher(AlgorithmBase):
15 |     """
16 |     MeanTeacher algorithm (https://arxiv.org/abs/1703.01780).
17 | 
18 |     Args:
19 |     - args (`argparse`):
20 |         algorithm arguments
21 |     - net_builder (`callable`):
22 |         network loading function
23 |     - tb_log (`TBLog`):
24 |         tensorboard logger
25 |     - logger (`logging.Logger`):
26 |         logger to use
27 |     - unsup_warm_up (`float`, *optional*, defaults to 0.4):
28 |         Ramp up for weights for unsupervised loss
29 |     """
30 | 
31 |     def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs):
32 |         super().__init__(args, net_builder, tb_log, logger, **kwargs)
33 |         # mean teacher specified arguments
34 |         self.init(unsup_warm_up=args.unsup_warm_up)
35 | 
36 |     def init(self, unsup_warm_up=0.4):
37 |         self.unsup_warm_up = unsup_warm_up
38 | 
39 |     def train_step(self, x_lb, y_lb, x_ulb_w, x_ulb_w_2, **kwargs):
40 |         # inference and calculate sup/unsup losses
41 |         with self.amp_cm():
42 |             outs_x_lb = self.model(x_lb)
43 |             logits_x_lb = outs_x_lb["logits"]
44 |             feats_x_lb = outs_x_lb["feat"]
45 | 
46 |             self.ema.apply_shadow()
47 |             with torch.no_grad():
48 |                 self.bn_controller.freeze_bn(self.model)
49 |                 outs_x_ulb_w = self.model(x_ulb_w)
50 |                 logits_x_ulb_w = outs_x_ulb_w["logits"]  # self.model(x_ulb_w)
51 |                 feats_x_ulb_w = outs_x_ulb_w["feat"]
52 |                 self.bn_controller.unfreeze_bn(self.model)
53 |             self.ema.restore()
54 | 
55 |             self.bn_controller.freeze_bn(self.model)
56 |             outs_x_ulb_w_2 = self.model(x_ulb_w_2)
57 |             logits_x_ulb_w_2 = outs_x_ulb_w_2["logits"]
58 |             feats_x_ulb_w_2 = outs_x_ulb_w_2["feat"]
59 |             self.bn_controller.unfreeze_bn(self.model)
60 | 
61 |             # extract features for further use in the classification algorithm.
62 |             feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w, "x_ulb_w_2": feats_x_ulb_w_2}
63 |             for k in kwargs:
64 |                 feat_dict[k] = self.model(kwargs[k], only_feat=True)
65 | 
66 |             sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean")
67 |             unsup_loss = self.consistency_loss(logits_x_ulb_w_2, logits_x_ulb_w.detach(), "mse")
68 | 
69 |             unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0)
70 |             total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup
71 | 
72 |         out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict)
73 |         log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item())
74 |         return out_dict, log_dict
75 | 
76 |     @staticmethod
77 |     def get_argument():
78 |         return [
79 |             SSL_Argument("--unsup_warm_up", float, 0.4, "warm up ratio for regression unsupervised loss"),
80 |         ]
81 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/vcc2018.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from pytorch/pytorch
 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py
 5 | 
 6 | import librosa
 7 | import numpy as np
 8 | import pandas as pd
 9 | from pathlib import Path
10 | from typing import Any, Tuple
11 | 
12 | from torch.utils.data import Dataset
13 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
14 | 
15 | 
16 | class VCC2018(Dataset):
17 |     """`The VCC2018 Data Set <https://datashare.ed.ac.uk/handle/10283/3061> <https://datashare.ed.ac.uk/handle/10283/3257>`
18 | 
19 |     The Voice Conversion Challenge 2018 (VCC2018) dataset is an audio quality assessment dataset,
20 |     where the objective is to predict the quality of an audio sample. The labels, ranging from 1
21 |     to 5, are obtained by averaging the scores provided by multiple listeners. The dataset
22 |     comprises over 20,000 audio files, which we split into 16,464 training samples and 4,116 test samples.
23 | 
24 |     Args:
25 |         root (string): Root directory of the dataset.
26 |         split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
27 |         download (bool, optional): If True, downloads the dataset from the internet and
28 |             puts it in root directory. If dataset is already downloaded, it is not
29 |             downloaded again. Default is False.
30 |     """
31 | 
32 |     _URL_MD5 = {
33 |         "data": (
34 |             "https://datashare.ed.ac.uk/bitstream/handle/10283/3061/vcc2018_submitted_systems_converted_speech.tar.gz",
35 |             "75b0f937240f6850a56ec2cbad34b4ad",
36 |         ),
37 |         "meta": ("https://github.com/pm25/regression-datasets/raw/refs/heads/main/data/vcc2018/meta.zip", "66ea41b35ffbc1ad6565e538320f011d"),
38 |     }
39 | 
40 |     def __init__(
41 |         self,
42 |         root: str,
43 |         split: str = "train",
44 |         download: bool = False,
45 |     ) -> None:
46 |         super().__init__()
47 |         self._split = verify_str_arg(split, "split", ("train", "test"))
48 |         self._base_folder = Path(root) / "vcc2018"
49 |         self._meta_folder = self._base_folder / "meta"
50 |         self._audio_folder = self._base_folder / "mnt/sysope/test_files/testVCC2"
51 | 
52 |         if download:
53 |             self._download()
54 | 
55 |         if not self._check_exists():
56 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
57 | 
58 |         metadata = pd.read_csv(self._meta_folder / f"{split}.csv")
59 |         self._file_paths = metadata["file_name"].apply(lambda x: self._audio_folder / x).to_numpy(dtype="object")
60 |         self._labels = metadata["label"].to_numpy(dtype=np.float32)
61 | 
62 |     def __len__(self) -> int:
63 |         return len(self._file_paths)
64 | 
65 |     def __getitem__(self, idx: int) -> Tuple[Any, Any]:
66 |         audio_file, label = self._file_paths[idx], self._labels[idx]
67 |         waveform, sample_rate = librosa.load(audio_file, sr=None, mono=True)
68 |         return waveform, sample_rate, label
69 | 
70 |     def extra_repr(self) -> str:
71 |         return f"split={self._split}"
72 | 
73 |     def _check_exists(self) -> bool:
74 |         return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._audio_folder))
75 | 
76 |     def _download(self) -> None:
77 |         if self._check_exists():
78 |             return
79 |         for url, md5 in self._URL_MD5.values():
80 |             download_and_extract_archive(url, download_root=self._base_folder, md5=md5)
81 | 


--------------------------------------------------------------------------------
/visualization/plot_multi.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024 Pin-Yen Huang.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import seaborn as sns
  7 | import matplotlib
  8 | import matplotlib.pyplot as plt
  9 | from matplotlib.gridspec import GridSpec
 10 | 
 11 | from umap import UMAP
 12 | from pathlib import Path
 13 | from sklearn.manifold import TSNE
 14 | 
 15 | font = {"size": 18}
 16 | matplotlib.rc("font", **font)
 17 | 
 18 | 
 19 | def plot_by_seaborn(ax, x_feats, y_labels, method="t-sne"):
 20 |     n_components = 2
 21 | 
 22 |     if method.lower() == "t-sne":
 23 |         m = TSNE(
 24 |             n_components=n_components,
 25 |             perplexity=50,
 26 |             learning_rate="auto",
 27 |             init="pca",
 28 |             random_state=222,
 29 |         )
 30 |     elif method.lower() == "umap":
 31 |         m = UMAP(n_components=n_components, n_neighbors=50, init="pca", random_state=222)
 32 | 
 33 |     projections = m.fit_transform(x_feats)
 34 | 
 35 |     projections_df = pd.DataFrame(
 36 |         {
 37 |             "Dimension 1": projections[:, 0],
 38 |             "Dimension 2": projections[:, 1],
 39 |             "label": y_labels,
 40 |         }
 41 |     )
 42 | 
 43 |     sns.scatterplot(
 44 |         ax=ax,
 45 |         x="Dimension 1",
 46 |         y="Dimension 2",
 47 |         hue="label",
 48 |         palette=sns.color_palette("coolwarm", as_cmap=True),
 49 |         data=projections_df,
 50 |         legend=False,
 51 |         s=9,
 52 |     )
 53 | 
 54 |     x_min, x_max = ax.get_xlim()
 55 |     y_min, y_max = ax.get_ylim()
 56 |     xy_lim = (min(x_min, y_min), max(x_max, y_max))
 57 |     ax.set_xlim(xy_lim)
 58 |     ax.set_ylim(xy_lim)
 59 |     ax.set_aspect("equal")
 60 | 
 61 | 
 62 | if __name__ == "__main__":
 63 |     data_info_list = [
 64 |         {"path": "features/supervised_utkface_lb250_s0.npy", "name": "Supervised"},
 65 |         {"path": "features/mixmatch_utkface_lb250_s0.npy", "name": "MixMatch"},
 66 |         {"path": "features/supervised_fixmatch_utkface_lb250_s0.npy", "name": "RankUp"},
 67 |     ]
 68 |     method = "t-SNE"  # UMAP
 69 | 
 70 |     features = [np.load(d["path"], allow_pickle=True).item() for d in data_info_list]
 71 | 
 72 |     n_data = len(data_info_list)
 73 |     fig = plt.figure(figsize=(8 * n_data, 6))
 74 |     gs = GridSpec(1, n_data + 1, width_ratios=[1] * n_data + [0.05], wspace=0.0)
 75 | 
 76 |     axes = [fig.add_subplot(gs[i]) for i in range(n_data)]
 77 |     cbar_ax = fig.add_subplot(gs[-1])
 78 | 
 79 |     for i, ax in enumerate(axes):
 80 |         data_name = data_info_list[i]["name"]
 81 |         print(f"Running {method} for {data_name} features ...")
 82 |         plot_by_seaborn(ax, features[i]["feat"], features[i]["label"], method=method)
 83 |         ax.set_title(data_info_list[i]["name"])
 84 | 
 85 |     norm = plt.Normalize(
 86 |         min([feat["label"].min() for feat in features]),
 87 |         max([feat["label"].max() for feat in features]),
 88 |     )
 89 | 
 90 |     cbar = fig.colorbar(
 91 |         plt.cm.ScalarMappable(cmap=sns.color_palette("coolwarm", as_cmap=True), norm=norm),
 92 |         cax=cbar_ax,
 93 |         orientation="vertical",
 94 |         label="Label (Age)",
 95 |     )
 96 | 
 97 |     # Ensure output directory exists
 98 |     output_dir = Path("plots")
 99 |     output_dir.mkdir(exist_ok=True, parents=True)
100 | 
101 |     # Save the plot
102 |     save_name = "_".join([d["name"].lower() for d in data_info_list]) + f"_{method.lower()}_2d.png"
103 |     save_path = output_dir / save_name
104 |     plt.savefig(save_path, dpi=300, bbox_inches="tight")
105 |     plt.show()
106 |     print(f"Plot saved to {save_path}")
107 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/clss/clss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from xmed-lab/CLSS
 4 | # https://github.com/xmed-lab/CLSS
 5 | 
 6 | from semilearn.core import AlgorithmBase
 7 | from semilearn.core.utils import ALGORITHMS
 8 | from semilearn.algorithms.utils import SSL_Argument
 9 | 
10 | from .ordinal_entropy import ordinal_entropy
11 | from .ulb_rank import ulb_rank, ulb_rank_prdlb
12 | 
13 | 
14 | @ALGORITHMS.register("clss")
15 | class CLSS(AlgorithmBase):
16 |     """
17 |     CLSS algorithm (https://proceedings.neurips.cc/paper_files/paper/2023/file/b2d4051f03a7038a2771dfbbe5c7b54e-Paper-Conference.pdf).
18 | 
19 |     Args:
20 |     - args (`argparse`):
21 |         algorithm arguments
22 |     - net_builder (`callable`):
23 |         network loading function
24 |     - tb_log (`TBLog`):
25 |         tensorboard logger
26 |     - logger (`logging.Logger`):
27 |         logger to use
28 |     """
29 | 
30 |     def __init__(self, args, net_builder, tb_log=None, logger=None, **kwargs):
31 |         super().__init__(args, net_builder, tb_log, logger, **kwargs)
32 |         self.init(
33 |             lambda_val=args.lambda_val,
34 |             lb_ctr_loss_ratio=args.lb_ctr_loss_ratio,
35 |             ulb_ctr_loss_ratio=args.ulb_ctr_loss_ratio,
36 |             ulb_rank_loss_ratio=args.ulb_rank_loss_ratio,
37 |         )
38 | 
39 |     def init(
40 |         self,
41 |         lambda_val=2,
42 |         lb_ctr_loss_ratio=1.0,
43 |         ulb_ctr_loss_ratio=0.05,
44 |         ulb_rank_loss_ratio=0.01,
45 |     ):
46 |         self.lambda_val = lambda_val
47 |         self.lb_ctr_loss_ratio = lb_ctr_loss_ratio
48 |         self.ulb_ctr_loss_ratio = ulb_ctr_loss_ratio
49 |         self.ulb_rank_loss_ratio = ulb_rank_loss_ratio
50 | 
51 |     def train_step(self, x_lb, y_lb, x_ulb_w, **kwargs):
52 |         # inference and calculate sup/unsup losses
53 |         with self.amp_cm():
54 |             x_lb_outputs = self.model(x_lb)
55 |             logits_x_lb = x_lb_outputs["logits"]
56 |             feats_x_lb = x_lb_outputs["feat"]
57 | 
58 |             x_ulb_outputs = self.model(x_ulb_w)
59 |             logits_x_ulb_w = x_ulb_outputs["logits"]
60 |             feats_x_ulb_w = x_ulb_outputs["feat"]
61 | 
62 |             # extract features for further use in the classification algorithm.
63 |             feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w}
64 |             for k in kwargs:
65 |                 feat_dict[k] = self.model(kwargs[k], only_feat=True)
66 | 
67 |             sup_reg_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean")
68 |             sup_ctr_loss = ordinal_entropy(feats_x_lb, y_lb)
69 |             sup_loss = sup_reg_loss + self.lb_ctr_loss_ratio * sup_ctr_loss
70 | 
71 |             unsup_ctr_loss, ft_rank = ulb_rank(feats_x_ulb_w, self.lambda_val)
72 |             unsup_rank_loss = ulb_rank_prdlb(logits_x_ulb_w.unsqueeze(1), self.lambda_val, pred_inp=ft_rank)
73 |             unsup_loss = self.ulb_ctr_loss_ratio * unsup_ctr_loss + self.ulb_rank_loss_ratio * unsup_rank_loss
74 | 
75 |             total_loss = sup_loss + unsup_loss
76 | 
77 |         out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict)
78 |         log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item())
79 |         return out_dict, log_dict
80 | 
81 |     @staticmethod
82 |     def get_argument():
83 |         return [
84 |             SSL_Argument("--lambda_val", float, 2.0),
85 |             SSL_Argument("--lb_ctr_loss_ratio", float, 1.0),
86 |             SSL_Argument("--ulb_ctr_loss_ratio", float, 0.05),
87 |             SSL_Argument("--ulb_rank_loss_ratio", float, 0.01),
88 |         ]
89 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/get_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from semilearn.datasets import audio_datasets
 6 | from semilearn.datasets.utils import split_ssl_data, load_audio_files
 7 | 
 8 | from .datasetbase import BasicDataset, AudioPathDataset
 9 | from .augmentation import Subsample, RandomSubsample, AudioTransforms
10 | 
11 | 
12 | def get_audio_dataset(args, alg, dataset_name, num_labels, data_dir="./data", include_lb_to_ulb=True):
13 |     """
14 |     Get the audio dataset and split the training samples into labeled and unlabeled sets.
15 | 
16 |     Args:
17 |         alg (str): Algorithm.
18 |         dataset_name (str): The name of the dataset to load.
19 |         num_labels (int): The number of labeled samples for the training set.
20 |         data_dir (str): The directory from which to load the dataset.
21 |         include_lb_to_ulb (bool): Indicates whether to include labeled data in the unlabeled set.
22 | 
23 |     Returns:
24 |         Tuple[Dataset, Dataset, Dataset, Dataset]:
25 |             A tuple containing:
26 |                 - train labeled dataset
27 |                 - train unlabeled dataset
28 |                 - evaluation dataset
29 |                 - test dataset
30 |     """
31 |     dataset = getattr(audio_datasets, dataset_name.upper())
32 | 
33 |     train_dataset = dataset(data_dir, split="train", download=True)
34 |     train_paths, train_targets = train_dataset._file_paths, train_dataset._labels
35 | 
36 |     test_dataset = dataset(data_dir, split="test", download=True)
37 |     test_paths, test_targets = test_dataset._file_paths, test_dataset._labels
38 | 
39 |     if args.preload:
40 |         train_data = load_audio_files(train_paths, args.sample_rate)
41 |         test_data = load_audio_files(test_paths, args.sample_rate)
42 |         AudioDataset = BasicDataset
43 |     else:
44 |         train_data = train_paths
45 |         test_data = test_paths
46 |         AudioDataset = AudioPathDataset
47 | 
48 |     transform_weak = RandomSubsample(max_length_seconds=args.max_length_seconds)
49 |     transform_strong = AudioTransforms(max_length_seconds=args.max_length_seconds, dataset_name=dataset_name)
50 |     transform_val = Subsample(max_length_seconds=args.max_length_seconds)
51 | 
52 |     eval_dset = AudioDataset(alg, test_data, test_targets, transform_val, False, None, args.sample_rate)
53 |     test_dset = None
54 | 
55 |     if dataset_name.lower() in ["bvcc"]:
56 |         dev_dataset = dataset(data_dir, split="dev", download=True)
57 |         dev_paths, dev_targets = dev_dataset._file_paths, dev_dataset._labels
58 |         dev_data = load_audio_files(dev_paths, args.sample_rate) if args.preload else dev_paths
59 |         eval_dset = AudioDataset(alg, dev_data, dev_targets, transform_val, False, None, args.sample_rate)
60 |         test_dset = AudioDataset(alg, test_data, test_targets, transform_val, False, None, args.sample_rate)
61 | 
62 |     if alg == "fullysupervised":
63 |         lb_dset = AudioDataset(alg, train_data, train_targets, transform_weak, False, transform_strong, args.sample_rate)
64 |         return lb_dset, None, eval_dset, test_dset
65 | 
66 |     lb_data, lb_targets, ulb_data, ulb_targets = split_ssl_data(
67 |         args,
68 |         train_data,
69 |         train_targets,
70 |         lb_num_labels=num_labels,
71 |         ulb_num_labels=args.ulb_num_labels,
72 |         include_lb_to_ulb=include_lb_to_ulb,
73 |     )
74 | 
75 |     lb_dset = AudioDataset(alg, lb_data, lb_targets, transform_weak, False, transform_strong, args.sample_rate)
76 |     ulb_dset = AudioDataset(alg, ulb_data, ulb_targets, transform_weak, True, transform_strong, args.sample_rate)
77 | 
78 |     if alg == "supervised":
79 |         ulb_dset = None
80 | 
81 |     return lb_dset, ulb_dset, eval_dset, test_dset
82 | 


--------------------------------------------------------------------------------
/semilearn/algorithms/rda/rda.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import numpy as np
 5 | 
 6 | from .utils import RDAHook
 7 | 
 8 | from semilearn.core import AlgorithmBase
 9 | from semilearn.core.utils import ALGORITHMS
10 | from semilearn.algorithms.utils import SSL_Argument
11 | 
12 | 
13 | @ALGORITHMS.register("rda")
14 | class RDA(AlgorithmBase):
15 |     """
16 |     RDA algorithm (https://arxiv.org/abs/2410.22124).
17 | 
18 |     Args:
19 |         - args (`argparse`):
20 |             algorithm arguments
21 |         - net_builder (`callable`):
22 |             network loading function
23 |         - tb_log (`TBLog`):
24 |             tensorboard logger
25 |         - logger (`logging.Logger`):
26 |             logger to use
27 |         - unsup_warm_up (`float`, *optional*, defaults to 0.4):
28 |             Ramp up for weights for unsupervised loss
29 |         - rda_num_refine_iter (`int`):
30 |             Number of iterations to apply RDA.
31 |     """
32 | 
33 |     def __init__(self, args, net_builder, tb_log=None, logger=None):
34 |         self.init(
35 |             unsup_warm_up=args.unsup_warm_up,
36 |             rda_num_refine_iter=args.rda_num_refine_iter,
37 |         )
38 |         super().__init__(args, net_builder, tb_log, logger)
39 | 
40 |     def init(self, unsup_warm_up, rda_num_refine_iter):
41 |         self.unsup_warm_up = unsup_warm_up
42 |         self.rda_num_refine_iter = rda_num_refine_iter
43 | 
44 |     def set_hooks(self):
45 |         super().set_hooks()
46 |         # reset PseudoLabelingHook hook
47 |         self.register_hook(
48 |             RDAHook(
49 |                 train_ulb_len=len(self.dataset_dict["train_ulb"]),
50 |                 lb_targets=np.copy(self.dataset_dict["train_lb"].targets),
51 |                 num_refine_iter=self.rda_num_refine_iter,
52 |             ),
53 |             "RDAHook",
54 |         )
55 | 
56 |     def train_step(self, x_lb, y_lb, idx_ulb, x_ulb_w, **kwargs):
57 |         self.idx_ulb = idx_ulb
58 | 
59 |         # inference and calculate sup losses
60 |         with self.amp_cm():
61 |             outs_x_lb = self.model(x_lb)
62 |             logits_x_lb = outs_x_lb["logits"]
63 |             feats_x_lb = outs_x_lb["feat"]
64 |             sup_loss = self.reg_loss(logits_x_lb, y_lb, reduction="mean")
65 | 
66 |             self.bn_controller.freeze_bn(self.model)
67 |             outs_x_ulb_w = self.model(x_ulb_w)
68 |             logits_x_ulb_w = outs_x_ulb_w["logits"]
69 |             feats_x_ulb_w = outs_x_ulb_w["feat"]
70 |             self.bn_controller.unfreeze_bn(self.model)
71 | 
72 |             # extract features for further use in the classification algorithm.
73 |             feat_dict = {"x_lb": feats_x_lb, "x_ulb_w": feats_x_ulb_w}
74 |             for k in kwargs:
75 |                 feat_dict[k] = self.model(kwargs[k], only_feat=True)
76 | 
77 |             # generate unlabeled targets using pseudo label hook
78 |             pseudo_label = self.call_hook(
79 |                 "gen_ulb_targets",
80 |                 "RDAHook",
81 |                 logits=logits_x_ulb_w,
82 |             )
83 | 
84 |             unsup_loss = self.consistency_loss(logits_x_ulb_w, pseudo_label.detach(), "mse")
85 | 
86 |             unsup_warmup = np.clip(self.it / (self.unsup_warm_up * self.num_train_iter), a_min=0.0, a_max=1.0)
87 |             total_loss = sup_loss + self.ulb_loss_ratio * unsup_loss * unsup_warmup
88 | 
89 |         out_dict = self.process_out_dict(loss=total_loss, feat=feat_dict)
90 |         log_dict = self.process_log_dict(sup_loss=sup_loss.item(), unsup_loss=unsup_loss.item(), total_loss=total_loss.item())
91 |         return out_dict, log_dict
92 | 
93 |     @staticmethod
94 |     def get_argument():
95 |         return [
96 |             SSL_Argument("--unsup_warm_up", float, 0.4),
97 |             SSL_Argument("--rda_num_refine_iter", int, 1024),
98 |         ]
99 | 


--------------------------------------------------------------------------------
/semilearn/datasets/nlp_datasets/yelp_review.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from pytorch/pytorch
 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py
 5 | 
 6 | import json
 7 | import numpy as np
 8 | from pathlib import Path
 9 | from typing import Any, Tuple
10 | 
11 | from torch.utils.data import Dataset
12 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
13 | 
14 | 
15 | class YELP_REVIEW(Dataset):
16 |     """`Yelp Review Dataset <http://www.yelp.com/dataset_challenge> <https://arxiv.org/abs/2208.07204>`
17 | 
18 |     The Yelp Review dataset is a sentiment ordinal regression dataset, where the goal
19 |     is to predict the rating of a customer based on their comment. The labels are divided
20 |     into 5 classes (scores ranging from 0 to 4). Originally, each class contains 130,000
21 |     training samples and 10,000 test samples.
22 | 
23 |     This version uses a processed Yelp Review dataset provided by USB
24 |     (https://github.com/microsoft/semi-supervised-learning). It contains:
25 |         - 50,000 samples per class for the training split (250,000 samples total)
26 |         - 5,000 samples per class for the validation split (25,000 samples total)
27 |         - The original test dataset remains unchanged (50,000 samples total)
28 | 
29 |     Additionally, the dataset includes preprocessed augmented text data (aug_0 and aug_1)
30 |     generated using back-translation, along with the original text (ori).
31 | 
32 |     Args:
33 |         root (string): Root directory of the dataset.
34 |         split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
35 |         download (bool, optional): If True, downloads the dataset from the internet and
36 |             puts it in root directory. If dataset is already downloaded, it is not
37 |             downloaded again. Default is False.
38 |     """
39 | 
40 |     _URL = "https://huggingface.co/datasets/py97/Yelp-Review/resolve/main/YelpReview.tar.gz"
41 |     _MD5 = "4c3e3736f3dc2c175f5ff9b0f69e6043"
42 | 
43 |     def __init__(
44 |         self,
45 |         root: str,
46 |         split: str = "train",
47 |         download: bool = False,
48 |     ) -> None:
49 |         super().__init__()
50 |         self._split = verify_str_arg(split, "split", ("train", "dev", "test"))
51 |         self._base_folder = Path(root) / "yelp_review"
52 |         self._text_folder = self._base_folder / "YelpReview"
53 | 
54 |         if download:
55 |             self._download()
56 | 
57 |         if not self._check_exists():
58 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
59 | 
60 |         with open(self._text_folder / f"{split}.json", "r") as f:
61 |             data = json.load(f)
62 | 
63 |         texts, labels = [], []
64 |         for idx in data:
65 |             aug_0 = data[idx].get("aug_0", None)
66 |             aug_1 = data[idx].get("aug_1", None)
67 |             texts.append((data[idx]["ori"], aug_0, aug_1))
68 |             labels.append(float(data[idx]["label"]))
69 | 
70 |         self._texts = np.array(texts, dtype="object")
71 |         self._labels = np.array(labels, dtype=np.float32)
72 | 
73 |     def __len__(self) -> int:
74 |         return len(self._texts)
75 | 
76 |     def __getitem__(self, idx: int) -> Tuple[Any, Any]:
77 |         text, label = self._texts[idx], self._labels[idx]
78 |         return text, label
79 | 
80 |     def extra_repr(self) -> str:
81 |         return f"split={self._split}"
82 | 
83 |     def _check_exists(self) -> bool:
84 |         return self._text_folder.exists() and self._text_folder.is_dir()
85 | 
86 |     def _download(self) -> None:
87 |         if self._check_exists():
88 |             return
89 |         download_and_extract_archive(self._URL, download_root=self._base_folder, md5=self._MD5)
90 | 


--------------------------------------------------------------------------------
/semilearn/datasets/collactors/nlp_collactor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
 3 | # Licensed under the MIT License.
 4 | 
 5 | from dataclasses import dataclass
 6 | from typing import Optional, Union
 7 | 
 8 | from transformers import BertTokenizerFast
 9 | from transformers.file_utils import PaddingStrategy
10 | from transformers.tokenization_utils_base import PreTrainedTokenizerBase
11 | from transformers.data import default_data_collator
12 | 
13 | 
14 | @dataclass
15 | class DataCollatorWithPadding:
16 |     """
17 |     Data collator that will dynamically pad the inputs received.
18 | 
19 |     Args:
20 |         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
21 |             The tokenizer used for encoding the data.
22 |         padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
23 |             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
24 |             among:
25 |             - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence
26 |               if provided).
27 |             - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
28 |               acceptable input length for the model if that argument is not provided.
29 |             - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
30 |               lengths).
31 |         max_length (`int`, *optional*):
32 |             Maximum length of the returned list and optionally padding length (see above).
33 |         pad_to_multiple_of (`int`, *optional*):
34 |             If set will pad the sequence to a multiple of the provided value.
35 |             This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
36 |             7.5 (Volta).
37 |         return_tensors (`str`):
38 |             The type of Tensor to return. Allowable values are "np", "pt" and "tf".
39 |     """
40 | 
41 |     tokenizer: PreTrainedTokenizerBase
42 |     padding: Union[bool, str, PaddingStrategy] = True
43 |     max_length: Optional[int] = None
44 |     pad_to_multiple_of: Optional[int] = None
45 |     return_tensors: str = "pt"
46 | 
47 |     def __call__(self, features):
48 |         text_keys = ["x_lb", "x_lb_s", "x_ulb_w", "x_ulb_w_2", "x_ulb_s", "x_ulb_s2"]
49 |         text_features = {k: [] for k in text_keys}
50 |         other_features = []
51 |         for f in features:
52 |             exist_ks = [k for k in text_keys if k in f]
53 |             for k in exist_ks:
54 |                 text = f.pop(k)
55 |                 input_ids = self.tokenizer(text, max_length=self.max_length, truncation=True, padding=False)["input_ids"]
56 |                 text_features[k].append({"input_ids": input_ids})
57 |             other_features.append(f)
58 | 
59 |         batch = default_data_collator(other_features, return_tensors="pt")
60 | 
61 |         for key, feats in text_features.items():
62 |             if len(feats) > 0:
63 |                 out = self.tokenizer.pad(
64 |                     feats,
65 |                     padding=True,
66 |                     max_length=None,
67 |                     pad_to_multiple_of=self.pad_to_multiple_of,
68 |                     return_tensors=self.return_tensors,
69 |                 )
70 |                 batch[key] = {"input_ids": out["input_ids"], "attention_mask": out["attention_mask"]}
71 | 
72 |         return batch
73 | 
74 | 
75 | def get_bert_base_collactor(pretrain_path="bert-base-uncased", max_length=512):
76 |     tokenizer = BertTokenizerFast.from_pretrained(pretrain_path)
77 |     tokenizer.deprecation_warnings["Asking-to-pad-a-fast-tokenizer"] = True  # turn off
78 |     collact_fn = DataCollatorWithPadding(tokenizer, max_length=max_length)
79 |     return collact_fn
80 | 


--------------------------------------------------------------------------------
/semilearn/datasets/nlp_datasets/amazon_review.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from pytorch/pytorch
 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py
 5 | 
 6 | import json
 7 | import numpy as np
 8 | from pathlib import Path
 9 | from typing import Any, Tuple
10 | 
11 | from torch.utils.data import Dataset
12 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
13 | 
14 | 
15 | class AMAZON_REVIEW(Dataset):
16 |     """`Amazon Review Dataset <https://dl.acm.org/doi/10.1145/2507157.2507163> <https://arxiv.org/abs/2208.07204>`
17 | 
18 |     The Amazon Review dataset is a sentiment ordinal regression dataset, where the goal
19 |     is to predict the rating of a customer based on their comment. The labels are divided
20 |     into 5 classes (scores ranging from 0 to 4). Originally, each class contains 600,000
21 |     training samples and 130,000 test samples.
22 | 
23 |     This version uses a processed Amazon Review dataset provided by USB
24 |     (https://github.com/microsoft/semi-supervised-learning). It contains:
25 |         - 50,000 samples per class for the training split (250,000 samples total)
26 |         - 5,000 samples per class for the validation split (25,000 samples total)
27 |         - The original test dataset remains unchanged (650,000 samples total)
28 | 
29 |     Additionally, the dataset includes preprocessed augmented text data (aug_0 and aug_1)
30 |     generated using back-translation, along with the original text (ori).
31 | 
32 |     Args:
33 |         root (string): Root directory of the dataset.
34 |         split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
35 |         download (bool, optional): If True, downloads the dataset from the internet and
36 |             puts it in root directory. If dataset is already downloaded, it is not
37 |             downloaded again. Default is False.
38 |     """
39 | 
40 |     _URL = "https://huggingface.co/datasets/py97/Amazon-Review/resolve/main/AmazonReview.tar.gz"
41 |     _MD5 = "080761d785bd86ed6ebcd8e388e401e3"
42 | 
43 |     def __init__(
44 |         self,
45 |         root: str,
46 |         split: str = "train",
47 |         download: bool = False,
48 |     ) -> None:
49 |         super().__init__()
50 |         self._split = verify_str_arg(split, "split", ("train", "dev", "test"))
51 |         self._base_folder = Path(root) / "amazon_review"
52 |         self._text_folder = self._base_folder / "AmazonReview"
53 | 
54 |         if download:
55 |             self._download()
56 | 
57 |         if not self._check_exists():
58 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
59 | 
60 |         with open(self._text_folder / f"{split}.json", "r") as f:
61 |             data = json.load(f)
62 | 
63 |         texts, labels = [], []
64 |         for idx in data:
65 |             aug_0 = data[idx].get("aug_0", None)
66 |             aug_1 = data[idx].get("aug_1", None)
67 |             texts.append((data[idx]["ori"], aug_0, aug_1))
68 |             labels.append(float(data[idx]["label"]))
69 | 
70 |         self._texts = np.array(texts, dtype="object")
71 |         self._labels = np.array(labels, dtype=np.float32)
72 | 
73 |     def __len__(self) -> int:
74 |         return len(self._texts)
75 | 
76 |     def __getitem__(self, idx: int) -> Tuple[Any, Any]:
77 |         text, label = self._texts[idx], self._labels[idx]
78 |         return text, label
79 | 
80 |     def extra_repr(self) -> str:
81 |         return f"split={self._split}"
82 | 
83 |     def _check_exists(self) -> bool:
84 |         return self._text_folder.exists() and self._text_folder.is_dir()
85 | 
86 |     def _download(self) -> None:
87 |         if self._check_exists():
88 |             return
89 |         download_and_extract_archive(self._URL, download_root=self._base_folder, md5=self._MD5)
90 | 


--------------------------------------------------------------------------------
/results/classic_cv_average_log.csv:
--------------------------------------------------------------------------------
 1 | exp_name,num_exp,min_MAE,min_MSE,max_R2,max_LCC,max_SRCC,max_KTAU,min_GMAE
 2 | clss_utkface_lb2000,3,6.288±0.013,81.669±1.321,0.794±0.003,0.892±0.002,0.862±0.001,0.700±0.001,3.405±0.021
 3 | clss_utkface_lb250,3,9.096±0.150,163.917±6.450,0.586±0.016,0.768±0.011,0.737±0.014,0.566±0.010,4.994±0.198
 4 | clss_utkface_lb50,3,13.609±0.917,340.943±39.913,0.138±0.101,0.438±0.084,0.447±0.074,0.320±0.057,7.564±0.403
 5 | fullysupervised_utkface,3,4.851±0.006,49.599±0.079,0.875±0.000,0.936±0.000,0.910±0.001,0.765±0.001,2.564±0.014
 6 | rankup(meanteacher-fixmatch)_utkface_lb250,3,7.006±0.172,98.049±5.135,0.752±0.013,0.868±0.008,0.831±0.004,0.662±0.006,3.809±0.128
 7 | meanteacher_utkface_lb2000,3,6.291±0.029,81.706±1.547,0.793±0.004,0.892±0.002,0.862±0.001,0.700±0.000,3.256±0.030
 8 | meanteacher_utkface_lb250,3,8.849±0.249,163.861±7.975,0.586±0.020,0.771±0.012,0.745±0.013,0.572±0.012,4.631±0.167
 9 | meanteacher_utkface_lb50,3,13.925±0.197,345.331±14.528,0.127±0.037,0.417±0.030,0.423±0.023,0.300±0.017,7.942±0.164
10 | rankup(mixmatch-fixmatch)_utkface_lb250,3,7.117±0.092,91.350±2.525,0.769±0.006,0.893±0.003,0.866±0.002,0.701±0.001,4.443±0.113
11 | mixmatch_utkface_lb2000,3,6.033±0.070,69.569±1.441,0.824±0.004,0.911±0.002,0.883±0.002,0.724±0.002,3.584±0.024
12 | mixmatch_utkface_lb250,3,7.951±0.146,121.761±5.062,0.692±0.013,0.852±0.005,0.832±0.008,0.658±0.008,4.674±0.103
13 | mixmatch_utkface_lb50,3,11.441±0.451,237.182±11.271,0.401±0.028,0.677±0.029,0.674±0.035,0.500±0.028,6.979±0.361
14 | rankup(pimodel-fixmatch)_utkface_lb250,3,6.953±0.161,95.715±3.946,0.758±0.010,0.872±0.006,0.837±0.005,0.668±0.006,3.765±0.109
15 | pimodel_utkface_lb2000,3,6.308±0.101,83.168±2.448,0.790±0.006,0.890±0.004,0.860±0.003,0.698±0.003,3.267±0.021
16 | pimodel_utkface_lb250,3,9.453±0.302,184.454±11.701,0.534±0.030,0.737±0.019,0.706±0.015,0.533±0.013,4.890±0.141
17 | pimodel_utkface_lb50,3,13.821±1.020,356.119±33.893,0.100±0.086,0.385±0.100,0.387±0.092,0.275±0.069,7.820±0.707
18 | rankuprda_utkface_lb2000,3,5.513±0.074,61.921±1.524,0.844±0.004,0.919±0.002,0.890±0.003,0.736±0.004,2.967±0.041
19 | rankuprda_utkface_lb250,3,6.570±0.184,86.350±4.743,0.782±0.012,0.886±0.007,0.856±0.005,0.690±0.005,3.523±0.121
20 | rankuprda_utkface_lb50,3,9.329±0.539,177.405±16.268,0.552±0.041,0.757±0.026,0.770±0.009,0.595±0.005,4.938±0.476
21 | rda_utkface_lb250,3,8.644±0.217,154.906±9.034,0.609±0.023,0.794±0.014,0.772±0.012,0.599±0.009,4.497±0.172
22 | rda_utkface_lb50,3,14.342±1.273,371.893±49.433,0.060±0.125,0.409±0.108,0.442±0.104,0.317±0.078,7.834±0.703
23 | rankup_utkface_lb2000,3,5.614±0.068,64.245±1.032,0.838±0.003,0.916±0.002,0.887±0.003,0.732±0.004,2.988±0.061
24 | rankup_utkface_lb250,3,7.055±0.115,98.548±4.235,0.751±0.011,0.868±0.007,0.835±0.008,0.664±0.010,3.820±0.055
25 | rankup_utkface_lb50,3,9.959±0.620,192.124±17.021,0.514±0.043,0.720±0.030,0.703±0.019,0.530±0.018,5.511±0.528
26 | rankup(supervised-meanteacher)_utkface_lb250,3,8.757±0.135,155.536±6.919,0.607±0.018,0.782±0.010,0.750±0.005,0.577±0.006,4.623±0.072
27 | supervised_utkface_lb2000,3,6.281±0.064,81.616±1.769,0.794±0.004,0.892±0.002,0.862±0.001,0.700±0.001,3.262±0.043
28 | supervised_utkface_lb250,3,9.421±0.160,181.946±5.435,0.540±0.014,0.740±0.009,0.712±0.010,0.540±0.008,4.923±0.136
29 | supervised_utkface_lb50,3,14.128±0.555,360.227±36.492,0.090±0.092,0.357±0.082,0.371±0.071,0.262±0.052,8.035±0.148
30 | rankup(supervised-pimodel)_utkface_lb250,3,8.812±0.109,161.721±4.977,0.591±0.013,0.776±0.006,0.751±0.012,0.577±0.010,4.594±0.061
31 | rankup(supervised-supervised)_utkface_lb250,3,9.027±0.087,163.129±7.112,0.588±0.018,0.769±0.013,0.746±0.008,0.571±0.008,4.852±0.103
32 | ucvme_utkface_lb2000,3,5.902±0.066,70.910±2.583,0.821±0.007,0.906±0.004,0.877±0.002,0.718±0.003,3.203±0.045
33 | ucvme_utkface_lb250,3,8.630±0.170,148.059±2.562,0.626±0.006,0.794±0.006,0.767±0.007,0.593±0.008,4.773±0.143
34 | ucvme_utkface_lb50,3,13.491±0.954,333.775±43.387,0.157±0.110,0.409±0.132,0.412±0.127,0.294±0.093,7.960±0.585
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .vscode/
163 | 
164 | data/
165 | saved_models/
166 | visualization/features/
167 | visualization/figures/
168 | _*/


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/utkface.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 Pin-Yen Huang.
 2 | # Licensed under the MIT License.
 3 | # Code in this file is adapted from pytorch/pytorch
 4 | # https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from pathlib import Path
 9 | from typing import Any, Callable, Optional, Tuple
10 | 
11 | import PIL.Image
12 | 
13 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
14 | from torchvision.datasets.vision import VisionDataset
15 | 
16 | 
17 | class UTKFACE(VisionDataset):
18 |     """`The UTKFace Data Set <https://susanqq.github.io/UTKFace/>`
19 | 
20 |     The UTKFace dataset is an image age estimation dataset, where the goal is to predict the age of the person in an image.
21 |     The labels range from 1 to 116 years old. The dataset consists of 23,705 face images, which we split into 18,964
22 |     training samples and 4,741 test samples. The dataset is available in two versions: the original images and an aligned
23 |     and cropped version. We use the aligned and cropped version of the UTKFace dataset here.
24 | 
25 |     Args:
26 |         root (string): Root directory of the dataset.
27 |         split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
28 |         transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
29 |             version. E.g, ``transforms.RandomCrop``.
30 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
31 |         download (bool, optional): If True, downloads the dataset from the internet and
32 |             puts it in root directory. If dataset is already downloaded, it is not
33 |             downloaded again. Default is False.
34 |     """
35 | 
36 |     _URL_MD5 = {
37 |         "data": ("https://huggingface.co/datasets/py97/UTKFace-Cropped/resolve/main/UTKFace.tar.gz", "ae1a16905fbd795db921ff1d940df9cc"),
38 |         "meta": ("https://github.com/pm25/regression-datasets/raw/refs/heads/main/data/utkface/meta.zip", "0983459bcfddbd93d6abdb821ae176c4"),
39 |     }
40 | 
41 |     def __init__(
42 |         self,
43 |         root: str,
44 |         split: str = "train",
45 |         transform: Optional[Callable] = None,
46 |         target_transform: Optional[Callable] = None,
47 |         download: bool = False,
48 |     ) -> None:
49 |         super().__init__(root, transform=transform, target_transform=target_transform)
50 |         self._split = verify_str_arg(split, "split", ("train", "test"))
51 |         self._base_folder = Path(self.root) / "utkface"
52 |         self._meta_folder = self._base_folder / "meta"
53 |         self._images_folder = self._base_folder / "UTKFace"
54 | 
55 |         if download:
56 |             self._download()
57 | 
58 |         if not self._check_exists():
59 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
60 | 
61 |         metadata = pd.read_csv(self._meta_folder / f"{split}.csv")
62 |         self._file_paths = metadata["file_name"].apply(lambda x: self._images_folder / x).to_numpy(dtype="object")
63 |         self._labels = metadata["label"].to_numpy(dtype=np.float32)
64 | 
65 |     def __len__(self) -> int:
66 |         return len(self._file_paths)
67 | 
68 |     def __getitem__(self, idx: int) -> Tuple[Any, Any]:
69 |         image_file, label = self._file_paths[idx], self._labels[idx]
70 |         image = PIL.Image.open(image_file).convert("RGB")
71 | 
72 |         if self.transform:
73 |             image = self.transform(image)
74 | 
75 |         if self.target_transform:
76 |             label = self.target_transform(label)
77 | 
78 |         return image, label
79 | 
80 |     def extra_repr(self) -> str:
81 |         return f"split={self._split}"
82 | 
83 |     def _check_exists(self) -> bool:
84 |         return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder))
85 | 
86 |     def _download(self) -> None:
87 |         if self._check_exists():
88 |             return
89 |         for url, md5 in self._URL_MD5.values():
90 |             download_and_extract_archive(url, download_root=self._base_folder, md5=md5)
91 | 


--------------------------------------------------------------------------------
/semilearn/datasets/cv_datasets/datasetbase.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import numpy as np
  6 | from PIL import Image
  7 | 
  8 | from torchvision import transforms
  9 | from torch.utils.data import Dataset
 10 | 
 11 | 
 12 | class BasicDataset(Dataset):
 13 |     """
 14 |     BasicDataset returns a pair of image and labels (targets).
 15 |     If targets are not given, BasicDataset returns None as the label.
 16 |     This class supports strong augmentation,
 17 |     and return both weakly and strongly augmented images.
 18 |     """
 19 | 
 20 |     def __init__(self, alg, data, targets=None, transform=None, is_ulb=False, strong_transform=None, *args, **kwargs):
 21 |         """
 22 |         Args:
 23 |             alg (str): Algorithm.
 24 |             data (list): List of PIL images or numpy arrays.
 25 |             targets (list or None): Target labels corresponding to the images.
 26 |             transform (callable or None): Basic transformation function applied to the image.
 27 |             is_ulb (bool): Indicates if the dataset is unlabeled.
 28 |             strong_transform (callable or None): Strong transformation function applied to the image.
 29 |         """
 30 |         super(BasicDataset, self).__init__()
 31 |         self.alg = alg
 32 |         self.data = data
 33 |         self.targets = targets
 34 |         self.transform = transform
 35 |         self.strong_transform = strong_transform
 36 |         self.is_ulb = is_ulb
 37 | 
 38 |         self._check_transform()
 39 | 
 40 |     def __sample__(self, idx):
 41 |         """Retrieve the image and corresponding target at a specific index."""
 42 |         img = self.data[idx]
 43 |         target = None if self.targets is None else self.targets[idx]
 44 |         return img, target
 45 | 
 46 |     def __getitem__(self, idx):
 47 |         """
 48 |         Returns weakly and/or strongly augmented images based on the algorithm and dataset type.
 49 |         """
 50 |         img, target = self.__sample__(idx)
 51 | 
 52 |         if isinstance(img, np.ndarray):
 53 |             img = Image.fromarray(img)
 54 | 
 55 |         if self.transform is None:
 56 |             return {"x_lb": transforms.ToTensor()(img), "y_lb": target}
 57 | 
 58 |         data_dict = {
 59 |             "idx_lb": lambda: idx,
 60 |             "x_lb": lambda: self.transform(img),
 61 |             "x_lb_s": lambda: self.strong_transform(img),
 62 |             "y_lb": lambda: target,
 63 |             "idx_ulb": lambda: idx,
 64 |             "x_ulb_w": lambda: self.transform(img),
 65 |             "x_ulb_w_2": lambda: self.transform(img),
 66 |             "x_ulb_s": lambda: self.strong_transform(img),
 67 |             "x_ulb_s_2": lambda: self.strong_transform(img),
 68 |         }
 69 | 
 70 |         data_keys = self._determine_data_keys()
 71 |         return {k: data_dict[k]() for k in data_keys}
 72 | 
 73 |     def __len__(self):
 74 |         return len(self.data)
 75 | 
 76 |     def _check_transform(self):
 77 |         """Ensure strong augmentation is used if required by the algorithm."""
 78 |         if self.strong_transform is None and self.is_ulb:
 79 |             assert self.alg not in ["rankup"], f"alg {self.alg} requires strong augmentation"
 80 | 
 81 |     def _determine_data_keys(self):
 82 |         """Determine the required output data based on the algorithm type."""
 83 |         data_keys = set()
 84 | 
 85 |         if not self.is_ulb:
 86 |             data_keys.update({"idx_lb", "x_lb", "y_lb"})
 87 |             return data_keys
 88 | 
 89 |         # for regression algorithms
 90 |         if self.alg == "fullysupervised" or self.alg == "supervised":
 91 |             data_keys.update({"idx_ulb"})
 92 |         elif self.alg == "rankup" or self.alg == "rankuprda":
 93 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"})
 94 |         elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch":
 95 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"})
 96 |         else:
 97 |             data_keys.update({"idx_ulb", "x_ulb_w"})
 98 | 
 99 |         return data_keys
100 | 
101 | 
102 | class ImagePathDataset(BasicDataset):
103 |     """Dataset subclass that directly opens images from file paths."""
104 | 
105 |     def __sample__(self, idx):
106 |         path, target = super().__sample__(idx)
107 |         img = Image.open(path).convert("RGB")
108 |         return img, target
109 | 


--------------------------------------------------------------------------------
/semilearn/datasets/audio_datasets/datasetbase.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import librosa
  6 | 
  7 | from torch.utils.data import Dataset
  8 | 
  9 | 
 10 | class BasicDataset(Dataset):
 11 |     """
 12 |     BasicDataset returns a pair of audio and labels (targets).
 13 |     If targets are not given, BasicDataset returns None as the label.
 14 |     This class supports strong augmentation,
 15 |     and return both weakly and strongly augmented images.
 16 |     """
 17 | 
 18 |     def __init__(self, alg, data, targets=None, transform=None, is_ulb=False, strong_transform=None, sample_rate=16000, *args, **kwargs):
 19 |         """
 20 |         Args:
 21 |             alg (str): Algorithm.
 22 |             data (list): List of audio data.
 23 |             targets (list or None): Target labels corresponding to the images.
 24 |             is_ulb (bool): Indicates if the dataset is unlabeled.
 25 |             transform (callable or None): Basic transformation function applied to the image.
 26 |             is_ulb (bool): Indicates if the dataset is unlabeled.
 27 |             strong_transform (callable or None): Strong transformation function applied to the image.
 28 |         """
 29 |         super(BasicDataset, self).__init__()
 30 |         self.alg = alg
 31 |         self.data = data
 32 |         self.targets = targets
 33 |         self.transform = transform
 34 |         self.strong_transform = strong_transform
 35 |         self.is_ulb = is_ulb
 36 |         self.sample_rate = sample_rate
 37 | 
 38 |         self._check_transform()
 39 | 
 40 |     def __sample__(self, idx):
 41 |         """Retrieve the audio and corresponding target at a specific index."""
 42 |         audio = self.data[idx]
 43 |         target = None if self.targets is None else self.targets[idx]
 44 |         return audio, target
 45 | 
 46 |     def __getitem__(self, idx):
 47 |         """
 48 |         Returns weakly and/or strongly augmented images based on the algorithm and dataset type.
 49 |         """
 50 |         wav, target = self.__sample__(idx)
 51 | 
 52 |         if self.transform is None:
 53 |             return {"x_lb": wav, "y_lb": target}
 54 | 
 55 |         data_dict = {
 56 |             "idx_lb": lambda: idx,
 57 |             "x_lb": lambda: self.transform(wav, sample_rate=self.sample_rate),
 58 |             "x_lb_s": lambda: self.strong_transform(wav, sample_rate=self.sample_rate),
 59 |             "y_lb": lambda: target,
 60 |             "idx_ulb": lambda: idx,
 61 |             "x_ulb_w": lambda: self.transform(wav, sample_rate=self.sample_rate),
 62 |             "x_ulb_w_2": lambda: self.transform(wav, sample_rate=self.sample_rate),
 63 |             "x_ulb_s": lambda: self.strong_transform(wav, sample_rate=self.sample_rate),
 64 |             "x_ulb_s_2": lambda: self.strong_transform(wav, sample_rate=self.sample_rate),
 65 |         }
 66 | 
 67 |         data_keys = self._determine_data_keys()
 68 |         return {k: data_dict[k]() for k in data_keys}
 69 | 
 70 |     def __len__(self):
 71 |         return len(self.data)
 72 | 
 73 |     def _check_transform(self):
 74 |         """Ensure strong augmentation is used if required by the algorithm."""
 75 |         if self.strong_transform is None and self.is_ulb:
 76 |             assert self.alg not in ["rankup"], f"alg {self.alg} requires strong augmentation"
 77 | 
 78 |     def _determine_data_keys(self):
 79 |         """Determine the required output data based on the algorithm type."""
 80 |         data_keys = set()
 81 | 
 82 |         if not self.is_ulb:
 83 |             data_keys.update({"idx_lb", "x_lb", "y_lb"})
 84 |             return data_keys
 85 | 
 86 |         # for regression algorithms
 87 |         if self.alg == "fullysupervised" or self.alg == "supervised":
 88 |             data_keys.update({"idx_ulb"})
 89 |         elif self.alg == "rankup" or self.alg == "rankuprda":
 90 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_s"})
 91 |         elif self.alg == "pimodel" or self.alg == "meanteacher" or self.alg == "mixmatch":
 92 |             data_keys.update({"idx_ulb", "x_ulb_w", "x_ulb_w_2"})
 93 |         else:
 94 |             data_keys.update({"idx_ulb", "x_ulb_w"})
 95 | 
 96 |         return data_keys
 97 | 
 98 | 
 99 | class AudioPathDataset(BasicDataset):
100 |     """Dataset subclass that directly opens audio from file paths."""
101 | 
102 |     def __sample__(self, idx):
103 |         path, target = super().__sample__(idx)
104 |         waveform, _ = librosa.load(path, sr=self.sample_rate, mono=True)
105 |         return waveform, target
106 | 


--------------------------------------------------------------------------------
/semilearn/nets/bert/bert.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from transformers import BertModel, BertConfig
  8 | 
  9 | from semilearn.nets.utils import init_weights
 10 | 
 11 | 
 12 | class RegressionBert(nn.Module):
 13 |     """
 14 |     A regression model based on the Bert architecture (https://arxiv.org/abs/1810.04805).
 15 | 
 16 |     Args:
 17 |         use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and
 18 |                                 `use_pretrained` is `False`, the model will load architecture
 19 |                                without pretrained weights. Defaults to True.
 20 |         pretrained_path (str or None): The Hugging Face path to the pretrained model.
 21 |                                         If None, a model with default configuration will be created.
 22 |         drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1.
 23 |         freeze_encoder (bool): If True, the encoder will be frozen during training,
 24 |                                and only the regressor head will be trained.
 25 |                                Do not freeze the encoder when using with RankUp or ARC.
 26 | 
 27 |     Attributes:
 28 |         model (BertModel): The underlying Bert model.
 29 |         config (BertConfig): Configuration of the Bert model.
 30 |         dropout (nn.Dropout): Dropout layer for regularization.
 31 |         num_features (int): Number of features from the model's hidden layer.
 32 |         regressor (nn.Sequential): The regressor head consisting of linear layers and activation.
 33 |     """
 34 | 
 35 |     def __init__(self, use_pretrained=False, pretrained_path=None, drop_rate=0.1):
 36 |         super(RegressionBert, self).__init__()
 37 |         # Load pre-trained bert model
 38 |         self.model, self.config = self.load_model(use_pretrained, pretrained_path)
 39 |         self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False)
 40 |         self.num_features = self.config.hidden_size
 41 |         self.regressor = nn.Sequential(*[nn.Linear(self.num_features, self.num_features), nn.GELU(), nn.Linear(self.num_features, 1)])
 42 | 
 43 |         self.regressor.apply(init_weights)
 44 | 
 45 |     def load_model(self, use_pretrained=True, pretrained_path=None):
 46 |         if use_pretrained and pretrained_path:
 47 |             config = BertConfig.from_pretrained(pretrained_path)
 48 |             model = BertModel.from_pretrained(pretrained_path)
 49 |             return model, config
 50 | 
 51 |         config = BertConfig() if not pretrained_path else BertConfig.from_pretrained(pretrained_path)
 52 |         model = BertModel(config)
 53 |         return model, config
 54 | 
 55 |     def forward(self, x, only_fc=False, only_feat=False, return_embed=False, **kwargs):
 56 |         """
 57 |         Args:
 58 |             x: input tensor, depends on only_fc and only_feat flag
 59 |             only_fc: only use classifier, input should be features before classifier
 60 |             only_feat: only return pooled features
 61 |             return_embed: return word embedding, used for vat
 62 |         """
 63 |         if only_fc:
 64 |             logits = self.regressor(x).flatten()
 65 |             return logits
 66 | 
 67 |         out_dict = self.model(**x, output_hidden_states=True, return_dict=True)
 68 |         last_hidden = out_dict["last_hidden_state"]
 69 |         drop_hidden = self.dropout(last_hidden)
 70 |         pooled_output = torch.mean(drop_hidden, 1)
 71 | 
 72 |         if only_feat:
 73 |             return pooled_output
 74 | 
 75 |         logits = self.regressor(pooled_output).flatten()
 76 |         result_dict = {"logits": logits, "feat": pooled_output}
 77 | 
 78 |         if return_embed:
 79 |             result_dict["embed"] = out_dict["hidden_states"][0]
 80 | 
 81 |         return result_dict
 82 | 
 83 |     def extract(self, x):
 84 |         out_dict = self.bert(**x, output_hidden_states=True, return_dict=True)
 85 |         last_hidden = out_dict["last_hidden_state"]
 86 |         drop_hidden = self.dropout(last_hidden)
 87 |         pooled_output = torch.mean(drop_hidden, 1)
 88 |         return pooled_output
 89 | 
 90 |     def group_matcher(self, coarse=False, prefix=""):
 91 |         matcher = dict(stem=r"^{}bert.embeddings".format(prefix), blocks=r"^{}bert.encoder.layer.(\d+)".format(prefix))
 92 |         return matcher
 93 | 
 94 |     def no_weight_decay(self):
 95 |         return []
 96 | 
 97 | 
 98 | def bert_base(pretrained=True, pretrained_path="bert-base-cased", **kwargs):
 99 |     model = RegressionBert(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs)
100 |     return model
101 | 


--------------------------------------------------------------------------------
/semilearn/nets/hubert/hubert.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from transformers import HubertModel, HubertConfig
  9 | 
 10 | from semilearn.nets.utils import init_weights
 11 | 
 12 | 
 13 | class RegressionHubert(nn.Module):
 14 |     """
 15 |     A regression model based on the Hubert architecture (https://arxiv.org/abs/2106.07447).
 16 | 
 17 |     Args:
 18 |         use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and
 19 |                                 `use_pretrained` is `False`, the model will load architecture
 20 |                                without pretrained weights. Defaults to True.
 21 |         pretrained_path (str or None): The Hugging Face path to the pretrained model.
 22 |                                         If None, a model with default configuration will be created.
 23 |         drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1.
 24 |         freeze_encoder (bool): If True, the encoder will be frozen during training,
 25 |                                and only the regressor head will be trained.
 26 |                                Do not freeze the encoder when using with RankUp or ARC.
 27 | 
 28 |     Attributes:
 29 |         model (HubertModel): The underlying Hubert model.
 30 |         config (HubertConfig): Configuration of the Hubert model.
 31 |         dropout (nn.Dropout): Dropout layer for regularization.
 32 |         num_features (int): Number of features from the model's hidden layer.
 33 |         regressor (nn.Sequential): The regressor head consisting of linear layers and activation.
 34 |     """
 35 | 
 36 |     def __init__(self, use_pretrained=False, pretrained_path=None, drop_rate=0.1, freeze_encoder=True):
 37 |         super(RegressionHubert, self).__init__()
 38 |         self.model, self.config = self.load_model(use_pretrained, pretrained_path)
 39 |         if freeze_encoder:
 40 |             self.model.freeze_feature_encoder()
 41 |         self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False)
 42 |         self.num_features = self.config.hidden_size
 43 |         self.regressor = nn.Sequential(*[nn.Linear(self.num_features, self.num_features), nn.GELU(), nn.Linear(self.num_features, 1)])
 44 | 
 45 |         self.regressor.apply(init_weights)
 46 | 
 47 |     def load_model(self, use_pretrained=True, pretrained_path=None):
 48 |         if use_pretrained and pretrained_path:
 49 |             config = HubertConfig.from_pretrained(pretrained_path)
 50 |             model = HubertModel.from_pretrained(pretrained_path)
 51 |             return model, config
 52 | 
 53 |         config = HubertConfig() if not pretrained_path else HubertConfig.from_pretrained(pretrained_path)
 54 |         model = HubertModel(config)
 55 |         return model, config
 56 | 
 57 |     def forward(self, x, only_fc=False, only_feat=False, **kwargs):
 58 |         """
 59 |         Args:
 60 |             x: input tensor, depends on only_fc and only_feat flag
 61 |             only_fc: only use classifier, input should be features before classifier
 62 |             only_feat: only return pooled features
 63 |         """
 64 |         if only_fc:
 65 |             logits = self.regressor(x).flatten()
 66 |             return logits
 67 | 
 68 |         pooled_output = self.extract(x)
 69 | 
 70 |         if only_feat:
 71 |             return pooled_output
 72 | 
 73 |         logits = self.regressor(pooled_output).flatten()
 74 |         result_dict = {"logits": logits, "feat": pooled_output}
 75 |         return result_dict
 76 | 
 77 |     def extract(self, x):
 78 |         out_dict = self.model(x, output_hidden_states=True, return_dict=True)
 79 |         last_hidden = out_dict["last_hidden_state"]
 80 |         embed = out_dict["hidden_states"][0]
 81 |         drop_hidden = self.dropout(last_hidden)
 82 |         pooled_output = torch.mean(drop_hidden, 1)
 83 |         return pooled_output
 84 | 
 85 |     def group_matcher(self, coarse=False, prefix=""):
 86 |         matcher = dict(
 87 |             stem=r"^{}model.feature_projection|^{}model.feature_extractor|^{}model.encoder.pos_conv_embed".format(prefix, prefix, prefix),
 88 |             blocks=r"^{}model.encoder.layers.(\d+)".format(prefix),
 89 |         )
 90 |         return matcher
 91 | 
 92 |     def no_weight_decay(self):
 93 |         return []
 94 | 
 95 | 
 96 | def hubert_base(pretrained=False, pretrained_path="facebook/hubert-base-ls960", **kwargs):
 97 |     model = RegressionHubert(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs)
 98 |     return model
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     model = hubert_base()
103 |     print(model)
104 | 


--------------------------------------------------------------------------------
/semilearn/nets/wave2vecv2/wave2vecv2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from transformers import Wav2Vec2Model, Wav2Vec2Config
  9 | 
 10 | from semilearn.nets.utils import init_weights
 11 | 
 12 | 
 13 | class RegressionWav2Vec2(nn.Module):
 14 |     """
 15 |     A regression model based on the Wav2Vec2 architecture (https://arxiv.org/abs/2006.11477).
 16 | 
 17 |     Args:
 18 |         use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and
 19 |                                 `use_pretrained` is `False`, the model will load architecture
 20 |                                without pretrained weights. Defaults to True.
 21 |         pretrained_path (str or None): The Hugging Face path to the pretrained model.
 22 |                                         If None, a model with default configuration will be created.
 23 |         drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1.
 24 |         freeze_encoder (bool): If True, the encoder will be frozen during training,
 25 |                                and only the regressor head will be trained.
 26 |                                Do not freeze the encoder when using with RankUp or ARC.
 27 | 
 28 |     Attributes:
 29 |         model (Wav2Vec2Model): The underlying Wav2Vec2 model.
 30 |         config (Wav2Vec2Config): Configuration of the Wav2Vec2 model.
 31 |         dropout (nn.Dropout): Dropout layer for regularization.
 32 |         num_features (int): Number of features from the model's hidden layer.
 33 |         regressor (nn.Sequential): The regressor head consisting of linear layers and activation.
 34 |     """
 35 | 
 36 |     def __init__(self, use_pretrained=True, pretrained_path=None, drop_rate=0.1, freeze_encoder=True):
 37 |         super(RegressionWav2Vec2, self).__init__()
 38 |         self.model, self.config = self.load_model(use_pretrained, pretrained_path)
 39 |         if freeze_encoder:
 40 |             self.model.freeze_feature_encoder()
 41 |         self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False)
 42 |         self.num_features = self.config.hidden_size
 43 |         self.regressor = nn.Sequential(
 44 |             *[nn.Linear(self.config.hidden_size, self.config.hidden_size), nn.GELU(), nn.Linear(self.config.hidden_size, 1)]
 45 |         )
 46 | 
 47 |         self.regressor.apply(init_weights)
 48 | 
 49 |     def load_model(self, use_pretrained=True, pretrained_path=None):
 50 |         if use_pretrained and pretrained_path:
 51 |             config = Wav2Vec2Config.from_pretrained(pretrained_path)
 52 |             model = Wav2Vec2Model.from_pretrained(pretrained_path)
 53 |             return model, config
 54 | 
 55 |         config = Wav2Vec2Config() if not pretrained_path else Wav2Vec2Config.from_pretrained(pretrained_path)
 56 |         model = Wav2Vec2Model(config)
 57 |         return model, config
 58 | 
 59 |     def forward(self, x, only_fc=False, only_feat=False, **kwargs):
 60 |         """
 61 |         Args:
 62 |             x: input tensor, depends on only_fc and only_feat flag
 63 |             only_fc: only use classifier, input should be features before classifier
 64 |             only_feat: only return pooled features
 65 |         """
 66 |         if only_fc:
 67 |             logits = self.regressor(x).flatten()
 68 |             return logits
 69 | 
 70 |         pooled_output = self.extract(x)
 71 | 
 72 |         if only_feat:
 73 |             return pooled_output
 74 | 
 75 |         logits = self.regressor(pooled_output).flatten()
 76 |         result_dict = {"logits": logits, "feat": pooled_output}
 77 |         return result_dict
 78 | 
 79 |     def extract(self, x):
 80 |         out_dict = self.model(x, output_hidden_states=True, return_dict=True)
 81 |         last_hidden = out_dict["last_hidden_state"]
 82 |         embed = out_dict["hidden_states"][0]
 83 |         drop_hidden = self.dropout(last_hidden)
 84 |         pooled_output = torch.mean(drop_hidden, 1)
 85 |         return pooled_output
 86 | 
 87 |     def group_matcher(self, coarse=False, prefix=""):
 88 |         matcher = dict(
 89 |             stem=r"^{}model.feature_projection|^{}model.feature_extractor".format(prefix, prefix),
 90 |             blocks=r"^{}model.encoder.layers.(\d+)".format(prefix),
 91 |         )
 92 |         return matcher
 93 | 
 94 |     def no_weight_decay(self):
 95 |         return []
 96 | 
 97 | 
 98 | def wave2vecv2_base(pretrained=True, pretrained_path="facebook/wav2vec2-base-960h", **kwargs):
 99 |     model = RegressionWav2Vec2(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs)
100 |     return model
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     model = wave2vecv2_base(True, "facebook/wav2vec2-base-960h")
105 |     print(model)
106 | 


--------------------------------------------------------------------------------
/semilearn/nets/whisper/whisper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from transformers import WhisperModel, WhisperConfig
  9 | 
 10 | from .whisper_encoder import MyWhisperEncoder
 11 | from semilearn.nets.utils import init_weights
 12 | 
 13 | 
 14 | class RegressionWhisper(nn.Module):
 15 |     """
 16 |     A regression model based on the Whisper architecture (https://arxiv.org/abs/2212.04356).
 17 | 
 18 |     Args:
 19 |         use_pretrained (bool): Whether to use a pretrained weights. If `pretrained_path` is set and
 20 |                                 `use_pretrained` is `False`, the model will load architecture
 21 |                                without pretrained weights. Defaults to True.
 22 |         pretrained_path (str or None): The Hugging Face path to the pretrained model.
 23 |                                         If None, a model with default configuration will be created.
 24 |         drop_rate (float): The dropout rate applied before the regression layer. Defaults to 0.1.
 25 |         freeze_encoder (bool): If True, the encoder will be frozen during training,
 26 |                                and only the regressor head will be trained.
 27 |                                Do not freeze the encoder when using with RankUp or ARC.
 28 | 
 29 |     Attributes:
 30 |         model (WhisperModel): The underlying Whisper model.
 31 |         config (WhisperConfig): Configuration of the Whisper model.
 32 |         dropout (nn.Dropout): Dropout layer for regularization.
 33 |         num_features (int): Number of features from the model's hidden layer.
 34 |         regressor (nn.Sequential): The regressor head consisting of linear layers and activation.
 35 |     """
 36 | 
 37 |     def __init__(self, use_pretrained=True, pretrained_path=None, drop_rate=0.1, freeze_encoder=False):
 38 |         super(RegressionWhisper, self).__init__()
 39 |         self.model, self.config = self.load_model(use_pretrained, pretrained_path)
 40 |         if freeze_encoder:
 41 |             self.model.freeze_encoder()
 42 |         self.model = MyWhisperEncoder.cast(self.model.encoder)  # overwrite default WhisperEncoder forward() function
 43 |         self.dropout = torch.nn.Dropout(p=drop_rate, inplace=False)
 44 |         self.num_features = self.config.hidden_size
 45 |         self.regressor = nn.Sequential(
 46 |             *[nn.Linear(self.config.hidden_size, self.config.hidden_size), nn.GELU(), nn.Linear(self.config.hidden_size, 1)]
 47 |         )
 48 | 
 49 |         self.regressor.apply(init_weights)
 50 | 
 51 |     def load_model(self, use_pretrained=True, pretrained_path=None):
 52 |         if use_pretrained and pretrained_path:
 53 |             config = WhisperConfig.from_pretrained(pretrained_path)
 54 |             model = WhisperModel.from_pretrained(pretrained_path)
 55 |             return model, config
 56 | 
 57 |         config = WhisperConfig() if not pretrained_path else WhisperConfig.from_pretrained(pretrained_path)
 58 |         model = WhisperModel(config)
 59 |         return model, config
 60 | 
 61 |     def forward(self, x, only_fc=False, only_feat=False, **kwargs):
 62 |         """
 63 |         Args:
 64 |             x: input tensor, depends on only_fc and only_feat flag
 65 |             only_fc: only use classifier, input should be features before classifier
 66 |             only_feat: only return pooled features
 67 |         """
 68 |         if only_fc:
 69 |             logits = self.regressor(x).flatten()
 70 |             return logits
 71 | 
 72 |         pooled_output = self.extract(x)
 73 | 
 74 |         if only_feat:
 75 |             return pooled_output
 76 | 
 77 |         logits = self.regressor(pooled_output).flatten()
 78 |         result_dict = {"logits": logits, "feat": pooled_output}
 79 |         return result_dict
 80 | 
 81 |     def extract(self, x):
 82 |         out_dict = self.model(x, output_hidden_states=True, return_dict=True)
 83 |         last_hidden = out_dict["last_hidden_state"]
 84 |         embed = out_dict["hidden_states"][0]
 85 |         drop_hidden = self.dropout(last_hidden)
 86 |         pooled_output = torch.mean(drop_hidden, 1)
 87 |         return pooled_output
 88 | 
 89 |     def group_matcher(self, coarse=False, prefix=""):
 90 |         matcher = dict(
 91 |             stem=r"^{}model.feature_projection|^{}model.feature_extractor".format(prefix, prefix),
 92 |             blocks=r"^{}model.encoder.layers.(\d+)".format(prefix),
 93 |         )
 94 |         return matcher
 95 | 
 96 |     def no_weight_decay(self):
 97 |         return []
 98 | 
 99 | 
100 | def whisper_base(pretrained=True, pretrained_path="openai/whisper-base", **kwargs):
101 |     model = RegressionWhisper(use_pretrained=pretrained, pretrained_path=pretrained_path, **kwargs)
102 |     return model
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     model = whisper_base(True, "openai/whisper-tiny")
107 |     print(model)
108 | 


--------------------------------------------------------------------------------
/semilearn/datasets/collactors/audio_collactor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Modifications Copyright (c) 2024 Pin-Yen Huang.
  3 | # Licensed under the MIT License.
  4 | 
  5 | from dataclasses import dataclass
  6 | from typing import Any, Dict, List, Optional, Union
  7 | 
  8 | from transformers import AutoFeatureExtractor
  9 | from transformers.file_utils import PaddingStrategy
 10 | from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 11 | from transformers.data import default_data_collator
 12 | 
 13 | 
 14 | @dataclass
 15 | class DataCollatorWithPadding:
 16 |     """
 17 |     Data collator that will dynamically pad the inputs received.
 18 | 
 19 |     Args:
 20 |         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
 21 |             The tokenizer used for encoding the data.
 22 |         padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
 23 |             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
 24 |             among:
 25 |             - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence
 26 |               if provided).
 27 |             - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
 28 |               acceptable input length for the model if that argument is not provided.
 29 |             - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
 30 |               lengths).
 31 |         max_length (`int`, *optional*):
 32 |             Maximum length of the returned list and optionally padding length (see above).
 33 |         pad_to_multiple_of (`int`, *optional*):
 34 |             If set will pad the sequence to a multiple of the provided value.
 35 |             This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
 36 |             7.5 (Volta).
 37 |         return_tensors (`str`):
 38 |             The type of Tensor to return. Allowable values are "np", "pt" and "tf".
 39 |     """
 40 | 
 41 |     tokenizer: PreTrainedTokenizerBase
 42 |     padding: Union[bool, str, PaddingStrategy] = True
 43 |     max_length: Optional[int] = None
 44 |     sample_rate: Optional[int] = 16000
 45 |     pad_to_multiple_of: Optional[int] = None
 46 |     return_tensors: str = "pt"
 47 | 
 48 |     def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
 49 |         wav_keys = ["x_lb", "x_lb_s", "x_ulb_w", "x_ulb_w_2", "x_ulb_s", "x_ulb_s2"]
 50 |         wav_features = {k: [] for k in wav_keys}
 51 |         other_features = []
 52 |         for f in features:
 53 |             exist_ks = [k for k in wav_keys if k in f]
 54 |             for k in exist_ks:
 55 |                 feat = f.pop(k)
 56 |                 wav_features[k].append(feat)
 57 |             other_features.append(f)
 58 | 
 59 |         batch = default_data_collator(other_features, return_tensors="pt")
 60 | 
 61 |         for key, feats in wav_features.items():
 62 |             if len(feats) > 0:
 63 |                 out = self.tokenizer(
 64 |                     feats,
 65 |                     padding=True if key == "x_lb" else "max_length",
 66 |                     max_length=int(self.max_length * self.sample_rate),
 67 |                     sampling_rate=self.sample_rate,
 68 |                     pad_to_multiple_of=self.pad_to_multiple_of,
 69 |                     return_tensors=self.return_tensors,
 70 |                     truncation=True,
 71 |                 )
 72 |                 if "input_values" in out:
 73 |                     input_values = out["input_values"]
 74 |                 elif "input_features" in out:
 75 |                     input_values = out["input_features"]
 76 |                 batch[key] = input_values
 77 | 
 78 |         return batch
 79 | 
 80 | 
 81 | def get_wave2vecv2_base_collactor(pretrain_path="facebook/wav2vec2-base-960h", max_length=4, sample_rate=16000):
 82 |     pretrain_path = "facebook/wav2vec2-base-960h" if pretrain_path == "" else pretrain_path
 83 |     feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path)
 84 |     collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate)
 85 |     return collator
 86 | 
 87 | 
 88 | def get_hubert_base_collactor(pretrain_path="facebook/hubert-base-ls960", max_length=4, sample_rate=16000):
 89 |     pretrain_path = "facebook/hubert-base-ls960" if pretrain_path == "" else pretrain_path
 90 |     feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path)
 91 |     collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate)
 92 |     return collator
 93 | 
 94 | 
 95 | def get_whisper_base_collactor(pretrain_path="openai/whisper-base", max_length=30, sample_rate=16000):
 96 |     pretrain_path = "openai/whisper-base" if pretrain_path == "" else pretrain_path
 97 |     feature_extractor = AutoFeatureExtractor.from_pretrained(pretrain_path)
 98 |     collator = DataCollatorWithPadding(feature_extractor, max_length=max_length, sample_rate=sample_rate)
 99 |     return collator
100 | 


--------------------------------------------------------------------------------