├── dalib ├── adaptation │ ├── segmentation │ │ ├── __init__.py │ │ ├── fda.py │ │ └── advent.py │ ├── keypoint_detection │ │ └── __init__.py │ ├── __init__.py │ ├── mcd.py │ ├── osbp.py │ ├── mcc.py │ ├── iwan.py │ ├── dann.py │ └── jan.py ├── __init__.py ├── translation │ ├── __init__.py │ ├── cyclegan │ │ ├── __init__.py │ │ ├── transform.py │ │ ├── loss.py │ │ └── util.py │ └── cycada.py └── modules │ ├── __init__.py │ ├── entropy.py │ ├── domain_discriminator.py │ ├── kernels.py │ ├── gl.py │ └── grl.py ├── common ├── __init__.py ├── vision │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── segmentation │ │ │ └── __init__.py │ │ ├── keypoint_detection │ │ │ ├── __init__.py │ │ │ ├── loss.py │ │ │ └── pose_resnet.py │ │ └── digits.py │ ├── datasets │ │ ├── regression │ │ │ ├── __init__.py │ │ │ ├── mpi3d.py │ │ │ ├── image_regression.py │ │ │ └── dsprites.py │ │ ├── keypoint_detection │ │ │ ├── __init__.py │ │ │ ├── lsp.py │ │ │ ├── util.py │ │ │ ├── hand_3d_studio.py │ │ │ └── surreal.py │ │ ├── segmentation │ │ │ ├── __init__.py │ │ │ ├── gta5.py │ │ │ ├── synthia.py │ │ │ └── cityscapes.py │ │ ├── __init__.py │ │ ├── _util.py │ │ ├── visda2017.py │ │ ├── office31.py │ │ ├── officecaltech.py │ │ ├── partial │ │ │ └── __init__.py │ │ ├── officehome.py │ │ ├── oxfordpet.py │ │ ├── imagelist.py │ │ ├── coco70.py │ │ ├── aircrafts.py │ │ ├── stanford_cars.py │ │ └── openset │ │ │ └── __init__.py │ └── transforms │ │ └── __init__.py ├── modules │ ├── __init__.py │ ├── regressor.py │ └── classifier.py ├── utils │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ ├── a_distance.py │ │ └── tsne.py │ ├── data.py │ ├── meter.py │ ├── metric │ │ ├── keypoint_detection.py │ │ └── __init__.py │ └── logger.py └── loss │ └── __init__.py ├── .gitignore ├── LICENSE ├── icon ├── entropy.py ├── transform.py ├── uda_backbone.py ├── icon_utils.py └── cluster.py ├── README.md ├── sam.py └── validate.py /dalib/adaptation/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dalib/adaptation/keypoint_detection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['modules', 'utils', 'vision'] 2 | -------------------------------------------------------------------------------- /dalib/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['adaptation', 'modules', 'translation'] 2 | -------------------------------------------------------------------------------- /common/vision/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['datasets', 'models', 'transforms'] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea 3 | .vscode 4 | build 5 | data 6 | logs 7 | dist 8 | venv 9 | exp* -------------------------------------------------------------------------------- /common/vision/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | 3 | __all__ = ['resnet', 'digits'] 4 | -------------------------------------------------------------------------------- /common/vision/models/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplabv2 import * 2 | 3 | __all__ = ['deeplabv2'] -------------------------------------------------------------------------------- /dalib/translation/__init__.py: -------------------------------------------------------------------------------- 1 | from . import fourier_transform 2 | 3 | __all__ = ['fourier_transform'] -------------------------------------------------------------------------------- /common/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .classifier import * 2 | from .regressor import * 3 | 4 | __all__ = ['classifier', 'regressor'] -------------------------------------------------------------------------------- /common/vision/models/keypoint_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .pose_resnet import * 2 | from . import loss 3 | 4 | __all__ = ['pose_resnet'] -------------------------------------------------------------------------------- /common/vision/datasets/regression/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_regression import ImageRegression 2 | from .dsprites import DSprites 3 | from .mpi3d import MPI3D -------------------------------------------------------------------------------- /common/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import CompleteLogger 2 | from .meter import * 3 | from .data import ForeverDataIterator 4 | 5 | __all__ = ['metric', 'analysis', 'meter', 'data', 'logger'] -------------------------------------------------------------------------------- /dalib/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .grl import * 2 | from .domain_discriminator import * 3 | from .kernels import * 4 | from .entropy import * 5 | 6 | __all__ = ['grl', 'kernels', 'domain_discriminator', 'entropy'] 7 | -------------------------------------------------------------------------------- /dalib/translation/cyclegan/__init__.py: -------------------------------------------------------------------------------- 1 | from . import discriminator 2 | from . import generator 3 | from . import loss 4 | from . import transform 5 | 6 | from .discriminator import * 7 | from .generator import * 8 | from .loss import * 9 | from .transform import * 10 | -------------------------------------------------------------------------------- /common/vision/datasets/keypoint_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .rendered_hand_pose import RenderedHandPose 2 | from .hand_3d_studio import Hand3DStudio, Hand3DStudioAll 3 | from .freihand import FreiHand 4 | 5 | from .surreal import SURREAL 6 | from .lsp import LSP 7 | from .human36m import Human36M 8 | 9 | -------------------------------------------------------------------------------- /common/vision/datasets/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .segmentation_list import SegmentationList 2 | from .cityscapes import Cityscapes, FoggyCityscapes 3 | from .gta5 import GTA5 4 | from .synthia import Synthia 5 | 6 | __all__ = ["SegmentationList", "Cityscapes", "GTA5", "Synthia", "FoggyCityscapes"] 7 | -------------------------------------------------------------------------------- /dalib/adaptation/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cdan 2 | from . import dann 3 | from . import mdd 4 | from . import dan 5 | from . import jan 6 | from . import mcd 7 | from . import mcc 8 | from . import pada 9 | from . import osbp 10 | from . import iwan 11 | 12 | __all__ = ["cdan", "dann", "mdd", "dan", "jan", "mcd", "mcc", "pada", "osbp", "iwan"] 13 | -------------------------------------------------------------------------------- /common/vision/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .imagelist import ImageList 2 | from .office31 import Office31 3 | from .officehome import OfficeHome 4 | from .visda2017 import VisDA2017 5 | from .officecaltech import OfficeCaltech 6 | from .domainnet import DomainNet 7 | from .aircrafts import Aircraft 8 | from .cub200 import CUB200 9 | from .stanford_cars import StanfordCars 10 | from .stanford_dogs import StanfordDogs 11 | from .coco70 import COCO70 12 | from .oxfordpet import OxfordIIITPet 13 | 14 | __all__ = ['ImageList', 'Office31', 'OfficeHome', "VisDA2017", "OfficeCaltech", "DomainNet", 15 | "Aircraft", "cub200", "StanfordCars", "StanfordDogs", "COCO70", "OxfordIIITPet"] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Liuhong99 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dalib/modules/entropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def entropy(predictions: torch.Tensor, reduction='none') -> torch.Tensor: 5 | r"""Entropy of prediction. 6 | The definition is: 7 | 8 | .. math:: 9 | entropy(p) = - \sum_{c=1}^C p_c \log p_c 10 | 11 | where C is number of classes. 12 | 13 | Args: 14 | predictions (tensor): Classifier predictions. Expected to contain raw, normalized scores for each class 15 | reduction (str, optional): Specifies the reduction to apply to the output: 16 | ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied, 17 | ``'mean'``: the sum of the output will be divided by the number of 18 | elements in the output. Default: ``'mean'`` 19 | 20 | Shape: 21 | - predictions: :math:`(minibatch, C)` where C means the number of classes. 22 | - Output: :math:`(minibatch, )` by default. If :attr:`reduction` is ``'mean'``, then scalar. 23 | """ 24 | epsilon = 1e-5 25 | H = -predictions * torch.log(predictions + epsilon) 26 | H = H.sum(dim=1) 27 | if reduction == 'mean': 28 | return H.mean() 29 | else: 30 | return H 31 | -------------------------------------------------------------------------------- /icon/entropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def entropy(predictions: torch.Tensor, reduction='none') -> torch.Tensor: 7 | 8 | epsilon = 1e-5 9 | H = -predictions * torch.log(predictions + epsilon) 10 | H = H.sum(dim=1) 11 | if reduction == 'mean': 12 | return H.mean() 13 | else: 14 | return H 15 | 16 | class TsallisEntropy(nn.Module): 17 | 18 | def __init__(self, temperature: float, alpha: float): 19 | super(TsallisEntropy, self).__init__() 20 | self.temperature = temperature 21 | self.alpha = alpha 22 | 23 | def forward(self, logits: torch.Tensor) -> torch.Tensor: 24 | N, C = logits.shape 25 | 26 | pred = F.softmax(logits / self.temperature, dim=1) 27 | entropy_weight = entropy(pred).detach() 28 | entropy_weight = 1 + torch.exp(-entropy_weight) 29 | entropy_weight = (N * entropy_weight / torch.sum(entropy_weight)).unsqueeze(dim=1) 30 | 31 | sum_dim = torch.sum(pred * entropy_weight, dim = 0).unsqueeze(dim=0) 32 | 33 | return 1 / (self.alpha - 1) * torch.sum((1 / torch.mean(sum_dim) - torch.sum(pred ** self.alpha / sum_dim * entropy_weight, dim = -1))) -------------------------------------------------------------------------------- /common/loss/__init__.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class KnowledgeDistillationLoss(nn.Module): 6 | """Knowledge Distillation Loss. 7 | 8 | Args: 9 | T (double): Temperature. Default: 1. 10 | reduction (str, optional): Specifies the reduction to apply to the output: 11 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 12 | ``'mean'``: the sum of the output will be divided by the number of 13 | elements in the output, ``'sum'``: the output will be summed. Default: ``'batchmean'`` 14 | 15 | Inputs: 16 | - y_student (tensor): logits output of the student 17 | - y_teacher (tensor): logits output of the teacher 18 | 19 | Shape: 20 | - y_student: (minibatch, `num_classes`) 21 | - y_teacher: (minibatch, `num_classes`) 22 | 23 | """ 24 | def __init__(self, T=1., reduction='batchmean'): 25 | super(KnowledgeDistillationLoss, self).__init__() 26 | self.T = T 27 | self.kl = nn.KLDivLoss(reduction=reduction) 28 | 29 | def forward(self, y_student, y_teacher): 30 | """""" 31 | return self.kl(F.log_softmax(y_student / self.T, dim=-1), F.softmax(y_teacher / self.T, dim=-1)) 32 | -------------------------------------------------------------------------------- /dalib/adaptation/segmentation/fda.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import math 3 | 4 | 5 | def robust_entropy(y, ita=1.5, num_classes=19, reduction='mean'): 6 | """ Robust entropy proposed in `FDA: Fourier Domain Adaptation for Semantic Segmentation (CVPR 2020) `_ 7 | 8 | Args: 9 | y (tensor): logits output of segmentation model in shape of :math:`(N, C, H, W)` 10 | ita (float, optional): parameters for robust entropy. Default: 1.5 11 | num_classes (int, optional): number of classes. Default: 19 12 | reduction (string, optional): Specifies the reduction to apply to the output: 13 | ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied, 14 | ``'mean'``: the sum of the output will be divided by the number of 15 | elements in the output. Default: ``'mean'`` 16 | 17 | Returns: 18 | Scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(N, )`. 19 | 20 | """ 21 | P = F.softmax(y, dim=1) 22 | logP = F.log_softmax(y, dim=1) 23 | PlogP = P * logP 24 | ent = -1.0 * PlogP.sum(dim=1) 25 | ent = ent / math.log(num_classes) 26 | 27 | # compute robust entropy 28 | ent = ent ** 2.0 + 1e-8 29 | ent = ent ** ita 30 | 31 | if reduction == 'mean': 32 | return ent.mean() 33 | else: 34 | return ent 35 | -------------------------------------------------------------------------------- /dalib/translation/cyclegan/transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.transforms as T 4 | 5 | from common.vision.transforms import Denormalize 6 | 7 | 8 | class Translation(nn.Module): 9 | """ 10 | Image Translation Transform Module 11 | 12 | Args: 13 | generator (torch.nn.Module): An image generator, e.g. :meth:`~dalib.translation.cyclegan.resnet_9_generator` 14 | device (torch.device): device to put the generator. Default: 'cpu' 15 | mean (tuple): the normalized mean for image 16 | std (tuple): the normalized std for image 17 | Input: 18 | - image (PIL.Image): raw image in shape H x W x C 19 | 20 | Output: 21 | raw image in shape H x W x 3 22 | 23 | """ 24 | def __init__(self, generator, device=torch.device("cpu"), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)): 25 | super(Translation, self).__init__() 26 | self.generator = generator.to(device) 27 | self.device = device 28 | self.pre_process = T.Compose([ 29 | T.ToTensor(), 30 | T.Normalize(mean, std) 31 | ]) 32 | self.post_process = T.Compose([ 33 | Denormalize(mean, std), 34 | T.ToPILImage() 35 | ]) 36 | 37 | def forward(self, image): 38 | image = self.pre_process(image.copy()) # C x H x W 39 | image = image.to(self.device) 40 | generated_image = self.generator(image.unsqueeze(dim=0)).squeeze(dim=0).cpu() 41 | return self.post_process(generated_image) 42 | -------------------------------------------------------------------------------- /icon/transform.py: -------------------------------------------------------------------------------- 1 | 2 | from icon.randaugment import rand_augment_transform 3 | from common.vision.transforms import ResizeImage 4 | import torchvision.transforms as T 5 | 6 | rgb_mean = (0.485, 0.456, 0.406) 7 | ra_params = dict(translate_const=int(224 * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in rgb_mean]),) 8 | 9 | 10 | class TransformFixMatch(object): 11 | def __init__(self): 12 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 13 | self.weak = T.Compose([ 14 | ResizeImage(256), 15 | T.CenterCrop(224), 16 | T.RandomHorizontalFlip(), 17 | T.ToTensor(), 18 | normalize 19 | ]) 20 | self.strong = T.Compose([ 21 | ResizeImage(256), 22 | T.CenterCrop(224), 23 | T.RandomHorizontalFlip(), 24 | T.RandomApply([ 25 | T.ColorJitter(0.4, 0.4, 0.4, 0.0) 26 | ], p=1.0), 27 | rand_augment_transform('rand-n{}-m{}-mstd0.5'.format(2, 10), ra_params), 28 | T.ToTensor(), 29 | normalize, 30 | ]) 31 | 32 | def __call__(self, x): 33 | weak = self.weak(x) 34 | strong = self.strong(x) 35 | return weak, strong 36 | 37 | 38 | def get_val_trainsform(): 39 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 40 | return T.Compose([ 41 | ResizeImage(256), 42 | T.CenterCrop(224), 43 | T.ToTensor(), 44 | normalize 45 | ]) -------------------------------------------------------------------------------- /common/utils/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | import torch.nn as nn 4 | import tqdm 5 | 6 | 7 | def collect_feature(data_loader: DataLoader, feature_extractor: nn.Module, 8 | device: torch.device, max_num_features=None) -> torch.Tensor: 9 | """ 10 | Fetch data from `data_loader`, and then use `feature_extractor` to collect features 11 | 12 | Args: 13 | data_loader (torch.utils.data.DataLoader): Data loader. 14 | feature_extractor (torch.nn.Module): A feature extractor. 15 | device (torch.device) 16 | max_num_features (int): The max number of features to return 17 | 18 | Returns: 19 | Features in shape (min(len(data_loader), max_num_features), :math:`|\mathcal{F}|`). 20 | """ 21 | feature_extractor.eval() 22 | all_features = [] 23 | all_features2 = [] 24 | all_labels = [] 25 | with torch.no_grad(): 26 | for i, ((images, images2), target, _) in enumerate(tqdm.tqdm(data_loader)): 27 | images = images.to(device) 28 | images2 = images2.to(device) 29 | feature = feature_extractor(images).cpu() 30 | feature2 = feature_extractor(images2).cpu() 31 | all_features.append(feature) 32 | all_features2.append(feature2) 33 | all_labels.append(target) 34 | if max_num_features is not None and i >= max_num_features: 35 | break 36 | return torch.cat(all_features, dim=0), torch.cat(all_features2, dim=0), torch.cat(all_labels, dim=0) -------------------------------------------------------------------------------- /common/vision/datasets/segmentation/gta5.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .segmentation_list import SegmentationList 3 | from .cityscapes import Cityscapes 4 | from .._util import download as download_data 5 | 6 | 7 | class GTA5(SegmentationList): 8 | """`GTA5 `_ 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``. 13 | data_folder (str, optional): Sub-directory of the image. Default: 'images'. 14 | label_folder (str, optional): Sub-directory of the label. Default: 'labels'. 15 | mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None. 16 | transforms (callable, optional): A function/transform that takes in (PIL image, label) pair \ 17 | and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`. 18 | 19 | .. note:: You need to download GTA5 manually. 20 | Ensure that there exist following directories in the `root` directory before you using this class. 21 | :: 22 | images/ 23 | labels/ 24 | """ 25 | download_list = [ 26 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/c77ff6fc4eea435791f4/?dl=1"), 27 | ] 28 | 29 | def __init__(self, root, split='train', data_folder='images', label_folder='labels', **kwargs): 30 | assert split in ['train'] 31 | # download meta information from Internet 32 | list(map(lambda args: download_data(root, *args), self.download_list)) 33 | data_list_file = os.path.join(root, "image_list", "{}.txt".format(split)) 34 | self.split = split 35 | super(GTA5, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file, data_folder, label_folder, 36 | id_to_train_id=Cityscapes.ID_TO_TRAIN_ID, train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs) -------------------------------------------------------------------------------- /dalib/modules/domain_discriminator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | import torch.nn as nn 3 | 4 | __all__ = ['DomainDiscriminator'] 5 | 6 | 7 | class DomainDiscriminator(nn.Sequential): 8 | r"""Domain discriminator model from 9 | `"Domain-Adversarial Training of Neural Networks" (ICML 2015) `_ 10 | 11 | Distinguish whether the input features come from the source domain or the target domain. 12 | The source domain label is 1 and the target domain label is 0. 13 | 14 | Args: 15 | in_feature (int): dimension of the input feature 16 | hidden_size (int): dimension of the hidden features 17 | batch_norm (bool): whether use :class:`~torch.nn.BatchNorm1d`. 18 | Use :class:`~torch.nn.Dropout` if ``batch_norm`` is False. Default: True. 19 | 20 | Shape: 21 | - Inputs: (minibatch, `in_feature`) 22 | - Outputs: :math:`(minibatch, 1)` 23 | """ 24 | 25 | def __init__(self, in_feature: int, hidden_size: int, batch_norm=True): 26 | if batch_norm: 27 | super(DomainDiscriminator, self).__init__( 28 | nn.Linear(in_feature, hidden_size), 29 | nn.BatchNorm1d(hidden_size), 30 | nn.ReLU(), 31 | nn.Linear(hidden_size, hidden_size), 32 | nn.BatchNorm1d(hidden_size), 33 | nn.ReLU(), 34 | nn.Linear(hidden_size, 1), 35 | nn.Sigmoid() 36 | ) 37 | else: 38 | super(DomainDiscriminator, self).__init__( 39 | nn.Linear(in_feature, hidden_size), 40 | nn.ReLU(inplace=True), 41 | nn.Dropout(0.5), 42 | nn.Linear(hidden_size, hidden_size), 43 | nn.ReLU(inplace=True), 44 | nn.Dropout(0.5), 45 | nn.Linear(hidden_size, 1), 46 | nn.Sigmoid() 47 | ) 48 | 49 | def get_parameters(self) -> List[Dict]: 50 | return [{"params": self.parameters(), "lr": 1.}] 51 | 52 | 53 | -------------------------------------------------------------------------------- /common/vision/datasets/_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | from torchvision.datasets.utils import download_and_extract_archive 4 | 5 | 6 | def download(root: str, file_name: str, archive_name: str, url_link: str): 7 | """ 8 | Download file from internet url link. 9 | 10 | Args: 11 | root (str) The directory to put downloaded files. 12 | file_name: (str) The name of the unzipped file. 13 | archive_name: (str) The name of archive(zipped file) downloaded. 14 | url_link: (str) The url link to download data. 15 | 16 | .. note:: 17 | If `file_name` already exists under path `root`, then it is not downloaded again. 18 | Else `archive_name` will be downloaded from `url_link` and extracted to `file_name`. 19 | """ 20 | if not os.path.exists(os.path.join(root, file_name)): 21 | print("Downloading {}".format(file_name)) 22 | # if os.path.exists(os.path.join(root, archive_name)): 23 | # os.remove(os.path.join(root, archive_name)) 24 | try: 25 | download_and_extract_archive(url_link, download_root=root, filename=archive_name, remove_finished=False) 26 | except Exception: 27 | print("Fail to download {} from url link {}".format(archive_name, url_link)) 28 | print('Please check you internet connection or ' 29 | "reinstall DALIB by 'pip install --upgrade dalib'") 30 | exit(0) 31 | 32 | 33 | def check_exits(root: str, file_name: str): 34 | """Check whether `file_name` exists under directory `root`. """ 35 | if not os.path.exists(os.path.join(root, file_name)): 36 | print("Dataset directory {} not found under {}".format(file_name, root)) 37 | exit(-1) 38 | 39 | 40 | def read_list_from_file(file_name: str) -> List[str]: 41 | """Read data from file and convert each line into an element in the list""" 42 | result = [] 43 | with open(file_name, "r") as f: 44 | for line in f.readlines(): 45 | result.append(line.strip()) 46 | return result 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ICON 2 | Code release for "Make the U in UDA Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation" (NeurIPS 2023). Paper is available [here](https://arxiv.org/pdf/2309.12742.pdf). 3 | 4 | ## Prerequisites 5 | - torch>=1.7.0 6 | - torchvision 7 | - qpsolvers 8 | - numpy 9 | - prettytable 10 | - tqdm 11 | - scikit-learn 12 | - webcolors 13 | - matplotlib 14 | 15 | 16 | ## Training 17 | 18 | Replace {data_dir} with the dataset directory. Missing datasets will be downloaded automatically. Replace {log_dir} with the logging directory (for storing model checkpoints, tensorboard logs and console logs). For Office-Home, source (-s) and target domain (-t) takes values from {'Ar', 'Cl', 'Rw', 'Pr'}. 19 | 20 | VisDA-2017 21 | ``` 22 | CUDA_VISIBLE_DEVICES=0 python run_icon.py {data_dir} -d VisDA2017 -s Synthetic -t Real -a resnet50 --epochs 50 --lr 0.002 --per-class-eval --temperature 3.0 --center-crop --w-transfer 0.08 --w-st 1.0 --threshold 0.97 --log-root {log_dir} --batch-size 28 --optim sgd --con-start-epoch 5 --con-mode sim --w-inv 0.25 --inv-start-epoch 5 --back-cluster-start-epoch 9 --topk 3 --dim-reduction umap --reduced-dim 50 --eqinv --exp-name visda_reproduce --seed 0 23 | ``` 24 | 25 | Office Home 26 | ``` 27 | CUDA_VISIBLE_DEVICES=0 python run_icon.py {data_dir} -d OfficeHome -s Ar -t Cl -a resnet50 --epochs 50 --lr 0.005 --temperature 2.5 --bottleneck-dim 2048 --w-transfer 0.015 --w-st 0.5 --threshold 0.97 --log-root {log_dir} --batch-size 28 --con-start-epoch 0 --con-mode stats --back-cluster-start-epoch 0 --topk 5 --seed 0 --w-inv 0.1 --inv-start-epoch 10 --exp-name Ar2Cl --optim sam 28 | ``` 29 | 30 | ## Acknowledgement 31 | This code is implemented based on the [CST](https://github.com/Liuhong99/CST), and it is our pleasure to acknowledge their contributions. 32 | 33 | 34 | ## Citation 35 | If you use this code for your research, please consider citing: 36 | ``` 37 | @article{yue2023make, 38 | title={Make the U in UDA Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation}, 39 | author={Yue, Zhongqi and Sun, Qianru and Zhang, Hanwang}, 40 | journal={Advances in neural information processing systems}, 41 | year={2023} 42 | } 43 | ``` 44 | 45 | ## Contact 46 | If you have any problem about our code, feel free to contact 47 | - yuez0003@ntu.edu.sg -------------------------------------------------------------------------------- /sam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SAM(torch.optim.Optimizer): 5 | def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs): 6 | assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}" 7 | 8 | defaults = dict(rho=rho, adaptive=adaptive, **kwargs) 9 | super(SAM, self).__init__(params, defaults) 10 | 11 | self.base_optimizer = base_optimizer(self.param_groups, **kwargs) 12 | self.param_groups = self.base_optimizer.param_groups 13 | 14 | @torch.no_grad() 15 | def first_step(self, zero_grad=False): 16 | grad_norm = self._grad_norm() 17 | for group in self.param_groups: 18 | scale = group["rho"] / (grad_norm + 1e-12) 19 | 20 | for p in group["params"]: 21 | if p.grad is None: continue 22 | e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p) 23 | p.add_(e_w) # climb to the local maximum "w + e(w)" 24 | self.state[p]["e_w"] = e_w 25 | 26 | if zero_grad: self.zero_grad() 27 | 28 | @torch.no_grad() 29 | def second_step(self, zero_grad=False): 30 | for group in self.param_groups: 31 | for p in group["params"]: 32 | if p.grad is None: continue 33 | p.sub_(self.state[p]["e_w"]) # get back to "w" from "w + e(w)" 34 | 35 | self.base_optimizer.step() # do the actual "sharpness-aware" update 36 | 37 | if zero_grad: self.zero_grad() 38 | 39 | @torch.no_grad() 40 | def step(self, closure=None): 41 | assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided" 42 | closure = torch.enable_grad()(closure) # the closure should do a full forward-backward pass 43 | 44 | self.first_step(zero_grad=True) 45 | closure() 46 | self.second_step() 47 | 48 | def _grad_norm(self): 49 | shared_device = self.param_groups[0]["params"][0].device # put everything on the same device, in case of model parallelism 50 | norm = torch.norm( 51 | torch.stack([ 52 | ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device) 53 | for group in self.param_groups for p in group["params"] 54 | if p.grad is not None 55 | ]), 56 | p=2 57 | ) 58 | return norm 59 | -------------------------------------------------------------------------------- /common/vision/datasets/segmentation/synthia.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .segmentation_list import SegmentationList 3 | from .cityscapes import Cityscapes 4 | from .._util import download as download_data 5 | 6 | 7 | class Synthia(SegmentationList): 8 | """`SYNTHIA `_ 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``. 13 | data_folder (str, optional): Sub-directory of the image. Default: 'RGB'. 14 | label_folder (str, optional): Sub-directory of the label. Default: 'synthia_mapped_to_cityscapes'. 15 | mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None. 16 | transforms (callable, optional): A function/transform that takes in (PIL image, label) pair \ 17 | and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`. 18 | 19 | .. note:: You need to download GTA5 manually. 20 | Ensure that there exist following directories in the `root` directory before you using this class. 21 | :: 22 | RGB/ 23 | synthia_mapped_to_cityscapes/ 24 | """ 25 | ID_TO_TRAIN_ID = { 26 | 3: 0, 4: 1, 2: 2, 21: 3, 5: 4, 7: 5, 27 | 15: 6, 9: 7, 6: 8, 16: 9, 1: 10, 10: 11, 17: 12, 28 | 8: 13, 18: 14, 19: 15, 20: 16, 12: 17, 11: 18 29 | } 30 | download_list = [ 31 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/15c4d0f8e62e45d9a6b7/?dl=1"), 32 | ] 33 | 34 | def __init__(self, root, split='train', data_folder='RGB', label_folder='synthia_mapped_to_cityscapes', **kwargs): 35 | assert split in ['train'] 36 | # download meta information from Internet 37 | list(map(lambda args: download_data(root, *args), self.download_list)) 38 | data_list_file = os.path.join(root, "image_list", "{}.txt".format(split)) 39 | super(Synthia, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file, data_folder, 40 | label_folder, id_to_train_id=Synthia.ID_TO_TRAIN_ID, 41 | train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs) 42 | 43 | @property 44 | def evaluate_classes(self): 45 | return [ 46 | 'road', 'sidewalk', 'building', 'traffic light', 'traffic sign', 47 | 'vegetation', 'sky', 'person', 'rider', 'car', 'bus', 'motorcycle', 'bicycle' 48 | ] 49 | -------------------------------------------------------------------------------- /dalib/modules/kernels.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | __all__ = ['GaussianKernel'] 7 | 8 | 9 | class GaussianKernel(nn.Module): 10 | r"""Gaussian Kernel Matrix 11 | 12 | Gaussian Kernel k is defined by 13 | 14 | .. math:: 15 | k(x_1, x_2) = \exp \left( - \dfrac{\| x_1 - x_2 \|^2}{2\sigma^2} \right) 16 | 17 | where :math:`x_1, x_2 \in R^d` are 1-d tensors. 18 | 19 | Gaussian Kernel Matrix K is defined on input group :math:`X=(x_1, x_2, ..., x_m),` 20 | 21 | .. math:: 22 | K(X)_{i,j} = k(x_i, x_j) 23 | 24 | Also by default, during training this layer keeps running estimates of the 25 | mean of L2 distances, which are then used to set hyperparameter :math:`\sigma`. 26 | Mathematically, the estimation is :math:`\sigma^2 = \dfrac{\alpha}{n^2}\sum_{i,j} \| x_i - x_j \|^2`. 27 | If :attr:`track_running_stats` is set to ``False``, this layer then does not 28 | keep running estimates, and use a fixed :math:`\sigma` instead. 29 | 30 | Args: 31 | sigma (float, optional): bandwidth :math:`\sigma`. Default: None 32 | track_running_stats (bool, optional): If ``True``, this module tracks the running mean of :math:`\sigma^2`. 33 | Otherwise, it won't track such statistics and always uses fix :math:`\sigma^2`. Default: ``True`` 34 | alpha (float, optional): :math:`\alpha` which decides the magnitude of :math:`\sigma^2` when track_running_stats is set to ``True`` 35 | 36 | Inputs: 37 | - X (tensor): input group :math:`X` 38 | 39 | Shape: 40 | - Inputs: :math:`(minibatch, F)` where F means the dimension of input features. 41 | - Outputs: :math:`(minibatch, minibatch)` 42 | """ 43 | 44 | def __init__(self, sigma: Optional[float] = None, track_running_stats: Optional[bool] = True, 45 | alpha: Optional[float] = 1.): 46 | super(GaussianKernel, self).__init__() 47 | assert track_running_stats or sigma is not None 48 | self.sigma_square = torch.tensor(sigma * sigma) if sigma is not None else None 49 | self.track_running_stats = track_running_stats 50 | self.alpha = alpha 51 | 52 | def forward(self, X: torch.Tensor) -> torch.Tensor: 53 | l2_distance_square = ((X.unsqueeze(0) - X.unsqueeze(1)) ** 2).sum(2) 54 | 55 | if self.track_running_stats: 56 | self.sigma_square = self.alpha * torch.mean(l2_distance_square.detach()) 57 | 58 | return torch.exp(-l2_distance_square / (2 * self.sigma_square)) -------------------------------------------------------------------------------- /common/vision/datasets/visda2017.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class VisDA2017(ImageList): 8 | """`VisDA-2017 `_ Dataset 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | task (str): The task (domain) to create dataset. Choices include ``'Synthetic'``: synthetic images and \ 13 | ``'Real'``: real-world images. 14 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 15 | in root directory. If dataset is already downloaded, it is not downloaded again. 16 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 17 | transformed version. E.g, ``transforms.RandomCrop``. 18 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 19 | 20 | .. note:: In `root`, there will exist following files after downloading. 21 | :: 22 | train/ 23 | aeroplance/ 24 | *.png 25 | ... 26 | validation/ 27 | image_list/ 28 | train.txt 29 | validation.txt 30 | """ 31 | download_list = [ 32 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/b25b2b990e8f42e691f0/?dl=1"), 33 | ("train", "train.tar", "http://csr.bu.edu/ftp/visda17/clf/train.tar"), 34 | ("validation", "validation.tar", "http://csr.bu.edu/ftp/visda17/clf/validation.tar") 35 | ] 36 | image_list = { 37 | "Synthetic": "image_list/train.txt", 38 | "Real": "image_list/validation.txt" 39 | } 40 | CLASSES = ['aeroplane', 'bicycle', 'bus', 'car', 'horse', 'knife', 41 | 'motorcycle', 'person', 'plant', 'skateboard', 'train', 'truck'] 42 | 43 | def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs): 44 | assert task in self.image_list 45 | data_list_file = os.path.join(root, self.image_list[task]) 46 | 47 | if download: 48 | list(map(lambda args: download_data(root, *args), self.download_list)) 49 | else: 50 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 51 | 52 | super(VisDA2017, self).__init__(root, VisDA2017.CLASSES, data_list_file=data_list_file, **kwargs) 53 | 54 | @classmethod 55 | def domains(cls): 56 | return list(cls.image_list.keys()) -------------------------------------------------------------------------------- /dalib/translation/cycada.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import Tensor 3 | 4 | 5 | class SemanticConsistency(nn.Module): 6 | """ 7 | Semantic consistency loss is introduced by 8 | `CyCADA: Cycle-Consistent Adversarial Domain Adaptation (ICML 2018) `_ 9 | 10 | This helps to prevent label flipping during image translation. 11 | 12 | Args: 13 | ignore_index (tuple, optional): Specifies target values that are ignored 14 | and do not contribute to the input gradient. When :attr:`size_average` is 15 | ``True``, the loss is averaged over non-ignored targets. Default: (). 16 | reduction (string, optional): Specifies the reduction to apply to the output: 17 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will 18 | be applied, ``'mean'``: the weighted mean of the output is taken, 19 | ``'sum'``: the output will be summed. Note: :attr:`size_average` 20 | and :attr:`reduce` are in the process of being deprecated, and in 21 | the meantime, specifying either of those two args will override 22 | :attr:`reduction`. Default: ``'mean'`` 23 | 24 | Shape: 25 | - Input: :math:`(N, C)` where `C = number of classes`, or 26 | :math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1` 27 | in the case of `K`-dimensional loss. 28 | - Target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, or 29 | :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of 30 | K-dimensional loss. 31 | - Output: scalar. 32 | If :attr:`reduction` is ``'none'``, then the same size as the target: 33 | :math:`(N)`, or 34 | :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case 35 | of K-dimensional loss. 36 | 37 | Examples:: 38 | 39 | >>> loss = SemanticConsistency() 40 | >>> input = torch.randn(3, 5, requires_grad=True) 41 | >>> target = torch.empty(3, dtype=torch.long).random_(5) 42 | >>> output = loss(input, target) 43 | >>> output.backward() 44 | """ 45 | def __init__(self, ignore_index=(), reduction='mean'): 46 | super(SemanticConsistency, self).__init__() 47 | self.ignore_index = ignore_index 48 | self.loss = nn.CrossEntropyLoss(ignore_index=-1, reduction=reduction) 49 | 50 | def forward(self, input: Tensor, target: Tensor) -> Tensor: 51 | for class_idx in self.ignore_index: 52 | target[target == class_idx] = -1 53 | return self.loss(input, target) 54 | -------------------------------------------------------------------------------- /dalib/modules/gl.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any, Tuple 2 | import numpy as np 3 | import torch.nn as nn 4 | from torch.autograd import Function 5 | import torch 6 | 7 | 8 | class GradientFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx: Any, input: torch.Tensor, coeff: Optional[float] = 1.) -> torch.Tensor: 12 | ctx.coeff = coeff 13 | output = input * 1.0 14 | return output 15 | 16 | @staticmethod 17 | def backward(ctx: Any, grad_output: torch.Tensor) -> Tuple[torch.Tensor, Any]: 18 | return grad_output * ctx.coeff, None 19 | 20 | 21 | class WarmStartGradientLayer(nn.Module): 22 | """Warm Start Gradient Layer :math:`\mathcal{R}(x)` with warm start 23 | 24 | The forward and backward behaviours are: 25 | 26 | .. math:: 27 | \mathcal{R}(x) = x, 28 | 29 | \dfrac{ d\mathcal{R}} {dx} = \lambda I. 30 | 31 | :math:`\lambda` is initiated at :math:`lo` and is gradually changed to :math:`hi` using the following schedule: 32 | 33 | .. math:: 34 | \lambda = \dfrac{2(hi-lo)}{1+\exp(- α \dfrac{i}{N})} - (hi-lo) + lo 35 | 36 | where :math:`i` is the iteration step. 37 | 38 | Parameters: 39 | - **alpha** (float, optional): :math:`α`. Default: 1.0 40 | - **lo** (float, optional): Initial value of :math:`\lambda`. Default: 0.0 41 | - **hi** (float, optional): Final value of :math:`\lambda`. Default: 1.0 42 | - **max_iters** (int, optional): :math:`N`. Default: 1000 43 | - **auto_step** (bool, optional): If True, increase :math:`i` each time `forward` is called. 44 | Otherwise use function `step` to increase :math:`i`. Default: False 45 | """ 46 | 47 | def __init__(self, alpha: Optional[float] = 1.0, lo: Optional[float] = 0.0, hi: Optional[float] = 1., 48 | max_iters: Optional[int] = 1000., auto_step: Optional[bool] = False): 49 | super(WarmStartGradientLayer, self).__init__() 50 | self.alpha = alpha 51 | self.lo = lo 52 | self.hi = hi 53 | self.iter_num = 0 54 | self.max_iters = max_iters 55 | self.auto_step = auto_step 56 | 57 | def forward(self, input: torch.Tensor) -> torch.Tensor: 58 | """""" 59 | coeff = np.float( 60 | 2.0 * (self.hi - self.lo) / (1.0 + np.exp(-self.alpha * self.iter_num / self.max_iters)) 61 | - (self.hi - self.lo) + self.lo 62 | ) 63 | if self.auto_step: 64 | self.step() 65 | return GradientFunction.apply(input, coeff) 66 | 67 | def step(self): 68 | """Increase iteration number :math:`i` by 1""" 69 | self.iter_num += 1 70 | -------------------------------------------------------------------------------- /common/utils/data.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from torch.utils.data import DataLoader, Dataset 3 | from typing import TypeVar, Iterable 4 | 5 | 6 | T_co = TypeVar('T_co', covariant=True) 7 | T = TypeVar('T') 8 | 9 | 10 | def send_to_device(tensor, device): 11 | """ 12 | Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device. 13 | 14 | Args: 15 | tensor (nested list/tuple/dictionary of :obj:`torch.Tensor`): 16 | The data to send to a given device. 17 | device (:obj:`torch.device`): 18 | The device to send the data to 19 | 20 | Returns: 21 | The same data structure as :obj:`tensor` with all tensors sent to the proper device. 22 | """ 23 | if isinstance(tensor, (list, tuple)): 24 | return type(tensor)(send_to_device(t, device) for t in tensor) 25 | elif isinstance(tensor, dict): 26 | return type(tensor)({k: send_to_device(v, device) for k, v in tensor.items()}) 27 | elif not hasattr(tensor, "to"): 28 | return tensor 29 | return tensor.to(device) 30 | 31 | 32 | class ForeverDataIterator: 33 | r"""A data iterator that will never stop producing data""" 34 | def __init__(self, data_loader: DataLoader, device=None): 35 | self.data_loader = data_loader 36 | self.iter = iter(self.data_loader) 37 | self.device = device 38 | 39 | def __next__(self): 40 | try: 41 | data = next(self.iter) 42 | if self.device is not None: 43 | data = send_to_device(data, self.device) 44 | except StopIteration: 45 | self.iter = iter(self.data_loader) 46 | data = next(self.iter) 47 | if self.device is not None: 48 | data = send_to_device(data, self.device) 49 | return data 50 | 51 | def __len__(self): 52 | return len(self.data_loader) 53 | 54 | 55 | class CombineDataset(Dataset): 56 | r"""Dataset as a combination of multiple datasets. 57 | 58 | The element of each dataset must be a list, and the i-th element of the combined dataset 59 | is a list splicing of the i-th element of each sub dataset. 60 | The length of the combined dataset is the minimum of the lengths of all sub datasets. 61 | 62 | Arguments: 63 | datasets (sequence): List of datasets to be concatenated 64 | """ 65 | def __init__(self, datasets: Iterable[Dataset]) -> None: 66 | super(CombineDataset, self).__init__() 67 | # Cannot verify that datasets is Sized 68 | assert len(datasets) > 0, 'datasets should not be an empty iterable' # type: ignore 69 | self.datasets = list(datasets) 70 | 71 | def __len__(self): 72 | return min([len(d) for d in self.datasets]) 73 | 74 | def __getitem__(self, idx): 75 | return list(itertools.chain(*[d[idx] for d in self.datasets])) 76 | 77 | -------------------------------------------------------------------------------- /common/utils/meter.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | 4 | class AverageMeter(object): 5 | r"""Computes and stores the average and current value. 6 | 7 | Examples:: 8 | 9 | >>> # Initialize a meter to record loss 10 | >>> losses = AverageMeter() 11 | >>> # Update meter after every minibatch update 12 | >>> losses.update(loss_value, batch_size) 13 | """ 14 | def __init__(self, name: str, fmt: Optional[str] = ':f'): 15 | self.name = name 16 | self.fmt = fmt 17 | self.reset() 18 | 19 | def reset(self): 20 | self.val = 0 21 | self.avg = 0 22 | self.sum = 0 23 | self.count = 0 24 | 25 | def update(self, val, n=1): 26 | self.val = val 27 | self.sum += val * n 28 | self.count += n 29 | if self.count > 0: 30 | self.avg = self.sum / self.count 31 | 32 | def __str__(self): 33 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 34 | return fmtstr.format(**self.__dict__) 35 | 36 | 37 | class AverageMeterDict(object): 38 | def __init__(self, names: List, fmt: Optional[str] = ':f'): 39 | self.dict = { 40 | name: AverageMeter(name, fmt) for name in names 41 | } 42 | 43 | def reset(self): 44 | for meter in self.dict.values(): 45 | meter.reset() 46 | 47 | def update(self, accuracies, n=1): 48 | for name, acc in accuracies.items(): 49 | self.dict[name].update(acc, n) 50 | 51 | def average(self): 52 | return { 53 | name: meter.avg for name, meter in self.dict.items() 54 | } 55 | 56 | def __getitem__(self, item): 57 | return self.dict[item] 58 | 59 | 60 | class Meter(object): 61 | """Computes and stores the current value.""" 62 | def __init__(self, name: str, fmt: Optional[str] = ':f'): 63 | self.name = name 64 | self.fmt = fmt 65 | self.reset() 66 | 67 | def reset(self): 68 | self.val = 0 69 | 70 | def update(self, val): 71 | self.val = val 72 | 73 | def __str__(self): 74 | fmtstr = '{name} {val' + self.fmt + '}' 75 | return fmtstr.format(**self.__dict__) 76 | 77 | 78 | class ProgressMeter(object): 79 | def __init__(self, num_batches, meters, prefix=""): 80 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 81 | self.meters = meters 82 | self.prefix = prefix 83 | 84 | def display(self, batch): 85 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 86 | entries += [str(meter) for meter in self.meters] 87 | print('\t'.join(entries)) 88 | 89 | def _get_batch_fmtstr(self, num_batches): 90 | num_digits = len(str(num_batches // 1)) 91 | fmt = '{:' + str(num_digits) + 'd}' 92 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 93 | 94 | 95 | -------------------------------------------------------------------------------- /dalib/modules/grl.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any, Tuple 2 | import numpy as np 3 | import torch.nn as nn 4 | from torch.autograd import Function 5 | import torch 6 | 7 | 8 | class GradientReverseFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx: Any, input: torch.Tensor, coeff: Optional[float] = 1.) -> torch.Tensor: 12 | ctx.coeff = coeff 13 | output = input * 1.0 14 | return output 15 | 16 | @staticmethod 17 | def backward(ctx: Any, grad_output: torch.Tensor) -> Tuple[torch.Tensor, Any]: 18 | return grad_output.neg() * ctx.coeff, None 19 | 20 | 21 | class GradientReverseLayer(nn.Module): 22 | def __init__(self): 23 | super(GradientReverseLayer, self).__init__() 24 | 25 | def forward(self, *input): 26 | return GradientReverseFunction.apply(*input) 27 | 28 | 29 | class WarmStartGradientReverseLayer(nn.Module): 30 | """Gradient Reverse Layer :math:`\mathcal{R}(x)` with warm start 31 | 32 | The forward and backward behaviours are: 33 | 34 | .. math:: 35 | \mathcal{R}(x) = x, 36 | 37 | \dfrac{ d\mathcal{R}} {dx} = - \lambda I. 38 | 39 | :math:`\lambda` is initiated at :math:`lo` and is gradually changed to :math:`hi` using the following schedule: 40 | 41 | .. math:: 42 | \lambda = \dfrac{2(hi-lo)}{1+\exp(- α \dfrac{i}{N})} - (hi-lo) + lo 43 | 44 | where :math:`i` is the iteration step. 45 | 46 | Args: 47 | alpha (float, optional): :math:`α`. Default: 1.0 48 | lo (float, optional): Initial value of :math:`\lambda`. Default: 0.0 49 | hi (float, optional): Final value of :math:`\lambda`. Default: 1.0 50 | max_iters (int, optional): :math:`N`. Default: 1000 51 | auto_step (bool, optional): If True, increase :math:`i` each time `forward` is called. 52 | Otherwise use function `step` to increase :math:`i`. Default: False 53 | """ 54 | 55 | def __init__(self, alpha: Optional[float] = 1.0, lo: Optional[float] = 0.0, hi: Optional[float] = 1., 56 | max_iters: Optional[int] = 1000., auto_step: Optional[bool] = False): 57 | super(WarmStartGradientReverseLayer, self).__init__() 58 | self.alpha = alpha 59 | self.lo = lo 60 | self.hi = hi 61 | self.iter_num = 0 62 | self.max_iters = max_iters 63 | self.auto_step = auto_step 64 | 65 | def forward(self, input: torch.Tensor) -> torch.Tensor: 66 | """""" 67 | coeff = np.float( 68 | 2.0 * (self.hi - self.lo) / (1.0 + np.exp(-self.alpha * self.iter_num / self.max_iters)) 69 | - (self.hi - self.lo) + self.lo 70 | ) 71 | if self.auto_step: 72 | self.step() 73 | return GradientReverseFunction.apply(input, coeff) 74 | 75 | def step(self): 76 | """Increase iteration number :math:`i` by 1""" 77 | self.iter_num += 1 78 | -------------------------------------------------------------------------------- /dalib/adaptation/mcd.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch.nn as nn 3 | import torch 4 | 5 | 6 | def classifier_discrepancy(predictions1: torch.Tensor, predictions2: torch.Tensor) -> torch.Tensor: 7 | r"""The `Classifier Discrepancy` in 8 | `Maximum Classifier Discrepancy for Unsupervised Domain Adaptation (CVPR 2018) `_. 9 | 10 | The classfier discrepancy between predictions :math:`p_1` and :math:`p_2` can be described as: 11 | 12 | .. math:: 13 | d(p_1, p_2) = \dfrac{1}{K} \sum_{k=1}^K | p_{1k} - p_{2k} |, 14 | 15 | where K is number of classes. 16 | 17 | Args: 18 | predictions1 (torch.Tensor): Classifier predictions :math:`p_1`. Expected to contain raw, normalized scores for each class 19 | predictions2 (torch.Tensor): Classifier predictions :math:`p_2` 20 | """ 21 | return torch.mean(torch.abs(predictions1 - predictions2)) 22 | 23 | 24 | def entropy(predictions: torch.Tensor) -> torch.Tensor: 25 | r"""Entropy of N predictions :math:`(p_1, p_2, ..., p_N)`. 26 | The definition is: 27 | 28 | .. math:: 29 | d(p_1, p_2, ..., p_N) = -\dfrac{1}{K} \sum_{k=1}^K \log \left( \dfrac{1}{N} \sum_{i=1}^N p_{ik} \right) 30 | 31 | where K is number of classes. 32 | 33 | .. note:: 34 | This entropy function is specifically used in MCD and different from the usual :meth:`~dalib.modules.entropy.entropy` function. 35 | 36 | Args: 37 | predictions (torch.Tensor): Classifier predictions. Expected to contain raw, normalized scores for each class 38 | """ 39 | return -torch.mean(torch.log(torch.mean(predictions, 0) + 1e-6)) 40 | 41 | 42 | class ImageClassifierHead(nn.Module): 43 | r"""Classifier Head for MCD. 44 | 45 | Args: 46 | in_features (int): Dimension of input features 47 | num_classes (int): Number of classes 48 | bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: 1024 49 | 50 | Shape: 51 | - Inputs: :math:`(minibatch, F)` where F = `in_features`. 52 | - Output: :math:`(minibatch, C)` where C = `num_classes`. 53 | """ 54 | 55 | def __init__(self, in_features: int, num_classes: int, bottleneck_dim: Optional[int] = 1024): 56 | super(ImageClassifierHead, self).__init__() 57 | self.num_classes = num_classes 58 | self.head = nn.Sequential( 59 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 60 | nn.Flatten(), 61 | nn.Dropout(0.5), 62 | nn.Linear(in_features, bottleneck_dim), 63 | nn.BatchNorm1d(bottleneck_dim), 64 | nn.ReLU(), 65 | nn.Dropout(0.5), 66 | nn.Linear(bottleneck_dim, bottleneck_dim), 67 | nn.BatchNorm1d(bottleneck_dim), 68 | nn.ReLU(), 69 | nn.Linear(bottleneck_dim, num_classes) 70 | ) 71 | 72 | def forward(self, inputs: torch.Tensor) -> torch.Tensor: 73 | return self.head(inputs) -------------------------------------------------------------------------------- /dalib/adaptation/osbp.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from common.modules.classifier import Classifier as ClassifierBase 7 | from ..modules.grl import GradientReverseLayer 8 | 9 | 10 | class UnknownClassBinaryCrossEntropy(nn.Module): 11 | r""" 12 | Binary cross entropy loss to make a boundary for unknown samples, proposed by 13 | `Open Set Domain Adaptation by Backpropagation (ECCV 2018) `_. 14 | 15 | Given a sample on target domain :math:`x_t` and its classifcation outputs :math:`y`, the binary cross entropy 16 | loss is defined as 17 | 18 | .. math:: 19 | L_{adv}(x_t) = -t log(p(y=C+1|x_t)) - (1-t)log(1-p(y=C+1|x_t)) 20 | 21 | where t is a hyper-parameter and C is the number of known classes. 22 | 23 | Args: 24 | t (float): Predefined hyper-parameter. Default: 0.5 25 | 26 | Inputs: 27 | - y (tensor): classification outputs (before softmax). 28 | 29 | Shape: 30 | - y: :math:`(minibatch, C+1)` where C is the number of known classes. 31 | - Outputs: scalar 32 | 33 | """ 34 | def __init__(self, t: Optional[float]=0.5): 35 | super(UnknownClassBinaryCrossEntropy, self).__init__() 36 | self.t = t 37 | 38 | def forward(self, y): 39 | # y : N x (C+1) 40 | softmax_output = F.softmax(y, dim=1) 41 | unknown_class_prob = softmax_output[:, -1].contiguous().view(-1, 1) 42 | known_class_prob = 1. - unknown_class_prob 43 | 44 | unknown_target = torch.ones((y.size(0), 1)).to(y.device) * self.t 45 | known_target = 1. - unknown_target 46 | return - torch.mean(unknown_target * torch.log(unknown_class_prob + 1e-6)) \ 47 | - torch.mean(known_target * torch.log(known_class_prob + 1e-6)) 48 | 49 | 50 | class ImageClassifier(ClassifierBase): 51 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs): 52 | bottleneck = nn.Sequential( 53 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 54 | nn.Flatten(), 55 | nn.Linear(backbone.out_features, bottleneck_dim), 56 | nn.BatchNorm1d(bottleneck_dim), 57 | nn.ReLU(), 58 | nn.Dropout(), 59 | nn.Linear(bottleneck_dim, bottleneck_dim), 60 | nn.BatchNorm1d(bottleneck_dim), 61 | nn.ReLU(), 62 | nn.Dropout() 63 | ) 64 | super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs) 65 | self.grl = GradientReverseLayer() 66 | 67 | def forward(self, x: torch.Tensor, grad_reverse: Optional[bool] = False): 68 | features = self.backbone(x) 69 | features = self.bottleneck(features) 70 | if grad_reverse: 71 | features = self.grl(features) 72 | outputs = self.head(features) 73 | return outputs, features 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /common/vision/datasets/office31.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import os 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class Office31(ImageList): 8 | """Office31 Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | task (str): The task (domain) to create dataset. Choices include ``'A'``: amazon, \ 13 | ``'D'``: dslr and ``'W'``: webcam. 14 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 15 | in root directory. If dataset is already downloaded, it is not downloaded again. 16 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 17 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 18 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 19 | 20 | .. note:: In `root`, there will exist following files after downloading. 21 | :: 22 | amazon/ 23 | images/ 24 | backpack/ 25 | *.jpg 26 | ... 27 | dslr/ 28 | webcam/ 29 | image_list/ 30 | amazon.txt 31 | dslr.txt 32 | webcam.txt 33 | """ 34 | download_list = [ 35 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d9bca681c71249f19da2/?dl=1"), 36 | ("amazon", "amazon.tgz", "https://cloud.tsinghua.edu.cn/f/edc8d1bba1c740dc821c/?dl=1"), 37 | ("dslr", "dslr.tgz", "https://cloud.tsinghua.edu.cn/f/ca6df562b7e64850ad7f/?dl=1"), 38 | ("webcam", "webcam.tgz", "https://cloud.tsinghua.edu.cn/f/82b24ed2e08f4a3c8888/?dl=1"), 39 | ] 40 | image_list = { 41 | "A": "image_list/amazon.txt", 42 | "D": "image_list/dslr.txt", 43 | "W": "image_list/webcam.txt" 44 | } 45 | CLASSES = ['back_pack', 'bike', 'bike_helmet', 'bookcase', 'bottle', 'calculator', 'desk_chair', 'desk_lamp', 46 | 'desktop_computer', 'file_cabinet', 'headphones', 'keyboard', 'laptop_computer', 'letter_tray', 47 | 'mobile_phone', 'monitor', 'mouse', 'mug', 'paper_notebook', 'pen', 'phone', 'printer', 'projector', 48 | 'punchers', 'ring_binder', 'ruler', 'scissors', 'speaker', 'stapler', 'tape_dispenser', 'trash_can'] 49 | 50 | def __init__(self, root: str, task: str, download: Optional[bool] = True, **kwargs): 51 | assert task in self.image_list 52 | data_list_file = os.path.join(root, self.image_list[task]) 53 | 54 | if download: 55 | list(map(lambda args: download_data(root, *args), self.download_list)) 56 | else: 57 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 58 | 59 | super(Office31, self).__init__(root, Office31.CLASSES, data_list_file=data_list_file, **kwargs) 60 | 61 | @classmethod 62 | def domains(cls): 63 | return list(cls.image_list.keys()) -------------------------------------------------------------------------------- /common/vision/datasets/officecaltech.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from torchvision.datasets.folder import DatasetFolder, IMG_EXTENSIONS, default_loader 4 | from torchvision.datasets.utils import download_and_extract_archive 5 | from ._util import check_exits 6 | 7 | 8 | class OfficeCaltech(DatasetFolder): 9 | """Office+Caltech Dataset. 10 | 11 | Args: 12 | root (str): Root directory of dataset 13 | task (str): The task (domain) to create dataset. Choices include ``'A'``: amazon, \ 14 | ``'D'``: dslr, ``'W'``:webcam and ``'C'``: caltech. 15 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 16 | in root directory. If dataset is already downloaded, it is not downloaded again. 17 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 18 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 19 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 20 | 21 | .. note:: In `root`, there will exist following files after downloading. 22 | :: 23 | amazon/ 24 | images/ 25 | backpack/ 26 | *.jpg 27 | ... 28 | dslr/ 29 | webcam/ 30 | caltech/ 31 | image_list/ 32 | amazon.txt 33 | dslr.txt 34 | webcam.txt 35 | caltech.txt 36 | """ 37 | directories = { 38 | "A": "amazon", 39 | "D": "dslr", 40 | "W": "webcam", 41 | "C": "caltech" 42 | } 43 | CLASSES = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard', 44 | 'laptop_computer', 'monitor', 'mouse', 'mug', 'projector'] 45 | 46 | def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs): 47 | if download: 48 | for dir in self.directories.values(): 49 | if not os.path.exists(os.path.join(root, dir)): 50 | download_and_extract_archive(url="https://cloud.tsinghua.edu.cn/f/e93f2e07d93243d6b57e/?dl=1", 51 | download_root=os.path.join(root, 'download'), 52 | filename="officecaltech.tgz", remove_finished=False, extract_root=root) 53 | break 54 | else: 55 | list(map(lambda dir, _: check_exits(root, dir), self.directories.values())) 56 | 57 | super(OfficeCaltech, self).__init__( 58 | os.path.join(root, self.directories[task]), default_loader, extensions=IMG_EXTENSIONS, **kwargs) 59 | self.classes = OfficeCaltech.CLASSES 60 | self.class_to_idx = {cls: idx 61 | for idx, clss in enumerate(self.classes) 62 | for cls in clss} 63 | 64 | @property 65 | def num_classes(self): 66 | """Number of classes""" 67 | return len(self.classes) 68 | 69 | @classmethod 70 | def domains(cls): 71 | return list(cls.directories.keys()) -------------------------------------------------------------------------------- /validate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from common.utils.meter import AverageMeter, ProgressMeter 4 | import argparse 5 | from torch.utils.data import DataLoader 6 | from icon.uda_backbone import ImageClassifier 7 | from common.utils.metric import accuracy, ConfusionMatrix 8 | import time 9 | import torch.nn.functional as F 10 | 11 | 12 | def validate_model(val_loader: DataLoader, source_loader: DataLoader, model: ImageClassifier, args: argparse.Namespace, device, identifier="default"): 13 | batch_time = AverageMeter('Time', ':6.3f') 14 | losses = AverageMeter('Loss', ':.4e') 15 | top1 = AverageMeter('Acc@1', ':6.2f') 16 | top5 = AverageMeter('Acc@5', ':6.2f') 17 | progress = ProgressMeter( 18 | len(val_loader), 19 | [batch_time, losses, top1, top5], 20 | prefix='Test: ') 21 | 22 | # switch to evaluate mode 23 | model.eval() 24 | if args.per_class_eval: 25 | classes = val_loader.dataset.classes 26 | confmat = ConfusionMatrix(len(classes)) 27 | else: 28 | confmat = None 29 | 30 | labels = [] 31 | clusters = [] 32 | pseudo_labels = [] 33 | features = [] 34 | with torch.no_grad(): 35 | end = time.time() 36 | for i, (images, target, _) in enumerate(val_loader): 37 | images = images.to(device) 38 | target = target.to(device) 39 | 40 | # compute output 41 | # output, output_alt, _, _, _, _ = model(images) 42 | o = model(images) 43 | output = o["y"] 44 | output_cluster = o["y_cluster_u"] 45 | features_batch = o["bottleneck_feature"] 46 | loss = F.cross_entropy(output, target) 47 | _, pseudo_clusters = torch.max(F.softmax(output_cluster), dim=-1) 48 | _, pseudo_labels_batch = torch.max(F.softmax(output), dim=-1) 49 | 50 | labels.append(target.cpu()) 51 | clusters.append(pseudo_clusters.cpu()) 52 | pseudo_labels.append(pseudo_labels_batch.cpu()) 53 | features.append(features_batch.cpu()) 54 | 55 | # measure accuracy and record loss 56 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 57 | if confmat: 58 | confmat.update(target, output.argmax(1)) 59 | losses.update(loss.item(), images.size(0)) 60 | top1.update(acc1.item(), images.size(0)) 61 | top5.update(acc5.item(), images.size(0)) 62 | 63 | # measure elapsed time 64 | batch_time.update(time.time() - end) 65 | end = time.time() 66 | 67 | if i % args.print_freq == 0: 68 | progress.display(i) 69 | 70 | print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' 71 | .format(top1=top1, top5=top5)) 72 | if confmat: 73 | _, acc, _ = confmat.compute() 74 | avg_return = acc.mean().item() * 100 75 | print(confmat.format(classes)) 76 | else: 77 | avg_return = top1.avg 78 | 79 | labels = torch.cat(labels, dim=0) 80 | clusters = torch.cat(clusters, dim=0) 81 | pseudo_labels = torch.cat(pseudo_labels, dim=0) 82 | features = torch.cat(features, dim=0) 83 | return avg_return -------------------------------------------------------------------------------- /common/utils/metric/keypoint_detection.py: -------------------------------------------------------------------------------- 1 | # TODO: add documentation 2 | import numpy as np 3 | 4 | 5 | def get_max_preds(batch_heatmaps): 6 | ''' 7 | get predictions from score maps 8 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 9 | ''' 10 | assert isinstance(batch_heatmaps, np.ndarray), \ 11 | 'batch_heatmaps should be numpy.ndarray' 12 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 13 | 14 | batch_size = batch_heatmaps.shape[0] 15 | num_joints = batch_heatmaps.shape[1] 16 | width = batch_heatmaps.shape[3] 17 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 18 | idx = np.argmax(heatmaps_reshaped, 2) 19 | maxvals = np.amax(heatmaps_reshaped, 2) 20 | 21 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 22 | idx = idx.reshape((batch_size, num_joints, 1)) 23 | 24 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 25 | 26 | preds[:, :, 0] = (preds[:, :, 0]) % width 27 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 28 | 29 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 30 | pred_mask = pred_mask.astype(np.float32) 31 | 32 | preds *= pred_mask 33 | return preds, maxvals 34 | 35 | 36 | def calc_dists(preds, target, normalize): 37 | preds = preds.astype(np.float32) 38 | target = target.astype(np.float32) 39 | dists = np.zeros((preds.shape[1], preds.shape[0])) 40 | for n in range(preds.shape[0]): 41 | for c in range(preds.shape[1]): 42 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 43 | normed_preds = preds[n, c, :] / normalize[n] 44 | normed_targets = target[n, c, :] / normalize[n] 45 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 46 | else: 47 | dists[c, n] = -1 48 | return dists 49 | 50 | 51 | def dist_acc(dists, thr=0.5): 52 | ''' Return percentage below threshold while ignoring values with a -1 ''' 53 | dist_cal = np.not_equal(dists, -1) 54 | num_dist_cal = dist_cal.sum() 55 | if num_dist_cal > 0: 56 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 57 | else: 58 | return -1 59 | 60 | 61 | def accuracy(output, target, hm_type='gaussian', thr=0.5): 62 | ''' 63 | Calculate accuracy according to PCK, 64 | but uses ground truth heatmap rather than x,y locations 65 | First value to be returned is average accuracy across 'idxs', 66 | followed by individual accuracies 67 | ''' 68 | idx = list(range(output.shape[1])) 69 | norm = 1.0 70 | if hm_type == 'gaussian': 71 | pred, _ = get_max_preds(output) 72 | target, _ = get_max_preds(target) 73 | h = output.shape[2] 74 | w = output.shape[3] 75 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 76 | dists = calc_dists(pred, target, norm) 77 | 78 | acc = np.zeros(len(idx)) 79 | avg_acc = 0 80 | cnt = 0 81 | 82 | for i in range(len(idx)): 83 | acc[i] = dist_acc(dists[idx[i]], thr) 84 | if acc[i] >= 0: 85 | avg_acc = avg_acc + acc[i] 86 | cnt += 1 87 | 88 | avg_acc = avg_acc / cnt if cnt != 0 else 0 89 | 90 | return acc, avg_acc, cnt, pred 91 | -------------------------------------------------------------------------------- /common/utils/analysis/a_distance.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import TensorDataset 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.utils.data import DataLoader 6 | from torch.optim import SGD 7 | from ..meter import AverageMeter 8 | from ..metric import binary_accuracy 9 | 10 | 11 | class ANet(nn.Module): 12 | def __init__(self, in_feature): 13 | super(ANet, self).__init__() 14 | self.layer = nn.Linear(in_feature, 1) 15 | self.sigmoid = nn.Sigmoid() 16 | 17 | def forward(self, x): 18 | x = self.layer(x) 19 | x = self.sigmoid(x) 20 | return x 21 | 22 | 23 | def calculate(source_feature: torch.Tensor, target_feature: torch.Tensor, 24 | device, progress=True, training_epochs=10): 25 | """ 26 | Calculate the :math:`\mathcal{A}`-distance, which is a measure for distribution discrepancy. 27 | 28 | The definition is :math:`dist_\mathcal{A} = 2 (1-2\epsilon)`, where :math:`\epsilon` is the 29 | test error of a classifier trained to discriminate the source from the target. 30 | 31 | Args: 32 | source_feature (tensor): features from source domain in shape :math:`(minibatch, F)` 33 | target_feature (tensor): features from target domain in shape :math:`(minibatch, F)` 34 | device (torch.device) 35 | progress (bool): if True, displays a the progress of training A-Net 36 | training_epochs (int): the number of epochs when training the classifier 37 | 38 | Returns: 39 | :math:`\mathcal{A}`-distance 40 | """ 41 | source_label = torch.ones((source_feature.shape[0], 1)) 42 | target_label = torch.zeros((target_feature.shape[0], 1)) 43 | feature = torch.cat([source_feature, target_feature], dim=0) 44 | label = torch.cat([source_label, target_label], dim=0) 45 | 46 | dataset = TensorDataset(feature, label) 47 | length = len(dataset) 48 | train_size = int(0.8 * length) 49 | val_size = length - train_size 50 | train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size]) 51 | train_loader = DataLoader(train_set, batch_size=2, shuffle=True) 52 | val_loader = DataLoader(val_set, batch_size=8, shuffle=False) 53 | 54 | anet = ANet(feature.shape[1]).to(device) 55 | optimizer = SGD(anet.parameters(), lr=0.01) 56 | a_distance = 2.0 57 | for epoch in range(training_epochs): 58 | anet.train() 59 | for (x, label) in train_loader: 60 | x = x.to(device) 61 | label = label.to(device) 62 | anet.zero_grad() 63 | y = anet(x) 64 | loss = F.binary_cross_entropy(y, label) 65 | loss.backward() 66 | optimizer.step() 67 | 68 | anet.eval() 69 | meter = AverageMeter("accuracy", ":4.2f") 70 | with torch.no_grad(): 71 | for (x, label) in val_loader: 72 | x = x.to(device) 73 | label = label.to(device) 74 | y = anet(x) 75 | acc = binary_accuracy(y, label) 76 | meter.update(acc, x.shape[0]) 77 | error = 1 - meter.avg / 100 78 | a_distance = 2 * (1 - 2 * error) 79 | if progress: 80 | print("epoch {} accuracy: {} A-dist: {}".format(epoch, meter.avg, a_distance)) 81 | 82 | return a_distance 83 | 84 | -------------------------------------------------------------------------------- /common/vision/datasets/regression/mpi3d.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | import os 3 | from .._util import download as download_data, check_exits 4 | from .image_regression import ImageRegression 5 | 6 | 7 | class MPI3D(ImageRegression): 8 | """`MPI3D `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | task (str): The task (domain) to create dataset. Choices include ``'C'``: Color, \ 13 | ``'N'``: Noisy and ``'S'``: Scream. 14 | split (str, optional): The dataset split, supports ``train``, or ``test``. 15 | factors (sequence[str]): Factors selected. Default: ('horizontal axis', 'vertical axis'). 16 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 17 | in root directory. If dataset is already downloaded, it is not downloaded again. 18 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 19 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 20 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 21 | 22 | .. note:: In `root`, there will exist following files after downloading. 23 | :: 24 | real/ 25 | ... 26 | realistic/ 27 | toy/ 28 | image_list/ 29 | real_train.txt 30 | realistic_train.txt 31 | toy_train.txt 32 | real_test.txt 33 | realistic_test.txt 34 | toy_test.txt 35 | """ 36 | download_list = [ 37 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/f0ff24df967b42479d9e/?dl=1"), 38 | ("real", "real.tgz", "https://cloud.tsinghua.edu.cn/f/04c1318555fc4283862b/?dl=1"), 39 | ("realistic", "realistic.tgz", "https://cloud.tsinghua.edu.cn/f/2c0f7dacc73148cea593/?dl=1"), 40 | ("toy", "toy.tgz", "https://cloud.tsinghua.edu.cn/f/6327912a50374e20af95/?dl=1"), 41 | ] 42 | image_list = { 43 | "RL": "real", 44 | "RC": "realistic", 45 | "T": "toy" 46 | } 47 | FACTORS = ('horizontal axis', 'vertical axis') 48 | 49 | def __init__(self, root: str, task: str, split: Optional[str] = 'train', 50 | factors: Sequence[str] = ('horizontal axis', 'vertical axis'), 51 | download: Optional[bool] = True, target_transform=None, **kwargs): 52 | assert task in self.image_list 53 | assert split in ['train', 'test'] 54 | for factor in factors: 55 | assert factor in self.FACTORS 56 | 57 | factor_index = [self.FACTORS.index(factor) for factor in factors] 58 | 59 | if target_transform is None: 60 | target_transform = lambda x: x[list(factor_index)] / 40. 61 | else: 62 | target_transform = lambda x: target_transform(x[list(factor_index)]) / 40. 63 | 64 | data_list_file = os.path.join(root, "image_list", "{}_{}.txt".format(self.image_list[task], split)) 65 | 66 | if download: 67 | list(map(lambda args: download_data(root, *args), self.download_list)) 68 | else: 69 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 70 | 71 | super(MPI3D, self).__init__(root, factors, data_list_file=data_list_file, target_transform=target_transform, **kwargs) 72 | 73 | -------------------------------------------------------------------------------- /common/vision/datasets/regression/image_regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Callable, Tuple, Any, List, Sequence 3 | import torchvision.datasets as datasets 4 | from torchvision.datasets.folder import default_loader 5 | import numpy as np 6 | 7 | 8 | class ImageRegression(datasets.VisionDataset): 9 | """A generic Dataset class for domain adaptation in image regression 10 | 11 | Args: 12 | root (str): Root directory of dataset 13 | factors (sequence[str]): Factors selected. Default: ('scale', 'position x', 'position y'). 14 | data_list_file (str): File to read the image list from. 15 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 16 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 17 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 18 | 19 | .. note:: 20 | In `data_list_file`, each line has `1+len(factors)` values in the following format. 21 | :: 22 | source_dir/dog_xxx.png x11, x12, ... 23 | source_dir/cat_123.png x21, x22, ... 24 | target_dir/dog_xxy.png x31, x32, ... 25 | target_dir/cat_nsdf3.png x41, x42, ... 26 | 27 | The first value is the relative path of an image, and the rest values are the ground truth of the corresponding factors. 28 | If your data_list_file has different formats, please over-ride :meth:`ImageRegression.parse_data_file`. 29 | """ 30 | def __init__(self, root: str, factors: Sequence[str], data_list_file: str, 31 | transform: Optional[Callable] = None, target_transform: Optional[Callable] = None): 32 | super().__init__(root, transform=transform, target_transform=target_transform) 33 | self.samples = self.parse_data_file(data_list_file) 34 | self.factors = factors 35 | self.loader = default_loader 36 | self.data_list_file = data_list_file 37 | 38 | def __getitem__(self, index: int) -> Tuple[Any, Tuple[float]]: 39 | """ 40 | Args: 41 | index (int): Index 42 | 43 | Returns: 44 | (image, target) where target is a numpy float array. 45 | """ 46 | path, target = self.samples[index] 47 | img = self.loader(path) 48 | if self.transform is not None: 49 | img = self.transform(img) 50 | if self.target_transform is not None and target is not None: 51 | target = self.target_transform(target) 52 | return img, target 53 | 54 | def __len__(self) -> int: 55 | return len(self.samples) 56 | 57 | def parse_data_file(self, file_name: str) -> List[Tuple[str, Any]]: 58 | """Parse file to data list 59 | 60 | Args: 61 | file_name (str): The path of data file 62 | 63 | Returns: 64 | List of (image path, (factors)) tuples 65 | """ 66 | with open(file_name, "r") as f: 67 | data_list = [] 68 | for line in f.readlines(): 69 | data = line.split() 70 | path = str(data[0]) 71 | target = np.array([float(d) for d in data[1:]], dtype=np.float) 72 | if not os.path.isabs(path): 73 | path = os.path.join(self.root, path) 74 | data_list.append((path, target)) 75 | return data_list 76 | 77 | @property 78 | def num_factors(self) -> int: 79 | return len(self.factors) -------------------------------------------------------------------------------- /common/vision/datasets/regression/dsprites.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | import os 3 | from .._util import download as download_data, check_exits 4 | from .image_regression import ImageRegression 5 | 6 | 7 | class DSprites(ImageRegression): 8 | """`DSprites `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | task (str): The task (domain) to create dataset. Choices include ``'C'``: Color, \ 13 | ``'N'``: Noisy and ``'S'``: Scream. 14 | split (str, optional): The dataset split, supports ``train``, or ``test``. 15 | factors (sequence[str]): Factors selected. Default: ('scale', 'position x', 'position y'). 16 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 17 | in root directory. If dataset is already downloaded, it is not downloaded again. 18 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 19 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 20 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 21 | 22 | .. note:: In `root`, there will exist following files after downloading. 23 | :: 24 | color/ 25 | ... 26 | noisy/ 27 | scream/ 28 | image_list/ 29 | color_train.txt 30 | noisy_train.txt 31 | scream_train.txt 32 | color_test.txt 33 | noisy_test.txt 34 | scream_test.txt 35 | """ 36 | download_list = [ 37 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/fbbb6b1a43034712b34d/?dl=1"), 38 | ("color", "color.tgz", "https://cloud.tsinghua.edu.cn/f/9ce9f2abc61f49ed995a/?dl=1"), 39 | ("noisy", "noisy.tgz", "https://cloud.tsinghua.edu.cn/f/674435c8cb914ca0ad10/?dl=1"), 40 | ("scream", "scream.tgz", "https://cloud.tsinghua.edu.cn/f/0613675916ac4c3bb6bd/?dl=1"), 41 | ] 42 | image_list = { 43 | "C": "color", 44 | "N": "noisy", 45 | "S": "scream" 46 | } 47 | FACTORS = ('none', 'shape', 'scale', 'orientation', 'position x', 'position y') 48 | 49 | def __init__(self, root: str, task: str, split: Optional[str] = 'train', 50 | factors: Sequence[str] = ('scale', 'position x', 'position y'), 51 | download: Optional[bool] = True, target_transform=None, **kwargs): 52 | assert task in self.image_list 53 | assert split in ['train', 'test'] 54 | for factor in factors: 55 | assert factor in self.FACTORS 56 | 57 | factor_index = [self.FACTORS.index(factor) for factor in factors] 58 | 59 | if target_transform is None: 60 | target_transform = lambda x: x[list(factor_index)] 61 | else: 62 | target_transform = lambda x: target_transform(x[list(factor_index)]) 63 | 64 | data_list_file = os.path.join(root, "image_list", "{}_{}.txt".format(self.image_list[task], split)) 65 | 66 | if download: 67 | list(map(lambda args: download_data(root, *args), self.download_list)) 68 | else: 69 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 70 | 71 | super(DSprites, self).__init__(root, factors, data_list_file=data_list_file, target_transform=target_transform, **kwargs) 72 | 73 | -------------------------------------------------------------------------------- /common/vision/datasets/partial/__init__.py: -------------------------------------------------------------------------------- 1 | from ..imagelist import ImageList 2 | from ..office31 import Office31 3 | from ..officehome import OfficeHome 4 | from ..visda2017 import VisDA2017 5 | from ..officecaltech import OfficeCaltech 6 | from .imagenet_caltech import ImageNetCaltech 7 | from .caltech_imagenet import CaltechImageNet 8 | from common.vision.datasets.partial.imagenet_caltech import ImageNetCaltech 9 | from typing import Sequence, ClassVar 10 | 11 | 12 | __all__ = ['Office31', 'OfficeHome', "VisDA2017", "CaltechImageNet", "ImageNetCaltech"] 13 | 14 | 15 | def partial(dataset_class: ClassVar, partial_classes: Sequence[str]) -> ClassVar: 16 | """ 17 | Convert a dataset into its partial version. 18 | 19 | In other words, those samples which doesn't belong to `partial_classes` will be discarded. 20 | Yet `partial` will not change the label space of `dataset_class`. 21 | 22 | Args: 23 | dataset_class (class): Dataset class. Only subclass of ``ImageList`` can be partial. 24 | partial_classes (sequence[str]): A sequence of which categories need to be kept in the partial dataset.\ 25 | Each element of `partial_classes` must belong to the `classes` list of `dataset_class`. 26 | 27 | Examples:: 28 | 29 | >>> partial_classes = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard'] 30 | >>> # create a partial dataset class 31 | >>> PartialOffice31 = partial(Office31, partial_classes) 32 | >>> # create an instance of the partial dataset 33 | >>> dataset = PartialDataset(root="data/office31", task="A") 34 | 35 | """ 36 | if not (issubclass(dataset_class, ImageList)): 37 | raise Exception("Only subclass of ImageList can be partial") 38 | 39 | class PartialDataset(dataset_class): 40 | def __init__(self, **kwargs): 41 | super(PartialDataset, self).__init__(**kwargs) 42 | assert all([c in self.classes for c in partial_classes]) 43 | samples = [] 44 | for (path, label) in self.samples: 45 | class_name = self.classes[label] 46 | if class_name in partial_classes: 47 | samples.append((path, label)) 48 | self.samples = samples 49 | self.partial_classes = partial_classes 50 | self.partial_classes_idx = [self.class_to_idx[c] for c in partial_classes] 51 | 52 | return PartialDataset 53 | 54 | 55 | def default_partial(dataset_class: ClassVar) -> ClassVar: 56 | """ 57 | Default partial used in some paper. 58 | 59 | Args: 60 | dataset_class (class): Dataset class. Currently, dataset_class must be one of 61 | :class:`~common.vision.datasets.office31.Office31`, :class:`~common.vision.datasets.officehome.OfficeHome`, 62 | :class:`~common.vision.datasets.visda2017.VisDA2017`, 63 | :class:`~common.vision.datasets.partial.imagenet_caltech.ImageNetCaltech` 64 | and :class:`~common.vision.datasets.partial.caltech_imagenet.CaltechImageNet`. 65 | """ 66 | if dataset_class == Office31: 67 | kept_classes = OfficeCaltech.CLASSES 68 | elif dataset_class == OfficeHome: 69 | kept_classes = sorted(OfficeHome.CLASSES)[:25] 70 | elif dataset_class == VisDA2017: 71 | kept_classes = sorted(VisDA2017.CLASSES)[:6] 72 | elif dataset_class in [ImageNetCaltech, CaltechImageNet]: 73 | kept_classes = dataset_class.CLASSES 74 | else: 75 | raise NotImplementedError("Unknown partial domain adaptation dataset: {}".format(dataset_class.__name__)) 76 | return partial(dataset_class, kept_classes) -------------------------------------------------------------------------------- /icon/uda_backbone.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, List, Dict 2 | import torch.nn as nn 3 | import torch 4 | from torch.nn.utils.weight_norm import WeightNorm 5 | from dalib.modules.grl import WarmStartGradientReverseLayer 6 | import torch.nn.functional as F 7 | 8 | def shift_log(x: torch.Tensor, offset: Optional[float] = 1e-6) -> torch.Tensor: 9 | return torch.log(torch.clamp(x + offset, max=1.)) 10 | 11 | class ImageClassifier(nn.Module): 12 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck: Optional[nn.Module] = None, 13 | bottleneck_dim: Optional[int] = -1, head: Optional[nn.Module] = None, finetune=True): 14 | super(ImageClassifier, self).__init__() 15 | self.backbone = nn.Sequential(backbone,nn.AdaptiveAvgPool2d(output_size=(1, 1)), 16 | nn.Flatten()) 17 | bottleneck = nn.Sequential( 18 | nn.Linear(backbone.out_features, bottleneck_dim), 19 | nn.BatchNorm1d(bottleneck_dim), 20 | nn.ReLU() 21 | ) 22 | self.num_classes = num_classes 23 | if bottleneck is None: 24 | self.bottleneck = nn.Sequential( 25 | ) 26 | self._features_dim = backbone.out_features 27 | else: 28 | self.bottleneck = bottleneck 29 | assert bottleneck_dim > 0 30 | self._features_dim = bottleneck_dim 31 | # cls head, eqinv head, cluster head 32 | if head is None: 33 | self.head = nn.Linear(self._features_dim, num_classes) 34 | else: 35 | self.head = head 36 | self.eqinv_head = nn.Linear(bottleneck_dim, num_classes) 37 | self.cluster_head = nn.Linear(bottleneck_dim, num_classes) 38 | self.finetune = finetune 39 | 40 | @property 41 | def features_dim(self) -> int: 42 | """The dimension of features before the final `head` layer""" 43 | return self._features_dim 44 | 45 | def forward(self, x: torch.Tensor, freeze_feature=False) -> Tuple[torch.Tensor, torch.Tensor]: 46 | """""" 47 | if freeze_feature: 48 | with torch.no_grad(): 49 | f = self.backbone(x) 50 | else: 51 | f = self.backbone(x) 52 | f1 = self.bottleneck(f) 53 | predictions = self.head(f1) 54 | preds_nograd = self.head(f1.detach()) 55 | eqinv_preds = self.eqinv_head(f1) 56 | eqinv_preds_nograd = self.eqinv_head(f1.detach()) 57 | outputs = { 58 | "y": predictions, 59 | "y_cluster_all": eqinv_preds, 60 | "feature": f, 61 | "bottleneck_feature": f1, 62 | "y_nograd": preds_nograd, 63 | "y_cluster_all_nograd": eqinv_preds_nograd 64 | } 65 | outputs["y_cluster_u"] = self.cluster_head(f1) 66 | outputs["y_cluster_u_nograd"] = self.cluster_head(f1.detach()) 67 | return outputs 68 | 69 | def get_parameters(self, base_lr=1.0) -> List[Dict]: 70 | """A parameter list which decides optimization hyper-parameters, 71 | such as the relative learning rate of each layer 72 | """ 73 | params = [ 74 | {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr}, 75 | {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr}, 76 | {"params": self.head.parameters(), "lr": 1.0 * base_lr}, 77 | {"params": self.eqinv_head.parameters(), "lr": 1.0 * base_lr}, 78 | {"params": self.cluster_head.parameters(), "lr": 1.0 * base_lr}, 79 | ] 80 | return params -------------------------------------------------------------------------------- /common/vision/models/digits.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class LeNet: 4 | def __init__(self, num_classes=10): 5 | self.num_classes = num_classes 6 | self.bottleneck_dim = 50 * 4 * 4 7 | 8 | def backbone(self): 9 | return nn.Sequential( 10 | nn.Conv2d(1, 20, kernel_size=5), 11 | nn.MaxPool2d(2), 12 | nn.ReLU(), 13 | nn.Conv2d(20, 50, kernel_size=5), 14 | nn.Dropout2d(p=0.5), 15 | nn.MaxPool2d(2), 16 | nn.ReLU(), 17 | ) 18 | 19 | def bottleneck(self): 20 | return nn.Flatten(start_dim=1) 21 | 22 | def head(self): 23 | return nn.Sequential( 24 | nn.Linear(self.bottleneck_dim, 500), 25 | nn.ReLU(), 26 | nn.Dropout(p=0.5), 27 | nn.Linear(500, self.num_classes) 28 | ) 29 | 30 | def complete(self): 31 | return nn.Sequential( 32 | self.backbone(), 33 | self.bottleneck(), 34 | self.head() 35 | ) 36 | 37 | 38 | class DTN: 39 | def __init__(self, num_classes=10): 40 | self.num_classes = num_classes 41 | self.bottleneck_dim = 256 * 4 * 4 42 | 43 | def backbone(self): 44 | return nn.Sequential( 45 | nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=2), 46 | nn.BatchNorm2d(64), 47 | nn.Dropout2d(0.1), 48 | nn.ReLU(), 49 | nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2), 50 | nn.BatchNorm2d(128), 51 | nn.Dropout2d(0.3), 52 | nn.ReLU(), 53 | nn.Conv2d(128, 256, kernel_size=5, stride=2, padding=2), 54 | nn.BatchNorm2d(256), 55 | nn.Dropout2d(0.5), 56 | nn.ReLU(), 57 | ) 58 | 59 | def bottleneck(self): 60 | return nn.Flatten(start_dim=1) 61 | 62 | def head(self): 63 | return nn.Sequential( 64 | nn.Linear(self.bottleneck_dim, 512), 65 | nn.BatchNorm1d(512), 66 | nn.ReLU(), 67 | nn.Dropout(), 68 | nn.Linear(512, self.num_classes) 69 | ) 70 | 71 | def complete(self): 72 | return nn.Sequential( 73 | self.backbone(), 74 | self.bottleneck(), 75 | self.head() 76 | ) 77 | 78 | 79 | def lenet(**kwargs): 80 | """LeNet model from 81 | `"Gradient-based learning applied to document recognition" `_ 82 | 83 | Args: 84 | num_classes (int): number of classes. Default: 10 85 | 86 | .. note:: 87 | The input image size must be 28 x 28. 88 | 89 | Examples:: 90 | >>> # Get the whole LeNet model 91 | >>> model = lenet().complete() 92 | >>> # Or combine it by yourself 93 | >>> model = nn.Sequential(lenet().backbone(), lenet().bottleneck(), lenet().head()) 94 | """ 95 | return LeNet(**kwargs) 96 | 97 | 98 | def dtn(**kwargs): 99 | """ DTN model 100 | 101 | Args: 102 | num_classes (int): number of classes. Default: 10 103 | 104 | .. note:: 105 | The input image size must be 32 x 32. 106 | 107 | Examples:: 108 | >>> # Get the whole DTN model 109 | >>> model = dtn().complete() 110 | >>> # Or combine it by yourself 111 | >>> model = nn.Sequential(dtn().backbone(), dtn().bottleneck(), dtn().head()) 112 | """ 113 | return DTN(**kwargs) -------------------------------------------------------------------------------- /common/vision/datasets/officehome.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class OfficeHome(ImageList): 8 | """`OfficeHome `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | task (str): The task (domain) to create dataset. Choices include ``'Ar'``: Art, \ 13 | ``'Cl'``: Clipart, ``'Pr'``: Product and ``'Rw'``: Real_World. 14 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 15 | in root directory. If dataset is already downloaded, it is not downloaded again. 16 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 17 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 18 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 19 | 20 | .. note:: In `root`, there will exist following files after downloading. 21 | :: 22 | Art/ 23 | Alarm_Clock/*.jpg 24 | ... 25 | Clipart/ 26 | Product/ 27 | Real_World/ 28 | image_list/ 29 | Art.txt 30 | Clipart.txt 31 | Product.txt 32 | Real_World.txt 33 | """ 34 | download_list = [ 35 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/ca3a3b6a8d554905b4cd/?dl=1"), 36 | ("Art", "Art.tgz", "https://cloud.tsinghua.edu.cn/f/4691878067d04755beab/?dl=1"), 37 | ("Clipart", "Clipart.tgz", "https://cloud.tsinghua.edu.cn/f/0d41e7da4558408ea5aa/?dl=1"), 38 | ("Product", "Product.tgz", "https://cloud.tsinghua.edu.cn/f/76186deacd7c4fa0a679/?dl=1"), 39 | ("Real_World", "Real_World.tgz", "https://cloud.tsinghua.edu.cn/f/dee961894cc64b1da1d7/?dl=1") 40 | ] 41 | image_list = { 42 | "Ar": "image_list/Art.txt", 43 | "Cl": "image_list/Clipart.txt", 44 | "Pr": "image_list/Product.txt", 45 | "Rw": "image_list/Real_World.txt", 46 | } 47 | CLASSES = ['Drill', 'Exit_Sign', 'Bottle', 'Glasses', 'Computer', 'File_Cabinet', 'Shelf', 'Toys', 'Sink', 48 | 'Laptop', 'Kettle', 'Folder', 'Keyboard', 'Flipflops', 'Pencil', 'Bed', 'Hammer', 'ToothBrush', 'Couch', 49 | 'Bike', 'Postit_Notes', 'Mug', 'Webcam', 'Desk_Lamp', 'Telephone', 'Helmet', 'Mouse', 'Pen', 'Monitor', 50 | 'Mop', 'Sneakers', 'Notebook', 'Backpack', 'Alarm_Clock', 'Push_Pin', 'Paper_Clip', 'Batteries', 'Radio', 51 | 'Fan', 'Ruler', 'Pan', 'Screwdriver', 'Trash_Can', 'Printer', 'Speaker', 'Eraser', 'Bucket', 'Chair', 52 | 'Calendar', 'Calculator', 'Flowers', 'Lamp_Shade', 'Spoon', 'Candles', 'Clipboards', 'Scissors', 'TV', 53 | 'Curtains', 'Fork', 'Soda', 'Table', 'Knives', 'Oven', 'Refrigerator', 'Marker'] 54 | 55 | def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs): 56 | assert task in self.image_list 57 | data_list_file = os.path.join(root, self.image_list[task]) 58 | 59 | if download: 60 | list(map(lambda args: download_data(root, *args), self.download_list)) 61 | else: 62 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 63 | 64 | super(OfficeHome, self).__init__(root, OfficeHome.CLASSES, data_list_file=data_list_file, **kwargs) 65 | 66 | @classmethod 67 | def domains(cls): 68 | return list(cls.image_list.keys()) -------------------------------------------------------------------------------- /common/vision/datasets/oxfordpet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class OxfordIIITPet(ImageList): 8 | """`The Oxford-IIIT Pet `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``, or ``test``. 13 | sample_rate (int): The sampling rates to sample random ``training`` images for each category. 14 | Choices include 100, 50, 30, 15. Default: 100. 15 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 16 | in root directory. If dataset is already downloaded, it is not downloaded again. 17 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 18 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 19 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 20 | 21 | .. note:: In `root`, there will exist following files after downloading. 22 | :: 23 | train/ 24 | test/ 25 | image_list/ 26 | train_100.txt 27 | train_50.txt 28 | train_30.txt 29 | train_15.txt 30 | test.txt 31 | """ 32 | download_list = [ 33 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/738d75de56844bd0951b/?dl=1"), 34 | ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/f40cf9f8c9ac4b04ba9b/?dl=1"), 35 | ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/72b4bff8b5c84f4ba240/?dl=1"), 36 | ] 37 | image_list = { 38 | "train": "image_list/train_100.txt", 39 | "train100": "image_list/train_100.txt", 40 | "train50": "image_list/train_50.txt", 41 | "train30": "image_list/train_30.txt", 42 | "train15": "image_list/train_15.txt", 43 | "test": "image_list/test.txt", 44 | "test100": "image_list/test.txt", 45 | } 46 | CLASSES = ['Abyssinian', 'american_bulldog', 'american_pit_bull_terrier', 'basset_hound', 'beagle', 'Bengal', 47 | 'Birman', 'Bombay', 'boxer', 'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel', 48 | 'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese', 'japanese_chin', 'keeshond', 'leonberger', 49 | 'Maine_Coon', 'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug', 'Ragdoll', 50 | 'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier', 'shiba_inu', 'Siamese', 'Sphynx', 51 | 'staffordshire_bull_terrier', 'wheaten_terrier', 'yorkshire_terrier'] 52 | 53 | def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs): 54 | 55 | if split == 'train': 56 | list_name = 'train' + str(sample_rate) 57 | assert list_name in self.image_list 58 | data_list_file = os.path.join(root, self.image_list[list_name]) 59 | else: 60 | data_list_file = os.path.join(root, self.image_list['test']) 61 | 62 | if download: 63 | list(map(lambda args: download_data(root, *args), self.download_list)) 64 | else: 65 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 66 | 67 | super(OxfordIIITPet, self).__init__(root, OxfordIIITPet.CLASSES, data_list_file=data_list_file, **kwargs) 68 | -------------------------------------------------------------------------------- /common/modules/regressor.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, List, Dict 2 | import torch.nn as nn 3 | import torch 4 | 5 | __all__ = ['Regressor'] 6 | 7 | 8 | class Regressor(nn.Module): 9 | """A generic Regressor class for domain adaptation. 10 | 11 | Args: 12 | backbone (torch.nn.Module): Any backbone to extract 2-d features from data 13 | num_factors (int): Number of factors 14 | bottleneck (torch.nn.Module, optional): Any bottleneck layer. Use no bottleneck by default 15 | bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: -1 16 | head (torch.nn.Module, optional): Any classifier head. Use `nn.Linear` by default 17 | finetune (bool): Whether finetune the classifier or train from scratch. Default: True 18 | 19 | .. note:: 20 | The learning rate of this regressor is set 10 times to that of the feature extractor for better accuracy 21 | by default. If you have other optimization strategies, please over-ride :meth:`~Regressor.get_parameters`. 22 | 23 | Inputs: 24 | - x (tensor): input data fed to `backbone` 25 | 26 | Outputs: 27 | - predictions: regressor's predictions 28 | - features: features after `bottleneck` layer and before `head` layer 29 | 30 | Shape: 31 | - Inputs: (minibatch, *) where * means, any number of additional dimensions 32 | - predictions: (minibatch, `num_factors`) 33 | - features: (minibatch, `features_dim`) 34 | 35 | """ 36 | 37 | def __init__(self, backbone: nn.Module, num_factors: int, bottleneck: Optional[nn.Module] = None, 38 | bottleneck_dim=-1, head: Optional[nn.Module] = None, finetune=True): 39 | super(Regressor, self).__init__() 40 | self.backbone = backbone 41 | self.num_factors = num_factors 42 | if bottleneck is None: 43 | feature_dim = backbone.out_features 44 | self.bottleneck = nn.Sequential( 45 | nn.Conv2d(feature_dim, feature_dim, kernel_size=3, stride=1, padding=1), 46 | nn.BatchNorm2d(feature_dim, feature_dim), 47 | nn.ReLU(), 48 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 49 | nn.Flatten() 50 | ) 51 | self._features_dim = feature_dim 52 | else: 53 | self.bottleneck = bottleneck 54 | assert bottleneck_dim > 0 55 | self._features_dim = bottleneck_dim 56 | 57 | if head is None: 58 | self.head = nn.Sequential( 59 | nn.Linear(self._features_dim, num_factors), 60 | nn.Sigmoid() 61 | ) 62 | else: 63 | self.head = head 64 | self.finetune = finetune 65 | 66 | @property 67 | def features_dim(self) -> int: 68 | """The dimension of features before the final `head` layer""" 69 | return self._features_dim 70 | 71 | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 72 | """""" 73 | f = self.backbone(x) 74 | f = self.bottleneck(f) 75 | predictions = self.head(f) 76 | return predictions, f 77 | 78 | def get_parameters(self, base_lr=1.0) -> List[Dict]: 79 | """A parameter list which decides optimization hyper-parameters, 80 | such as the relative learning rate of each layer 81 | """ 82 | params = [ 83 | {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr}, 84 | {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr}, 85 | {"params": self.head.parameters(), "lr": 1.0 * base_lr}, 86 | ] 87 | 88 | return params 89 | 90 | 91 | -------------------------------------------------------------------------------- /common/utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | 5 | class TextLogger(object): 6 | """Writes stream output to external text file. 7 | 8 | Args: 9 | filename (str): the file to write stream output 10 | stream: the stream to read from. Default: sys.stdout 11 | """ 12 | def __init__(self, filename, stream=sys.stdout): 13 | self.terminal = stream 14 | self.log = open(filename, 'a') 15 | self.file_close = False 16 | 17 | def write(self, message): 18 | self.terminal.write(message) 19 | if not self.file_close: 20 | self.log.write(message) 21 | self.flush() 22 | 23 | def flush(self): 24 | self.terminal.flush() 25 | if not self.file_close: 26 | self.log.flush() 27 | 28 | def close(self): 29 | # self.terminal.close() 30 | self.file_close = True 31 | self.log.close() 32 | 33 | def close_terminal(self): 34 | self.terminal.close() 35 | 36 | 37 | class CompleteLogger: 38 | """ 39 | A useful logger that 40 | 41 | - writes outputs to files and displays them on the console at the same time. 42 | - manages the directory of checkpoints and debugging images. 43 | 44 | Args: 45 | root (str): the root directory of logger 46 | phase (str): the phase of training. 47 | 48 | """ 49 | 50 | def __init__(self, root, phase='train'): 51 | self.root = root 52 | self.phase = phase 53 | self.visualize_directory = os.path.join(self.root, "visualize") 54 | self.checkpoint_directory = os.path.join(self.root, "checkpoints") 55 | self.epoch = 0 56 | 57 | os.makedirs(self.root, exist_ok=True) 58 | os.makedirs(self.visualize_directory, exist_ok=True) 59 | os.makedirs(self.checkpoint_directory, exist_ok=True) 60 | 61 | # redirect std out 62 | now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time())) 63 | log_filename = os.path.join(self.root, "{}-{}.txt".format(phase, now)) 64 | if os.path.exists(log_filename): 65 | os.remove(log_filename) 66 | self.logger = TextLogger(log_filename) 67 | sys.stdout = self.logger 68 | sys.stderr = self.logger 69 | if phase != 'train': 70 | self.set_epoch(phase) 71 | 72 | def set_epoch(self, epoch): 73 | """Set the epoch number. Please use it during training.""" 74 | os.makedirs(os.path.join(self.visualize_directory, str(epoch)), exist_ok=True) 75 | self.epoch = epoch 76 | 77 | def _get_phase_or_epoch(self): 78 | if self.phase == 'train': 79 | return str(self.epoch) 80 | else: 81 | return self.phase 82 | 83 | def get_image_path(self, filename: str): 84 | """ 85 | Get the full image path for a specific filename 86 | """ 87 | return os.path.join(self.visualize_directory, self._get_phase_or_epoch(), filename) 88 | 89 | def get_checkpoint_path(self, name=None): 90 | """ 91 | Get the full checkpoint path. 92 | 93 | Args: 94 | name (optional): the filename (without file extension) to save checkpoint. 95 | If None, when the phase is ``train``, checkpoint will be saved to ``{epoch}.pth``. 96 | Otherwise, will be saved to ``{phase}.pth``. 97 | 98 | """ 99 | if name is None: 100 | name = self._get_phase_or_epoch() 101 | name = str(name) 102 | return os.path.join(self.checkpoint_directory, name + ".pth") 103 | 104 | def close(self): 105 | self.logger.close() 106 | -------------------------------------------------------------------------------- /common/vision/datasets/keypoint_detection/lsp.py: -------------------------------------------------------------------------------- 1 | import scipy.io as scio 2 | import os 3 | 4 | from PIL import ImageFile 5 | import torch 6 | from .keypoint_dataset import Body16KeypointDataset 7 | from ...transforms.keypoint_detection import * 8 | from .util import * 9 | from .._util import download as download_data, check_exits 10 | 11 | 12 | ImageFile.LOAD_TRUNCATED_IMAGES = True 13 | 14 | 15 | class LSP(Body16KeypointDataset): 16 | """`Leeds Sports Pose Dataset `_ 17 | 18 | Args: 19 | root (str): Root directory of dataset 20 | split (str, optional): PlaceHolder. 21 | task (str, optional): Placeholder. 22 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 23 | in root directory. If dataset is already downloaded, it is not downloaded again. 24 | transforms (callable, optional): PlaceHolder. 25 | heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64) 26 | sigma (int): sigma parameter when generate the heatmap. Default: 2 27 | 28 | .. note:: In `root`, there will exist following files after downloading. 29 | :: 30 | lsp/ 31 | images/ 32 | joints.mat 33 | 34 | .. note:: 35 | LSP is only used for target domain. Due to the small dataset size, the whole dataset is used 36 | no matter what ``split`` is. Also, the transform is fixed. 37 | """ 38 | def __init__(self, root, split='train', task='all', download=True, image_size=(256, 256), transforms=None, **kwargs): 39 | if download: 40 | download_data(root, "images", "lsp_dataset.zip", 41 | "https://cloud.tsinghua.edu.cn/f/46ea73c89abc46bfb125/?dl=1") 42 | else: 43 | check_exits(root, "lsp") 44 | 45 | assert split in ['train', 'test', 'all'] 46 | self.split = split 47 | 48 | samples = [] 49 | annotations = scio.loadmat(os.path.join(root, "joints.mat"))['joints'].transpose((2, 1, 0)) 50 | for i in range(0, 2000): 51 | image = "im{0:04d}.jpg".format(i+1) 52 | annotation = annotations[i] 53 | samples.append((image, annotation)) 54 | 55 | self.joints_index = (0, 1, 2, 3, 4, 5, 13, 13, 12, 13, 6, 7, 8, 9, 10, 11) 56 | self.visible = np.array([1.] * 6 + [0, 0] + [1.] * 8, dtype=np.float32) 57 | normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 58 | transforms = Compose([ 59 | ResizePad(image_size[0]), 60 | ToTensor(), 61 | normalize 62 | ]) 63 | super(LSP, self).__init__(root, samples, transforms=transforms, image_size=image_size, **kwargs) 64 | 65 | def __getitem__(self, index): 66 | sample = self.samples[index] 67 | image_name = sample[0] 68 | image = Image.open(os.path.join(self.root, "images", image_name)) 69 | keypoint2d = sample[1][self.joints_index, :2] 70 | image, data = self.transforms(image, keypoint2d=keypoint2d) 71 | keypoint2d = data['keypoint2d'] 72 | visible = self.visible * (1-sample[1][self.joints_index, 2]) 73 | visible = visible[:, np.newaxis] 74 | 75 | # 2D heatmap 76 | target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size) 77 | target = torch.from_numpy(target) 78 | target_weight = torch.from_numpy(target_weight) 79 | 80 | meta = { 81 | 'image': image_name, 82 | 'keypoint2d': keypoint2d, # (NUM_KEYPOINTS x 2) 83 | 'keypoint3d': np.zeros((self.num_keypoints, 3)).astype(keypoint2d.dtype), # (NUM_KEYPOINTS x 3) 84 | } 85 | return image, target, target_weight, meta 86 | -------------------------------------------------------------------------------- /common/modules/classifier.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, List, Dict 2 | import torch.nn as nn 3 | import torch 4 | 5 | __all__ = ['Classifier'] 6 | 7 | 8 | class Classifier(nn.Module): 9 | """A generic Classifier class for domain adaptation. 10 | 11 | Args: 12 | backbone (torch.nn.Module): Any backbone to extract 2-d features from data 13 | num_classes (int): Number of classes 14 | bottleneck (torch.nn.Module, optional): Any bottleneck layer. Use no bottleneck by default 15 | bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: -1 16 | head (torch.nn.Module, optional): Any classifier head. Use :class:`torch.nn.Linear` by default 17 | finetune (bool): Whether finetune the classifier or train from scratch. Default: True 18 | 19 | .. note:: 20 | Different classifiers are used in different domain adaptation algorithms to achieve better accuracy 21 | respectively, and we provide a suggested `Classifier` for different algorithms. 22 | Remember they are not the core of algorithms. You can implement your own `Classifier` and combine it with 23 | the domain adaptation algorithm in this algorithm library. 24 | 25 | .. note:: 26 | The learning rate of this classifier is set 10 times to that of the feature extractor for better accuracy 27 | by default. If you have other optimization strategies, please over-ride :meth:`~Classifier.get_parameters`. 28 | 29 | Inputs: 30 | - x (tensor): input data fed to `backbone` 31 | 32 | Outputs: 33 | - predictions: classifier's predictions 34 | - features: features after `bottleneck` layer and before `head` layer 35 | 36 | Shape: 37 | - Inputs: (minibatch, *) where * means, any number of additional dimensions 38 | - predictions: (minibatch, `num_classes`) 39 | - features: (minibatch, `features_dim`) 40 | 41 | """ 42 | 43 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck: Optional[nn.Module] = None, 44 | bottleneck_dim: Optional[int] = -1, head: Optional[nn.Module] = None, finetune=True): 45 | super(Classifier, self).__init__() 46 | self.backbone = backbone 47 | self.num_classes = num_classes 48 | if bottleneck is None: 49 | self.bottleneck = nn.Sequential( 50 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 51 | nn.Flatten() 52 | ) 53 | self._features_dim = backbone.out_features 54 | else: 55 | self.bottleneck = bottleneck 56 | assert bottleneck_dim > 0 57 | self._features_dim = bottleneck_dim 58 | 59 | if head is None: 60 | self.head = nn.Linear(self._features_dim, num_classes) 61 | else: 62 | self.head = head 63 | self.finetune = finetune 64 | 65 | @property 66 | def features_dim(self) -> int: 67 | """The dimension of features before the final `head` layer""" 68 | return self._features_dim 69 | 70 | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 71 | """""" 72 | f = self.backbone(x) 73 | f = self.bottleneck(f) 74 | predictions = self.head(f) 75 | return predictions, f 76 | 77 | def get_parameters(self, base_lr=1.0) -> List[Dict]: 78 | """A parameter list which decides optimization hyper-parameters, 79 | such as the relative learning rate of each layer 80 | """ 81 | params = [ 82 | {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr}, 83 | {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr}, 84 | {"params": self.head.parameters(), "lr": 1.0 * base_lr}, 85 | ] 86 | 87 | return params 88 | -------------------------------------------------------------------------------- /dalib/adaptation/mcc.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from common.modules.classifier import Classifier as ClassifierBase 7 | from ..modules.entropy import entropy 8 | 9 | 10 | __all__ = ['MinimumClassConfusionLoss', 'ImageClassifier'] 11 | 12 | 13 | class MinimumClassConfusionLoss(nn.Module): 14 | r""" 15 | Minimum Class Confusion loss minimizes the class confusion in the target predictions. 16 | 17 | You can see more details in `Minimum Class Confusion for Versatile Domain Adaptation (ECCV 2020) `_ 18 | 19 | Args: 20 | temperature (float) : The temperature for rescaling, the prediction will shrink to vanilla softmax if 21 | temperature is 1.0. 22 | 23 | .. note:: 24 | Make sure that temperature is larger than 0. 25 | 26 | Inputs: g_t 27 | - g_t (tensor): unnormalized classifier predictions on target domain, :math:`g^t` 28 | 29 | Shape: 30 | - g_t: :math:`(minibatch, C)` where C means the number of classes. 31 | - Output: scalar. 32 | 33 | Examples:: 34 | >>> temperature = 2.0 35 | >>> loss = MinimumClassConfusionLoss(temperature) 36 | >>> # logits output from target domain 37 | >>> g_t = torch.randn(batch_size, num_classes) 38 | >>> output = loss(g_t) 39 | 40 | MCC can also serve as a regularizer for existing methods. 41 | Examples:: 42 | >>> from dalib.modules.domain_discriminator import DomainDiscriminator 43 | >>> num_classes = 2 44 | >>> feature_dim = 1024 45 | >>> batch_size = 10 46 | >>> temperature = 2.0 47 | >>> discriminator = DomainDiscriminator(in_feature=feature_dim, hidden_size=1024) 48 | >>> cdan_loss = ConditionalDomainAdversarialLoss(discriminator, reduction='mean') 49 | >>> mcc_loss = MinimumClassConfusionLoss(temperature) 50 | >>> # features from source domain and target domain 51 | >>> f_s, f_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim) 52 | >>> # logits output from source domain adn target domain 53 | >>> g_s, g_t = torch.randn(batch_size, num_classes), torch.randn(batch_size, num_classes) 54 | >>> total_loss = cdan_loss(g_s, f_s, g_t, f_t) + mcc_loss(g_t) 55 | """ 56 | 57 | def __init__(self, temperature: float): 58 | super(MinimumClassConfusionLoss, self).__init__() 59 | self.temperature = temperature 60 | 61 | def forward(self, logits: torch.Tensor) -> torch.Tensor: 62 | batch_size, num_classes = logits.shape 63 | predictions = F.softmax(logits / self.temperature, dim=1) # batch_size x num_classes 64 | entropy_weight = entropy(predictions).detach() 65 | entropy_weight = 1 + torch.exp(-entropy_weight) 66 | entropy_weight = (batch_size * entropy_weight / torch.sum(entropy_weight)).unsqueeze(dim=1) # batch_size x 1 67 | class_confusion_matrix = torch.mm((predictions * entropy_weight).transpose(1, 0), predictions) # num_classes x num_classes 68 | class_confusion_matrix = class_confusion_matrix / torch.sum(class_confusion_matrix, dim=1) 69 | mcc_loss = (torch.sum(class_confusion_matrix) - torch.trace(class_confusion_matrix)) / num_classes 70 | return mcc_loss 71 | 72 | 73 | class ImageClassifier(ClassifierBase): 74 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs): 75 | bottleneck = nn.Sequential( 76 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 77 | nn.Flatten(), 78 | nn.Linear(backbone.out_features, bottleneck_dim), 79 | nn.BatchNorm1d(bottleneck_dim), 80 | nn.ReLU() 81 | ) 82 | super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs) 83 | -------------------------------------------------------------------------------- /common/vision/datasets/imagelist.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Callable, Tuple, Any, List 3 | import torchvision.datasets as datasets 4 | from torchvision.datasets.folder import default_loader 5 | 6 | 7 | class ImageList(datasets.VisionDataset): 8 | """A generic Dataset class for image classification 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | classes (list[str]): The names of all the classes 13 | data_list_file (str): File to read the image list from. 14 | transform (callable, optional): A function/transform that takes in an PIL image \ 15 | and returns a transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 16 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 17 | 18 | .. note:: In `data_list_file`, each line has 2 values in the following format. 19 | :: 20 | source_dir/dog_xxx.png 0 21 | source_dir/cat_123.png 1 22 | target_dir/dog_xxy.png 0 23 | target_dir/cat_nsdf3.png 1 24 | 25 | The first value is the relative path of an image, and the second value is the label of the corresponding image. 26 | If your data_list_file has different formats, please over-ride :meth:`~ImageList.parse_data_file`. 27 | """ 28 | 29 | def __init__(self, root: str, classes: List[str], data_list_file: str, 30 | transform: Optional[Callable] = None, target_transform: Optional[Callable] = None): 31 | super().__init__(root, transform=transform, target_transform=target_transform) 32 | self.samples = self.parse_data_file(data_list_file) 33 | self.classes = classes 34 | self.class_to_idx = {cls: idx 35 | for idx, cls in enumerate(self.classes)} 36 | self.loader = default_loader 37 | self.data_list_file = data_list_file 38 | self.metadata = [{"index": i} for i in range(len(self.samples))] 39 | 40 | def __getitem__(self, index: int) -> Tuple[Any, int]: 41 | """ 42 | Args: 43 | index (int): Index 44 | return (tuple): (image, target) where target is index of the target class. 45 | """ 46 | path, target = self.samples[index] 47 | img = self.loader(path) 48 | if self.transform is not None: 49 | img = self.transform(img) 50 | if self.target_transform is not None and target is not None: 51 | target = self.target_transform(target) 52 | metadata = self.metadata[index] 53 | return img, target, metadata 54 | 55 | def __len__(self) -> int: 56 | return len(self.samples) 57 | 58 | def set_metadata(self, metadata, name): 59 | # assert isinstance(metadata, list) 60 | assert len(metadata) == len(self.samples) 61 | for i in range(len(metadata)): 62 | self.metadata[i][name] = metadata[i] 63 | 64 | def parse_data_file(self, file_name: str) -> List[Tuple[str, int]]: 65 | """Parse file to data list 66 | 67 | Args: 68 | file_name (str): The path of data file 69 | return (list): List of (image path, class_index) tuples 70 | """ 71 | with open(file_name, "r") as f: 72 | data_list = [] 73 | for line in f.readlines(): 74 | split_line = line.split() 75 | target = split_line[-1] 76 | path = ' '.join(split_line[:-1]) 77 | if not os.path.isabs(path): 78 | path = os.path.join(self.root, path) 79 | target = int(target) 80 | data_list.append((path, target)) 81 | return data_list 82 | 83 | @property 84 | def num_classes(self) -> int: 85 | """Number of classes""" 86 | return len(self.classes) 87 | 88 | @classmethod 89 | def domains(cls): 90 | """All possible domain in this dataset""" 91 | raise NotImplemented -------------------------------------------------------------------------------- /common/vision/datasets/coco70.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class COCO70(ImageList): 8 | """COCO-70 dataset is a large-scale classification dataset (1000 images per class) created from 9 | `COCO `_ Dataset. 10 | It is used to explore the effect of fine-tuning with a large amount of data. 11 | 12 | Args: 13 | root (str): Root directory of dataset 14 | split (str, optional): The dataset split, supports ``train``, or ``test``. 15 | sample_rate (int): The sampling rates to sample random ``training`` images for each category. 16 | Choices include 100, 50, 30, 15. Default: 100. 17 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 18 | in root directory. If dataset is already downloaded, it is not downloaded again. 19 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 20 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 21 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 22 | 23 | .. note:: In `root`, there will exist following files after downloading. 24 | :: 25 | train/ 26 | test/ 27 | image_list/ 28 | train_100.txt 29 | train_50.txt 30 | train_30.txt 31 | train_15.txt 32 | test.txt 33 | """ 34 | download_list = [ 35 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d2ffb62fe3d140f1a73c/?dl=1"), 36 | ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/e0dc4368342948c5bb2a/?dl=1"), 37 | ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/59393a55c818429fb8d1/?dl=1"), 38 | ] 39 | image_list = { 40 | "train": "image_list/train_100.txt", 41 | "train100": "image_list/train_100.txt", 42 | "train50": "image_list/train_50.txt", 43 | "train30": "image_list/train_30.txt", 44 | "train15": "image_list/train_15.txt", 45 | "test": "image_list/test.txt", 46 | "test100": "image_list/test.txt", 47 | } 48 | CLASSES =['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 49 | 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'bench', 'bird', 'cat', 'dog', 50 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 51 | 'handbag', 'tie', 'suitcase', 'skis', 'kite', 'baseball_bat', 'skateboard', 'surfboard', 52 | 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 53 | 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 54 | 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 55 | 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'teddy_bear'] 56 | 57 | def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs): 58 | 59 | if split == 'train': 60 | list_name = 'train' + str(sample_rate) 61 | assert list_name in self.image_list 62 | data_list_file = os.path.join(root, self.image_list[list_name]) 63 | else: 64 | data_list_file = os.path.join(root, self.image_list['test']) 65 | 66 | if download: 67 | list(map(lambda args: download_data(root, *args), self.download_list)) 68 | else: 69 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 70 | 71 | super(COCO70, self).__init__(root, COCO70.CLASSES, data_list_file=data_list_file, **kwargs) 72 | -------------------------------------------------------------------------------- /dalib/translation/cyclegan/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Modified from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix 3 | # ------------------------------------------------------------------------------ 4 | import torch.nn as nn 5 | import torch 6 | 7 | 8 | class LeastSquaresGenerativeAdversarialLoss(nn.Module): 9 | """ 10 | Loss for `Least Squares Generative Adversarial Network (LSGAN) `_ 11 | 12 | Args: 13 | reduction (str, optional): Specifies the reduction to apply to the output: 14 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 15 | ``'mean'``: the sum of the output will be divided by the number of 16 | elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'`` 17 | 18 | Inputs: 19 | - prediction (tensor): unnormalized discriminator predictions 20 | - real (bool): if the ground truth label is for real images or fake images. Default: true 21 | 22 | .. warning:: 23 | Do not use sigmoid as the last layer of Discriminator. 24 | 25 | """ 26 | def __init__(self, reduction='mean'): 27 | super(LeastSquaresGenerativeAdversarialLoss, self).__init__() 28 | self.mse_loss = nn.MSELoss(reduction=reduction) 29 | 30 | def forward(self, prediction, real=True): 31 | if real: 32 | label = torch.ones_like(prediction) 33 | else: 34 | label = torch.zeros_like(prediction) 35 | return self.mse_loss(prediction, label) 36 | 37 | 38 | class VanillaGenerativeAdversarialLoss(nn.Module): 39 | """ 40 | Loss for `Vanilla Generative Adversarial Network `_ 41 | 42 | Args: 43 | reduction (str, optional): Specifies the reduction to apply to the output: 44 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 45 | ``'mean'``: the sum of the output will be divided by the number of 46 | elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'`` 47 | 48 | Inputs: 49 | - prediction (tensor): unnormalized discriminator predictions 50 | - real (bool): if the ground truth label is for real images or fake images. Default: true 51 | 52 | .. warning:: 53 | Do not use sigmoid as the last layer of Discriminator. 54 | 55 | """ 56 | def __init__(self, reduction='mean'): 57 | super(VanillaGenerativeAdversarialLoss, self).__init__() 58 | self.bce_loss = nn.BCEWithLogitsLoss(reduction=reduction) 59 | 60 | def forward(self, prediction, real=True): 61 | if real: 62 | label = torch.ones_like(prediction) 63 | else: 64 | label = torch.zeros_like(prediction) 65 | return self.bce_loss(prediction, label) 66 | 67 | 68 | class WassersteinGenerativeAdversarialLoss(nn.Module): 69 | """ 70 | Loss for `Wasserstein Generative Adversarial Network `_ 71 | 72 | Args: 73 | reduction (str, optional): Specifies the reduction to apply to the output: 74 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 75 | ``'mean'``: the sum of the output will be divided by the number of 76 | elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'`` 77 | 78 | Inputs: 79 | - prediction (tensor): unnormalized discriminator predictions 80 | - real (bool): if the ground truth label is for real images or fake images. Default: true 81 | 82 | .. warning:: 83 | Do not use sigmoid as the last layer of Discriminator. 84 | 85 | """ 86 | def __init__(self, reduction='mean'): 87 | super(WassersteinGenerativeAdversarialLoss, self).__init__() 88 | self.mse_loss = nn.MSELoss(reduction=reduction) 89 | 90 | def forward(self, prediction, real=True): 91 | if real: 92 | return -prediction.mean() 93 | else: 94 | return prediction.mean() -------------------------------------------------------------------------------- /common/vision/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import torch 4 | from torchvision.transforms import Normalize 5 | 6 | 7 | class ResizeImage(object): 8 | """Resize the input PIL Image to the given size. 9 | 10 | Args: 11 | size (sequence or int): Desired output size. If size is a sequence like 12 | (h, w), output size will be matched to this. If size is an int, 13 | output size will be (size, size) 14 | """ 15 | def __init__(self, size): 16 | if isinstance(size, int): 17 | self.size = (int(size), int(size)) 18 | else: 19 | self.size = size 20 | 21 | def __call__(self, img): 22 | th, tw = self.size 23 | return img.resize((th, tw)) 24 | 25 | 26 | class MultipleApply: 27 | """Apply a list of transformations to an image and get multiple transformed images. 28 | 29 | Args: 30 | transforms (list or tuple): list of transformations 31 | 32 | Example: 33 | 34 | >>> transform1 = T.Compose([ 35 | ... ResizeImage(256), 36 | ... T.RandomCrop(224) 37 | ... ]) 38 | >>> transform2 = T.Compose([ 39 | ... ResizeImage(256), 40 | ... T.RandomCrop(224), 41 | ... ]) 42 | >>> multiply_transform = MultipleApply([transform1, transform2]) 43 | """ 44 | def __init__(self, transforms): 45 | self.transforms = transforms 46 | 47 | def __call__(self, image): 48 | return [t(image) for t in self.transforms] 49 | 50 | 51 | class Denormalize(Normalize): 52 | """DeNormalize a tensor image with mean and standard deviation. 53 | Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` 54 | channels, this transform will denormalize each channel of the input 55 | ``torch.*Tensor`` i.e., 56 | ``output[channel] = input[channel] * std[channel] + mean[channel]`` 57 | 58 | .. note:: 59 | This transform acts out of place, i.e., it does not mutate the input tensor. 60 | 61 | Args: 62 | mean (sequence): Sequence of means for each channel. 63 | std (sequence): Sequence of standard deviations for each channel. 64 | 65 | """ 66 | def __init__(self, mean, std): 67 | mean = np.array(mean) 68 | std = np.array(std) 69 | super().__init__((-mean / std).tolist(), (1 / std).tolist()) 70 | 71 | 72 | class NormalizeAndTranspose: 73 | """ 74 | First, normalize a tensor image with mean and standard deviation. 75 | Then, convert the shape (H x W x C) to shape (C x H x W). 76 | """ 77 | def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)): 78 | self.mean = np.array(mean, dtype=np.float32) 79 | 80 | def __call__(self, image): 81 | if isinstance(image, Image.Image): 82 | image = np.asarray(image, np.float32) 83 | # change to BGR 84 | image = image[:, :, ::-1] 85 | # normalize 86 | image -= self.mean 87 | image = image.transpose((2, 0, 1)).copy() 88 | elif isinstance(image, torch.Tensor): 89 | # change to BGR 90 | image = image[:, :, [2, 1, 0]] 91 | # normalize 92 | image -= torch.from_numpy(self.mean).to(image.device) 93 | image = image.permute((2, 0, 1)) 94 | else: 95 | raise NotImplementedError(type(image)) 96 | return image 97 | 98 | 99 | class DeNormalizeAndTranspose: 100 | """ 101 | First, convert a tensor image from the shape (C x H x W ) to shape (H x W x C). 102 | Then, denormalize it with mean and standard deviation. 103 | """ 104 | def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)): 105 | self.mean = np.array(mean, dtype=np.float32) 106 | 107 | def __call__(self, image): 108 | image = image.transpose((1, 2, 0)) 109 | # denormalize 110 | image += self.mean 111 | # change to RGB 112 | image = image[:, :, ::-1] 113 | return image 114 | 115 | -------------------------------------------------------------------------------- /dalib/adaptation/iwan.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Dict 2 | import torch 3 | import torch.nn as nn 4 | 5 | from common.modules.classifier import Classifier as ClassifierBase 6 | 7 | 8 | class ImportanceWeightModule(object): 9 | r""" 10 | Calculating class weight based on the output of discriminator. 11 | Introduced by `Importance Weighted Adversarial Nets for Partial Domain Adaptation (CVPR 2018) `_ 12 | 13 | Args: 14 | discriminator (torch.nn.Module): A domain discriminator object, which predicts the domains of features. 15 | Its input shape is :math:`(N, F)` and output shape is :math:`(N, 1)` 16 | partial_classes_index (list[int], optional): The index of partial classes. Note that this parameter is \ 17 | just for debugging, since in real-world dataset, we have no access to the index of partial classes. \ 18 | Default: None. 19 | 20 | Examples:: 21 | 22 | >>> domain_discriminator = DomainDiscriminator(1024, 1024) 23 | >>> importance_weight_module = ImportanceWeightModule(domain_discriminator) 24 | >>> num_iterations = 10000 25 | >>> for _ in range(num_iterations): 26 | >>> # feature from source domain 27 | >>> f_s = torch.randn(32, 1024) 28 | >>> # importance weights for source instance 29 | >>> w_s = importance_weight_module.get_importance_weight(f_s) 30 | """ 31 | 32 | def __init__(self, discriminator: nn.Module, partial_classes_index: Optional[List[int]] = None): 33 | self.discriminator = discriminator 34 | self.partial_classes_index = partial_classes_index 35 | 36 | def get_importance_weight(self, feature): 37 | """ 38 | Get importance weights for each instance. 39 | 40 | Args: 41 | feature (tensor): feature from source domain, in shape :math:`(N, F)` 42 | 43 | Returns: 44 | instance weight in shape :math:`(N, 1)` 45 | """ 46 | weight = 1. - self.discriminator(feature) 47 | weight = weight / weight.mean() 48 | weight = weight.detach() 49 | return weight 50 | 51 | def get_partial_classes_weight(self, weights: torch.Tensor, labels: torch.Tensor): 52 | """ 53 | Get class weight averaged on the partial classes and non-partial classes respectively. 54 | 55 | Args: 56 | weights (tensor): instance weight in shape :math:`(N, 1)` 57 | labels (tensor): ground truth labels in shape :math:`(N, 1)` 58 | 59 | .. warning:: 60 | This function is just for debugging, since in real-world dataset, we have no access to the index of \ 61 | partial classes and this function will throw an error when `partial_classes_index` is None. 62 | """ 63 | assert self.partial_classes_index is not None 64 | 65 | weights = weights.squeeze() 66 | is_partial = torch.Tensor([label in self.partial_classes_index for label in labels]).to(weights.device) 67 | if is_partial.sum() > 0: 68 | partial_classes_weight = (weights * is_partial).sum() / is_partial.sum() 69 | else: 70 | partial_classes_weight = 0 71 | 72 | not_partial = 1. - is_partial 73 | if not_partial.sum() > 0: 74 | not_partial_classes_weight = (weights * not_partial).sum() / not_partial.sum() 75 | else: 76 | not_partial_classes_weight = 0 77 | return partial_classes_weight, not_partial_classes_weight 78 | 79 | 80 | class ImageClassifier(ClassifierBase): 81 | r"""The Image Classifier for `Importance Weighted Adversarial Nets for Partial Domain Adaptation `_ 82 | """ 83 | 84 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs): 85 | bottleneck = nn.Sequential( 86 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 87 | nn.Flatten(), 88 | nn.Linear(backbone.out_features, bottleneck_dim), 89 | nn.BatchNorm1d(bottleneck_dim), 90 | nn.ReLU() 91 | ) 92 | super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs) 93 | -------------------------------------------------------------------------------- /icon/icon_utils.py: -------------------------------------------------------------------------------- 1 | from tensorboardX import SummaryWriter 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | import torchvision.transforms as T 6 | from common.vision.transforms import ResizeImage 7 | import torch.nn.functional as F 8 | from icon.cluster import PairEnum 9 | from icon.randaugment import rand_augment_transform 10 | 11 | rgb_mean = (0.485, 0.456, 0.406) 12 | ra_params = dict(translate_const=int(224 * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in rgb_mean]),) 13 | 14 | 15 | class Visualizer(): 16 | def __init__(self, root_dir, exp_name): 17 | if not os.path.exists(root_dir): 18 | os.makedirs(root_dir) 19 | log_dir = os.path.join(root_dir, exp_name) 20 | self.writer = SummaryWriter(log_dir) 21 | self.step = 0 22 | 23 | def plot_items(self, items): 24 | for name, value in items.items(): 25 | self.writer.add_scalar(name, value, self.step) 26 | 27 | def tick(self): 28 | self.step += 1 29 | 30 | 31 | class TwoViewsTrainTransform(object): 32 | def __init__(self, center_crop): 33 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 34 | crop = T.CenterCrop(224) if center_crop else T.RandomResizedCrop(224) 35 | self.weak = T.Compose([ 36 | ResizeImage(256), 37 | crop, 38 | T.RandomHorizontalFlip(), 39 | T.ToTensor(), 40 | normalize 41 | ]) 42 | self.strong = T.Compose([ 43 | ResizeImage(256), 44 | crop, 45 | T.RandomHorizontalFlip(), 46 | T.RandomApply([ 47 | T.ColorJitter(0.4, 0.4, 0.4, 0.0) 48 | ], p=1.0), 49 | rand_augment_transform('rand-n{}-m{}-mstd0.5'.format(2, 10), ra_params), 50 | T.ToTensor(), 51 | normalize, 52 | ]) 53 | 54 | def __call__(self, x): 55 | weak = self.weak(x) 56 | strong = self.strong(x) 57 | return weak, strong 58 | 59 | 60 | sim_list = [] 61 | def get_ulb_sim_matrix(mode, sim_matrix_ulb, cluster_preds_t, update_list=True): 62 | if mode == 'stats': 63 | return sim_matrix_ulb, 0, 0 64 | elif mode == 'argmax': 65 | y_c_t = cluster_preds_t.argmax(dim=1).contiguous().view(-1, 1) 66 | sim_matrix_ulb_full = torch.eq(y_c_t, y_c_t.T).float().to(cluster_preds_t.device) 67 | sim_matrix_ulb_full = (sim_matrix_ulb_full - 0.5) * 2 68 | sim_matrix_ulb_full = sim_matrix_ulb_full.flatten() 69 | return sim_matrix_ulb_full 70 | else: 71 | if mode == 'sim': 72 | feat_row, feat_col = PairEnum(F.normalize(cluster_preds_t, dim=1)) 73 | elif mode == 'prob': 74 | feat_row, feat_col = PairEnum(F.softmax(cluster_preds_t, dim=1)) 75 | tmp_distance_ori = torch.bmm( 76 | feat_row.view(feat_row.size(0), 1, -1), 77 | feat_col.view(feat_row.size(0), -1, 1) 78 | ) 79 | sim_threshold = 0.92 80 | sim_ratio = 0.5 / 12 81 | diff_ratio = 5.5 / 12 82 | similarity = tmp_distance_ori.squeeze() 83 | if update_list: 84 | global sim_list 85 | sim_list.append(similarity) 86 | if len(sim_list) > 30: 87 | sim_list = sim_list[1:] 88 | sim_all = torch.cat(sim_list, dim=0) 89 | sim_all_sorted, _ = torch.sort(sim_all) 90 | 91 | n_diff = min(len(sim_all) * diff_ratio, len(sim_all)-1) 92 | n_sim = min(len(sim_all) * sim_ratio, len(sim_all)) 93 | 94 | low_threshold = sim_all_sorted[int(n_diff)] 95 | high_threshold = max(sim_threshold, sim_all_sorted[-int(n_sim)]) 96 | 97 | sim_matrix_ulb = torch.zeros_like(similarity).float() 98 | 99 | if high_threshold != low_threshold: 100 | sim_matrix_ulb[similarity >= high_threshold] = 1.0 101 | sim_matrix_ulb[similarity <= low_threshold] = -1.0 102 | else: 103 | sim_matrix_ulb[similarity > high_threshold] = 1.0 104 | sim_matrix_ulb[similarity < low_threshold] = -1.0 105 | return sim_matrix_ulb, low_threshold, high_threshold -------------------------------------------------------------------------------- /common/vision/datasets/aircrafts.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class Aircraft(ImageList): 8 | """`FVGC-Aircraft `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``, or ``test``. 13 | sample_rate (int): The sampling rates to sample random ``training`` images for each category. 14 | Choices include 100, 50, 30, 15. Default: 100. 15 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 16 | in root directory. If dataset is already downloaded, it is not downloaded again. 17 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 18 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 19 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 20 | 21 | .. note:: In `root`, there will exist following files after downloading. 22 | :: 23 | train/ 24 | test/ 25 | image_list/ 26 | train_100.txt 27 | train_50.txt 28 | train_30.txt 29 | train_15.txt 30 | test.txt 31 | """ 32 | download_list = [ 33 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/04356d49d0054092b07e/?dl=1"), 34 | ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/9fed22eba03046d69012/?dl=1"), 35 | ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/8d5e4c1b031a4a608c68/?dl=1"), 36 | ] 37 | image_list = { 38 | "train": "image_list/train_100.txt", 39 | "train100": "image_list/train_100.txt", 40 | "train50": "image_list/train_50.txt", 41 | "train30": "image_list/train_30.txt", 42 | "train15": "image_list/train_15.txt", 43 | "test": "image_list/test.txt", 44 | "test100": "image_list/test.txt", 45 | } 46 | CLASSES = ['707-320', '727-200', '737-200', '737-300', '737-400', '737-500', '737-600', '737-700', '737-800', '737-900', '747-100', 47 | '747-200', '747-300', '747-400', '757-200', '757-300', '767-200', '767-300', '767-400', '777-200', '777-300', 'A300B4', 48 | 'A310', 'A318', 'A319', 'A320', 'A321', 'A330-200', 'A330-300', 'A340-200', 'A340-300', 'A340-500', 'A340-600', 'A380', 49 | 'ATR-42', 'ATR-72', 'An-12', 'BAE 146-200', 'BAE 146-300', 'BAE-125', 'Beechcraft 1900', 'Boeing 717', 'C-130', 'C-47', 50 | 'CRJ-200', 'CRJ-700', 'CRJ-900', 'Cessna 172', 'Cessna 208', 'Cessna 525', 'Cessna 560', 'Challenger 600', 'DC-10', 51 | 'DC-3', 'DC-6', 'DC-8', 'DC-9-30', 'DH-82', 'DHC-1', 'DHC-6', 'DHC-8-100', 'DHC-8-300', 'DR-400', 'Dornier 328', 'E-170', 52 | 'E-190', 'E-195', 'EMB-120', 'ERJ 135', 'ERJ 145', 'Embraer Legacy 600', 'Eurofighter Typhoon', 'F-16A-B', 'F-A-18', 53 | 'Falcon 2000', 'Falcon 900', 'Fokker 100', 'Fokker 50', 'Fokker 70', 'Global Express', 'Gulfstream IV', 'Gulfstream V', 54 | 'Hawk T1', 'Il-76', 'L-1011', 'MD-11', 'MD-80', 'MD-87', 'MD-90', 'Metroliner', 'Model B200', 'PA-28', 'SR-20', 55 | 'Saab 2000', 'Saab 340', 'Spitfire', 'Tornado', 'Tu-134', 'Tu-154', 'Yak-42'] 56 | 57 | def __init__(self, root: str, split: str, sample_rate: Optional[int]=100, download: Optional[bool] = False, **kwargs): 58 | 59 | if split == 'train': 60 | list_name = 'train' + str(sample_rate) 61 | assert list_name in self.image_list 62 | data_list_file = os.path.join(root, self.image_list[list_name]) 63 | else: 64 | data_list_file = os.path.join(root, self.image_list['test']) 65 | 66 | if download: 67 | list(map(lambda args: download_data(root, *args), self.download_list)) 68 | else: 69 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 70 | 71 | super(Aircraft, self).__init__(root, Aircraft.CLASSES, data_list_file=data_list_file, **kwargs) -------------------------------------------------------------------------------- /common/vision/models/keypoint_detection/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Modified from https://github.com/microsoft/human-pose-estimation.pytorch 3 | # ------------------------------------------------------------------------------ 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class JointsMSELoss(nn.Module): 9 | """ 10 | Typical MSE loss for keypoint detection. 11 | 12 | Args: 13 | reduction (str, optional): Specifies the reduction to apply to the output: 14 | ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied, 15 | ``'mean'``: the sum of the output will be divided by the number of 16 | elements in the output. Default: ``'mean'`` 17 | 18 | Inputs: 19 | - output (tensor): heatmap predictions 20 | - target (tensor): heatmap labels 21 | - target_weight (tensor): whether the keypoint is visible. All keypoint is visible if None. Default: None. 22 | 23 | Shape: 24 | - output: :math:`(minibatch, K, H, W)` where K means the number of keypoints, 25 | H and W is the height and width of the heatmap respectively. 26 | - target: :math:`(minibatch, K, H, W)`. 27 | - target_weight: :math:`(minibatch, K)`. 28 | - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(minibatch, K)`. 29 | 30 | """ 31 | def __init__(self, reduction='mean'): 32 | super(JointsMSELoss, self).__init__() 33 | self.criterion = nn.MSELoss(reduction='none') 34 | self.reduction = reduction 35 | 36 | def forward(self, output, target, target_weight=None): 37 | B, K, _, _ = output.shape 38 | heatmaps_pred = output.reshape((B, K, -1)) 39 | heatmaps_gt = target.reshape((B, K, -1)) 40 | loss = self.criterion(heatmaps_pred, heatmaps_gt) * 0.5 41 | if target_weight is not None: 42 | loss = loss * target_weight.view((B, K, 1)) 43 | if self.reduction == 'mean': 44 | return loss.mean() 45 | elif self.reduction == 'none': 46 | return loss.mean(dim=-1) 47 | 48 | 49 | class JointsKLLoss(nn.Module): 50 | """ 51 | KL Divergence for keypoint detection proposed by 52 | `Regressive Domain Adaptation for Unsupervised Keypoint Detection `_. 53 | 54 | Args: 55 | reduction (str, optional): Specifies the reduction to apply to the output: 56 | ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied, 57 | ``'mean'``: the sum of the output will be divided by the number of 58 | elements in the output. Default: ``'mean'`` 59 | 60 | Inputs: 61 | - output (tensor): heatmap predictions 62 | - target (tensor): heatmap labels 63 | - target_weight (tensor): whether the keypoint is visible. All keypoint is visible if None. Default: None. 64 | 65 | Shape: 66 | - output: :math:`(minibatch, K, H, W)` where K means the number of keypoints, 67 | H and W is the height and width of the heatmap respectively. 68 | - target: :math:`(minibatch, K, H, W)`. 69 | - target_weight: :math:`(minibatch, K)`. 70 | - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(minibatch, K)`. 71 | 72 | """ 73 | def __init__(self, reduction='mean', epsilon=0.): 74 | super(JointsKLLoss, self).__init__() 75 | self.criterion = nn.KLDivLoss(reduction='none') 76 | self.reduction = reduction 77 | self.epsilon = epsilon 78 | 79 | def forward(self, output, target, target_weight=None): 80 | B, K, _, _ = output.shape 81 | heatmaps_pred = output.reshape((B, K, -1)) 82 | heatmaps_pred = F.log_softmax(heatmaps_pred, dim=-1) 83 | heatmaps_gt = target.reshape((B, K, -1)) 84 | heatmaps_gt = heatmaps_gt + self.epsilon 85 | heatmaps_gt = heatmaps_gt / heatmaps_gt.sum(dim=-1, keepdims=True) 86 | loss = self.criterion(heatmaps_pred, heatmaps_gt).sum(dim=-1) 87 | if target_weight is not None: 88 | loss = loss * target_weight.view((B, K)) 89 | if self.reduction == 'mean': 90 | return loss.mean() 91 | elif self.reduction == 'none': 92 | return loss.mean(dim=-1) 93 | -------------------------------------------------------------------------------- /common/utils/metric/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import prettytable 3 | 4 | __all__ = ['keypoint_detection'] 5 | 6 | def binary_accuracy(output: torch.Tensor, target: torch.Tensor) -> float: 7 | """Computes the accuracy for binary classification""" 8 | with torch.no_grad(): 9 | batch_size = target.size(0) 10 | pred = (output >= 0.5).float().t().view(-1) 11 | correct = pred.eq(target.view(-1)).float().sum() 12 | correct.mul_(100. / batch_size) 13 | return correct 14 | 15 | 16 | def accuracy(output, target, topk=(1,)): 17 | r""" 18 | Computes the accuracy over the k top predictions for the specified values of k 19 | 20 | Args: 21 | output (tensor): Classification outputs, :math:`(N, C)` where `C = number of classes` 22 | target (tensor): :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1` 23 | topk (sequence[int]): A list of top-N number. 24 | 25 | Returns: 26 | Top-N accuracies (N :math:`\in` topK). 27 | """ 28 | with torch.no_grad(): 29 | maxk = max(topk) 30 | batch_size = target.size(0) 31 | 32 | _, pred = output.topk(maxk, 1, True, True) 33 | pred = pred.t() 34 | correct = pred.eq(target[None]) 35 | 36 | res = [] 37 | for k in topk: 38 | correct_k = correct[:k].flatten().sum(dtype=torch.float32) 39 | res.append(correct_k * (100.0 / batch_size)) 40 | return res 41 | 42 | 43 | class ConfusionMatrix(object): 44 | def __init__(self, num_classes): 45 | self.num_classes = num_classes 46 | self.mat = None 47 | 48 | def update(self, target, output): 49 | """ 50 | Update confusion matrix. 51 | 52 | Args: 53 | target: ground truth 54 | output: predictions of models 55 | 56 | Shape: 57 | - target: :math:`(minibatch, C)` where C means the number of classes. 58 | - output: :math:`(minibatch, C)` where C means the number of classes. 59 | """ 60 | n = self.num_classes 61 | if self.mat is None: 62 | self.mat = torch.zeros((n, n), dtype=torch.int64, device=target.device) 63 | with torch.no_grad(): 64 | k = (target >= 0) & (target < n) 65 | inds = n * target[k].to(torch.int64) + output[k] 66 | self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) 67 | 68 | def reset(self): 69 | self.mat.zero_() 70 | 71 | def compute(self): 72 | """compute global accuracy, per-class accuracy and per-class IoU""" 73 | h = self.mat.float() 74 | acc_global = torch.diag(h).sum() / h.sum() 75 | acc = torch.diag(h) / h.sum(1) 76 | iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) 77 | return acc_global, acc, iu 78 | 79 | # def reduce_from_all_processes(self): 80 | # if not torch.distributed.is_available(): 81 | # return 82 | # if not torch.distributed.is_initialized(): 83 | # return 84 | # torch.distributed.barrier() 85 | # torch.distributed.all_reduce(self.mat) 86 | 87 | def __str__(self): 88 | acc_global, acc, iu = self.compute() 89 | return ( 90 | 'global correct: {:.1f}\n' 91 | 'average row correct: {}\n' 92 | 'IoU: {}\n' 93 | 'mean IoU: {:.1f}').format( 94 | acc_global.item() * 100, 95 | ['{:.1f}'.format(i) for i in (acc * 100).tolist()], 96 | ['{:.1f}'.format(i) for i in (iu * 100).tolist()], 97 | iu.mean().item() * 100) 98 | 99 | def format(self, classes: list): 100 | """Get the accuracy and IoU for each class in the table format""" 101 | acc_global, acc, iu = self.compute() 102 | 103 | table = prettytable.PrettyTable(["class", "acc", "iou"]) 104 | for i, class_name, per_acc, per_iu in zip(range(len(classes)), classes, (acc * 100).tolist(), (iu * 100).tolist()): 105 | table.add_row([class_name, per_acc, per_iu]) 106 | 107 | return 'global correct: {:.1f}\nmean correct:{:.1f}\nmean IoU: {:.1f}\n{}'.format( 108 | acc_global.item() * 100, acc.mean().item() * 100, iu.mean().item() * 100, table.get_string()) 109 | 110 | -------------------------------------------------------------------------------- /common/vision/datasets/stanford_cars.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from .imagelist import ImageList 4 | from ._util import download as download_data, check_exits 5 | 6 | 7 | class StanfordCars(ImageList): 8 | """`The Stanford Cars `_ Dataset. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``, or ``test``. 13 | sample_rate (int): The sampling rates to sample random ``training`` images for each category. 14 | Choices include 100, 50, 30, 15. Default: 100. 15 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 16 | in root directory. If dataset is already downloaded, it is not downloaded again. 17 | transform (callable, optional): A function/transform that takes in an PIL image and returns a \ 18 | transformed version. E.g, :class:`torchvision.transforms.RandomCrop`. 19 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 20 | 21 | .. note:: In `root`, there will exist following files after downloading. 22 | :: 23 | train/ 24 | test/ 25 | image_list/ 26 | train_100.txt 27 | train_50.txt 28 | train_30.txt 29 | train_15.txt 30 | test.txt 31 | """ 32 | download_list = [ 33 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d95c188cc49c404aba70/?dl=1"), 34 | ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/d5ab63c391a949509db0/?dl=1"), 35 | ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/04e6fd5222a84d0a8ff5/?dl=1"), 36 | ] 37 | image_list = { 38 | "train": "image_list/train_100.txt", 39 | "train100": "image_list/train_100.txt", 40 | "train50": "image_list/train_50.txt", 41 | "train30": "image_list/train_30.txt", 42 | "train15": "image_list/train_15.txt", 43 | "test": "image_list/test.txt", 44 | "test100": "image_list/test.txt", 45 | } 46 | CLASSES = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', 47 | '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', 48 | '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', 49 | '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', 50 | '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', 51 | '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', 52 | '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', 53 | '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', 54 | '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', 55 | '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196'] 56 | 57 | def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs): 58 | 59 | if split == 'train': 60 | list_name = 'train' + str(sample_rate) 61 | assert list_name in self.image_list 62 | data_list_file = os.path.join(root, self.image_list[list_name]) 63 | else: 64 | data_list_file = os.path.join(root, self.image_list['test']) 65 | 66 | if download: 67 | list(map(lambda args: download_data(root, *args), self.download_list)) 68 | else: 69 | list(map(lambda file_name, _: check_exits(root, file_name), self.download_list)) 70 | 71 | super(StanfordCars, self).__init__(root, StanfordCars.CLASSES, data_list_file=data_list_file, **kwargs) 72 | -------------------------------------------------------------------------------- /common/vision/datasets/openset/__init__.py: -------------------------------------------------------------------------------- 1 | from ..imagelist import ImageList 2 | from ..office31 import Office31 3 | from ..officehome import OfficeHome 4 | from ..visda2017 import VisDA2017 5 | 6 | from typing import Optional, ClassVar, Sequence 7 | from copy import deepcopy 8 | 9 | 10 | __all__ = ['Office31', 'OfficeHome', "VisDA2017"] 11 | 12 | 13 | def open_set(dataset_class: ClassVar, public_classes: Sequence[str], 14 | private_classes: Optional[Sequence[str]] = ()) -> ClassVar: 15 | """ 16 | Convert a dataset into its open-set version. 17 | 18 | In other words, those samples which doesn't belong to `private_classes` will be marked as "unknown". 19 | 20 | Be aware that `open_set` will change the label number of each category. 21 | 22 | Args: 23 | dataset_class (class): Dataset class. Only subclass of ``ImageList`` can be open-set. 24 | public_classes (sequence[str]): A sequence of which categories need to be kept in the open-set dataset.\ 25 | Each element of `public_classes` must belong to the `classes` list of `dataset_class`. 26 | private_classes (sequence[str], optional): A sequence of which categories need to be marked as "unknown" \ 27 | in the open-set dataset. Each element of `private_classes` must belong to the `classes` list of \ 28 | `dataset_class`. Default: (). 29 | 30 | Examples:: 31 | 32 | >>> public_classes = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard'] 33 | >>> private_classes = ['laptop_computer', 'monitor', 'mouse', 'mug', 'projector'] 34 | >>> # create a open-set dataset class which has classes 35 | >>> # 'back_pack', 'bike', 'calculator', 'headphones', 'keyboard' and 'unknown'. 36 | >>> OpenSetOffice31 = open_set(Office31, public_classes, private_classes) 37 | >>> # create an instance of the open-set dataset 38 | >>> dataset = OpenSetDataset(root="data/office31", task="A") 39 | 40 | """ 41 | if not (issubclass(dataset_class, ImageList)): 42 | raise Exception("Only subclass of ImageList can be openset") 43 | 44 | class OpenSetDataset(dataset_class): 45 | def __init__(self, **kwargs): 46 | super(OpenSetDataset, self).__init__(**kwargs) 47 | samples = [] 48 | all_classes = list(deepcopy(public_classes)) + ["unknown"] 49 | for (path, label) in self.samples: 50 | class_name = self.classes[label] 51 | if class_name in public_classes: 52 | samples.append((path, all_classes.index(class_name))) 53 | elif class_name in private_classes: 54 | samples.append((path, all_classes.index("unknown"))) 55 | self.samples = samples 56 | self.classes = all_classes 57 | self.class_to_idx = {cls: idx 58 | for idx, cls in enumerate(self.classes)} 59 | 60 | return OpenSetDataset 61 | 62 | 63 | def default_open_set(dataset_class: ClassVar, source: bool) -> ClassVar: 64 | """ 65 | Default open-set used in some paper. 66 | 67 | Args: 68 | dataset_class (class): Dataset class. Currently, dataset_class must be one of 69 | :class:`~common.vision.datasets.office31.Office31`, :class:`~common.vision.datasets.officehome.OfficeHome`, 70 | :class:`~common.vision.datasets.visda2017.VisDA2017`, 71 | source (bool): Whether the dataset is used for source domain or not. 72 | """ 73 | if dataset_class == Office31: 74 | public_classes = Office31.CLASSES[:20] 75 | if source: 76 | private_classes = () 77 | else: 78 | private_classes = Office31.CLASSES[20:] 79 | elif dataset_class == OfficeHome: 80 | public_classes = sorted(OfficeHome.CLASSES)[:25] 81 | if source: 82 | private_classes = () 83 | else: 84 | private_classes = sorted(OfficeHome.CLASSES)[25:] 85 | elif dataset_class == VisDA2017: 86 | public_classes = ('bicycle', 'bus', 'car', 'motorcycle', 'train', 'truck') 87 | if source: 88 | private_classes = () 89 | else: 90 | private_classes = ('aeroplane', 'horse', 'knife', 'person', 'plant', 'skateboard') 91 | else: 92 | raise NotImplementedError("Unknown openset domain adaptation dataset: {}".format(dataset_class.__name__)) 93 | return open_set(dataset_class, public_classes, private_classes) 94 | 95 | -------------------------------------------------------------------------------- /icon/cluster.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import math 4 | import torch.nn.functional as F 5 | import numpy as np 6 | 7 | 8 | def PairEnum(x,mask=None): 9 | # Enumerate all pairs of feature in x 10 | assert x.ndimension() == 2, 'Input dimension must be 2' 11 | x1 = x.repeat(x.size(0), 1) 12 | x2 = x.repeat(1, x.size(0)).view(-1, x.size(1)) 13 | if mask is not None: 14 | xmask = mask.view(-1,1).repeat(1,x.size(1)) 15 | #dim 0: #sample, dim 1:#feature 16 | x1 = x1[xmask].view(-1,x.size(1)) 17 | x2 = x2[xmask].view(-1,x.size(1)) 18 | return x1,x2 19 | 20 | class BCE(nn.Module): 21 | eps = 1e-7 # Avoid calculating log(0). Use the small value of float16. 22 | def forward(self, prob1, prob2, simi): 23 | # simi: 1->similar; -1->dissimilar; 0->unknown(ignore) 24 | assert len(prob1)==len(prob2)==len(simi), 'Wrong input size:{0},{1},{2}'.format(str(len(prob1)),str(len(prob2)),str(len(simi))) 25 | P = prob1.mul_(prob2) 26 | P = P.sum(1) 27 | P.mul_(simi).add_(simi.eq(-1).type_as(P)) 28 | neglogP = -P.add_(BCE.eps).log_() 29 | return neglogP.mean() 30 | 31 | 32 | class ClusterLoss(): 33 | def __init__(self, device, num_classes, bce_type, cosine_threshold, topk): 34 | # super(NCLMemory, self).__init__() 35 | self.device = device 36 | self.num_classes = num_classes 37 | self.bce_type = bce_type 38 | self.costhre = cosine_threshold 39 | self.topk = topk 40 | self.bce = BCE() 41 | 42 | def compute_losses(self, inputs): 43 | bce_loss = 0.0 44 | device = self.device 45 | feat, output2 = inputs["x1"], inputs["preds1_u"] 46 | output2_bar = inputs["preds2_u"] 47 | label = inputs["labels"] 48 | 49 | num_s = (label < self.num_classes).sum() 50 | labels_s = label[:num_s] 51 | mask_lb = label < self.num_classes # masked away label samples. only use unlabel samples for clustering 52 | 53 | prob2, prob2_bar = F.softmax(output2, dim=1), F.softmax(output2_bar, dim=1) 54 | 55 | rank_feat = (feat[~mask_lb]).detach() 56 | if self.bce_type == 'cos': 57 | # default: cosine similarity with threshold 58 | feat_row, feat_col = PairEnum(F.normalize(rank_feat, dim=1)) 59 | tmp_distance_ori = torch.bmm( 60 | feat_row.view(feat_row.size(0), 1, -1), 61 | feat_col.view(feat_row.size(0), -1, 1) 62 | ) 63 | tmp_distance_ori = tmp_distance_ori.squeeze() 64 | target_ulb = torch.zeros_like(tmp_distance_ori).float() - 1 65 | target_ulb[tmp_distance_ori > self.costhre] = 1 66 | elif self.bce_type == 'RK': 67 | # top-k rank statics 68 | rank_idx = torch.argsort(rank_feat, dim=1, descending=True) 69 | rank_idx1, rank_idx2 = PairEnum(rank_idx) 70 | rank_idx1, rank_idx2 = rank_idx1[:, :self.topk], rank_idx2[:, :self.topk] 71 | rank_idx1, _ = torch.sort(rank_idx1, dim=1) 72 | rank_idx2, _ = torch.sort(rank_idx2, dim=1) 73 | rank_diff = rank_idx1 - rank_idx2 74 | rank_diff = torch.sum(torch.abs(rank_diff), dim=1) 75 | target_ulb = torch.ones_like(rank_diff).float().to(device) 76 | target_ulb[rank_diff > 0] = -1 77 | 78 | prob1_ulb, _ = PairEnum(prob2[~mask_lb]) 79 | _, prob2_ulb = PairEnum(prob2_bar[~mask_lb]) 80 | 81 | bce_loss = self.bce(prob1_ulb, prob2_ulb, target_ulb) 82 | return bce_loss, target_ulb 83 | 84 | class Normalize(nn.Module): 85 | def __init__(self, power=2): 86 | super(Normalize, self).__init__() 87 | self.power = power 88 | 89 | def forward(self, x): 90 | norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power) 91 | out = x.div(norm) 92 | return out 93 | 94 | def reduce_dimension(features, mode, dim): 95 | if mode == 'pca': 96 | from sklearn.decomposition import PCA 97 | pca = PCA(n_components=dim) 98 | transformed_features = pca.fit_transform(features) 99 | fit_score = pca.explained_variance_ratio_.sum() 100 | elif mode == 'umap': 101 | import umap 102 | fit = umap.UMAP(n_components=dim) 103 | transformed_features = fit.fit_transform(features) 104 | fit_score = 0.0 105 | return transformed_features, fit_score -------------------------------------------------------------------------------- /dalib/adaptation/dann.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from ..modules.grl import WarmStartGradientReverseLayer 7 | from common.modules.classifier import Classifier as ClassifierBase 8 | from common.utils.metric import binary_accuracy 9 | 10 | __all__ = ['DomainAdversarialLoss'] 11 | 12 | 13 | class DomainAdversarialLoss(nn.Module): 14 | """ 15 | The Domain Adversarial Loss proposed in 16 | `Domain-Adversarial Training of Neural Networks (ICML 2015) `_ 17 | 18 | Domain adversarial loss measures the domain discrepancy through training a domain discriminator. 19 | Given domain discriminator :math:`D`, feature representation :math:`f`, the definition of DANN loss is 20 | 21 | .. math:: 22 | loss(\mathcal{D}_s, \mathcal{D}_t) = \mathbb{E}_{x_i^s \sim \mathcal{D}_s} log[D(f_i^s)] 23 | + \mathbb{E}_{x_j^t \sim \mathcal{D}_t} log[1-D(f_j^t)]. 24 | 25 | Args: 26 | domain_discriminator (torch.nn.Module): A domain discriminator object, which predicts the domains of features. Its input shape is (N, F) and output shape is (N, 1) 27 | reduction (str, optional): Specifies the reduction to apply to the output: 28 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 29 | ``'mean'``: the sum of the output will be divided by the number of 30 | elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'`` 31 | grl (WarmStartGradientReverseLayer, optional): Default: None. 32 | 33 | Inputs: 34 | - f_s (tensor): feature representations on source domain, :math:`f^s` 35 | - f_t (tensor): feature representations on target domain, :math:`f^t` 36 | - w_s (tensor, optional): a rescaling weight given to each instance from source domain. 37 | - w_t (tensor, optional): a rescaling weight given to each instance from target domain. 38 | 39 | Shape: 40 | - f_s, f_t: :math:`(N, F)` where F means the dimension of input features. 41 | - Outputs: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(N, )`. 42 | 43 | Examples:: 44 | 45 | >>> from dalib.modules.domain_discriminator import DomainDiscriminator 46 | >>> discriminator = DomainDiscriminator(in_feature=1024, hidden_size=1024) 47 | >>> loss = DomainAdversarialLoss(discriminator, reduction='mean') 48 | >>> # features from source domain and target domain 49 | >>> f_s, f_t = torch.randn(20, 1024), torch.randn(20, 1024) 50 | >>> # If you want to assign different weights to each instance, you should pass in w_s and w_t 51 | >>> w_s, w_t = torch.randn(20), torch.randn(20) 52 | >>> output = loss(f_s, f_t, w_s, w_t) 53 | """ 54 | 55 | def __init__(self, domain_discriminator: nn.Module, reduction: Optional[str] = 'mean', 56 | grl: Optional = None): 57 | super(DomainAdversarialLoss, self).__init__() 58 | self.grl = WarmStartGradientReverseLayer(alpha=1., lo=0., hi=1., max_iters=1000, auto_step=True) if grl is None else grl 59 | self.domain_discriminator = domain_discriminator 60 | self.bce = lambda input, target, weight: \ 61 | F.binary_cross_entropy(input, target, weight=weight, reduction=reduction) 62 | self.domain_discriminator_accuracy = None 63 | 64 | def forward(self, f_s: torch.Tensor, f_t: torch.Tensor, 65 | w_s: Optional[torch.Tensor] = None, w_t: Optional[torch.Tensor] = None) -> torch.Tensor: 66 | f = self.grl(torch.cat((f_s, f_t), dim=0)) 67 | d = self.domain_discriminator(f) 68 | d_s, d_t = d.chunk(2, dim=0) 69 | d_label_s = torch.ones((f_s.size(0), 1)).to(f_s.device) 70 | d_label_t = torch.zeros((f_t.size(0), 1)).to(f_t.device) 71 | self.domain_discriminator_accuracy = 0.5 * (binary_accuracy(d_s, d_label_s) + binary_accuracy(d_t, d_label_t)) 72 | 73 | if w_s is None: 74 | w_s = torch.ones_like(d_label_s) 75 | if w_t is None: 76 | w_t = torch.ones_like(d_label_t) 77 | return 0.5 * (self.bce(d_s, d_label_s, w_s.view_as(d_s)) + self.bce(d_t, d_label_t, w_t.view_as(d_t))) 78 | 79 | 80 | class ImageClassifier(ClassifierBase): 81 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs): 82 | bottleneck = nn.Sequential( 83 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 84 | nn.Flatten(), 85 | nn.Linear(backbone.out_features, bottleneck_dim), 86 | nn.BatchNorm1d(bottleneck_dim), 87 | nn.ReLU() 88 | ) 89 | super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs) 90 | -------------------------------------------------------------------------------- /dalib/adaptation/segmentation/advent.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class Discriminator(nn.Sequential): 8 | """ 9 | Domain discriminator model from 10 | `ADVENT: Adversarial Entropy Minimization for Domain Adaptation in Semantic Segmentation (CVPR 2019) `_ 11 | 12 | Distinguish pixel-by-pixel whether the input predictions come from the source domain or the target domain. 13 | The source domain label is 1 and the target domain label is 0. 14 | 15 | Args: 16 | num_classes (int): num of classes in the predictions 17 | ndf (int): dimension of the hidden features 18 | 19 | Shape: 20 | - Inputs: :math:`(minibatch, C, H, W)` where :math:`C` is the number of classes 21 | - Outputs: :math:`(minibatch, 1, H, W)` 22 | """ 23 | def __init__(self, num_classes, ndf=64): 24 | super(Discriminator, self).__init__( 25 | nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1), 26 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 27 | nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1), 28 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 29 | nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1), 30 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 31 | nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1), 32 | nn.LeakyReLU(negative_slope=0.2, inplace=True), 33 | nn.Conv2d(ndf * 8, 1, kernel_size=4, stride=2, padding=1), 34 | ) 35 | 36 | 37 | def prob_2_entropy(prob): 38 | """ convert probabilistic prediction maps to weighted self-information maps 39 | """ 40 | n, c, h, w = prob.size() 41 | return -torch.mul(prob, torch.log2(prob + 1e-30)) / np.log2(c) 42 | 43 | 44 | def bce_loss(y_pred, y_label): 45 | y_truth_tensor = torch.FloatTensor(y_pred.size()) 46 | y_truth_tensor.fill_(y_label) 47 | y_truth_tensor = y_truth_tensor.to(y_pred.get_device()) 48 | return F.binary_cross_entropy_with_logits(y_pred, y_truth_tensor) 49 | 50 | 51 | class DomainAdversarialEntropyLoss(nn.Module): 52 | r"""The `Domain Adversarial Entropy Loss `_ 53 | 54 | Minimizing entropy with adversarial learning through training a domain discriminator. 55 | 56 | Args: 57 | domain_discriminator (torch.nn.Module): A domain discriminator object, which predicts 58 | the domains of predictions. Its input shape is :math:`(minibatch, C, H, W)` and output shape is :math:`(minibatch, 1, H, W)` 59 | 60 | Inputs: 61 | - logits (tensor): logits output of segmentation model 62 | - domain_label (str, optional): whether the data comes from source or target. 63 | Choices: ['source', 'target']. Default: 'source' 64 | 65 | Shape: 66 | - logits: :math:`(minibatch, C, H, W)` where :math:`C` means the number of classes 67 | - Outputs: scalar. 68 | 69 | Examples:: 70 | 71 | >>> B, C, H, W = 2, 19, 512, 512 72 | >>> discriminator = Discriminator(num_classes=C) 73 | >>> dann = DomainAdversarialEntropyLoss(discriminator) 74 | >>> # logits output on source domain and target domain 75 | >>> y_s, y_t = torch.randn(B, C, H, W), torch.randn(B, C, H, W) 76 | >>> loss = 0.5 * (dann(y_s, "source") + dann(y_t, "target")) 77 | """ 78 | def __init__(self, discriminator: nn.Module): 79 | super(DomainAdversarialEntropyLoss, self).__init__() 80 | self.discriminator = discriminator 81 | 82 | def forward(self, logits, domain_label='source'): 83 | """ 84 | """ 85 | assert domain_label in ['source', 'target'] 86 | probability = F.softmax(logits, dim=1) 87 | entropy = prob_2_entropy(probability) 88 | domain_prediciton = self.discriminator(entropy) 89 | if domain_label == 'source': 90 | return bce_loss(domain_prediciton, 1) 91 | else: 92 | return bce_loss(domain_prediciton, 0) 93 | 94 | def train(self, mode=True): 95 | r"""Sets the discriminator in training mode. In the training mode, 96 | all the parameters in discriminator will be set requires_grad=True. 97 | 98 | Args: 99 | mode (bool): whether to set training mode (``True``) or evaluation mode (``False``). Default: ``True``. 100 | """ 101 | self.discriminator.train(mode) 102 | for param in self.discriminator.parameters(): 103 | param.requires_grad = mode 104 | return self 105 | 106 | def eval(self): 107 | r"""Sets the module in evaluation mode. In the training mode, 108 | all the parameters in discriminator will be set requires_grad=False. 109 | 110 | This is equivalent with :meth:`self.train(False) `. 111 | """ 112 | return self.train(False) 113 | -------------------------------------------------------------------------------- /common/vision/models/keypoint_detection/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Modified from https://github.com/microsoft/human-pose-estimation.pytorch 3 | # ------------------------------------------------------------------------------ 4 | 5 | import torch.nn as nn 6 | from ..resnet import _resnet, Bottleneck 7 | 8 | 9 | class Upsampling(nn.Sequential): 10 | """ 11 | 3-layers deconvolution used in `Simple Baseline `_. 12 | """ 13 | def __init__(self, in_channel=2048, hidden_dims=(256, 256, 256), kernel_sizes=(4, 4, 4), bias=False): 14 | assert len(hidden_dims) == len(kernel_sizes), \ 15 | 'ERROR: len(hidden_dims) is different len(kernel_sizes)' 16 | 17 | layers = [] 18 | for hidden_dim, kernel_size in zip(hidden_dims, kernel_sizes): 19 | if kernel_size == 4: 20 | padding = 1 21 | output_padding = 0 22 | elif kernel_size == 3: 23 | padding = 1 24 | output_padding = 1 25 | elif kernel_size == 2: 26 | padding = 0 27 | output_padding = 0 28 | else: 29 | raise NotImplementedError("kernel_size is {}".format(kernel_size)) 30 | 31 | layers.append( 32 | nn.ConvTranspose2d( 33 | in_channels=in_channel, 34 | out_channels=hidden_dim, 35 | kernel_size=kernel_size, 36 | stride=2, 37 | padding=padding, 38 | output_padding=output_padding, 39 | bias=bias)) 40 | layers.append(nn.BatchNorm2d(hidden_dim)) 41 | layers.append(nn.ReLU(inplace=True)) 42 | in_channel = hidden_dim 43 | 44 | super(Upsampling, self).__init__(*layers) 45 | 46 | # init following Simple Baseline 47 | for name, m in self.named_modules(): 48 | if isinstance(m, nn.ConvTranspose2d): 49 | nn.init.normal_(m.weight, std=0.001) 50 | if bias: 51 | nn.init.constant_(m.bias, 0) 52 | elif isinstance(m, nn.BatchNorm2d): 53 | nn.init.constant_(m.weight, 1) 54 | nn.init.constant_(m.bias, 0) 55 | 56 | 57 | class PoseResNet(nn.Module): 58 | """ 59 | `Simple Baseline `_ for keypoint detection. 60 | 61 | Args: 62 | backbone (torch.nn.Module): Backbone to extract 2-d features from data 63 | upsampling (torch.nn.Module): Layer to upsample image feature to heatmap size 64 | feature_dim (int): The dimension of the features from upsampling layer. 65 | num_keypoints (int): Number of keypoints 66 | finetune (bool, optional): Whether use 10x smaller learning rate in the backbone. Default: False 67 | """ 68 | def __init__(self, backbone, upsampling, feature_dim, num_keypoints, finetune=False): 69 | super(PoseResNet, self).__init__() 70 | self.backbone = backbone 71 | self.upsampling = upsampling 72 | self.head = nn.Conv2d(in_channels=feature_dim, out_channels=num_keypoints, kernel_size=1, stride=1, padding=0) 73 | self.finetune = finetune 74 | for m in self.head.modules(): 75 | nn.init.normal_(m.weight, std=0.001) 76 | nn.init.constant_(m.bias, 0) 77 | 78 | def forward(self, x): 79 | x = self.backbone(x) 80 | x = self.upsampling(x) 81 | x = self.head(x) 82 | return x 83 | 84 | def get_parameters(self, lr=1.): 85 | return [ 86 | {'params': self.backbone.parameters(), 'lr': 0.1 * lr if self.finetune else lr}, 87 | {'params': self.upsampling.parameters(), 'lr': lr}, 88 | {'params': self.head.parameters(), 'lr': lr}, 89 | ] 90 | 91 | 92 | def _pose_resnet(arch, num_keypoints, block, layers, pretrained_backbone, deconv_with_bias, finetune=False, progress=True, **kwargs): 93 | backbone = _resnet(arch, block, layers, pretrained_backbone, progress, **kwargs) 94 | upsampling = Upsampling(backbone.out_features, bias=deconv_with_bias) 95 | model = PoseResNet(backbone, upsampling, 256, num_keypoints, finetune) 96 | return model 97 | 98 | 99 | def pose_resnet101(num_keypoints, pretrained_backbone=True, deconv_with_bias=False, finetune=False, progress=True, **kwargs): 100 | """Constructs a Simple Baseline model with a ResNet-101 backbone. 101 | 102 | Args: 103 | num_keypoints (int): number of keypoints 104 | pretrained_backbone (bool, optional): If True, returns a model pre-trained on ImageNet. Default: True. 105 | deconv_with_bias (bool, optional): Whether use bias in the deconvolution layer. Default: False 106 | finetune (bool, optional): Whether use 10x smaller learning rate in the backbone. Default: False 107 | progress (bool, optional): If True, displays a progress bar of the download to stderr. Default: True 108 | """ 109 | return _pose_resnet('resnet101', num_keypoints, Bottleneck, [3, 4, 23, 3], pretrained_backbone, deconv_with_bias, finetune, progress, **kwargs) -------------------------------------------------------------------------------- /dalib/translation/cyclegan/util.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import functools 3 | import random 4 | import torch 5 | from torch.nn import init 6 | 7 | 8 | class Identity(nn.Module): 9 | def forward(self, x): 10 | return x 11 | 12 | 13 | def get_norm_layer(norm_type='instance'): 14 | """Return a normalization layer 15 | 16 | Parameters: 17 | norm_type (str) -- the name of the normalization layer: batch | instance | none 18 | 19 | For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). 20 | For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. 21 | """ 22 | if norm_type == 'batch': 23 | norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) 24 | elif norm_type == 'instance': 25 | norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) 26 | elif norm_type == 'none': 27 | def norm_layer(x): return Identity() 28 | else: 29 | raise NotImplementedError('normalization layer [%s] is not found' % norm_type) 30 | return norm_layer 31 | 32 | 33 | def init_weights(net, init_type='normal', init_gain=0.02): 34 | """Initialize network weights. 35 | 36 | Args: 37 | net (torch.nn.Module): network to be initialized 38 | init_type (str): the name of an initialization method. Choices includes: ``normal`` | 39 | ``xavier`` | ``kaiming`` | ``orthogonal`` 40 | init_gain (float): scaling factor for normal, xavier and orthogonal. 41 | 42 | 'normal' is used in the original CycleGAN paper. But xavier and kaiming might 43 | work better for some applications. 44 | """ 45 | def init_func(m): # define the initialization function 46 | classname = m.__class__.__name__ 47 | if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): 48 | if init_type == 'normal': 49 | init.normal_(m.weight.data, 0.0, init_gain) 50 | elif init_type == 'xavier': 51 | init.xavier_normal_(m.weight.data, gain=init_gain) 52 | elif init_type == 'kaiming': 53 | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 54 | elif init_type == 'orthogonal': 55 | init.orthogonal_(m.weight.data, gain=init_gain) 56 | else: 57 | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) 58 | if hasattr(m, 'bias') and m.bias is not None: 59 | init.constant_(m.bias.data, 0.0) 60 | elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. 61 | init.normal_(m.weight.data, 1.0, init_gain) 62 | init.constant_(m.bias.data, 0.0) 63 | 64 | print('initialize network with %s' % init_type) 65 | net.apply(init_func) # apply the initialization function 66 | 67 | 68 | class ImagePool: 69 | """An image buffer that stores previously generated images. 70 | 71 | This buffer enables us to update discriminators using a history of generated images 72 | rather than the ones produced by the latest generators. 73 | 74 | Args: 75 | pool_size (int): the size of image buffer, if pool_size=0, no buffer will be created 76 | 77 | """ 78 | 79 | def __init__(self, pool_size): 80 | self.pool_size = pool_size 81 | if self.pool_size > 0: # create an empty pool 82 | self.num_imgs = 0 83 | self.images = [] 84 | 85 | def query(self, images): 86 | """Return an image from the pool. 87 | 88 | Args: 89 | images (torch.Tensor): the latest generated images from the generator 90 | 91 | Returns: 92 | By 50/100, the buffer will return input images. 93 | By 50/100, the buffer will return images previously stored in the buffer, 94 | and insert the current images to the buffer. 95 | 96 | """ 97 | if self.pool_size == 0: # if the buffer size is 0, do nothing 98 | return images 99 | return_images = [] 100 | for image in images: 101 | image = torch.unsqueeze(image.data, 0) 102 | if self.num_imgs < self.pool_size: # if the buffer is not full; keep inserting current images to the buffer 103 | self.num_imgs = self.num_imgs + 1 104 | self.images.append(image) 105 | return_images.append(image) 106 | else: 107 | p = random.uniform(0, 1) 108 | if p > 0.5: # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer 109 | random_id = random.randint(0, self.pool_size - 1) # randint is inclusive 110 | tmp = self.images[random_id].clone() 111 | self.images[random_id] = image 112 | return_images.append(tmp) 113 | else: # by another 50% chance, the buffer will return the current image 114 | return_images.append(image) 115 | return_images = torch.cat(return_images, 0) # collect all the images and return 116 | return return_images 117 | 118 | 119 | def set_requires_grad(net, requires_grad=False): 120 | """ 121 | Set requies_grad=Fasle for all the networks to avoid unnecessary computations 122 | """ 123 | for param in net.parameters(): 124 | param.requires_grad = requires_grad 125 | -------------------------------------------------------------------------------- /common/vision/datasets/keypoint_detection/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def generate_target(joints, joints_vis, heatmap_size, sigma, image_size): 6 | """Generate heatamap for joints. 7 | 8 | Args: 9 | joints: (K, 2) 10 | joints_vis: (K, 1) 11 | heatmap_size: W, H 12 | sigma: 13 | image_size: 14 | 15 | Returns: 16 | 17 | """ 18 | num_joints = joints.shape[0] 19 | target_weight = np.ones((num_joints, 1), dtype=np.float32) 20 | target_weight[:, 0] = joints_vis[:, 0] 21 | 22 | target = np.zeros((num_joints, 23 | heatmap_size[1], 24 | heatmap_size[0]), 25 | dtype=np.float32) 26 | 27 | tmp_size = sigma * 3 28 | image_size = np.array(image_size) 29 | heatmap_size = np.array(heatmap_size) 30 | 31 | for joint_id in range(num_joints): 32 | feat_stride = image_size / heatmap_size 33 | mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) 34 | mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) 35 | # Check that any part of the gaussian is in-bounds 36 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 37 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 38 | if mu_x >= heatmap_size[0] or mu_y >= heatmap_size[1] \ 39 | or mu_x < 0 or mu_y < 0: 40 | # If not, just return the image as is 41 | target_weight[joint_id] = 0 42 | continue 43 | 44 | # Generate gaussian 45 | size = 2 * tmp_size + 1 46 | x = np.arange(0, size, 1, np.float32) 47 | y = x[:, np.newaxis] 48 | x0 = y0 = size // 2 49 | # The gaussian is not normalized, we want the center value to equal 1 50 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 51 | 52 | # Usable gaussian range 53 | g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0] 54 | g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1] 55 | # Image range 56 | img_x = max(0, ul[0]), min(br[0], heatmap_size[0]) 57 | img_y = max(0, ul[1]), min(br[1], heatmap_size[1]) 58 | 59 | v = target_weight[joint_id] 60 | if v > 0.5: 61 | target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ 62 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 63 | 64 | return target, target_weight 65 | 66 | 67 | def keypoint2d_to_3d(keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, Zc: np.ndarray): 68 | """Convert 2D keypoints to 3D keypoints""" 69 | uv1 = np.concatenate([np.copy(keypoint2d), np.ones((keypoint2d.shape[0], 1))], axis=1).T * Zc # 3 x NUM_KEYPOINTS 70 | xyz = np.matmul(np.linalg.inv(intrinsic_matrix), uv1).T # NUM_KEYPOINTS x 3 71 | return xyz 72 | 73 | 74 | def keypoint3d_to_2d(keypoint3d: np.ndarray, intrinsic_matrix: np.ndarray): 75 | """Convert 3D keypoints to 2D keypoints""" 76 | keypoint2d = np.matmul(intrinsic_matrix, keypoint3d.T).T # NUM_KEYPOINTS x 3 77 | keypoint2d = keypoint2d[:, :2] / keypoint2d[:, 2:3] # NUM_KEYPOINTS x 2 78 | return keypoint2d 79 | 80 | 81 | def scale_box(box, image_width, image_height, scale): 82 | """ 83 | Change `box` to a square box. 84 | The side with of the square box will be `scale` * max(w, h) 85 | where w and h is the width and height of the origin box 86 | """ 87 | left, upper, right, lower = box 88 | center_x, center_y = (left + right) / 2, (upper + lower) / 2 89 | w, h = right - left, lower - upper 90 | side_with = min(round(scale * max(w, h)), min(image_width, image_height)) 91 | left = round(center_x - side_with / 2) 92 | right = left + side_with - 1 93 | upper = round(center_y - side_with / 2) 94 | lower = upper + side_with - 1 95 | if left < 0: 96 | left = 0 97 | right = side_with - 1 98 | if right >= image_width: 99 | right = image_width - 1 100 | left = image_width - side_with 101 | if upper < 0: 102 | upper = 0 103 | lower = side_with -1 104 | if lower >= image_height: 105 | lower = image_height - 1 106 | upper = image_height - side_with 107 | return left, upper, right, lower 108 | 109 | 110 | def get_bounding_box(keypoint2d: np.array): 111 | """Get the bounding box for keypoints""" 112 | left = np.min(keypoint2d[:, 0]) 113 | right = np.max(keypoint2d[:, 0]) 114 | upper = np.min(keypoint2d[:, 1]) 115 | lower = np.max(keypoint2d[:, 1]) 116 | return left, upper, right, lower 117 | 118 | 119 | def visualize_heatmap(image, heatmaps, filename): 120 | image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR).copy() 121 | H, W = heatmaps.shape[1], heatmaps.shape[2] 122 | resized_image = cv2.resize(image, (int(W), int(H))) 123 | heatmaps = heatmaps.mul(255).clamp(0, 255).byte().cpu().numpy() 124 | for k in range(heatmaps.shape[0]): 125 | heatmap = heatmaps[k] 126 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 127 | masked_image = colored_heatmap * 0.7 + resized_image * 0.3 128 | cv2.imwrite(filename.format(k), masked_image) 129 | 130 | 131 | def area(left, upper, right, lower): 132 | return max(right - left + 1, 0) * max(lower - upper + 1, 0) 133 | 134 | 135 | def intersection(box_a, box_b): 136 | left_a, upper_a, right_a, lower_a = box_a 137 | left_b, upper_b, right_b, lower_b = box_b 138 | return max(left_a, left_b), max(upper_a, upper_b), min(right_a, right_b), min(lower_a, lower_b) 139 | -------------------------------------------------------------------------------- /common/vision/datasets/segmentation/cityscapes.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .segmentation_list import SegmentationList 3 | from .._util import download as download_data 4 | 5 | 6 | class Cityscapes(SegmentationList): 7 | """`Cityscapes `_ is a real-world semantic segmentation dataset collected 8 | in driving scenarios. 9 | 10 | Args: 11 | root (str): Root directory of dataset 12 | split (str, optional): The dataset split, supports ``train``, or ``val``. 13 | data_folder (str, optional): Sub-directory of the image. Default: 'leftImg8bit'. 14 | label_folder (str, optional): Sub-directory of the label. Default: 'gtFine'. 15 | mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None. 16 | transforms (callable, optional): A function/transform that takes in (PIL image, label) pair \ 17 | and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`. 18 | 19 | .. note:: You need to download Cityscapes manually. 20 | Ensure that there exist following files in the `root` directory before you using this class. 21 | :: 22 | leftImg8bit/ 23 | train/ 24 | val/ 25 | test/ 26 | gtFine/ 27 | train/ 28 | val/ 29 | test/ 30 | """ 31 | 32 | CLASSES = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign', 33 | 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 34 | 'bicycle'] 35 | 36 | ID_TO_TRAIN_ID = { 37 | 7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 17: 5, 38 | 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 39 | 26: 13, 27: 14, 28: 15, 31: 16, 32: 17, 33: 18 40 | } 41 | TRAIN_ID_TO_COLOR = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), 42 | (190, 153, 153), (153, 153, 153), (250, 170, 30), (220, 220, 0), 43 | (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), 44 | (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), 45 | (0, 0, 230), (119, 11, 32), [0, 0, 0]] 46 | download_list = [ 47 | ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/08745e798b16483db4bf/?dl=1"), 48 | ] 49 | EVALUATE_CLASSES = CLASSES 50 | 51 | def __init__(self, root, split='train', data_folder='leftImg8bit', label_folder='gtFine', **kwargs): 52 | assert split in ['train', 'val'] 53 | 54 | # download meta information from Internet 55 | list(map(lambda args: download_data(root, *args), self.download_list)) 56 | data_list_file = os.path.join(root, "image_list", "{}.txt".format(split)) 57 | self.split = split 58 | super(Cityscapes, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file, 59 | os.path.join(data_folder, split), os.path.join(label_folder, split), 60 | id_to_train_id=Cityscapes.ID_TO_TRAIN_ID, 61 | train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs) 62 | 63 | def parse_label_file(self, label_list_file): 64 | with open(label_list_file, "r") as f: 65 | label_list = [line.strip().replace("leftImg8bit", "gtFine_labelIds") for line in f.readlines()] 66 | return label_list 67 | 68 | 69 | class FoggyCityscapes(Cityscapes): 70 | """`Foggy Cityscapes `_ is a real-world semantic segmentation dataset collected 71 | in foggy driving scenarios. 72 | 73 | Args: 74 | root (str): Root directory of dataset 75 | split (str, optional): The dataset split, supports ``train``, or ``val``. 76 | data_folder (str, optional): Sub-directory of the image. Default: 'leftImg8bit'. 77 | label_folder (str, optional): Sub-directory of the label. Default: 'gtFine'. 78 | beta (float, optional): The parameter for foggy. Choices includes: 0.005, 0.01, 0.02. Default: 0.02 79 | mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None. 80 | transforms (callable, optional): A function/transform that takes in (PIL image, label) pair \ 81 | and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`. 82 | 83 | .. note:: You need to download Cityscapes manually. 84 | Ensure that there exist following files in the `root` directory before you using this class. 85 | :: 86 | leftImg8bit_foggy/ 87 | train/ 88 | val/ 89 | test/ 90 | gtFine/ 91 | train/ 92 | val/ 93 | test/ 94 | """ 95 | def __init__(self, root, split='train', data_folder='leftImg8bit_foggy', label_folder='gtFine', beta=0.02, **kwargs): 96 | assert beta in [0.02, 0.01, 0.005] 97 | self.beta = beta 98 | super(FoggyCityscapes, self).__init__(root, split, data_folder, label_folder, **kwargs) 99 | 100 | def parse_data_file(self, file_name): 101 | """Parse file to image list 102 | 103 | Args: 104 | file_name (str): The path of data file 105 | 106 | Returns: 107 | List of image path 108 | """ 109 | with open(file_name, "r") as f: 110 | data_list = [line.strip().replace("leftImg8bit", "leftImg8bit_foggy_beta_{}".format(self.beta)) for line in f.readlines()] 111 | return data_list 112 | -------------------------------------------------------------------------------- /common/vision/datasets/keypoint_detection/hand_3d_studio.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import random 4 | from PIL import ImageFile, Image 5 | import torch 6 | import os.path as osp 7 | 8 | from .._util import download as download_data, check_exits 9 | from .keypoint_dataset import Hand21KeypointDataset 10 | from .util import * 11 | 12 | ImageFile.LOAD_TRUNCATED_IMAGES = True 13 | 14 | 15 | class Hand3DStudio(Hand21KeypointDataset): 16 | """`Hand-3d-Studio Dataset `_ 17 | 18 | Args: 19 | root (str): Root directory of dataset 20 | split (str, optional): The dataset split, supports ``train``, ``test``, or ``all``. 21 | task (str, optional): The task to create dataset. Choices include ``'noobject'``: only hands without objects, \ 22 | ``'object'``: only hands interacting with hands, and ``'all'``: all hands. Default: 'noobject'. 23 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 24 | in root directory. If dataset is already downloaded, it is not downloaded again. 25 | transforms (callable, optional): A function/transform that takes in a dict (which contains PIL image and 26 | its labels) and returns a transformed version. E.g, :class:`~common.vision.transforms.keypoint_detection.Resize`. 27 | image_size (tuple): (width, height) of the image. Default: (256, 256) 28 | heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64) 29 | sigma (int): sigma parameter when generate the heatmap. Default: 2 30 | 31 | .. note:: 32 | We found that the original H3D image is in high resolution while most part in an image is background, 33 | thus we crop the image and keep only the surrounding area of hands (1.5x bigger than hands) to speed up training. 34 | 35 | .. note:: In `root`, there will exist following files after downloading. 36 | :: 37 | H3D_crop/ 38 | annotation.json 39 | part1/ 40 | part2/ 41 | part3/ 42 | part4/ 43 | part5/ 44 | """ 45 | def __init__(self, root, split='train', task='noobject', download=True, **kwargs): 46 | assert split in ['train', 'test', 'all'] 47 | self.split = split 48 | assert task in ['noobject', 'object', 'all'] 49 | self.task = task 50 | 51 | if download: 52 | download_data(root, "H3D_crop", "H3D_crop.tar", "https://cloud.tsinghua.edu.cn/f/d4e612e44dc04d8eb01f/?dl=1") 53 | else: 54 | check_exits(root, "H3D_crop") 55 | 56 | root = osp.join(root, "H3D_crop") 57 | # load labels 58 | annotation_file = os.path.join(root, 'annotation.json') 59 | print("loading from {}".format(annotation_file)) 60 | with open(annotation_file) as f: 61 | samples = list(json.load(f)) 62 | if task == 'noobject': 63 | samples = [sample for sample in samples if int(sample['without_object']) == 1] 64 | elif task == 'object': 65 | samples = [sample for sample in samples if int(sample['without_object']) == 0] 66 | 67 | random.seed(42) 68 | random.shuffle(samples) 69 | samples_len = len(samples) 70 | samples_split = min(int(samples_len * 0.2), 3200) 71 | if split == 'train': 72 | samples = samples[samples_split:] 73 | elif split == 'test': 74 | samples = samples[:samples_split] 75 | 76 | super(Hand3DStudio, self).__init__(root, samples, **kwargs) 77 | 78 | def __getitem__(self, index): 79 | sample = self.samples[index] 80 | image_name = sample['name'] 81 | image_path = os.path.join(self.root, image_name) 82 | image = Image.open(image_path) 83 | keypoint3d_camera = np.array(sample['keypoint3d']) # NUM_KEYPOINTS x 3 84 | keypoint2d = np.array(sample['keypoint2d']) # NUM_KEYPOINTS x 2 85 | intrinsic_matrix = np.array(sample['intrinsic_matrix']) 86 | Zc = keypoint3d_camera[:, 2] 87 | 88 | image, data = self.transforms(image, keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix) 89 | keypoint2d = data['keypoint2d'] 90 | intrinsic_matrix = data['intrinsic_matrix'] 91 | keypoint3d_camera = keypoint2d_to_3d(keypoint2d, intrinsic_matrix, Zc) 92 | 93 | # noramlize 2D pose: 94 | visible = np.ones((self.num_keypoints, ), dtype=np.float32) 95 | visible = visible[:, np.newaxis] 96 | # 2D heatmap 97 | target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size) 98 | target = torch.from_numpy(target) 99 | target_weight = torch.from_numpy(target_weight) 100 | 101 | # normalize 3D pose: 102 | # put middle finger metacarpophalangeal (MCP) joint in the center of the coordinate system 103 | # and make distance between wrist and middle finger MCP joint to be of length 1 104 | keypoint3d_n = keypoint3d_camera - keypoint3d_camera[9:10, :] 105 | keypoint3d_n = keypoint3d_n / np.sqrt(np.sum(keypoint3d_n[0, :] ** 2)) 106 | 107 | meta = { 108 | 'image': image_name, 109 | 'keypoint2d': keypoint2d, # (NUM_KEYPOINTS x 2) 110 | 'keypoint3d': keypoint3d_n, # (NUM_KEYPOINTS x 3) 111 | } 112 | return image, target, target_weight, meta 113 | 114 | 115 | class Hand3DStudioAll(Hand3DStudio): 116 | """ 117 | `Hand-3d-Studio Dataset `_ 118 | 119 | """ 120 | def __init__(self, root, task='all', **kwargs): 121 | super(Hand3DStudioAll, self).__init__(root, task=task, **kwargs) -------------------------------------------------------------------------------- /common/utils/analysis/tsne.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import matplotlib 3 | import os 4 | matplotlib.use('Agg') 5 | from sklearn.manifold import TSNE 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import matplotlib.colors as col 9 | import matplotlib.cm as cm 10 | # import umap 11 | 12 | def visualize(source_feature: torch.Tensor, source_labels: torch.Tensor, 13 | target_feature: torch.Tensor, target_labels: torch.Tensor, 14 | filename: str, source_color='r', target_color='b'): 15 | """ 16 | Visualize features from different domains using t-SNE. 17 | 18 | Args: 19 | source_feature (tensor): features from source domain in shape :math:`(minibatch, F)` 20 | target_feature (tensor): features from target domain in shape :math:`(minibatch, F)` 21 | filename (str): the file name to save t-SNE 22 | source_color (str): the color of the source features. Default: 'r' 23 | target_color (str): the color of the target features. Default: 'b' 24 | 25 | """ 26 | source_feature = source_feature.numpy() 27 | target_feature = target_feature.numpy() 28 | features = np.concatenate([source_feature, target_feature], axis=0) 29 | 30 | # map features to 2-d using TSNE 31 | print("Transforming features by umap...") 32 | X_tsne = TSNE(n_components=2, random_state=33).fit_transform(features) 33 | # X_tsne = umap.UMAP(n_components=2, metric='euclidean', n_neighbors=15).fit_transform(features) 34 | 35 | # domain labels, 1 represents source while 0 represents target 36 | domains = np.concatenate((np.ones(len(source_feature)), np.zeros(len(target_feature)))) 37 | 38 | # visualize using matplotlib 39 | plt.figure(figsize=(10, 10)) 40 | plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=domains, cmap=col.ListedColormap([target_color, source_color]), s=2) 41 | plt.savefig(filename) 42 | 43 | def visualize_cluster(source_features, source_labels, source_clusters, 44 | target_features, target_labels, target_clusters, file_root, 45 | num_s=None, num_t=None, umap=True, metric='euclidean'): 46 | source_features = source_features.numpy() 47 | target_features = target_features.numpy() 48 | source_labels = source_labels.numpy() 49 | target_labels = target_labels.numpy() 50 | source_clusters = source_clusters.numpy() 51 | target_clusters = target_clusters.numpy() 52 | num_classes = len(np.unique(source_labels)) 53 | num_source = source_features.shape[0] 54 | num_target = target_features.shape[0] 55 | 56 | # select features 57 | if num_s is not None and num_s < num_source: 58 | source_tsne_idx = np.random.choice(num_source, num_s, replace=False) 59 | source_features = source_features[source_tsne_idx, :] 60 | source_labels = source_labels[source_tsne_idx] 61 | source_clusters = source_clusters[source_tsne_idx] 62 | num_source = num_s 63 | if num_t is not None and num_t < num_target: 64 | target_tsne_idx = np.random.choice(num_target, num_t, replace=False) 65 | target_features = target_features[target_tsne_idx, :] 66 | target_labels = target_labels[target_tsne_idx] 67 | target_clusters = target_clusters[target_tsne_idx] 68 | num_target = num_t 69 | features = np.concatenate([source_features, target_features], axis=0) 70 | 71 | # map features to 2-d using TSNE 72 | if umap: 73 | print("Transforming features by umap...") 74 | assert False 75 | # X_tsne = umap.UMAP(n_components=2, metric='euclidean', n_neighbors=15).fit_transform(features) 76 | else: 77 | print("Transforming features by tsne...") 78 | X_tsne = TSNE( 79 | n_components=2, random_state=33, metric=metric 80 | ).fit_transform(features) 81 | 82 | source_tsne = X_tsne[:num_source, :] 83 | target_tsne = X_tsne[num_source:, :] 84 | 85 | # draw 86 | plt.figure(figsize=(15, 15)) 87 | colors = cm.rainbow(np.linspace(0, 1, num_classes)) 88 | for i in range(num_classes): 89 | # draw the anchor class with color 90 | source_mask = (source_labels == i) 91 | target_mask = (target_labels == i) 92 | source_mask_neg = ~source_mask 93 | target_mask_neg = ~target_mask 94 | 95 | # draw other class as grey 96 | plt.scatter( 97 | source_tsne[source_mask_neg][:, 0], source_tsne[source_mask_neg][:, 1], 98 | c="grey", s=7, marker='o' 99 | ) 100 | plt.scatter( 101 | target_tsne[target_mask_neg][:, 0], target_tsne[target_mask_neg][:, 1], 102 | c="grey", s=12, marker='x' 103 | ) 104 | 105 | # draw anchor class as color 106 | for j in range(num_classes): 107 | source_cluster_mask = (source_clusters == j) 108 | target_cluster_mask = (target_clusters == j) 109 | source_combined_mask = source_mask & source_cluster_mask 110 | target_combined_mask = target_mask & target_cluster_mask 111 | plt.scatter( 112 | source_tsne[source_combined_mask][:, 0], source_tsne[source_combined_mask][:, 1], 113 | color=colors[j], s=14, marker='o', alpha=0.7 114 | ) 115 | plt.scatter( 116 | target_tsne[target_combined_mask][:, 0], target_tsne[target_combined_mask][:, 1], 117 | color=colors[j], s=21, marker='x', alpha=0.7 118 | ) 119 | 120 | plt.savefig(os.path.join(file_root, "class_%d.png" % i)) 121 | plt.clf() 122 | 123 | # Draw source vs target domain 124 | plt.scatter(source_tsne[:, 0], source_tsne[:, 1], color='red', s=2) 125 | plt.scatter(target_tsne[:, 0], target_tsne[:, 1], color='blue', s=2) 126 | plt.savefig(os.path.join(file_root, "overall.png")) -------------------------------------------------------------------------------- /dalib/adaptation/jan.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | import torch 3 | import torch.nn as nn 4 | 5 | from common.modules.classifier import Classifier as ClassifierBase 6 | from ..modules.grl import GradientReverseLayer 7 | from ..modules.kernels import GaussianKernel 8 | from .dan import _update_index_matrix 9 | 10 | 11 | __all__ = ['JointMultipleKernelMaximumMeanDiscrepancy', 'ImageClassifier'] 12 | 13 | 14 | 15 | class JointMultipleKernelMaximumMeanDiscrepancy(nn.Module): 16 | r"""The Joint Multiple Kernel Maximum Mean Discrepancy (JMMD) used in 17 | `Deep Transfer Learning with Joint Adaptation Networks (ICML 2017) `_ 18 | 19 | Given source domain :math:`\mathcal{D}_s` of :math:`n_s` labeled points and target domain :math:`\mathcal{D}_t` 20 | of :math:`n_t` unlabeled points drawn i.i.d. from P and Q respectively, the deep networks will generate 21 | activations in layers :math:`\mathcal{L}` as :math:`\{(z_i^{s1}, ..., z_i^{s|\mathcal{L}|})\}_{i=1}^{n_s}` and 22 | :math:`\{(z_i^{t1}, ..., z_i^{t|\mathcal{L}|})\}_{i=1}^{n_t}`. The empirical estimate of 23 | :math:`\hat{D}_{\mathcal{L}}(P, Q)` is computed as the squared distance between the empirical kernel mean 24 | embeddings as 25 | 26 | .. math:: 27 | \hat{D}_{\mathcal{L}}(P, Q) &= 28 | \dfrac{1}{n_s^2} \sum_{i=1}^{n_s}\sum_{j=1}^{n_s} \prod_{l\in\mathcal{L}} k^l(z_i^{sl}, z_j^{sl}) \\ 29 | &+ \dfrac{1}{n_t^2} \sum_{i=1}^{n_t}\sum_{j=1}^{n_t} \prod_{l\in\mathcal{L}} k^l(z_i^{tl}, z_j^{tl}) \\ 30 | &- \dfrac{2}{n_s n_t} \sum_{i=1}^{n_s}\sum_{j=1}^{n_t} \prod_{l\in\mathcal{L}} k^l(z_i^{sl}, z_j^{tl}). \\ 31 | 32 | Args: 33 | kernels (tuple(tuple(torch.nn.Module))): kernel functions, where `kernels[r]` corresponds to kernel :math:`k^{\mathcal{L}[r]}`. 34 | linear (bool): whether use the linear version of JAN. Default: False 35 | thetas (list(Theta): use adversarial version JAN if not None. Default: None 36 | 37 | Inputs: 38 | - z_s (tuple(tensor)): multiple layers' activations from the source domain, :math:`z^s` 39 | - z_t (tuple(tensor)): multiple layers' activations from the target domain, :math:`z^t` 40 | 41 | Shape: 42 | - :math:`z^{sl}` and :math:`z^{tl}`: :math:`(minibatch, *)` where * means any dimension 43 | - Outputs: scalar 44 | 45 | .. note:: 46 | Activations :math:`z^{sl}` and :math:`z^{tl}` must have the same shape. 47 | 48 | .. note:: 49 | The kernel values will add up when there are multiple kernels for a certain layer. 50 | 51 | Examples:: 52 | 53 | >>> feature_dim = 1024 54 | >>> batch_size = 10 55 | >>> layer1_kernels = (GaussianKernel(alpha=0.5), GaussianKernel(1.), GaussianKernel(2.)) 56 | >>> layer2_kernels = (GaussianKernel(1.), ) 57 | >>> loss = JointMultipleKernelMaximumMeanDiscrepancy((layer1_kernels, layer2_kernels)) 58 | >>> # layer1 features from source domain and target domain 59 | >>> z1_s, z1_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim) 60 | >>> # layer2 features from source domain and target domain 61 | >>> z2_s, z2_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim) 62 | >>> output = loss((z1_s, z2_s), (z1_t, z2_t)) 63 | """ 64 | 65 | def __init__(self, kernels: Sequence[Sequence[nn.Module]], linear: Optional[bool] = True, thetas: Sequence[nn.Module] = None): 66 | super(JointMultipleKernelMaximumMeanDiscrepancy, self).__init__() 67 | self.kernels = kernels 68 | self.index_matrix = None 69 | self.linear = linear 70 | if thetas: 71 | self.thetas = thetas 72 | else: 73 | self.thetas = [nn.Identity() for _ in kernels] 74 | 75 | def forward(self, z_s: torch.Tensor, z_t: torch.Tensor) -> torch.Tensor: 76 | batch_size = int(z_s[0].size(0)) 77 | self.index_matrix = _update_index_matrix(batch_size, self.index_matrix, self.linear).to(z_s[0].device) 78 | 79 | kernel_matrix = torch.ones_like(self.index_matrix) 80 | for layer_z_s, layer_z_t, layer_kernels, theta in zip(z_s, z_t, self.kernels, self.thetas): 81 | layer_features = torch.cat([layer_z_s, layer_z_t], dim=0) 82 | layer_features = theta(layer_features) 83 | kernel_matrix *= sum( 84 | [kernel(layer_features) for kernel in layer_kernels]) # Add up the matrix of each kernel 85 | 86 | # Add 2 / (n-1) to make up for the value on the diagonal 87 | # to ensure loss is positive in the non-linear version 88 | loss = (kernel_matrix * self.index_matrix).sum() + 2. / float(batch_size - 1) 89 | return loss 90 | 91 | 92 | class Theta(nn.Module): 93 | """ 94 | maximize loss respect to :math:`\theta` 95 | minimize loss respect to features 96 | """ 97 | def __init__(self, dim: int): 98 | super(Theta, self).__init__() 99 | self.grl1 = GradientReverseLayer() 100 | self.grl2 = GradientReverseLayer() 101 | self.layer1 = nn.Linear(dim, dim) 102 | nn.init.eye_(self.layer1.weight) 103 | nn.init.zeros_(self.layer1.bias) 104 | 105 | def forward(self, features: torch.Tensor) -> torch.Tensor: 106 | features = self.grl1(features) 107 | return self.grl2(self.layer1(features)) 108 | 109 | 110 | class ImageClassifier(ClassifierBase): 111 | def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs): 112 | bottleneck = nn.Sequential( 113 | nn.AdaptiveAvgPool2d(output_size=(1, 1)), 114 | nn.Flatten(), 115 | nn.Linear(backbone.out_features, bottleneck_dim), 116 | nn.BatchNorm1d(bottleneck_dim), 117 | nn.ReLU(), 118 | nn.Dropout(0.5) 119 | ) 120 | super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs) -------------------------------------------------------------------------------- /common/vision/datasets/keypoint_detection/surreal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from PIL import ImageFile 4 | import torch 5 | from ...transforms.keypoint_detection import * 6 | from .util import * 7 | from .._util import download as download_data, check_exits 8 | from .keypoint_dataset import Body16KeypointDataset 9 | 10 | ImageFile.LOAD_TRUNCATED_IMAGES = True 11 | 12 | 13 | class SURREAL(Body16KeypointDataset): 14 | """`Surreal Dataset `_ 15 | 16 | Args: 17 | root (str): Root directory of dataset 18 | split (str, optional): The dataset split, supports ``train``, ``test``, or ``all``. 19 | Default: ``train``. 20 | task (str, optional): Placeholder. 21 | download (bool, optional): If true, downloads the dataset from the internet and puts it \ 22 | in root directory. If dataset is already downloaded, it is not downloaded again. 23 | transforms (callable, optional): A function/transform that takes in a dict (which contains PIL image and 24 | its labels) and returns a transformed version. E.g, :class:`~common.vision.transforms.keypoint_detection.Resize`. 25 | image_size (tuple): (width, height) of the image. Default: (256, 256) 26 | heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64) 27 | sigma (int): sigma parameter when generate the heatmap. Default: 2 28 | 29 | .. note:: 30 | We found that the original Surreal image is in high resolution while most part in an image is background, 31 | thus we crop the image and keep only the surrounding area of hands (1.5x bigger than hands) to speed up training. 32 | 33 | .. note:: In `root`, there will exist following files after downloading. 34 | :: 35 | train/ 36 | test/ 37 | val/ 38 | """ 39 | def __init__(self, root, split='train', task='all', download=True, **kwargs): 40 | assert split in ['train', 'test', 'val'] 41 | self.split = split 42 | 43 | if download: 44 | download_data(root, "train/run0", "train0.tgz", "https://cloud.tsinghua.edu.cn/f/b13604f06ff1445c830a/?dl=1") 45 | download_data(root, "train/run1", "train1.tgz", "https://cloud.tsinghua.edu.cn/f/919aefe2de3541c3b940/?dl=1") 46 | download_data(root, "train/run1", "train2.tgz", "https://cloud.tsinghua.edu.cn/f/34864760ad4945b9bcd6/?dl=1") 47 | download_data(root, "val", "val.tgz", "https://cloud.tsinghua.edu.cn/f/16b20f2e76684f848dc1/?dl=1") 48 | download_data(root, "test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/36c72d86e43540e0a913/?dl=1") 49 | else: 50 | check_exits(root, "train/run0") 51 | check_exits(root, "train/run1") 52 | check_exits(root, "train/run2") 53 | check_exits(root, "val") 54 | check_exits(root, "test") 55 | 56 | all_samples = [] 57 | for part in [0, 1, 2]: 58 | annotation_file = os.path.join(root, split, 'run{}.json'.format(part)) 59 | print("loading", annotation_file) 60 | with open(annotation_file) as f: 61 | samples = json.load(f) 62 | for sample in samples: 63 | sample["image_path"] = os.path.join(root, self.split, 'run{}'.format(part), sample['name']) 64 | all_samples.extend(samples) 65 | 66 | random.seed(42) 67 | random.shuffle(all_samples) 68 | samples_len = len(all_samples) 69 | samples_split = min(int(samples_len * 0.2), 3200) 70 | if self.split == 'train': 71 | all_samples = all_samples[samples_split:] 72 | elif self.split == 'test': 73 | all_samples = all_samples[:samples_split] 74 | self.joints_index = (7, 4, 1, 2, 5, 8, 0, 9, 12, 15, 20, 18, 13, 14, 19, 21) 75 | 76 | super(SURREAL, self).__init__(root, all_samples, **kwargs) 77 | 78 | def __getitem__(self, index): 79 | sample = self.samples[index] 80 | image_name = sample['name'] 81 | 82 | image_path = sample['image_path'] 83 | image = Image.open(image_path) 84 | keypoint3d_camera = np.array(sample['keypoint3d'])[self.joints_index, :] # NUM_KEYPOINTS x 3 85 | keypoint2d = np.array(sample['keypoint2d'])[self.joints_index, :] # NUM_KEYPOINTS x 2 86 | intrinsic_matrix = np.array(sample['intrinsic_matrix']) 87 | Zc = keypoint3d_camera[:, 2] 88 | 89 | image, data = self.transforms(image, keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix) 90 | keypoint2d = data['keypoint2d'] 91 | intrinsic_matrix = data['intrinsic_matrix'] 92 | keypoint3d_camera = keypoint2d_to_3d(keypoint2d, intrinsic_matrix, Zc) 93 | 94 | # noramlize 2D pose: 95 | visible = np.array([1.] * 16, dtype=np.float32) 96 | visible = visible[:, np.newaxis] 97 | 98 | # 2D heatmap 99 | target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size) 100 | target = torch.from_numpy(target) 101 | target_weight = torch.from_numpy(target_weight) 102 | 103 | # normalize 3D pose: 104 | # put middle finger metacarpophalangeal (MCP) joint in the center of the coordinate system 105 | # and make distance between wrist and middle finger MCP joint to be of length 1 106 | keypoint3d_n = keypoint3d_camera - keypoint3d_camera[9:10, :] 107 | keypoint3d_n = keypoint3d_n / np.sqrt(np.sum(keypoint3d_n[0, :] ** 2)) 108 | 109 | meta = { 110 | 'image': image_name, 111 | 'keypoint2d': keypoint2d, # (NUM_KEYPOINTS x 2) 112 | 'keypoint3d': keypoint3d_n, # (NUM_KEYPOINTS x 3) 113 | } 114 | return image, target, target_weight, meta 115 | 116 | def __len__(self): 117 | return len(self.samples) 118 | --------------------------------------------------------------------------------