├── dalib
    ├── adaptation
    │   ├── segmentation
    │   │   ├── __init__.py
    │   │   ├── fda.py
    │   │   └── advent.py
    │   ├── keypoint_detection
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── mcd.py
    │   ├── osbp.py
    │   ├── mcc.py
    │   ├── iwan.py
    │   ├── dann.py
    │   └── jan.py
    ├── __init__.py
    ├── translation
    │   ├── __init__.py
    │   ├── cyclegan
    │   │   ├── __init__.py
    │   │   ├── transform.py
    │   │   ├── loss.py
    │   │   └── util.py
    │   └── cycada.py
    └── modules
    │   ├── __init__.py
    │   ├── entropy.py
    │   ├── domain_discriminator.py
    │   ├── kernels.py
    │   ├── gl.py
    │   └── grl.py
├── common
    ├── __init__.py
    ├── vision
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── segmentation
    │   │   │   └── __init__.py
    │   │   ├── keypoint_detection
    │   │   │   ├── __init__.py
    │   │   │   ├── loss.py
    │   │   │   └── pose_resnet.py
    │   │   └── digits.py
    │   ├── datasets
    │   │   ├── regression
    │   │   │   ├── __init__.py
    │   │   │   ├── mpi3d.py
    │   │   │   ├── image_regression.py
    │   │   │   └── dsprites.py
    │   │   ├── keypoint_detection
    │   │   │   ├── __init__.py
    │   │   │   ├── lsp.py
    │   │   │   ├── util.py
    │   │   │   ├── hand_3d_studio.py
    │   │   │   └── surreal.py
    │   │   ├── segmentation
    │   │   │   ├── __init__.py
    │   │   │   ├── gta5.py
    │   │   │   ├── synthia.py
    │   │   │   └── cityscapes.py
    │   │   ├── __init__.py
    │   │   ├── _util.py
    │   │   ├── visda2017.py
    │   │   ├── office31.py
    │   │   ├── officecaltech.py
    │   │   ├── partial
    │   │   │   └── __init__.py
    │   │   ├── officehome.py
    │   │   ├── oxfordpet.py
    │   │   ├── imagelist.py
    │   │   ├── coco70.py
    │   │   ├── aircrafts.py
    │   │   ├── stanford_cars.py
    │   │   └── openset
    │   │   │   └── __init__.py
    │   └── transforms
    │   │   └── __init__.py
    ├── modules
    │   ├── __init__.py
    │   ├── regressor.py
    │   └── classifier.py
    ├── utils
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   ├── a_distance.py
    │   │   └── tsne.py
    │   ├── data.py
    │   ├── meter.py
    │   ├── metric
    │   │   ├── keypoint_detection.py
    │   │   └── __init__.py
    │   └── logger.py
    └── loss
    │   └── __init__.py
├── .gitignore
├── LICENSE
├── icon
    ├── entropy.py
    ├── transform.py
    ├── uda_backbone.py
    ├── icon_utils.py
    └── cluster.py
├── README.md
├── sam.py
└── validate.py


/dalib/adaptation/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dalib/adaptation/keypoint_detection/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['modules', 'utils', 'vision']
2 | 


--------------------------------------------------------------------------------
/dalib/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['adaptation', 'modules', 'translation']
2 | 


--------------------------------------------------------------------------------
/common/vision/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['datasets', 'models', 'transforms']
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .idea
3 | .vscode
4 | build
5 | data
6 | logs
7 | dist
8 | venv
9 | exp*


--------------------------------------------------------------------------------
/common/vision/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | 
3 | __all__ = ['resnet', 'digits']
4 | 


--------------------------------------------------------------------------------
/common/vision/models/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .deeplabv2 import *
2 | 
3 | __all__ = ['deeplabv2']


--------------------------------------------------------------------------------
/dalib/translation/__init__.py:
--------------------------------------------------------------------------------
1 | from . import fourier_transform
2 | 
3 | __all__ = ['fourier_transform']


--------------------------------------------------------------------------------
/common/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .classifier import *
2 | from .regressor import *
3 | 
4 | __all__ = ['classifier', 'regressor']


--------------------------------------------------------------------------------
/common/vision/models/keypoint_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .pose_resnet import *
2 | from . import loss
3 | 
4 | __all__ = ['pose_resnet']


--------------------------------------------------------------------------------
/common/vision/datasets/regression/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_regression import ImageRegression
2 | from .dsprites import DSprites
3 | from .mpi3d import MPI3D


--------------------------------------------------------------------------------
/common/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logger import CompleteLogger
2 | from .meter import *
3 | from .data import ForeverDataIterator
4 | 
5 | __all__ = ['metric', 'analysis', 'meter', 'data', 'logger']


--------------------------------------------------------------------------------
/dalib/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .grl import *
2 | from .domain_discriminator import *
3 | from .kernels import *
4 | from .entropy import *
5 | 
6 | __all__ = ['grl', 'kernels', 'domain_discriminator', 'entropy']
7 | 


--------------------------------------------------------------------------------
/dalib/translation/cyclegan/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import discriminator
 2 | from . import generator
 3 | from . import loss
 4 | from . import transform
 5 | 
 6 | from .discriminator import *
 7 | from .generator import *
 8 | from .loss import *
 9 | from .transform import *
10 | 


--------------------------------------------------------------------------------
/common/vision/datasets/keypoint_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .rendered_hand_pose import RenderedHandPose
2 | from .hand_3d_studio import Hand3DStudio, Hand3DStudioAll
3 | from .freihand import FreiHand
4 | 
5 | from .surreal import SURREAL
6 | from .lsp import LSP
7 | from .human36m import Human36M
8 | 
9 | 


--------------------------------------------------------------------------------
/common/vision/datasets/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .segmentation_list import SegmentationList
2 | from .cityscapes import Cityscapes, FoggyCityscapes
3 | from .gta5 import GTA5
4 | from .synthia import Synthia
5 | 
6 | __all__ = ["SegmentationList", "Cityscapes", "GTA5", "Synthia", "FoggyCityscapes"]
7 | 


--------------------------------------------------------------------------------
/dalib/adaptation/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import cdan
 2 | from . import dann
 3 | from . import mdd
 4 | from . import dan
 5 | from . import jan
 6 | from . import mcd
 7 | from . import mcc
 8 | from . import pada
 9 | from . import osbp
10 | from . import iwan
11 | 
12 | __all__ = ["cdan", "dann", "mdd", "dan", "jan", "mcd", "mcc", "pada", "osbp", "iwan"]
13 | 


--------------------------------------------------------------------------------
/common/vision/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .imagelist import ImageList
 2 | from .office31 import Office31
 3 | from .officehome import OfficeHome
 4 | from .visda2017 import VisDA2017
 5 | from .officecaltech import OfficeCaltech
 6 | from .domainnet import DomainNet
 7 | from .aircrafts import Aircraft
 8 | from .cub200 import CUB200
 9 | from .stanford_cars import StanfordCars
10 | from .stanford_dogs import StanfordDogs
11 | from .coco70 import COCO70
12 | from .oxfordpet import OxfordIIITPet
13 | 
14 | __all__ = ['ImageList', 'Office31', 'OfficeHome', "VisDA2017", "OfficeCaltech", "DomainNet",
15 |            "Aircraft", "cub200", "StanfordCars", "StanfordDogs", "COCO70", "OxfordIIITPet"]
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Liuhong99
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dalib/modules/entropy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def entropy(predictions: torch.Tensor, reduction='none') -> torch.Tensor:
 5 |     r"""Entropy of prediction.
 6 |     The definition is:
 7 | 
 8 |     .. math::
 9 |         entropy(p) = - \sum_{c=1}^C p_c \log p_c
10 | 
11 |     where C is number of classes.
12 | 
13 |     Args:
14 |         predictions (tensor): Classifier predictions. Expected to contain raw, normalized scores for each class
15 |         reduction (str, optional): Specifies the reduction to apply to the output:
16 |           ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied,
17 |           ``'mean'``: the sum of the output will be divided by the number of
18 |           elements in the output. Default: ``'mean'``
19 | 
20 |     Shape:
21 |         - predictions: :math:`(minibatch, C)` where C means the number of classes.
22 |         - Output: :math:`(minibatch, )` by default. If :attr:`reduction` is ``'mean'``, then scalar.
23 |     """
24 |     epsilon = 1e-5
25 |     H = -predictions * torch.log(predictions + epsilon)
26 |     H = H.sum(dim=1)
27 |     if reduction == 'mean':
28 |         return H.mean()
29 |     else:
30 |         return H
31 | 


--------------------------------------------------------------------------------
/icon/entropy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def entropy(predictions: torch.Tensor, reduction='none') -> torch.Tensor:
 7 |   
 8 |     epsilon = 1e-5
 9 |     H = -predictions * torch.log(predictions + epsilon)
10 |     H = H.sum(dim=1)
11 |     if reduction == 'mean':
12 |         return H.mean()
13 |     else:
14 |         return H
15 | 
16 | class TsallisEntropy(nn.Module):
17 |     
18 |     def __init__(self, temperature: float, alpha: float):
19 |         super(TsallisEntropy, self).__init__()
20 |         self.temperature = temperature
21 |         self.alpha = alpha
22 | 
23 |     def forward(self, logits: torch.Tensor) -> torch.Tensor:
24 |         N, C = logits.shape
25 |         
26 |         pred = F.softmax(logits / self.temperature, dim=1) 
27 |         entropy_weight = entropy(pred).detach()
28 |         entropy_weight = 1 + torch.exp(-entropy_weight)
29 |         entropy_weight = (N * entropy_weight / torch.sum(entropy_weight)).unsqueeze(dim=1)  
30 |         
31 |         sum_dim = torch.sum(pred * entropy_weight, dim = 0).unsqueeze(dim=0)
32 |       
33 |         return 1 / (self.alpha - 1) * torch.sum((1 / torch.mean(sum_dim) - torch.sum(pred ** self.alpha / sum_dim * entropy_weight, dim = -1)))


--------------------------------------------------------------------------------
/common/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class KnowledgeDistillationLoss(nn.Module):
 6 |     """Knowledge Distillation Loss.
 7 | 
 8 |     Args:
 9 |         T (double): Temperature. Default: 1.
10 |         reduction (str, optional): Specifies the reduction to apply to the output:
11 |           ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
12 |           ``'mean'``: the sum of the output will be divided by the number of
13 |           elements in the output, ``'sum'``: the output will be summed. Default: ``'batchmean'``
14 | 
15 |     Inputs:
16 |         - y_student (tensor): logits output of the student
17 |         - y_teacher (tensor): logits output of the teacher
18 | 
19 |     Shape:
20 |         - y_student: (minibatch, `num_classes`)
21 |         - y_teacher: (minibatch, `num_classes`)
22 | 
23 |     """
24 |     def __init__(self, T=1., reduction='batchmean'):
25 |         super(KnowledgeDistillationLoss, self).__init__()
26 |         self.T = T
27 |         self.kl = nn.KLDivLoss(reduction=reduction)
28 | 
29 |     def forward(self, y_student, y_teacher):
30 |         """"""
31 |         return self.kl(F.log_softmax(y_student / self.T, dim=-1), F.softmax(y_teacher / self.T, dim=-1))
32 | 


--------------------------------------------------------------------------------
/dalib/adaptation/segmentation/fda.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | import math
 3 | 
 4 | 
 5 | def robust_entropy(y, ita=1.5, num_classes=19, reduction='mean'):
 6 |     """ Robust entropy proposed in `FDA: Fourier Domain Adaptation for Semantic Segmentation (CVPR 2020) <https://arxiv.org/abs/2004.05498>`_
 7 | 
 8 |     Args:
 9 |         y (tensor): logits output of segmentation model in shape of :math:`(N, C, H, W)`
10 |         ita (float, optional): parameters for robust entropy. Default: 1.5
11 |         num_classes (int, optional): number of classes. Default: 19
12 |         reduction (string, optional): Specifies the reduction to apply to the output:
13 |           ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied,
14 |           ``'mean'``: the sum of the output will be divided by the number of
15 |           elements in the output. Default: ``'mean'``
16 | 
17 |     Returns:
18 |         Scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(N, )`.
19 | 
20 |     """
21 |     P = F.softmax(y, dim=1)
22 |     logP = F.log_softmax(y, dim=1)
23 |     PlogP = P * logP
24 |     ent = -1.0 * PlogP.sum(dim=1)
25 |     ent = ent / math.log(num_classes)
26 | 
27 |     # compute robust entropy
28 |     ent = ent ** 2.0 + 1e-8
29 |     ent = ent ** ita
30 | 
31 |     if reduction == 'mean':
32 |         return ent.mean()
33 |     else:
34 |         return ent
35 | 


--------------------------------------------------------------------------------
/dalib/translation/cyclegan/transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision.transforms as T
 4 | 
 5 | from common.vision.transforms import Denormalize
 6 | 
 7 | 
 8 | class Translation(nn.Module):
 9 |     """
10 |     Image Translation Transform Module
11 | 
12 |     Args:
13 |         generator (torch.nn.Module): An image generator, e.g. :meth:`~dalib.translation.cyclegan.resnet_9_generator`
14 |         device (torch.device): device to put the generator. Default: 'cpu'
15 |         mean (tuple): the normalized mean for image
16 |         std (tuple): the normalized std for image
17 |     Input:
18 |         - image (PIL.Image): raw image in shape H x W x C
19 | 
20 |     Output:
21 |         raw image in shape H x W x 3
22 | 
23 |     """
24 |     def __init__(self, generator, device=torch.device("cpu"), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
25 |         super(Translation, self).__init__()
26 |         self.generator = generator.to(device)
27 |         self.device = device
28 |         self.pre_process = T.Compose([
29 |             T.ToTensor(),
30 |             T.Normalize(mean, std)
31 |         ])
32 |         self.post_process = T.Compose([
33 |             Denormalize(mean, std),
34 |             T.ToPILImage()
35 |         ])
36 | 
37 |     def forward(self, image):
38 |         image = self.pre_process(image.copy())  # C x H x W
39 |         image = image.to(self.device)
40 |         generated_image = self.generator(image.unsqueeze(dim=0)).squeeze(dim=0).cpu()
41 |         return self.post_process(generated_image)
42 | 


--------------------------------------------------------------------------------
/icon/transform.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from icon.randaugment import rand_augment_transform
 3 | from common.vision.transforms import ResizeImage
 4 | import torchvision.transforms as T
 5 | 
 6 | rgb_mean = (0.485, 0.456, 0.406)
 7 | ra_params = dict(translate_const=int(224 * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in rgb_mean]),)
 8 | 
 9 | 
10 | class TransformFixMatch(object):
11 |     def __init__(self):
12 |         normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
13 |         self.weak = T.Compose([
14 |             ResizeImage(256),
15 |             T.CenterCrop(224),
16 |             T.RandomHorizontalFlip(),
17 |             T.ToTensor(),
18 |             normalize
19 |         ])
20 |         self.strong = T.Compose([
21 |             ResizeImage(256),
22 |             T.CenterCrop(224),
23 |             T.RandomHorizontalFlip(),
24 |             T.RandomApply([
25 |                 T.ColorJitter(0.4, 0.4, 0.4, 0.0)
26 |             ], p=1.0),
27 |             rand_augment_transform('rand-n{}-m{}-mstd0.5'.format(2, 10), ra_params),
28 |             T.ToTensor(),
29 |             normalize,
30 |         ])
31 |         
32 |     def __call__(self, x):
33 |         weak = self.weak(x)
34 |         strong = self.strong(x)
35 |         return weak, strong
36 | 
37 | 
38 | def get_val_trainsform():
39 |     normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
40 |     return T.Compose([
41 |         ResizeImage(256),
42 |         T.CenterCrop(224),
43 |         T.ToTensor(),
44 |         normalize
45 |     ])


--------------------------------------------------------------------------------
/common/utils/analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import DataLoader
 3 | import torch.nn as nn
 4 | import tqdm
 5 | 
 6 | 
 7 | def collect_feature(data_loader: DataLoader, feature_extractor: nn.Module,
 8 |                                    device: torch.device, max_num_features=None) -> torch.Tensor:
 9 |     """
10 |     Fetch data from `data_loader`, and then use `feature_extractor` to collect features
11 | 
12 |     Args:
13 |         data_loader (torch.utils.data.DataLoader): Data loader.
14 |         feature_extractor (torch.nn.Module): A feature extractor.
15 |         device (torch.device)
16 |         max_num_features (int): The max number of features to return
17 | 
18 |     Returns:
19 |         Features in shape (min(len(data_loader), max_num_features), :math:`|\mathcal{F}|`).
20 |     """
21 |     feature_extractor.eval()
22 |     all_features = []
23 |     all_features2 = []
24 |     all_labels = []
25 |     with torch.no_grad():
26 |         for i, ((images, images2), target, _) in enumerate(tqdm.tqdm(data_loader)):
27 |             images = images.to(device)
28 |             images2 = images2.to(device)
29 |             feature = feature_extractor(images).cpu()
30 |             feature2 = feature_extractor(images2).cpu()
31 |             all_features.append(feature)
32 |             all_features2.append(feature2)
33 |             all_labels.append(target)
34 |             if max_num_features is not None and i >= max_num_features:
35 |                 break
36 |     return torch.cat(all_features, dim=0), torch.cat(all_features2, dim=0), torch.cat(all_labels, dim=0)


--------------------------------------------------------------------------------
/common/vision/datasets/segmentation/gta5.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .segmentation_list import SegmentationList
 3 | from .cityscapes import Cityscapes
 4 | from .._util import download as download_data
 5 | 
 6 | 
 7 | class GTA5(SegmentationList):
 8 |     """`GTA5 <https://download.visinf.tu-darmstadt.de/data/from_games/>`_
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         split (str, optional): The dataset split, supports ``train``.
13 |         data_folder (str, optional): Sub-directory of the image. Default: 'images'.
14 |         label_folder (str, optional): Sub-directory of the label. Default: 'labels'.
15 |         mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None.
16 |         transforms (callable, optional): A function/transform that  takes in  (PIL image, label) pair \
17 |             and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`.
18 | 
19 |     .. note:: You need to download GTA5 manually.
20 |         Ensure that there exist following directories in the `root` directory before you using this class.
21 |         ::
22 |             images/
23 |             labels/
24 |     """
25 |     download_list = [
26 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/c77ff6fc4eea435791f4/?dl=1"),
27 |     ]
28 | 
29 |     def __init__(self, root, split='train', data_folder='images', label_folder='labels', **kwargs):
30 |         assert split in ['train']
31 |         # download meta information from Internet
32 |         list(map(lambda args: download_data(root, *args), self.download_list))
33 |         data_list_file = os.path.join(root, "image_list", "{}.txt".format(split))
34 |         self.split = split
35 |         super(GTA5, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file, data_folder, label_folder,
36 |                                    id_to_train_id=Cityscapes.ID_TO_TRAIN_ID, train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs)


--------------------------------------------------------------------------------
/dalib/modules/domain_discriminator.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict
 2 | import torch.nn as nn
 3 | 
 4 | __all__ = ['DomainDiscriminator']
 5 | 
 6 | 
 7 | class DomainDiscriminator(nn.Sequential):
 8 |     r"""Domain discriminator model from
 9 |     `"Domain-Adversarial Training of Neural Networks" (ICML 2015) <https://arxiv.org/abs/1505.07818>`_
10 | 
11 |     Distinguish whether the input features come from the source domain or the target domain.
12 |     The source domain label is 1 and the target domain label is 0.
13 | 
14 |     Args:
15 |         in_feature (int): dimension of the input feature
16 |         hidden_size (int): dimension of the hidden features
17 |         batch_norm (bool): whether use :class:`~torch.nn.BatchNorm1d`.
18 |             Use :class:`~torch.nn.Dropout` if ``batch_norm`` is False. Default: True.
19 | 
20 |     Shape:
21 |         - Inputs: (minibatch, `in_feature`)
22 |         - Outputs: :math:`(minibatch, 1)`
23 |     """
24 | 
25 |     def __init__(self, in_feature: int, hidden_size: int, batch_norm=True):
26 |         if batch_norm:
27 |             super(DomainDiscriminator, self).__init__(
28 |                 nn.Linear(in_feature, hidden_size),
29 |                 nn.BatchNorm1d(hidden_size),
30 |                 nn.ReLU(),
31 |                 nn.Linear(hidden_size, hidden_size),
32 |                 nn.BatchNorm1d(hidden_size),
33 |                 nn.ReLU(),
34 |                 nn.Linear(hidden_size, 1),
35 |                 nn.Sigmoid()
36 |             )
37 |         else:
38 |             super(DomainDiscriminator, self).__init__(
39 |                 nn.Linear(in_feature, hidden_size),
40 |                 nn.ReLU(inplace=True),
41 |                 nn.Dropout(0.5),
42 |                 nn.Linear(hidden_size, hidden_size),
43 |                 nn.ReLU(inplace=True),
44 |                 nn.Dropout(0.5),
45 |                 nn.Linear(hidden_size, 1),
46 |                 nn.Sigmoid()
47 |             )
48 | 
49 |     def get_parameters(self) -> List[Dict]:
50 |         return [{"params": self.parameters(), "lr": 1.}]
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/common/vision/datasets/_util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | from torchvision.datasets.utils import download_and_extract_archive
 4 | 
 5 | 
 6 | def download(root: str, file_name: str, archive_name: str, url_link: str):
 7 |     """
 8 |     Download file from internet url link.
 9 | 
10 |     Args:
11 |         root (str) The directory to put downloaded files.
12 |         file_name: (str) The name of the unzipped file.
13 |         archive_name: (str) The name of archive(zipped file) downloaded.
14 |         url_link: (str) The url link to download data.
15 | 
16 |     .. note::
17 |         If `file_name` already exists under path `root`, then it is not downloaded again.
18 |         Else `archive_name` will be downloaded from `url_link` and extracted to `file_name`.
19 |     """
20 |     if not os.path.exists(os.path.join(root, file_name)):
21 |         print("Downloading {}".format(file_name))
22 |         # if os.path.exists(os.path.join(root, archive_name)):
23 |         #     os.remove(os.path.join(root, archive_name))
24 |         try:
25 |             download_and_extract_archive(url_link, download_root=root, filename=archive_name, remove_finished=False)
26 |         except Exception:
27 |             print("Fail to download {} from url link {}".format(archive_name, url_link))
28 |             print('Please check you internet connection or '
29 |                   "reinstall DALIB by 'pip install --upgrade dalib'")
30 |             exit(0)
31 | 
32 | 
33 | def check_exits(root: str, file_name: str):
34 |     """Check whether `file_name` exists under directory `root`. """
35 |     if not os.path.exists(os.path.join(root, file_name)):
36 |         print("Dataset directory {} not found under {}".format(file_name, root))
37 |         exit(-1)
38 | 
39 | 
40 | def read_list_from_file(file_name: str) -> List[str]:
41 |     """Read data from file and convert each line into an element in the list"""
42 |     result = []
43 |     with open(file_name, "r") as f:
44 |         for line in f.readlines():
45 |             result.append(line.strip())
46 |     return result
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ICON
 2 | Code release for "Make the U in UDA Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation" (NeurIPS 2023). Paper is available [here](https://arxiv.org/pdf/2309.12742.pdf).
 3 | 
 4 | ## Prerequisites
 5 | - torch>=1.7.0
 6 | - torchvision
 7 | - qpsolvers
 8 | - numpy
 9 | - prettytable
10 | - tqdm
11 | - scikit-learn
12 | - webcolors
13 | - matplotlib
14 | 
15 | 
16 | ## Training
17 | 
18 | Replace {data_dir} with the dataset directory. Missing datasets will be downloaded automatically. Replace {log_dir} with the logging directory (for storing model checkpoints, tensorboard logs and console logs). For Office-Home, source (-s) and target domain (-t) takes values from {'Ar', 'Cl', 'Rw', 'Pr'}.
19 | 
20 | VisDA-2017
21 | ```
22 | CUDA_VISIBLE_DEVICES=0 python run_icon.py {data_dir} -d VisDA2017 -s Synthetic -t Real -a resnet50 --epochs 50 --lr 0.002 --per-class-eval --temperature 3.0 --center-crop --w-transfer 0.08 --w-st 1.0 --threshold 0.97 --log-root {log_dir} --batch-size 28 --optim sgd --con-start-epoch 5 --con-mode sim --w-inv 0.25 --inv-start-epoch 5 --back-cluster-start-epoch 9 --topk 3 --dim-reduction umap --reduced-dim 50 --eqinv --exp-name visda_reproduce --seed 0
23 | ```
24 | 
25 | Office Home
26 | ```
27 | CUDA_VISIBLE_DEVICES=0 python run_icon.py {data_dir} -d OfficeHome -s Ar -t Cl -a resnet50 --epochs 50 --lr 0.005 --temperature 2.5 --bottleneck-dim 2048  --w-transfer 0.015 --w-st 0.5 --threshold 0.97 --log-root {log_dir} --batch-size 28  --con-start-epoch 0 --con-mode stats --back-cluster-start-epoch 0 --topk 5 --seed 0 --w-inv 0.1 --inv-start-epoch 10 --exp-name Ar2Cl --optim sam
28 | ```
29 | 
30 | ## Acknowledgement
31 | This code is implemented based on the [CST](https://github.com/Liuhong99/CST), and it is our pleasure to acknowledge their contributions.
32 | 
33 | 
34 | ## Citation
35 | If you use this code for your research, please consider citing:
36 | ```
37 | @article{yue2023make,
38 |   title={Make the U in UDA Matter: Invariant Consistency Learning for Unsupervised Domain Adaptation},
39 |   author={Yue, Zhongqi and Sun, Qianru and Zhang, Hanwang},
40 |   journal={Advances in neural information processing systems},
41 |   year={2023}
42 | }
43 | ```
44 | 
45 | ## Contact
46 | If you have any problem about our code, feel free to contact
47 | - yuez0003@ntu.edu.sg


--------------------------------------------------------------------------------
/sam.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SAM(torch.optim.Optimizer):
 5 |     def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
 6 |         assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"
 7 | 
 8 |         defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
 9 |         super(SAM, self).__init__(params, defaults)
10 | 
11 |         self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
12 |         self.param_groups = self.base_optimizer.param_groups
13 | 
14 |     @torch.no_grad()
15 |     def first_step(self, zero_grad=False):
16 |         grad_norm = self._grad_norm()
17 |         for group in self.param_groups:
18 |             scale = group["rho"] / (grad_norm + 1e-12)
19 | 
20 |             for p in group["params"]:
21 |                 if p.grad is None: continue
22 |                 e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
23 |                 p.add_(e_w)  # climb to the local maximum "w + e(w)"
24 |                 self.state[p]["e_w"] = e_w
25 | 
26 |         if zero_grad: self.zero_grad()
27 | 
28 |     @torch.no_grad()
29 |     def second_step(self, zero_grad=False):
30 |         for group in self.param_groups:
31 |             for p in group["params"]:
32 |                 if p.grad is None: continue
33 |                 p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"
34 | 
35 |         self.base_optimizer.step()  # do the actual "sharpness-aware" update
36 | 
37 |         if zero_grad: self.zero_grad()
38 | 
39 |     @torch.no_grad()
40 |     def step(self, closure=None):
41 |         assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
42 |         closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass
43 | 
44 |         self.first_step(zero_grad=True)
45 |         closure()
46 |         self.second_step()
47 | 
48 |     def _grad_norm(self):
49 |         shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
50 |         norm = torch.norm(
51 |                     torch.stack([
52 |                         ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device)
53 |                         for group in self.param_groups for p in group["params"]
54 |                         if p.grad is not None
55 |                     ]),
56 |                     p=2
57 |                )
58 |         return norm
59 | 


--------------------------------------------------------------------------------
/common/vision/datasets/segmentation/synthia.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .segmentation_list import SegmentationList
 3 | from .cityscapes import Cityscapes
 4 | from .._util import download as download_data
 5 | 
 6 | 
 7 | class Synthia(SegmentationList):
 8 |     """`SYNTHIA <https://synthia-dataset.net/>`_
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         split (str, optional): The dataset split, supports ``train``.
13 |         data_folder (str, optional): Sub-directory of the image. Default: 'RGB'.
14 |         label_folder (str, optional): Sub-directory of the label. Default: 'synthia_mapped_to_cityscapes'.
15 |         mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None.
16 |         transforms (callable, optional): A function/transform that  takes in  (PIL image, label) pair \
17 |             and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`.
18 | 
19 |     .. note:: You need to download GTA5 manually.
20 |         Ensure that there exist following directories in the `root` directory before you using this class.
21 |         ::
22 |             RGB/
23 |             synthia_mapped_to_cityscapes/
24 |     """
25 |     ID_TO_TRAIN_ID = {
26 |         3: 0, 4: 1, 2: 2, 21: 3, 5: 4, 7: 5,
27 |         15: 6, 9: 7, 6: 8, 16: 9, 1: 10, 10: 11, 17: 12,
28 |         8: 13, 18: 14, 19: 15, 20: 16, 12: 17, 11: 18
29 |     }
30 |     download_list = [
31 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/15c4d0f8e62e45d9a6b7/?dl=1"),
32 |     ]
33 | 
34 |     def __init__(self, root, split='train', data_folder='RGB', label_folder='synthia_mapped_to_cityscapes', **kwargs):
35 |         assert split in ['train']
36 |         # download meta information from Internet
37 |         list(map(lambda args: download_data(root, *args), self.download_list))
38 |         data_list_file = os.path.join(root, "image_list", "{}.txt".format(split))
39 |         super(Synthia, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file, data_folder,
40 |                                       label_folder, id_to_train_id=Synthia.ID_TO_TRAIN_ID,
41 |                                       train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs)
42 | 
43 |     @property
44 |     def evaluate_classes(self):
45 |         return [
46 |             'road', 'sidewalk', 'building', 'traffic light', 'traffic sign',
47 |             'vegetation', 'sky', 'person', 'rider', 'car', 'bus', 'motorcycle', 'bicycle'
48 |         ]
49 | 


--------------------------------------------------------------------------------
/dalib/modules/kernels.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | __all__ = ['GaussianKernel']
 7 | 
 8 | 
 9 | class GaussianKernel(nn.Module):
10 |     r"""Gaussian Kernel Matrix
11 | 
12 |     Gaussian Kernel k is defined by
13 | 
14 |     .. math::
15 |         k(x_1, x_2) = \exp \left( - \dfrac{\| x_1 - x_2 \|^2}{2\sigma^2} \right)
16 | 
17 |     where :math:`x_1, x_2 \in R^d` are 1-d tensors.
18 | 
19 |     Gaussian Kernel Matrix K is defined on input group :math:`X=(x_1, x_2, ..., x_m),`
20 | 
21 |     .. math::
22 |         K(X)_{i,j} = k(x_i, x_j)
23 | 
24 |     Also by default, during training this layer keeps running estimates of the
25 |     mean of L2 distances, which are then used to set hyperparameter  :math:`\sigma`.
26 |     Mathematically, the estimation is :math:`\sigma^2 = \dfrac{\alpha}{n^2}\sum_{i,j} \| x_i - x_j \|^2`.
27 |     If :attr:`track_running_stats` is set to ``False``, this layer then does not
28 |     keep running estimates, and use a fixed :math:`\sigma` instead.
29 | 
30 |     Args:
31 |         sigma (float, optional): bandwidth :math:`\sigma`. Default: None
32 |         track_running_stats (bool, optional): If ``True``, this module tracks the running mean of :math:`\sigma^2`.
33 |           Otherwise, it won't track such statistics and always uses fix :math:`\sigma^2`. Default: ``True``
34 |         alpha (float, optional): :math:`\alpha` which decides the magnitude of :math:`\sigma^2` when track_running_stats is set to ``True``
35 | 
36 |     Inputs:
37 |         - X (tensor): input group :math:`X`
38 | 
39 |     Shape:
40 |         - Inputs: :math:`(minibatch, F)` where F means the dimension of input features.
41 |         - Outputs: :math:`(minibatch, minibatch)`
42 |     """
43 | 
44 |     def __init__(self, sigma: Optional[float] = None, track_running_stats: Optional[bool] = True,
45 |                  alpha: Optional[float] = 1.):
46 |         super(GaussianKernel, self).__init__()
47 |         assert track_running_stats or sigma is not None
48 |         self.sigma_square = torch.tensor(sigma * sigma) if sigma is not None else None
49 |         self.track_running_stats = track_running_stats
50 |         self.alpha = alpha
51 | 
52 |     def forward(self, X: torch.Tensor) -> torch.Tensor:
53 |         l2_distance_square = ((X.unsqueeze(0) - X.unsqueeze(1)) ** 2).sum(2)
54 | 
55 |         if self.track_running_stats:
56 |             self.sigma_square = self.alpha * torch.mean(l2_distance_square.detach())
57 | 
58 |         return torch.exp(-l2_distance_square / (2 * self.sigma_square))


--------------------------------------------------------------------------------
/common/vision/datasets/visda2017.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class VisDA2017(ImageList):
 8 |     """`VisDA-2017 <http://ai.bu.edu/visda-2017/assets/attachments/VisDA_2017.pdf>`_ Dataset
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         task (str): The task (domain) to create dataset. Choices include ``'Synthetic'``: synthetic images and \
13 |             ``'Real'``: real-world images.
14 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
15 |             in root directory. If dataset is already downloaded, it is not downloaded again.
16 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
17 |             transformed version. E.g, ``transforms.RandomCrop``.
18 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
19 | 
20 |     .. note:: In `root`, there will exist following files after downloading.
21 |         ::
22 |             train/
23 |                 aeroplance/
24 |                     *.png
25 |                     ...
26 |             validation/
27 |             image_list/
28 |                 train.txt
29 |                 validation.txt
30 |     """
31 |     download_list = [
32 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/b25b2b990e8f42e691f0/?dl=1"),
33 |         ("train", "train.tar", "http://csr.bu.edu/ftp/visda17/clf/train.tar"),
34 |         ("validation", "validation.tar", "http://csr.bu.edu/ftp/visda17/clf/validation.tar")
35 |     ]
36 |     image_list = {
37 |         "Synthetic": "image_list/train.txt",
38 |         "Real": "image_list/validation.txt"
39 |     }
40 |     CLASSES = ['aeroplane', 'bicycle', 'bus', 'car', 'horse', 'knife',
41 |                'motorcycle', 'person', 'plant', 'skateboard', 'train', 'truck']
42 | 
43 |     def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs):
44 |         assert task in self.image_list
45 |         data_list_file = os.path.join(root, self.image_list[task])
46 | 
47 |         if download:
48 |             list(map(lambda args: download_data(root, *args), self.download_list))
49 |         else:
50 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
51 | 
52 |         super(VisDA2017, self).__init__(root, VisDA2017.CLASSES, data_list_file=data_list_file, **kwargs)
53 | 
54 |     @classmethod
55 |     def domains(cls):
56 |         return list(cls.image_list.keys())


--------------------------------------------------------------------------------
/dalib/translation/cycada.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch import Tensor
 3 | 
 4 | 
 5 | class SemanticConsistency(nn.Module):
 6 |     """
 7 |     Semantic consistency loss is introduced by
 8 |     `CyCADA: Cycle-Consistent Adversarial Domain Adaptation (ICML 2018) <https://arxiv.org/abs/1711.03213>`_
 9 | 
10 |     This helps to prevent label flipping during image translation.
11 | 
12 |     Args:
13 |         ignore_index (tuple, optional): Specifies target values that are ignored
14 |             and do not contribute to the input gradient. When :attr:`size_average` is
15 |             ``True``, the loss is averaged over non-ignored targets. Default: ().
16 |         reduction (string, optional): Specifies the reduction to apply to the output:
17 |             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will
18 |             be applied, ``'mean'``: the weighted mean of the output is taken,
19 |             ``'sum'``: the output will be summed. Note: :attr:`size_average`
20 |             and :attr:`reduce` are in the process of being deprecated, and in
21 |             the meantime, specifying either of those two args will override
22 |             :attr:`reduction`. Default: ``'mean'``
23 | 
24 |     Shape:
25 |         - Input: :math:`(N, C)` where `C = number of classes`, or
26 |           :math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`
27 |           in the case of `K`-dimensional loss.
28 |         - Target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, or
29 |           :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of
30 |           K-dimensional loss.
31 |         - Output: scalar.
32 |           If :attr:`reduction` is ``'none'``, then the same size as the target:
33 |           :math:`(N)`, or
34 |           :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case
35 |           of K-dimensional loss.
36 | 
37 |     Examples::
38 | 
39 |         >>> loss = SemanticConsistency()
40 |         >>> input = torch.randn(3, 5, requires_grad=True)
41 |         >>> target = torch.empty(3, dtype=torch.long).random_(5)
42 |         >>> output = loss(input, target)
43 |         >>> output.backward()
44 |     """
45 |     def __init__(self, ignore_index=(), reduction='mean'):
46 |         super(SemanticConsistency, self).__init__()
47 |         self.ignore_index = ignore_index
48 |         self.loss = nn.CrossEntropyLoss(ignore_index=-1, reduction=reduction)
49 | 
50 |     def forward(self, input: Tensor, target: Tensor) -> Tensor:
51 |         for class_idx in self.ignore_index:
52 |             target[target == class_idx] = -1
53 |         return self.loss(input, target)
54 | 


--------------------------------------------------------------------------------
/dalib/modules/gl.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any, Tuple
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | from torch.autograd import Function
 5 | import torch
 6 | 
 7 | 
 8 | class GradientFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx: Any, input: torch.Tensor, coeff: Optional[float] = 1.) -> torch.Tensor:
12 |         ctx.coeff = coeff
13 |         output = input * 1.0
14 |         return output
15 | 
16 |     @staticmethod
17 |     def backward(ctx: Any, grad_output: torch.Tensor) -> Tuple[torch.Tensor, Any]:
18 |         return grad_output * ctx.coeff, None
19 | 
20 | 
21 | class WarmStartGradientLayer(nn.Module):
22 |     """Warm Start Gradient Layer :math:`\mathcal{R}(x)` with warm start
23 | 
24 |         The forward and backward behaviours are:
25 | 
26 |         .. math::
27 |             \mathcal{R}(x) = x,
28 | 
29 |             \dfrac{ d\mathcal{R}} {dx} = \lambda I.
30 | 
31 |         :math:`\lambda` is initiated at :math:`lo` and is gradually changed to :math:`hi` using the following schedule:
32 | 
33 |         .. math::
34 |             \lambda = \dfrac{2(hi-lo)}{1+\exp(- α \dfrac{i}{N})} - (hi-lo) + lo
35 | 
36 |         where :math:`i` is the iteration step.
37 | 
38 |         Parameters:
39 |             - **alpha** (float, optional): :math:`α`. Default: 1.0
40 |             - **lo** (float, optional): Initial value of :math:`\lambda`. Default: 0.0
41 |             - **hi** (float, optional): Final value of :math:`\lambda`. Default: 1.0
42 |             - **max_iters** (int, optional): :math:`N`. Default: 1000
43 |             - **auto_step** (bool, optional): If True, increase :math:`i` each time `forward` is called.
44 |               Otherwise use function `step` to increase :math:`i`. Default: False
45 |         """
46 | 
47 |     def __init__(self, alpha: Optional[float] = 1.0, lo: Optional[float] = 0.0, hi: Optional[float] = 1.,
48 |                  max_iters: Optional[int] = 1000., auto_step: Optional[bool] = False):
49 |         super(WarmStartGradientLayer, self).__init__()
50 |         self.alpha = alpha
51 |         self.lo = lo
52 |         self.hi = hi
53 |         self.iter_num = 0
54 |         self.max_iters = max_iters
55 |         self.auto_step = auto_step
56 | 
57 |     def forward(self, input: torch.Tensor) -> torch.Tensor:
58 |         """"""
59 |         coeff = np.float(
60 |             2.0 * (self.hi - self.lo) / (1.0 + np.exp(-self.alpha * self.iter_num / self.max_iters))
61 |             - (self.hi - self.lo) + self.lo
62 |         )
63 |         if self.auto_step:
64 |             self.step()
65 |         return GradientFunction.apply(input, coeff)
66 | 
67 |     def step(self):
68 |         """Increase iteration number :math:`i` by 1"""
69 |         self.iter_num += 1
70 | 


--------------------------------------------------------------------------------
/common/utils/data.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from torch.utils.data import DataLoader, Dataset
 3 | from typing import TypeVar, Iterable
 4 | 
 5 | 
 6 | T_co = TypeVar('T_co', covariant=True)
 7 | T = TypeVar('T')
 8 | 
 9 | 
10 | def send_to_device(tensor, device):
11 |     """
12 |     Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.
13 | 
14 |     Args:
15 |         tensor (nested list/tuple/dictionary of :obj:`torch.Tensor`):
16 |             The data to send to a given device.
17 |         device (:obj:`torch.device`):
18 |             The device to send the data to
19 | 
20 |     Returns:
21 |         The same data structure as :obj:`tensor` with all tensors sent to the proper device.
22 |     """
23 |     if isinstance(tensor, (list, tuple)):
24 |         return type(tensor)(send_to_device(t, device) for t in tensor)
25 |     elif isinstance(tensor, dict):
26 |         return type(tensor)({k: send_to_device(v, device) for k, v in tensor.items()})
27 |     elif not hasattr(tensor, "to"):
28 |         return tensor
29 |     return tensor.to(device)
30 | 
31 | 
32 | class ForeverDataIterator:
33 |     r"""A data iterator that will never stop producing data"""
34 |     def __init__(self, data_loader: DataLoader, device=None):
35 |         self.data_loader = data_loader
36 |         self.iter = iter(self.data_loader)
37 |         self.device = device
38 | 
39 |     def __next__(self):
40 |         try:
41 |             data = next(self.iter)
42 |             if self.device is not None:
43 |                 data = send_to_device(data, self.device)
44 |         except StopIteration:
45 |             self.iter = iter(self.data_loader)
46 |             data = next(self.iter)
47 |             if self.device is not None:
48 |                 data = send_to_device(data, self.device)
49 |         return data
50 | 
51 |     def __len__(self):
52 |         return len(self.data_loader)
53 | 
54 | 
55 | class CombineDataset(Dataset):
56 |     r"""Dataset as a combination of multiple datasets.
57 | 
58 |     The element of each dataset must be a list, and the i-th element of the combined dataset
59 |      is a list splicing of the i-th element of each sub dataset.
60 |     The length of the combined dataset is the minimum of the lengths of all sub datasets.
61 | 
62 |     Arguments:
63 |         datasets (sequence): List of datasets to be concatenated
64 |     """
65 |     def __init__(self, datasets: Iterable[Dataset]) -> None:
66 |         super(CombineDataset, self).__init__()
67 |         # Cannot verify that datasets is Sized
68 |         assert len(datasets) > 0, 'datasets should not be an empty iterable'  # type: ignore
69 |         self.datasets = list(datasets)
70 | 
71 |     def __len__(self):
72 |         return min([len(d) for d in self.datasets])
73 | 
74 |     def __getitem__(self, idx):
75 |         return list(itertools.chain(*[d[idx] for d in self.datasets]))
76 | 
77 | 


--------------------------------------------------------------------------------
/common/utils/meter.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | 
 3 | 
 4 | class AverageMeter(object):
 5 |     r"""Computes and stores the average and current value.
 6 | 
 7 |     Examples::
 8 | 
 9 |         >>> # Initialize a meter to record loss
10 |         >>> losses = AverageMeter()
11 |         >>> # Update meter after every minibatch update
12 |         >>> losses.update(loss_value, batch_size)
13 |     """
14 |     def __init__(self, name: str, fmt: Optional[str] = ':f'):
15 |         self.name = name
16 |         self.fmt = fmt
17 |         self.reset()
18 | 
19 |     def reset(self):
20 |         self.val = 0
21 |         self.avg = 0
22 |         self.sum = 0
23 |         self.count = 0
24 | 
25 |     def update(self, val, n=1):
26 |         self.val = val
27 |         self.sum += val * n
28 |         self.count += n
29 |         if self.count > 0:
30 |             self.avg = self.sum / self.count
31 | 
32 |     def __str__(self):
33 |         fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
34 |         return fmtstr.format(**self.__dict__)
35 | 
36 | 
37 | class AverageMeterDict(object):
38 |     def __init__(self, names: List, fmt: Optional[str] = ':f'):
39 |         self.dict = {
40 |             name: AverageMeter(name, fmt) for name in names
41 |         }
42 | 
43 |     def reset(self):
44 |         for meter in self.dict.values():
45 |             meter.reset()
46 | 
47 |     def update(self, accuracies, n=1):
48 |         for name, acc in accuracies.items():
49 |             self.dict[name].update(acc, n)
50 | 
51 |     def average(self):
52 |         return {
53 |             name: meter.avg for name, meter in self.dict.items()
54 |         }
55 | 
56 |     def __getitem__(self, item):
57 |         return self.dict[item]
58 | 
59 | 
60 | class Meter(object):
61 |     """Computes and stores the current value."""
62 |     def __init__(self, name: str, fmt: Optional[str] = ':f'):
63 |         self.name = name
64 |         self.fmt = fmt
65 |         self.reset()
66 | 
67 |     def reset(self):
68 |         self.val = 0
69 | 
70 |     def update(self, val):
71 |         self.val = val
72 | 
73 |     def __str__(self):
74 |         fmtstr = '{name} {val' + self.fmt + '}'
75 |         return fmtstr.format(**self.__dict__)
76 | 
77 | 
78 | class ProgressMeter(object):
79 |     def __init__(self, num_batches, meters, prefix=""):
80 |         self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
81 |         self.meters = meters
82 |         self.prefix = prefix
83 | 
84 |     def display(self, batch):
85 |         entries = [self.prefix + self.batch_fmtstr.format(batch)]
86 |         entries += [str(meter) for meter in self.meters]
87 |         print('\t'.join(entries))
88 | 
89 |     def _get_batch_fmtstr(self, num_batches):
90 |         num_digits = len(str(num_batches // 1))
91 |         fmt = '{:' + str(num_digits) + 'd}'
92 |         return '[' + fmt + '/' + fmt.format(num_batches) + ']'
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/dalib/modules/grl.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any, Tuple
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | from torch.autograd import Function
 5 | import torch
 6 | 
 7 | 
 8 | class GradientReverseFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx: Any, input: torch.Tensor, coeff: Optional[float] = 1.) -> torch.Tensor:
12 |         ctx.coeff = coeff
13 |         output = input * 1.0
14 |         return output
15 | 
16 |     @staticmethod
17 |     def backward(ctx: Any, grad_output: torch.Tensor) -> Tuple[torch.Tensor, Any]:
18 |         return grad_output.neg() * ctx.coeff, None
19 | 
20 | 
21 | class GradientReverseLayer(nn.Module):
22 |     def __init__(self):
23 |         super(GradientReverseLayer, self).__init__()
24 | 
25 |     def forward(self, *input):
26 |         return GradientReverseFunction.apply(*input)
27 | 
28 | 
29 | class WarmStartGradientReverseLayer(nn.Module):
30 |     """Gradient Reverse Layer :math:`\mathcal{R}(x)` with warm start
31 | 
32 |         The forward and backward behaviours are:
33 | 
34 |         .. math::
35 |             \mathcal{R}(x) = x,
36 | 
37 |             \dfrac{ d\mathcal{R}} {dx} = - \lambda I.
38 | 
39 |         :math:`\lambda` is initiated at :math:`lo` and is gradually changed to :math:`hi` using the following schedule:
40 | 
41 |         .. math::
42 |             \lambda = \dfrac{2(hi-lo)}{1+\exp(- α \dfrac{i}{N})} - (hi-lo) + lo
43 | 
44 |         where :math:`i` is the iteration step.
45 | 
46 |         Args:
47 |             alpha (float, optional): :math:`α`. Default: 1.0
48 |             lo (float, optional): Initial value of :math:`\lambda`. Default: 0.0
49 |             hi (float, optional): Final value of :math:`\lambda`. Default: 1.0
50 |             max_iters (int, optional): :math:`N`. Default: 1000
51 |             auto_step (bool, optional): If True, increase :math:`i` each time `forward` is called.
52 |               Otherwise use function `step` to increase :math:`i`. Default: False
53 |         """
54 | 
55 |     def __init__(self, alpha: Optional[float] = 1.0, lo: Optional[float] = 0.0, hi: Optional[float] = 1.,
56 |                  max_iters: Optional[int] = 1000., auto_step: Optional[bool] = False):
57 |         super(WarmStartGradientReverseLayer, self).__init__()
58 |         self.alpha = alpha
59 |         self.lo = lo
60 |         self.hi = hi
61 |         self.iter_num = 0
62 |         self.max_iters = max_iters
63 |         self.auto_step = auto_step
64 | 
65 |     def forward(self, input: torch.Tensor) -> torch.Tensor:
66 |         """"""
67 |         coeff = np.float(
68 |             2.0 * (self.hi - self.lo) / (1.0 + np.exp(-self.alpha * self.iter_num / self.max_iters))
69 |             - (self.hi - self.lo) + self.lo
70 |         )
71 |         if self.auto_step:
72 |             self.step()
73 |         return GradientReverseFunction.apply(input, coeff)
74 | 
75 |     def step(self):
76 |         """Increase iteration number :math:`i` by 1"""
77 |         self.iter_num += 1
78 | 


--------------------------------------------------------------------------------
/dalib/adaptation/mcd.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch.nn as nn
 3 | import torch
 4 | 
 5 | 
 6 | def classifier_discrepancy(predictions1: torch.Tensor, predictions2: torch.Tensor) -> torch.Tensor:
 7 |     r"""The `Classifier Discrepancy` in
 8 |     `Maximum Classiﬁer Discrepancy for Unsupervised Domain Adaptation (CVPR 2018) <https://arxiv.org/abs/1712.02560>`_.
 9 | 
10 |     The classfier discrepancy between predictions :math:`p_1` and :math:`p_2` can be described as:
11 | 
12 |     .. math::
13 |         d(p_1, p_2) = \dfrac{1}{K} \sum_{k=1}^K | p_{1k} - p_{2k} |,
14 | 
15 |     where K is number of classes.
16 | 
17 |     Args:
18 |         predictions1 (torch.Tensor): Classifier predictions :math:`p_1`. Expected to contain raw, normalized scores for each class
19 |         predictions2 (torch.Tensor): Classifier predictions :math:`p_2`
20 |     """
21 |     return torch.mean(torch.abs(predictions1 - predictions2))
22 | 
23 | 
24 | def entropy(predictions: torch.Tensor) -> torch.Tensor:
25 |     r"""Entropy of N predictions :math:`(p_1, p_2, ..., p_N)`.
26 |     The definition is:
27 | 
28 |     .. math::
29 |         d(p_1, p_2, ..., p_N) = -\dfrac{1}{K} \sum_{k=1}^K \log \left( \dfrac{1}{N} \sum_{i=1}^N p_{ik} \right)
30 | 
31 |     where K is number of classes.
32 | 
33 |     .. note::
34 |         This entropy function is specifically used in MCD and different from the usual :meth:`~dalib.modules.entropy.entropy` function.
35 | 
36 |     Args:
37 |         predictions (torch.Tensor): Classifier predictions. Expected to contain raw, normalized scores for each class
38 |     """
39 |     return -torch.mean(torch.log(torch.mean(predictions, 0) + 1e-6))
40 | 
41 | 
42 | class ImageClassifierHead(nn.Module):
43 |     r"""Classifier Head for MCD.
44 | 
45 |     Args:
46 |         in_features (int): Dimension of input features
47 |         num_classes (int): Number of classes
48 |         bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: 1024
49 | 
50 |     Shape:
51 |         - Inputs: :math:`(minibatch, F)` where F = `in_features`.
52 |         - Output: :math:`(minibatch, C)` where C = `num_classes`.
53 |     """
54 | 
55 |     def __init__(self, in_features: int, num_classes: int, bottleneck_dim: Optional[int] = 1024):
56 |         super(ImageClassifierHead, self).__init__()
57 |         self.num_classes = num_classes
58 |         self.head = nn.Sequential(
59 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
60 |             nn.Flatten(),
61 |             nn.Dropout(0.5),
62 |             nn.Linear(in_features, bottleneck_dim),
63 |             nn.BatchNorm1d(bottleneck_dim),
64 |             nn.ReLU(),
65 |             nn.Dropout(0.5),
66 |             nn.Linear(bottleneck_dim, bottleneck_dim),
67 |             nn.BatchNorm1d(bottleneck_dim),
68 |             nn.ReLU(),
69 |             nn.Linear(bottleneck_dim, num_classes)
70 |         )
71 | 
72 |     def forward(self, inputs: torch.Tensor) -> torch.Tensor:
73 |         return self.head(inputs)


--------------------------------------------------------------------------------
/dalib/adaptation/osbp.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from common.modules.classifier import Classifier as ClassifierBase
 7 | from ..modules.grl import GradientReverseLayer
 8 | 
 9 | 
10 | class UnknownClassBinaryCrossEntropy(nn.Module):
11 |     r"""
12 |     Binary cross entropy loss to make a boundary for unknown samples, proposed by
13 |     `Open Set Domain Adaptation by Backpropagation (ECCV 2018) <https://arxiv.org/abs/1804.10427>`_.
14 | 
15 |     Given a sample on target domain :math:`x_t` and its classifcation outputs :math:`y`, the binary cross entropy
16 |     loss is defined as
17 | 
18 |     .. math::
19 |         L_{adv}(x_t) = -t log(p(y=C+1|x_t)) - (1-t)log(1-p(y=C+1|x_t))
20 | 
21 |     where t is a hyper-parameter and C is the number of known classes.
22 | 
23 |     Args:
24 |         t (float): Predefined hyper-parameter. Default: 0.5
25 | 
26 |     Inputs:
27 |         - y (tensor): classification outputs (before softmax).
28 | 
29 |     Shape:
30 |         - y: :math:`(minibatch, C+1)`  where C is the number of known classes.
31 |         - Outputs: scalar
32 | 
33 |     """
34 |     def __init__(self, t: Optional[float]=0.5):
35 |         super(UnknownClassBinaryCrossEntropy, self).__init__()
36 |         self.t = t
37 | 
38 |     def forward(self, y):
39 |         # y : N x (C+1)
40 |         softmax_output = F.softmax(y, dim=1)
41 |         unknown_class_prob = softmax_output[:, -1].contiguous().view(-1, 1)
42 |         known_class_prob = 1. - unknown_class_prob
43 | 
44 |         unknown_target = torch.ones((y.size(0), 1)).to(y.device) * self.t
45 |         known_target = 1. - unknown_target
46 |         return - torch.mean(unknown_target * torch.log(unknown_class_prob + 1e-6)) \
47 |                - torch.mean(known_target * torch.log(known_class_prob + 1e-6))
48 | 
49 | 
50 | class ImageClassifier(ClassifierBase):
51 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs):
52 |         bottleneck = nn.Sequential(
53 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
54 |             nn.Flatten(),
55 |             nn.Linear(backbone.out_features, bottleneck_dim),
56 |             nn.BatchNorm1d(bottleneck_dim),
57 |             nn.ReLU(),
58 |             nn.Dropout(),
59 |             nn.Linear(bottleneck_dim, bottleneck_dim),
60 |             nn.BatchNorm1d(bottleneck_dim),
61 |             nn.ReLU(),
62 |             nn.Dropout()
63 |         )
64 |         super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs)
65 |         self.grl = GradientReverseLayer()
66 | 
67 |     def forward(self, x: torch.Tensor, grad_reverse: Optional[bool] = False):
68 |         features = self.backbone(x)
69 |         features = self.bottleneck(features)
70 |         if grad_reverse:
71 |             features = self.grl(features)
72 |         outputs = self.head(features)
73 |         return outputs, features
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/common/vision/datasets/office31.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import os
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class Office31(ImageList):
 8 |     """Office31 Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         task (str): The task (domain) to create dataset. Choices include ``'A'``: amazon, \
13 |             ``'D'``: dslr and ``'W'``: webcam.
14 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
15 |             in root directory. If dataset is already downloaded, it is not downloaded again.
16 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
17 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
18 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
19 | 
20 |     .. note:: In `root`, there will exist following files after downloading.
21 |         ::
22 |             amazon/
23 |                 images/
24 |                     backpack/
25 |                         *.jpg
26 |                         ...
27 |             dslr/
28 |             webcam/
29 |             image_list/
30 |                 amazon.txt
31 |                 dslr.txt
32 |                 webcam.txt
33 |     """
34 |     download_list = [
35 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d9bca681c71249f19da2/?dl=1"),
36 |         ("amazon", "amazon.tgz", "https://cloud.tsinghua.edu.cn/f/edc8d1bba1c740dc821c/?dl=1"),
37 |         ("dslr", "dslr.tgz", "https://cloud.tsinghua.edu.cn/f/ca6df562b7e64850ad7f/?dl=1"),
38 |         ("webcam", "webcam.tgz", "https://cloud.tsinghua.edu.cn/f/82b24ed2e08f4a3c8888/?dl=1"),
39 |     ]
40 |     image_list = {
41 |         "A": "image_list/amazon.txt",
42 |         "D": "image_list/dslr.txt",
43 |         "W": "image_list/webcam.txt"
44 |     }
45 |     CLASSES = ['back_pack', 'bike', 'bike_helmet', 'bookcase', 'bottle', 'calculator', 'desk_chair', 'desk_lamp',
46 |                'desktop_computer', 'file_cabinet', 'headphones', 'keyboard', 'laptop_computer', 'letter_tray',
47 |                'mobile_phone', 'monitor', 'mouse', 'mug', 'paper_notebook', 'pen', 'phone', 'printer', 'projector',
48 |                'punchers', 'ring_binder', 'ruler', 'scissors', 'speaker', 'stapler', 'tape_dispenser', 'trash_can']
49 | 
50 |     def __init__(self, root: str, task: str, download: Optional[bool] = True, **kwargs):
51 |         assert task in self.image_list
52 |         data_list_file = os.path.join(root, self.image_list[task])
53 | 
54 |         if download:
55 |             list(map(lambda args: download_data(root, *args), self.download_list))
56 |         else:
57 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
58 | 
59 |         super(Office31, self).__init__(root, Office31.CLASSES, data_list_file=data_list_file, **kwargs)
60 | 
61 |     @classmethod
62 |     def domains(cls):
63 |         return list(cls.image_list.keys())


--------------------------------------------------------------------------------
/common/vision/datasets/officecaltech.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from torchvision.datasets.folder import DatasetFolder, IMG_EXTENSIONS, default_loader
 4 | from torchvision.datasets.utils import download_and_extract_archive
 5 | from ._util import check_exits
 6 | 
 7 | 
 8 | class OfficeCaltech(DatasetFolder):
 9 |     """Office+Caltech Dataset.
10 | 
11 |     Args:
12 |         root (str): Root directory of dataset
13 |         task (str): The task (domain) to create dataset. Choices include ``'A'``: amazon, \
14 |             ``'D'``: dslr, ``'W'``:webcam and ``'C'``: caltech.
15 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
16 |             in root directory. If dataset is already downloaded, it is not downloaded again.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
18 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
19 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
20 | 
21 |     .. note:: In `root`, there will exist following files after downloading.
22 |         ::
23 |             amazon/
24 |                 images/
25 |                     backpack/
26 |                         *.jpg
27 |                         ...
28 |             dslr/
29 |             webcam/
30 |             caltech/
31 |             image_list/
32 |                 amazon.txt
33 |                 dslr.txt
34 |                 webcam.txt
35 |                 caltech.txt
36 |     """
37 |     directories = {
38 |         "A": "amazon",
39 |         "D": "dslr",
40 |         "W": "webcam",
41 |         "C": "caltech"
42 |     }
43 |     CLASSES = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard',
44 |                'laptop_computer', 'monitor', 'mouse', 'mug', 'projector']
45 | 
46 |     def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs):
47 |         if download:
48 |             for dir in self.directories.values():
49 |                 if not os.path.exists(os.path.join(root, dir)):
50 |                     download_and_extract_archive(url="https://cloud.tsinghua.edu.cn/f/e93f2e07d93243d6b57e/?dl=1",
51 |                                                  download_root=os.path.join(root, 'download'),
52 |                                                  filename="officecaltech.tgz", remove_finished=False, extract_root=root)
53 |                     break
54 |         else:
55 |             list(map(lambda dir, _: check_exits(root, dir), self.directories.values()))
56 | 
57 |         super(OfficeCaltech, self).__init__(
58 |             os.path.join(root, self.directories[task]), default_loader, extensions=IMG_EXTENSIONS, **kwargs)
59 |         self.classes = OfficeCaltech.CLASSES
60 |         self.class_to_idx = {cls: idx
61 |                              for idx, clss in enumerate(self.classes)
62 |                              for cls in clss}
63 | 
64 |     @property
65 |     def num_classes(self):
66 |         """Number of classes"""
67 |         return len(self.classes)
68 | 
69 |     @classmethod
70 |     def domains(cls):
71 |         return list(cls.directories.keys())


--------------------------------------------------------------------------------
/validate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from common.utils.meter import AverageMeter, ProgressMeter
 4 | import argparse
 5 | from torch.utils.data import DataLoader
 6 | from icon.uda_backbone import ImageClassifier
 7 | from common.utils.metric import accuracy, ConfusionMatrix
 8 | import time
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | def validate_model(val_loader: DataLoader, source_loader: DataLoader, model: ImageClassifier, args: argparse.Namespace, device, identifier="default"):
13 |     batch_time = AverageMeter('Time', ':6.3f')
14 |     losses = AverageMeter('Loss', ':.4e')
15 |     top1 = AverageMeter('Acc@1', ':6.2f')
16 |     top5 = AverageMeter('Acc@5', ':6.2f')
17 |     progress = ProgressMeter(
18 |         len(val_loader),
19 |         [batch_time, losses, top1, top5],
20 |         prefix='Test: ')
21 | 
22 |     # switch to evaluate mode
23 |     model.eval()
24 |     if args.per_class_eval:
25 |         classes = val_loader.dataset.classes
26 |         confmat = ConfusionMatrix(len(classes))
27 |     else:
28 |         confmat = None
29 | 
30 |     labels = []
31 |     clusters = []
32 |     pseudo_labels = []
33 |     features = []
34 |     with torch.no_grad():
35 |         end = time.time()
36 |         for i, (images, target, _) in enumerate(val_loader):
37 |             images = images.to(device)
38 |             target = target.to(device)
39 |             
40 |             # compute output
41 |             # output, output_alt, _, _, _, _ = model(images)
42 |             o = model(images)
43 |             output = o["y"]
44 |             output_cluster = o["y_cluster_u"]
45 |             features_batch = o["bottleneck_feature"]
46 |             loss = F.cross_entropy(output, target)
47 |             _, pseudo_clusters = torch.max(F.softmax(output_cluster), dim=-1)
48 |             _, pseudo_labels_batch = torch.max(F.softmax(output), dim=-1)
49 | 
50 |             labels.append(target.cpu())
51 |             clusters.append(pseudo_clusters.cpu())
52 |             pseudo_labels.append(pseudo_labels_batch.cpu())
53 |             features.append(features_batch.cpu())
54 | 
55 |             # measure accuracy and record loss
56 |             acc1, acc5 = accuracy(output, target, topk=(1, 5))
57 |             if confmat:
58 |                 confmat.update(target, output.argmax(1))
59 |             losses.update(loss.item(), images.size(0))
60 |             top1.update(acc1.item(), images.size(0))
61 |             top5.update(acc5.item(), images.size(0))
62 | 
63 |             # measure elapsed time
64 |             batch_time.update(time.time() - end)
65 |             end = time.time()
66 | 
67 |             if i % args.print_freq == 0:
68 |                 progress.display(i)
69 |         
70 |         print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
71 |               .format(top1=top1, top5=top5))
72 |         if confmat:
73 |             _, acc, _ = confmat.compute()
74 |             avg_return = acc.mean().item() * 100
75 |             print(confmat.format(classes))
76 |         else:
77 |             avg_return = top1.avg
78 | 
79 |         labels = torch.cat(labels, dim=0)
80 |         clusters = torch.cat(clusters, dim=0)
81 |         pseudo_labels = torch.cat(pseudo_labels, dim=0)
82 |         features = torch.cat(features, dim=0)
83 |     return avg_return


--------------------------------------------------------------------------------
/common/utils/metric/keypoint_detection.py:
--------------------------------------------------------------------------------
 1 | # TODO: add documentation
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_max_preds(batch_heatmaps):
 6 |     '''
 7 |     get predictions from score maps
 8 |     heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
 9 |     '''
10 |     assert isinstance(batch_heatmaps, np.ndarray), \
11 |         'batch_heatmaps should be numpy.ndarray'
12 |     assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
13 | 
14 |     batch_size = batch_heatmaps.shape[0]
15 |     num_joints = batch_heatmaps.shape[1]
16 |     width = batch_heatmaps.shape[3]
17 |     heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
18 |     idx = np.argmax(heatmaps_reshaped, 2)
19 |     maxvals = np.amax(heatmaps_reshaped, 2)
20 | 
21 |     maxvals = maxvals.reshape((batch_size, num_joints, 1))
22 |     idx = idx.reshape((batch_size, num_joints, 1))
23 | 
24 |     preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
25 | 
26 |     preds[:, :, 0] = (preds[:, :, 0]) % width
27 |     preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
28 | 
29 |     pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
30 |     pred_mask = pred_mask.astype(np.float32)
31 | 
32 |     preds *= pred_mask
33 |     return preds, maxvals
34 | 
35 | 
36 | def calc_dists(preds, target, normalize):
37 |     preds = preds.astype(np.float32)
38 |     target = target.astype(np.float32)
39 |     dists = np.zeros((preds.shape[1], preds.shape[0]))
40 |     for n in range(preds.shape[0]):
41 |         for c in range(preds.shape[1]):
42 |             if target[n, c, 0] > 1 and target[n, c, 1] > 1:
43 |                 normed_preds = preds[n, c, :] / normalize[n]
44 |                 normed_targets = target[n, c, :] / normalize[n]
45 |                 dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
46 |             else:
47 |                 dists[c, n] = -1
48 |     return dists
49 | 
50 | 
51 | def dist_acc(dists, thr=0.5):
52 |     ''' Return percentage below threshold while ignoring values with a -1 '''
53 |     dist_cal = np.not_equal(dists, -1)
54 |     num_dist_cal = dist_cal.sum()
55 |     if num_dist_cal > 0:
56 |         return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
57 |     else:
58 |         return -1
59 | 
60 | 
61 | def accuracy(output, target, hm_type='gaussian', thr=0.5):
62 |     '''
63 |     Calculate accuracy according to PCK,
64 |     but uses ground truth heatmap rather than x,y locations
65 |     First value to be returned is average accuracy across 'idxs',
66 |     followed by individual accuracies
67 |     '''
68 |     idx = list(range(output.shape[1]))
69 |     norm = 1.0
70 |     if hm_type == 'gaussian':
71 |         pred, _ = get_max_preds(output)
72 |         target, _ = get_max_preds(target)
73 |         h = output.shape[2]
74 |         w = output.shape[3]
75 |         norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
76 |     dists = calc_dists(pred, target, norm)
77 | 
78 |     acc = np.zeros(len(idx))
79 |     avg_acc = 0
80 |     cnt = 0
81 | 
82 |     for i in range(len(idx)):
83 |         acc[i] = dist_acc(dists[idx[i]], thr)
84 |         if acc[i] >= 0:
85 |             avg_acc = avg_acc + acc[i]
86 |             cnt += 1
87 | 
88 |     avg_acc = avg_acc / cnt if cnt != 0 else 0
89 | 
90 |     return acc, avg_acc, cnt, pred
91 | 


--------------------------------------------------------------------------------
/common/utils/analysis/a_distance.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import TensorDataset
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.utils.data import DataLoader
 6 | from torch.optim import SGD
 7 | from ..meter import AverageMeter
 8 | from ..metric import binary_accuracy
 9 | 
10 | 
11 | class ANet(nn.Module):
12 |     def __init__(self, in_feature):
13 |         super(ANet, self).__init__()
14 |         self.layer = nn.Linear(in_feature, 1)
15 |         self.sigmoid = nn.Sigmoid()
16 | 
17 |     def forward(self, x):
18 |         x = self.layer(x)
19 |         x = self.sigmoid(x)
20 |         return x
21 | 
22 | 
23 | def calculate(source_feature: torch.Tensor, target_feature: torch.Tensor,
24 |               device, progress=True, training_epochs=10):
25 |     """
26 |     Calculate the :math:`\mathcal{A}`-distance, which is a measure for distribution discrepancy.
27 | 
28 |     The definition is :math:`dist_\mathcal{A} = 2 (1-2\epsilon)`, where :math:`\epsilon` is the
29 |     test error of a classifier trained to discriminate the source from the target.
30 | 
31 |     Args:
32 |         source_feature (tensor): features from source domain in shape :math:`(minibatch, F)`
33 |         target_feature (tensor): features from target domain in shape :math:`(minibatch, F)`
34 |         device (torch.device)
35 |         progress (bool): if True, displays a the progress of training A-Net
36 |         training_epochs (int): the number of epochs when training the classifier
37 | 
38 |     Returns:
39 |         :math:`\mathcal{A}`-distance
40 |     """
41 |     source_label = torch.ones((source_feature.shape[0], 1))
42 |     target_label = torch.zeros((target_feature.shape[0], 1))
43 |     feature = torch.cat([source_feature, target_feature], dim=0)
44 |     label = torch.cat([source_label, target_label], dim=0)
45 | 
46 |     dataset = TensorDataset(feature, label)
47 |     length = len(dataset)
48 |     train_size = int(0.8 * length)
49 |     val_size = length - train_size
50 |     train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])
51 |     train_loader = DataLoader(train_set, batch_size=2, shuffle=True)
52 |     val_loader = DataLoader(val_set, batch_size=8, shuffle=False)
53 | 
54 |     anet = ANet(feature.shape[1]).to(device)
55 |     optimizer = SGD(anet.parameters(), lr=0.01)
56 |     a_distance = 2.0
57 |     for epoch in range(training_epochs):
58 |         anet.train()
59 |         for (x, label) in train_loader:
60 |             x = x.to(device)
61 |             label = label.to(device)
62 |             anet.zero_grad()
63 |             y = anet(x)
64 |             loss = F.binary_cross_entropy(y, label)
65 |             loss.backward()
66 |             optimizer.step()
67 | 
68 |         anet.eval()
69 |         meter = AverageMeter("accuracy", ":4.2f")
70 |         with torch.no_grad():
71 |             for (x, label) in val_loader:
72 |                 x = x.to(device)
73 |                 label = label.to(device)
74 |                 y = anet(x)
75 |                 acc = binary_accuracy(y, label)
76 |                 meter.update(acc, x.shape[0])
77 |         error = 1 - meter.avg / 100
78 |         a_distance = 2 * (1 - 2 * error)
79 |         if progress:
80 |             print("epoch {} accuracy: {} A-dist: {}".format(epoch, meter.avg, a_distance))
81 | 
82 |     return a_distance
83 | 
84 | 


--------------------------------------------------------------------------------
/common/vision/datasets/regression/mpi3d.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence
 2 | import os
 3 | from .._util import download as download_data, check_exits
 4 | from .image_regression import ImageRegression
 5 | 
 6 | 
 7 | class MPI3D(ImageRegression):
 8 |     """`MPI3D <https://arxiv.org/abs/1906.03292>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         task (str): The task (domain) to create dataset. Choices include ``'C'``: Color, \
13 |             ``'N'``: Noisy and ``'S'``: Scream.
14 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
15 |         factors (sequence[str]): Factors selected. Default: ('horizontal axis', 'vertical axis').
16 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
17 |             in root directory. If dataset is already downloaded, it is not downloaded again.
18 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
19 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
20 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
21 | 
22 |     .. note:: In `root`, there will exist following files after downloading.
23 |         ::
24 |             real/
25 |                 ...
26 |             realistic/
27 |             toy/
28 |             image_list/
29 |                 real_train.txt
30 |                 realistic_train.txt
31 |                 toy_train.txt
32 |                 real_test.txt
33 |                 realistic_test.txt
34 |                 toy_test.txt
35 |         """
36 |     download_list = [
37 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/f0ff24df967b42479d9e/?dl=1"),
38 |         ("real", "real.tgz", "https://cloud.tsinghua.edu.cn/f/04c1318555fc4283862b/?dl=1"),
39 |         ("realistic", "realistic.tgz", "https://cloud.tsinghua.edu.cn/f/2c0f7dacc73148cea593/?dl=1"),
40 |         ("toy", "toy.tgz", "https://cloud.tsinghua.edu.cn/f/6327912a50374e20af95/?dl=1"),
41 |     ]
42 |     image_list = {
43 |         "RL": "real",
44 |         "RC": "realistic",
45 |         "T": "toy"
46 |     }
47 |     FACTORS = ('horizontal axis', 'vertical axis')
48 | 
49 |     def __init__(self, root: str, task: str, split: Optional[str] = 'train',
50 |                  factors: Sequence[str] = ('horizontal axis', 'vertical axis'),
51 |                  download: Optional[bool] = True, target_transform=None, **kwargs):
52 |         assert task in self.image_list
53 |         assert split in ['train', 'test']
54 |         for factor in factors:
55 |             assert factor in self.FACTORS
56 | 
57 |         factor_index = [self.FACTORS.index(factor) for factor in factors]
58 | 
59 |         if target_transform is None:
60 |             target_transform = lambda x: x[list(factor_index)] / 40.
61 |         else:
62 |             target_transform = lambda x: target_transform(x[list(factor_index)]) / 40.
63 | 
64 |         data_list_file = os.path.join(root, "image_list", "{}_{}.txt".format(self.image_list[task], split))
65 | 
66 |         if download:
67 |             list(map(lambda args: download_data(root, *args), self.download_list))
68 |         else:
69 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
70 | 
71 |         super(MPI3D, self).__init__(root, factors, data_list_file=data_list_file, target_transform=target_transform, **kwargs)
72 | 
73 | 


--------------------------------------------------------------------------------
/common/vision/datasets/regression/image_regression.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Callable, Tuple, Any, List, Sequence
 3 | import torchvision.datasets as datasets
 4 | from torchvision.datasets.folder import default_loader
 5 | import numpy as np
 6 | 
 7 | 
 8 | class ImageRegression(datasets.VisionDataset):
 9 |     """A generic Dataset class for domain adaptation in image regression
10 | 
11 |     Args:
12 |         root (str): Root directory of dataset
13 |         factors (sequence[str]): Factors selected. Default: ('scale', 'position x', 'position y').
14 |         data_list_file (str): File to read the image list from.
15 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
16 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
17 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
18 | 
19 |     .. note::
20 |         In `data_list_file`, each line has `1+len(factors)` values in the following format.
21 |         ::
22 |             source_dir/dog_xxx.png x11, x12, ...
23 |             source_dir/cat_123.png x21, x22, ...
24 |             target_dir/dog_xxy.png x31, x32, ...
25 |             target_dir/cat_nsdf3.png x41, x42, ...
26 | 
27 |         The first value is the relative path of an image, and the rest values are the ground truth of the corresponding factors.
28 |         If your data_list_file has different formats, please over-ride :meth:`ImageRegression.parse_data_file`.
29 |     """
30 |     def __init__(self, root: str, factors: Sequence[str], data_list_file: str,
31 |                  transform: Optional[Callable] = None, target_transform: Optional[Callable] = None):
32 |         super().__init__(root, transform=transform, target_transform=target_transform)
33 |         self.samples = self.parse_data_file(data_list_file)
34 |         self.factors = factors
35 |         self.loader = default_loader
36 |         self.data_list_file = data_list_file
37 | 
38 |     def __getitem__(self, index: int) -> Tuple[Any, Tuple[float]]:
39 |         """
40 |         Args:
41 |             index (int): Index
42 | 
43 |         Returns:
44 |             (image, target) where target is a numpy float array.
45 |         """
46 |         path, target = self.samples[index]
47 |         img = self.loader(path)
48 |         if self.transform is not None:
49 |             img = self.transform(img)
50 |         if self.target_transform is not None and target is not None:
51 |             target = self.target_transform(target)
52 |         return img, target
53 | 
54 |     def __len__(self) -> int:
55 |         return len(self.samples)
56 | 
57 |     def parse_data_file(self, file_name: str) -> List[Tuple[str, Any]]:
58 |         """Parse file to data list
59 | 
60 |         Args:
61 |             file_name (str): The path of data file
62 | 
63 |         Returns:
64 |             List of (image path, (factors)) tuples
65 |         """
66 |         with open(file_name, "r") as f:
67 |             data_list = []
68 |             for line in f.readlines():
69 |                 data = line.split()
70 |                 path = str(data[0])
71 |                 target = np.array([float(d) for d in data[1:]], dtype=np.float)
72 |                 if not os.path.isabs(path):
73 |                     path = os.path.join(self.root, path)
74 |                 data_list.append((path, target))
75 |         return data_list
76 | 
77 |     @property
78 |     def num_factors(self) -> int:
79 |         return len(self.factors)


--------------------------------------------------------------------------------
/common/vision/datasets/regression/dsprites.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence
 2 | import os
 3 | from .._util import download as download_data, check_exits
 4 | from .image_regression import ImageRegression
 5 | 
 6 | 
 7 | class DSprites(ImageRegression):
 8 |     """`DSprites <https://github.com/deepmind/dsprites-dataset>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         task (str): The task (domain) to create dataset. Choices include ``'C'``: Color, \
13 |             ``'N'``: Noisy and ``'S'``: Scream.
14 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
15 |         factors (sequence[str]): Factors selected. Default: ('scale', 'position x', 'position y').
16 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
17 |             in root directory. If dataset is already downloaded, it is not downloaded again.
18 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
19 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
20 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
21 | 
22 |     .. note:: In `root`, there will exist following files after downloading.
23 |         ::
24 |             color/
25 |                 ...
26 |             noisy/
27 |             scream/
28 |             image_list/
29 |                 color_train.txt
30 |                 noisy_train.txt
31 |                 scream_train.txt
32 |                 color_test.txt
33 |                 noisy_test.txt
34 |                 scream_test.txt
35 |     """
36 |     download_list = [
37 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/fbbb6b1a43034712b34d/?dl=1"),
38 |         ("color", "color.tgz", "https://cloud.tsinghua.edu.cn/f/9ce9f2abc61f49ed995a/?dl=1"),
39 |         ("noisy", "noisy.tgz", "https://cloud.tsinghua.edu.cn/f/674435c8cb914ca0ad10/?dl=1"),
40 |         ("scream", "scream.tgz", "https://cloud.tsinghua.edu.cn/f/0613675916ac4c3bb6bd/?dl=1"),
41 |     ]
42 |     image_list = {
43 |         "C": "color",
44 |         "N": "noisy",
45 |         "S": "scream"
46 |     }
47 |     FACTORS = ('none', 'shape', 'scale', 'orientation', 'position x', 'position y')
48 | 
49 |     def __init__(self, root: str, task: str, split: Optional[str] = 'train',
50 |                  factors: Sequence[str] = ('scale', 'position x', 'position y'),
51 |                  download: Optional[bool] = True, target_transform=None, **kwargs):
52 |         assert task in self.image_list
53 |         assert split in ['train', 'test']
54 |         for factor in factors:
55 |             assert factor in self.FACTORS
56 | 
57 |         factor_index = [self.FACTORS.index(factor) for factor in factors]
58 | 
59 |         if target_transform is None:
60 |             target_transform = lambda x: x[list(factor_index)]
61 |         else:
62 |             target_transform = lambda x: target_transform(x[list(factor_index)])
63 | 
64 |         data_list_file = os.path.join(root, "image_list", "{}_{}.txt".format(self.image_list[task], split))
65 | 
66 |         if download:
67 |             list(map(lambda args: download_data(root, *args), self.download_list))
68 |         else:
69 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
70 | 
71 |         super(DSprites, self).__init__(root, factors, data_list_file=data_list_file, target_transform=target_transform, **kwargs)
72 | 
73 | 


--------------------------------------------------------------------------------
/common/vision/datasets/partial/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..imagelist import ImageList
 2 | from ..office31 import Office31
 3 | from ..officehome import OfficeHome
 4 | from ..visda2017 import VisDA2017
 5 | from ..officecaltech import OfficeCaltech
 6 | from .imagenet_caltech import ImageNetCaltech
 7 | from .caltech_imagenet import CaltechImageNet
 8 | from common.vision.datasets.partial.imagenet_caltech import ImageNetCaltech
 9 | from typing import Sequence, ClassVar
10 | 
11 | 
12 | __all__ = ['Office31', 'OfficeHome', "VisDA2017", "CaltechImageNet", "ImageNetCaltech"]
13 | 
14 | 
15 | def partial(dataset_class: ClassVar, partial_classes: Sequence[str]) -> ClassVar:
16 |     """
17 |     Convert a dataset into its partial version.
18 | 
19 |     In other words, those samples which doesn't belong to `partial_classes` will be discarded.
20 |     Yet `partial` will not change the label space of `dataset_class`.
21 | 
22 |     Args:
23 |         dataset_class (class): Dataset class. Only subclass of ``ImageList`` can be partial.
24 |         partial_classes (sequence[str]): A sequence of which categories need to be kept in the partial dataset.\
25 |             Each element of `partial_classes` must belong to the `classes` list of `dataset_class`.
26 | 
27 |     Examples::
28 | 
29 |     >>> partial_classes = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard']
30 |     >>> # create a partial dataset class
31 |     >>> PartialOffice31 = partial(Office31, partial_classes)
32 |     >>> # create an instance of the partial dataset
33 |     >>> dataset = PartialDataset(root="data/office31", task="A")
34 | 
35 |     """
36 |     if not (issubclass(dataset_class, ImageList)):
37 |         raise Exception("Only subclass of ImageList can be partial")
38 | 
39 |     class PartialDataset(dataset_class):
40 |         def __init__(self, **kwargs):
41 |             super(PartialDataset, self).__init__(**kwargs)
42 |             assert all([c in self.classes for c in partial_classes])
43 |             samples = []
44 |             for (path, label) in self.samples:
45 |                 class_name = self.classes[label]
46 |                 if class_name in partial_classes:
47 |                     samples.append((path, label))
48 |             self.samples = samples
49 |             self.partial_classes = partial_classes
50 |             self.partial_classes_idx = [self.class_to_idx[c] for c in partial_classes]
51 | 
52 |     return PartialDataset
53 | 
54 | 
55 | def default_partial(dataset_class: ClassVar) -> ClassVar:
56 |     """
57 |     Default partial used in some paper.
58 | 
59 |     Args:
60 |         dataset_class (class): Dataset class. Currently, dataset_class must be one of
61 |             :class:`~common.vision.datasets.office31.Office31`, :class:`~common.vision.datasets.officehome.OfficeHome`,
62 |             :class:`~common.vision.datasets.visda2017.VisDA2017`,
63 |             :class:`~common.vision.datasets.partial.imagenet_caltech.ImageNetCaltech`
64 |             and :class:`~common.vision.datasets.partial.caltech_imagenet.CaltechImageNet`.
65 |     """
66 |     if dataset_class == Office31:
67 |         kept_classes = OfficeCaltech.CLASSES
68 |     elif dataset_class == OfficeHome:
69 |         kept_classes = sorted(OfficeHome.CLASSES)[:25]
70 |     elif dataset_class == VisDA2017:
71 |         kept_classes = sorted(VisDA2017.CLASSES)[:6]
72 |     elif dataset_class in [ImageNetCaltech, CaltechImageNet]:
73 |         kept_classes = dataset_class.CLASSES
74 |     else:
75 |         raise NotImplementedError("Unknown partial domain adaptation dataset: {}".format(dataset_class.__name__))
76 |     return partial(dataset_class, kept_classes)


--------------------------------------------------------------------------------
/icon/uda_backbone.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional, List, Dict
 2 | import torch.nn as nn
 3 | import torch
 4 | from torch.nn.utils.weight_norm import WeightNorm
 5 | from dalib.modules.grl import WarmStartGradientReverseLayer
 6 | import torch.nn.functional as F
 7 | 
 8 | def shift_log(x: torch.Tensor, offset: Optional[float] = 1e-6) -> torch.Tensor:
 9 |     return torch.log(torch.clamp(x + offset, max=1.))
10 | 
11 | class ImageClassifier(nn.Module):
12 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck: Optional[nn.Module] = None,
13 |                  bottleneck_dim: Optional[int] = -1, head: Optional[nn.Module] = None, finetune=True):
14 |         super(ImageClassifier, self).__init__()
15 |         self.backbone = nn.Sequential(backbone,nn.AdaptiveAvgPool2d(output_size=(1, 1)),
16 |             nn.Flatten())
17 |         bottleneck = nn.Sequential(
18 |             nn.Linear(backbone.out_features, bottleneck_dim),
19 |             nn.BatchNorm1d(bottleneck_dim),
20 |             nn.ReLU()
21 |         )
22 |         self.num_classes = num_classes
23 |         if bottleneck is None:
24 |             self.bottleneck = nn.Sequential(
25 |             )
26 |             self._features_dim = backbone.out_features
27 |         else:
28 |             self.bottleneck = bottleneck
29 |             assert bottleneck_dim > 0
30 |             self._features_dim = bottleneck_dim
31 |         # cls head, eqinv head, cluster head
32 |         if head is None:
33 |             self.head = nn.Linear(self._features_dim, num_classes)
34 |         else:
35 |             self.head = head
36 |         self.eqinv_head = nn.Linear(bottleneck_dim, num_classes)
37 |         self.cluster_head = nn.Linear(bottleneck_dim, num_classes)
38 |         self.finetune = finetune
39 | 
40 |     @property
41 |     def features_dim(self) -> int:
42 |         """The dimension of features before the final `head` layer"""
43 |         return self._features_dim
44 | 
45 |     def forward(self, x: torch.Tensor, freeze_feature=False) -> Tuple[torch.Tensor, torch.Tensor]:
46 |         """"""
47 |         if freeze_feature:
48 |             with torch.no_grad():
49 |                 f = self.backbone(x)
50 |         else:
51 |             f = self.backbone(x)
52 |         f1 = self.bottleneck(f)
53 |         predictions = self.head(f1)
54 |         preds_nograd = self.head(f1.detach())
55 |         eqinv_preds = self.eqinv_head(f1)
56 |         eqinv_preds_nograd = self.eqinv_head(f1.detach())
57 |         outputs = {
58 |             "y": predictions,
59 |             "y_cluster_all": eqinv_preds,
60 |             "feature": f,
61 |             "bottleneck_feature": f1,
62 |             "y_nograd": preds_nograd,
63 |             "y_cluster_all_nograd": eqinv_preds_nograd
64 |         }
65 |         outputs["y_cluster_u"] = self.cluster_head(f1)
66 |         outputs["y_cluster_u_nograd"] = self.cluster_head(f1.detach())
67 |         return outputs
68 | 
69 |     def get_parameters(self, base_lr=1.0) -> List[Dict]:
70 |         """A parameter list which decides optimization hyper-parameters,
71 |             such as the relative learning rate of each layer
72 |         """
73 |         params = [
74 |             {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr},
75 |             {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr},
76 |             {"params": self.head.parameters(), "lr": 1.0 * base_lr},
77 |             {"params": self.eqinv_head.parameters(), "lr": 1.0 * base_lr},
78 |             {"params": self.cluster_head.parameters(), "lr": 1.0 * base_lr},
79 |         ]
80 |         return params


--------------------------------------------------------------------------------
/common/vision/models/digits.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | class LeNet:
  4 |     def __init__(self, num_classes=10):
  5 |         self.num_classes = num_classes
  6 |         self.bottleneck_dim = 50 * 4 * 4
  7 | 
  8 |     def backbone(self):
  9 |         return nn.Sequential(
 10 |             nn.Conv2d(1, 20, kernel_size=5),
 11 |             nn.MaxPool2d(2),
 12 |             nn.ReLU(),
 13 |             nn.Conv2d(20, 50, kernel_size=5),
 14 |             nn.Dropout2d(p=0.5),
 15 |             nn.MaxPool2d(2),
 16 |             nn.ReLU(),
 17 |         )
 18 | 
 19 |     def bottleneck(self):
 20 |         return nn.Flatten(start_dim=1)
 21 | 
 22 |     def head(self):
 23 |         return nn.Sequential(
 24 |             nn.Linear(self.bottleneck_dim, 500),
 25 |             nn.ReLU(),
 26 |             nn.Dropout(p=0.5),
 27 |             nn.Linear(500, self.num_classes)
 28 |         )
 29 | 
 30 |     def complete(self):
 31 |         return nn.Sequential(
 32 |             self.backbone(),
 33 |             self.bottleneck(),
 34 |             self.head()
 35 |         )
 36 | 
 37 | 
 38 | class DTN:
 39 |     def __init__(self, num_classes=10):
 40 |         self.num_classes = num_classes
 41 |         self.bottleneck_dim = 256 * 4 * 4
 42 | 
 43 |     def backbone(self):
 44 |         return nn.Sequential(
 45 |                 nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=2),
 46 |                 nn.BatchNorm2d(64),
 47 |                 nn.Dropout2d(0.1),
 48 |                 nn.ReLU(),
 49 |                 nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
 50 |                 nn.BatchNorm2d(128),
 51 |                 nn.Dropout2d(0.3),
 52 |                 nn.ReLU(),
 53 |                 nn.Conv2d(128, 256, kernel_size=5, stride=2, padding=2),
 54 |                 nn.BatchNorm2d(256),
 55 |                 nn.Dropout2d(0.5),
 56 |                 nn.ReLU(),
 57 |         )
 58 | 
 59 |     def bottleneck(self):
 60 |         return nn.Flatten(start_dim=1)
 61 | 
 62 |     def head(self):
 63 |         return nn.Sequential(
 64 |                 nn.Linear(self.bottleneck_dim, 512),
 65 |                 nn.BatchNorm1d(512),
 66 |                 nn.ReLU(),
 67 |                 nn.Dropout(),
 68 |                 nn.Linear(512, self.num_classes)
 69 |         )
 70 | 
 71 |     def complete(self):
 72 |         return nn.Sequential(
 73 |             self.backbone(),
 74 |             self.bottleneck(),
 75 |             self.head()
 76 |         )
 77 | 
 78 | 
 79 | def lenet(**kwargs):
 80 |     """LeNet model from
 81 |     `"Gradient-based learning applied to document recognition" <http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf>`_
 82 | 
 83 |     Args:
 84 |         num_classes (int): number of classes. Default: 10
 85 | 
 86 |     .. note::
 87 |         The input image size must be 28 x 28.
 88 | 
 89 |     Examples::
 90 |         >>> # Get the whole LeNet model
 91 |         >>> model = lenet().complete()
 92 |         >>> # Or combine it by yourself
 93 |         >>> model = nn.Sequential(lenet().backbone(), lenet().bottleneck(), lenet().head())
 94 |     """
 95 |     return LeNet(**kwargs)
 96 | 
 97 | 
 98 | def dtn(**kwargs):
 99 |     """ DTN model
100 | 
101 |     Args:
102 |         num_classes (int): number of classes. Default: 10
103 | 
104 |     .. note::
105 |         The input image size must be 32 x 32.
106 | 
107 |     Examples::
108 |         >>> # Get the whole DTN model
109 |         >>> model = dtn().complete()
110 |         >>> # Or combine it by yourself
111 |         >>> model = nn.Sequential(dtn().backbone(), dtn().bottleneck(), dtn().head())
112 |     """
113 |     return DTN(**kwargs)


--------------------------------------------------------------------------------
/common/vision/datasets/officehome.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class OfficeHome(ImageList):
 8 |     """`OfficeHome <http://hemanthdv.org/OfficeHome-Dataset/>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         task (str): The task (domain) to create dataset. Choices include ``'Ar'``: Art, \
13 |             ``'Cl'``: Clipart, ``'Pr'``: Product and ``'Rw'``: Real_World.
14 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
15 |             in root directory. If dataset is already downloaded, it is not downloaded again.
16 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
17 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
18 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
19 | 
20 |     .. note:: In `root`, there will exist following files after downloading.
21 |         ::
22 |             Art/
23 |                 Alarm_Clock/*.jpg
24 |                 ...
25 |             Clipart/
26 |             Product/
27 |             Real_World/
28 |             image_list/
29 |                 Art.txt
30 |                 Clipart.txt
31 |                 Product.txt
32 |                 Real_World.txt
33 |     """
34 |     download_list = [
35 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/ca3a3b6a8d554905b4cd/?dl=1"),
36 |         ("Art", "Art.tgz", "https://cloud.tsinghua.edu.cn/f/4691878067d04755beab/?dl=1"),
37 |         ("Clipart", "Clipart.tgz", "https://cloud.tsinghua.edu.cn/f/0d41e7da4558408ea5aa/?dl=1"),
38 |         ("Product", "Product.tgz", "https://cloud.tsinghua.edu.cn/f/76186deacd7c4fa0a679/?dl=1"),
39 |         ("Real_World", "Real_World.tgz", "https://cloud.tsinghua.edu.cn/f/dee961894cc64b1da1d7/?dl=1")
40 |     ]
41 |     image_list = {
42 |         "Ar": "image_list/Art.txt",
43 |         "Cl": "image_list/Clipart.txt",
44 |         "Pr": "image_list/Product.txt",
45 |         "Rw": "image_list/Real_World.txt",
46 |     }
47 |     CLASSES = ['Drill', 'Exit_Sign', 'Bottle', 'Glasses', 'Computer', 'File_Cabinet', 'Shelf', 'Toys', 'Sink',
48 |                'Laptop', 'Kettle', 'Folder', 'Keyboard', 'Flipflops', 'Pencil', 'Bed', 'Hammer', 'ToothBrush', 'Couch',
49 |                'Bike', 'Postit_Notes', 'Mug', 'Webcam', 'Desk_Lamp', 'Telephone', 'Helmet', 'Mouse', 'Pen', 'Monitor',
50 |                'Mop', 'Sneakers', 'Notebook', 'Backpack', 'Alarm_Clock', 'Push_Pin', 'Paper_Clip', 'Batteries', 'Radio',
51 |                'Fan', 'Ruler', 'Pan', 'Screwdriver', 'Trash_Can', 'Printer', 'Speaker', 'Eraser', 'Bucket', 'Chair',
52 |                'Calendar', 'Calculator', 'Flowers', 'Lamp_Shade', 'Spoon', 'Candles', 'Clipboards', 'Scissors', 'TV',
53 |                'Curtains', 'Fork', 'Soda', 'Table', 'Knives', 'Oven', 'Refrigerator', 'Marker']
54 | 
55 |     def __init__(self, root: str, task: str, download: Optional[bool] = False, **kwargs):
56 |         assert task in self.image_list
57 |         data_list_file = os.path.join(root, self.image_list[task])
58 | 
59 |         if download:
60 |             list(map(lambda args: download_data(root, *args), self.download_list))
61 |         else:
62 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
63 | 
64 |         super(OfficeHome, self).__init__(root, OfficeHome.CLASSES, data_list_file=data_list_file, **kwargs)
65 | 
66 |     @classmethod
67 |     def domains(cls):
68 |         return list(cls.image_list.keys())


--------------------------------------------------------------------------------
/common/vision/datasets/oxfordpet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class OxfordIIITPet(ImageList):
 8 |     """`The Oxford-IIIT Pet <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
13 |         sample_rate (int): The sampling rates to sample random ``training`` images for each category.
14 |             Choices include 100, 50, 30, 15. Default: 100.
15 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
16 |             in root directory. If dataset is already downloaded, it is not downloaded again.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
18 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
19 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
20 | 
21 |     .. note:: In `root`, there will exist following files after downloading.
22 |         ::
23 |             train/
24 |             test/
25 |             image_list/
26 |                 train_100.txt
27 |                 train_50.txt
28 |                 train_30.txt
29 |                 train_15.txt
30 |                 test.txt
31 |     """
32 |     download_list = [
33 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/738d75de56844bd0951b/?dl=1"),
34 |         ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/f40cf9f8c9ac4b04ba9b/?dl=1"),
35 |         ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/72b4bff8b5c84f4ba240/?dl=1"),
36 |     ]
37 |     image_list = {
38 |         "train": "image_list/train_100.txt",
39 |         "train100": "image_list/train_100.txt",
40 |         "train50": "image_list/train_50.txt",
41 |         "train30": "image_list/train_30.txt",
42 |         "train15": "image_list/train_15.txt",
43 |         "test": "image_list/test.txt",
44 |         "test100": "image_list/test.txt",
45 |     }
46 |     CLASSES = ['Abyssinian', 'american_bulldog', 'american_pit_bull_terrier', 'basset_hound', 'beagle', 'Bengal',
47 |                'Birman', 'Bombay', 'boxer', 'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel',
48 |                'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese', 'japanese_chin', 'keeshond', 'leonberger',
49 |                'Maine_Coon', 'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug', 'Ragdoll',
50 |                'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier', 'shiba_inu', 'Siamese', 'Sphynx',
51 |                'staffordshire_bull_terrier', 'wheaten_terrier', 'yorkshire_terrier']
52 | 
53 |     def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs):
54 | 
55 |         if split == 'train':
56 |             list_name = 'train' + str(sample_rate)
57 |             assert list_name in self.image_list
58 |             data_list_file = os.path.join(root, self.image_list[list_name])
59 |         else:
60 |             data_list_file = os.path.join(root, self.image_list['test'])
61 | 
62 |         if download:
63 |             list(map(lambda args: download_data(root, *args), self.download_list))
64 |         else:
65 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
66 | 
67 |         super(OxfordIIITPet, self).__init__(root, OxfordIIITPet.CLASSES, data_list_file=data_list_file, **kwargs)
68 | 


--------------------------------------------------------------------------------
/common/modules/regressor.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional, List, Dict
 2 | import torch.nn as nn
 3 | import torch
 4 | 
 5 | __all__ = ['Regressor']
 6 | 
 7 | 
 8 | class Regressor(nn.Module):
 9 |     """A generic Regressor class for domain adaptation.
10 | 
11 |     Args:
12 |         backbone (torch.nn.Module): Any backbone to extract 2-d features from data
13 |         num_factors (int): Number of factors
14 |         bottleneck (torch.nn.Module, optional): Any bottleneck layer. Use no bottleneck by default
15 |         bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: -1
16 |         head (torch.nn.Module, optional): Any classifier head. Use `nn.Linear` by default
17 |         finetune (bool): Whether finetune the classifier or train from scratch. Default: True
18 | 
19 |     .. note::
20 |         The learning rate of this regressor is set 10 times to that of the feature extractor for better accuracy
21 |         by default. If you have other optimization strategies, please over-ride :meth:`~Regressor.get_parameters`.
22 | 
23 |     Inputs:
24 |         - x (tensor): input data fed to `backbone`
25 | 
26 |     Outputs:
27 |         - predictions: regressor's predictions
28 |         - features: features after `bottleneck` layer and before `head` layer
29 | 
30 |     Shape:
31 |         - Inputs: (minibatch, *) where * means, any number of additional dimensions
32 |         - predictions: (minibatch, `num_factors`)
33 |         - features: (minibatch, `features_dim`)
34 | 
35 |     """
36 | 
37 |     def __init__(self, backbone: nn.Module, num_factors: int, bottleneck: Optional[nn.Module] = None,
38 |                  bottleneck_dim=-1, head: Optional[nn.Module] = None, finetune=True):
39 |         super(Regressor, self).__init__()
40 |         self.backbone = backbone
41 |         self.num_factors = num_factors
42 |         if bottleneck is None:
43 |             feature_dim = backbone.out_features
44 |             self.bottleneck = nn.Sequential(
45 |                 nn.Conv2d(feature_dim, feature_dim, kernel_size=3, stride=1, padding=1),
46 |                 nn.BatchNorm2d(feature_dim, feature_dim),
47 |                 nn.ReLU(),
48 |                 nn.AdaptiveAvgPool2d(output_size=(1, 1)),
49 |                 nn.Flatten()
50 |             )
51 |             self._features_dim = feature_dim
52 |         else:
53 |             self.bottleneck = bottleneck
54 |             assert bottleneck_dim > 0
55 |             self._features_dim = bottleneck_dim
56 | 
57 |         if head is None:
58 |             self.head = nn.Sequential(
59 |                 nn.Linear(self._features_dim, num_factors),
60 |                 nn.Sigmoid()
61 |             )
62 |         else:
63 |             self.head = head
64 |         self.finetune = finetune
65 | 
66 |     @property
67 |     def features_dim(self) -> int:
68 |         """The dimension of features before the final `head` layer"""
69 |         return self._features_dim
70 | 
71 |     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
72 |         """"""
73 |         f = self.backbone(x)
74 |         f = self.bottleneck(f)
75 |         predictions = self.head(f)
76 |         return predictions, f
77 | 
78 |     def get_parameters(self, base_lr=1.0) -> List[Dict]:
79 |         """A parameter list which decides optimization hyper-parameters,
80 |             such as the relative learning rate of each layer
81 |         """
82 |         params = [
83 |             {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr},
84 |             {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr},
85 |             {"params": self.head.parameters(), "lr": 1.0 * base_lr},
86 |         ]
87 | 
88 |         return params
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/common/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | 
  5 | class TextLogger(object):
  6 |     """Writes stream output to external text file.
  7 | 
  8 |     Args:
  9 |         filename (str): the file to write stream output
 10 |         stream: the stream to read from. Default: sys.stdout
 11 |     """
 12 |     def __init__(self, filename, stream=sys.stdout):
 13 |         self.terminal = stream
 14 |         self.log = open(filename, 'a')
 15 |         self.file_close = False
 16 | 
 17 |     def write(self, message):
 18 |         self.terminal.write(message)
 19 |         if not self.file_close:
 20 |             self.log.write(message)
 21 |         self.flush()
 22 | 
 23 |     def flush(self):
 24 |         self.terminal.flush()
 25 |         if not self.file_close:
 26 |             self.log.flush()
 27 | 
 28 |     def close(self):
 29 |         # self.terminal.close()
 30 |         self.file_close = True
 31 |         self.log.close()
 32 |     
 33 |     def close_terminal(self):
 34 |         self.terminal.close()
 35 | 
 36 | 
 37 | class CompleteLogger:
 38 |     """
 39 |     A useful logger that
 40 | 
 41 |     - writes outputs to files and displays them on the console at the same time.
 42 |     - manages the directory of checkpoints and debugging images.
 43 | 
 44 |     Args:
 45 |         root (str): the root directory of logger
 46 |         phase (str): the phase of training.
 47 | 
 48 |     """
 49 | 
 50 |     def __init__(self, root, phase='train'):
 51 |         self.root = root
 52 |         self.phase = phase
 53 |         self.visualize_directory = os.path.join(self.root, "visualize")
 54 |         self.checkpoint_directory = os.path.join(self.root, "checkpoints")
 55 |         self.epoch = 0
 56 | 
 57 |         os.makedirs(self.root, exist_ok=True)
 58 |         os.makedirs(self.visualize_directory, exist_ok=True)
 59 |         os.makedirs(self.checkpoint_directory, exist_ok=True)
 60 | 
 61 |         # redirect std out
 62 |         now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
 63 |         log_filename = os.path.join(self.root, "{}-{}.txt".format(phase, now))
 64 |         if os.path.exists(log_filename):
 65 |             os.remove(log_filename)
 66 |         self.logger = TextLogger(log_filename)
 67 |         sys.stdout = self.logger
 68 |         sys.stderr = self.logger
 69 |         if phase != 'train':
 70 |             self.set_epoch(phase)
 71 | 
 72 |     def set_epoch(self, epoch):
 73 |         """Set the epoch number. Please use it during training."""
 74 |         os.makedirs(os.path.join(self.visualize_directory, str(epoch)), exist_ok=True)
 75 |         self.epoch = epoch
 76 | 
 77 |     def _get_phase_or_epoch(self):
 78 |         if self.phase == 'train':
 79 |             return str(self.epoch)
 80 |         else:
 81 |             return self.phase
 82 | 
 83 |     def get_image_path(self, filename: str):
 84 |         """
 85 |         Get the full image path for a specific filename
 86 |         """
 87 |         return os.path.join(self.visualize_directory, self._get_phase_or_epoch(), filename)
 88 | 
 89 |     def get_checkpoint_path(self, name=None):
 90 |         """
 91 |         Get the full checkpoint path.
 92 | 
 93 |         Args:
 94 |             name (optional): the filename (without file extension) to save checkpoint.
 95 |                 If None, when the phase is ``train``, checkpoint will be saved to ``{epoch}.pth``.
 96 |                 Otherwise, will be saved to ``{phase}.pth``.
 97 | 
 98 |         """
 99 |         if name is None:
100 |             name = self._get_phase_or_epoch()
101 |         name = str(name)
102 |         return os.path.join(self.checkpoint_directory, name + ".pth")
103 | 
104 |     def close(self):
105 |         self.logger.close()
106 | 


--------------------------------------------------------------------------------
/common/vision/datasets/keypoint_detection/lsp.py:
--------------------------------------------------------------------------------
 1 | import scipy.io as scio
 2 | import os
 3 | 
 4 | from PIL import ImageFile
 5 | import torch
 6 | from .keypoint_dataset import Body16KeypointDataset
 7 | from ...transforms.keypoint_detection import *
 8 | from .util import *
 9 | from .._util import download as download_data, check_exits
10 | 
11 | 
12 | ImageFile.LOAD_TRUNCATED_IMAGES = True
13 | 
14 | 
15 | class LSP(Body16KeypointDataset):
16 |     """`Leeds Sports Pose Dataset <http://sam.johnson.io/research/lsp.html>`_
17 | 
18 |     Args:
19 |         root (str): Root directory of dataset
20 |         split (str, optional): PlaceHolder.
21 |         task (str, optional): Placeholder.
22 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
23 |             in root directory. If dataset is already downloaded, it is not downloaded again.
24 |         transforms (callable, optional): PlaceHolder.
25 |         heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64)
26 |         sigma (int): sigma parameter when generate the heatmap. Default: 2
27 | 
28 |     .. note:: In `root`, there will exist following files after downloading.
29 |         ::
30 |             lsp/
31 |                 images/
32 |                 joints.mat
33 | 
34 |     .. note::
35 |         LSP is only used for target domain. Due to the small dataset size, the whole dataset is used
36 |         no matter what ``split`` is. Also, the transform is fixed.
37 |     """
38 |     def __init__(self, root, split='train', task='all', download=True, image_size=(256, 256), transforms=None, **kwargs):
39 |         if download:
40 |             download_data(root, "images", "lsp_dataset.zip",
41 |                           "https://cloud.tsinghua.edu.cn/f/46ea73c89abc46bfb125/?dl=1")
42 |         else:
43 |             check_exits(root, "lsp")
44 | 
45 |         assert split in ['train', 'test', 'all']
46 |         self.split = split
47 | 
48 |         samples = []
49 |         annotations = scio.loadmat(os.path.join(root, "joints.mat"))['joints'].transpose((2, 1, 0))
50 |         for i in range(0, 2000):
51 |             image = "im{0:04d}.jpg".format(i+1)
52 |             annotation = annotations[i]
53 |             samples.append((image, annotation))
54 | 
55 |         self.joints_index = (0, 1, 2, 3, 4, 5, 13, 13, 12, 13, 6, 7, 8, 9, 10, 11)
56 |         self.visible = np.array([1.] * 6 + [0, 0] + [1.] * 8, dtype=np.float32)
57 |         normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
58 |         transforms = Compose([
59 |             ResizePad(image_size[0]),
60 |             ToTensor(),
61 |             normalize
62 |         ])
63 |         super(LSP, self).__init__(root, samples, transforms=transforms, image_size=image_size, **kwargs)
64 | 
65 |     def __getitem__(self, index):
66 |         sample = self.samples[index]
67 |         image_name = sample[0]
68 |         image = Image.open(os.path.join(self.root, "images", image_name))
69 |         keypoint2d = sample[1][self.joints_index, :2]
70 |         image, data = self.transforms(image, keypoint2d=keypoint2d)
71 |         keypoint2d = data['keypoint2d']
72 |         visible = self.visible * (1-sample[1][self.joints_index, 2])
73 |         visible = visible[:, np.newaxis]
74 | 
75 |         # 2D heatmap
76 |         target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size)
77 |         target = torch.from_numpy(target)
78 |         target_weight = torch.from_numpy(target_weight)
79 | 
80 |         meta = {
81 |             'image': image_name,
82 |             'keypoint2d': keypoint2d,  # （NUM_KEYPOINTS x 2）
83 |             'keypoint3d': np.zeros((self.num_keypoints, 3)).astype(keypoint2d.dtype),  # （NUM_KEYPOINTS x 3）
84 |         }
85 |         return image, target, target_weight, meta
86 | 


--------------------------------------------------------------------------------
/common/modules/classifier.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional, List, Dict
 2 | import torch.nn as nn
 3 | import torch
 4 | 
 5 | __all__ = ['Classifier']
 6 | 
 7 | 
 8 | class Classifier(nn.Module):
 9 |     """A generic Classifier class for domain adaptation.
10 | 
11 |     Args:
12 |         backbone (torch.nn.Module): Any backbone to extract 2-d features from data
13 |         num_classes (int): Number of classes
14 |         bottleneck (torch.nn.Module, optional): Any bottleneck layer. Use no bottleneck by default
15 |         bottleneck_dim (int, optional): Feature dimension of the bottleneck layer. Default: -1
16 |         head (torch.nn.Module, optional): Any classifier head. Use :class:`torch.nn.Linear` by default
17 |         finetune (bool): Whether finetune the classifier or train from scratch. Default: True
18 | 
19 |     .. note::
20 |         Different classifiers are used in different domain adaptation algorithms to achieve better accuracy
21 |         respectively, and we provide a suggested `Classifier` for different algorithms.
22 |         Remember they are not the core of algorithms. You can implement your own `Classifier` and combine it with
23 |         the domain adaptation algorithm in this algorithm library.
24 | 
25 |     .. note::
26 |         The learning rate of this classifier is set 10 times to that of the feature extractor for better accuracy
27 |         by default. If you have other optimization strategies, please over-ride :meth:`~Classifier.get_parameters`.
28 | 
29 |     Inputs:
30 |         - x (tensor): input data fed to `backbone`
31 | 
32 |     Outputs:
33 |         - predictions: classifier's predictions
34 |         - features: features after `bottleneck` layer and before `head` layer
35 | 
36 |     Shape:
37 |         - Inputs: (minibatch, *) where * means, any number of additional dimensions
38 |         - predictions: (minibatch, `num_classes`)
39 |         - features: (minibatch, `features_dim`)
40 | 
41 |     """
42 | 
43 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck: Optional[nn.Module] = None,
44 |                  bottleneck_dim: Optional[int] = -1, head: Optional[nn.Module] = None, finetune=True):
45 |         super(Classifier, self).__init__()
46 |         self.backbone = backbone
47 |         self.num_classes = num_classes
48 |         if bottleneck is None:
49 |             self.bottleneck = nn.Sequential(
50 |                 nn.AdaptiveAvgPool2d(output_size=(1, 1)),
51 |                 nn.Flatten()
52 |             )
53 |             self._features_dim = backbone.out_features
54 |         else:
55 |             self.bottleneck = bottleneck
56 |             assert bottleneck_dim > 0
57 |             self._features_dim = bottleneck_dim
58 | 
59 |         if head is None:
60 |             self.head = nn.Linear(self._features_dim, num_classes)
61 |         else:
62 |             self.head = head
63 |         self.finetune = finetune
64 | 
65 |     @property
66 |     def features_dim(self) -> int:
67 |         """The dimension of features before the final `head` layer"""
68 |         return self._features_dim
69 | 
70 |     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
71 |         """"""
72 |         f = self.backbone(x)
73 |         f = self.bottleneck(f)
74 |         predictions = self.head(f)
75 |         return predictions, f
76 | 
77 |     def get_parameters(self, base_lr=1.0) -> List[Dict]:
78 |         """A parameter list which decides optimization hyper-parameters,
79 |             such as the relative learning rate of each layer
80 |         """
81 |         params = [
82 |             {"params": self.backbone.parameters(), "lr": 0.1 * base_lr if self.finetune else 1.0 * base_lr},
83 |             {"params": self.bottleneck.parameters(), "lr": 1.0 * base_lr},
84 |             {"params": self.head.parameters(), "lr": 1.0 * base_lr},
85 |         ]
86 | 
87 |         return params
88 | 


--------------------------------------------------------------------------------
/dalib/adaptation/mcc.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from common.modules.classifier import Classifier as ClassifierBase
 7 | from ..modules.entropy import entropy
 8 | 
 9 | 
10 | __all__ = ['MinimumClassConfusionLoss', 'ImageClassifier']
11 | 
12 | 
13 | class MinimumClassConfusionLoss(nn.Module):
14 |     r"""
15 |     Minimum Class Confusion loss minimizes the class confusion in the target predictions.
16 | 
17 |     You can see more details in `Minimum Class Confusion for Versatile Domain Adaptation (ECCV 2020) <https://arxiv.org/abs/1912.03699>`_
18 | 
19 |     Args:
20 |         temperature (float) : The temperature for rescaling, the prediction will shrink to vanilla softmax if
21 |           temperature is 1.0.
22 | 
23 |     .. note::
24 |         Make sure that temperature is larger than 0.
25 | 
26 |     Inputs: g_t
27 |         - g_t (tensor): unnormalized classifier predictions on target domain, :math:`g^t`
28 | 
29 |     Shape:
30 |         - g_t: :math:`(minibatch, C)` where C means the number of classes.
31 |         - Output: scalar.
32 | 
33 |     Examples::
34 |         >>> temperature = 2.0
35 |         >>> loss = MinimumClassConfusionLoss(temperature)
36 |         >>> # logits output from target domain
37 |         >>> g_t = torch.randn(batch_size, num_classes)
38 |         >>> output = loss(g_t)
39 | 
40 |     MCC can also serve as a regularizer for existing methods.
41 |     Examples::
42 |         >>> from dalib.modules.domain_discriminator import DomainDiscriminator
43 |         >>> num_classes = 2
44 |         >>> feature_dim = 1024
45 |         >>> batch_size = 10
46 |         >>> temperature = 2.0
47 |         >>> discriminator = DomainDiscriminator(in_feature=feature_dim, hidden_size=1024)
48 |         >>> cdan_loss = ConditionalDomainAdversarialLoss(discriminator, reduction='mean')
49 |         >>> mcc_loss = MinimumClassConfusionLoss(temperature)
50 |         >>> # features from source domain and target domain
51 |         >>> f_s, f_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim)
52 |         >>> # logits output from source domain adn target domain
53 |         >>> g_s, g_t = torch.randn(batch_size, num_classes), torch.randn(batch_size, num_classes)
54 |         >>> total_loss = cdan_loss(g_s, f_s, g_t, f_t) + mcc_loss(g_t)
55 |     """
56 | 
57 |     def __init__(self, temperature: float):
58 |         super(MinimumClassConfusionLoss, self).__init__()
59 |         self.temperature = temperature
60 | 
61 |     def forward(self, logits: torch.Tensor) -> torch.Tensor:
62 |         batch_size, num_classes = logits.shape
63 |         predictions = F.softmax(logits / self.temperature, dim=1)  # batch_size x num_classes
64 |         entropy_weight = entropy(predictions).detach()
65 |         entropy_weight = 1 + torch.exp(-entropy_weight)
66 |         entropy_weight = (batch_size * entropy_weight / torch.sum(entropy_weight)).unsqueeze(dim=1)  # batch_size x 1
67 |         class_confusion_matrix = torch.mm((predictions * entropy_weight).transpose(1, 0), predictions) # num_classes x num_classes
68 |         class_confusion_matrix = class_confusion_matrix / torch.sum(class_confusion_matrix, dim=1)
69 |         mcc_loss = (torch.sum(class_confusion_matrix) - torch.trace(class_confusion_matrix)) / num_classes
70 |         return mcc_loss
71 | 
72 | 
73 | class ImageClassifier(ClassifierBase):
74 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs):
75 |         bottleneck = nn.Sequential(
76 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
77 |             nn.Flatten(),
78 |             nn.Linear(backbone.out_features, bottleneck_dim),
79 |             nn.BatchNorm1d(bottleneck_dim),
80 |             nn.ReLU()
81 |         )
82 |         super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs)
83 | 


--------------------------------------------------------------------------------
/common/vision/datasets/imagelist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Callable, Tuple, Any, List
 3 | import torchvision.datasets as datasets
 4 | from torchvision.datasets.folder import default_loader
 5 | 
 6 | 
 7 | class ImageList(datasets.VisionDataset):
 8 |     """A generic Dataset class for image classification
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         classes (list[str]): The names of all the classes
13 |         data_list_file (str): File to read the image list from.
14 |         transform (callable, optional): A function/transform that  takes in an PIL image \
15 |             and returns a transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
16 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
17 | 
18 |     .. note:: In `data_list_file`, each line has 2 values in the following format.
19 |         ::
20 |             source_dir/dog_xxx.png 0
21 |             source_dir/cat_123.png 1
22 |             target_dir/dog_xxy.png 0
23 |             target_dir/cat_nsdf3.png 1
24 | 
25 |         The first value is the relative path of an image, and the second value is the label of the corresponding image.
26 |         If your data_list_file has different formats, please over-ride :meth:`~ImageList.parse_data_file`.
27 |     """
28 | 
29 |     def __init__(self, root: str, classes: List[str], data_list_file: str,
30 |                  transform: Optional[Callable] = None, target_transform: Optional[Callable] = None):
31 |         super().__init__(root, transform=transform, target_transform=target_transform)
32 |         self.samples = self.parse_data_file(data_list_file)
33 |         self.classes = classes
34 |         self.class_to_idx = {cls: idx
35 |                              for idx, cls in enumerate(self.classes)}
36 |         self.loader = default_loader
37 |         self.data_list_file = data_list_file
38 |         self.metadata = [{"index": i} for i in range(len(self.samples))]
39 | 
40 |     def __getitem__(self, index: int) -> Tuple[Any, int]:
41 |         """
42 |         Args:
43 |             index (int): Index
44 |             return (tuple): (image, target) where target is index of the target class.
45 |         """
46 |         path, target = self.samples[index]
47 |         img = self.loader(path)
48 |         if self.transform is not None:
49 |             img = self.transform(img)
50 |         if self.target_transform is not None and target is not None:
51 |             target = self.target_transform(target)
52 |         metadata = self.metadata[index]
53 |         return img, target, metadata
54 | 
55 |     def __len__(self) -> int:
56 |         return len(self.samples)
57 | 
58 |     def set_metadata(self, metadata, name):
59 |         # assert isinstance(metadata, list)
60 |         assert len(metadata) == len(self.samples)
61 |         for i in range(len(metadata)):
62 |             self.metadata[i][name] = metadata[i]
63 | 
64 |     def parse_data_file(self, file_name: str) -> List[Tuple[str, int]]:
65 |         """Parse file to data list
66 | 
67 |         Args:
68 |             file_name (str): The path of data file
69 |             return (list): List of (image path, class_index) tuples
70 |         """
71 |         with open(file_name, "r") as f:
72 |             data_list = []
73 |             for line in f.readlines():
74 |                 split_line = line.split()
75 |                 target = split_line[-1]
76 |                 path = ' '.join(split_line[:-1])
77 |                 if not os.path.isabs(path):
78 |                     path = os.path.join(self.root, path)
79 |                 target = int(target)
80 |                 data_list.append((path, target))
81 |         return data_list
82 | 
83 |     @property
84 |     def num_classes(self) -> int:
85 |         """Number of classes"""
86 |         return len(self.classes)
87 | 
88 |     @classmethod
89 |     def domains(cls):
90 |         """All possible domain in this dataset"""
91 |         raise NotImplemented


--------------------------------------------------------------------------------
/common/vision/datasets/coco70.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class COCO70(ImageList):
 8 |     """COCO-70 dataset is a large-scale classification dataset (1000 images per class) created from
 9 |     `COCO <https://cocodataset.org/>`_ Dataset.
10 |     It is used to explore the effect of fine-tuning with a large amount of data.
11 | 
12 |     Args:
13 |         root (str): Root directory of dataset
14 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
15 |         sample_rate (int): The sampling rates to sample random ``training`` images for each category.
16 |             Choices include 100, 50, 30, 15. Default: 100.
17 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
18 |             in root directory. If dataset is already downloaded, it is not downloaded again.
19 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
20 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
21 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
22 | 
23 |     .. note:: In `root`, there will exist following files after downloading.
24 |         ::
25 |             train/
26 |             test/
27 |             image_list/
28 |                 train_100.txt
29 |                 train_50.txt
30 |                 train_30.txt
31 |                 train_15.txt
32 |                 test.txt
33 |     """
34 |     download_list = [
35 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d2ffb62fe3d140f1a73c/?dl=1"),
36 |         ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/e0dc4368342948c5bb2a/?dl=1"),
37 |         ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/59393a55c818429fb8d1/?dl=1"),
38 |     ]
39 |     image_list = {
40 |         "train": "image_list/train_100.txt",
41 |         "train100": "image_list/train_100.txt",
42 |         "train50": "image_list/train_50.txt",
43 |         "train30": "image_list/train_30.txt",
44 |         "train15": "image_list/train_15.txt",
45 |         "test": "image_list/test.txt",
46 |         "test100": "image_list/test.txt",
47 |     }
48 |     CLASSES =['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
49 |               'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'bench', 'bird', 'cat', 'dog',
50 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
51 |               'handbag', 'tie', 'suitcase', 'skis', 'kite', 'baseball_bat', 'skateboard', 'surfboard',
52 |               'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
53 |               'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake',
54 |               'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
55 |               'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'teddy_bear']
56 | 
57 |     def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs):
58 | 
59 |         if split == 'train':
60 |             list_name = 'train' + str(sample_rate)
61 |             assert list_name in self.image_list
62 |             data_list_file = os.path.join(root, self.image_list[list_name])
63 |         else:
64 |             data_list_file = os.path.join(root, self.image_list['test'])
65 | 
66 |         if download:
67 |             list(map(lambda args: download_data(root, *args), self.download_list))
68 |         else:
69 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
70 | 
71 |         super(COCO70, self).__init__(root, COCO70.CLASSES, data_list_file=data_list_file, **kwargs)
72 | 


--------------------------------------------------------------------------------
/dalib/translation/cyclegan/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Modified from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix
 3 | # ------------------------------------------------------------------------------
 4 | import torch.nn as nn
 5 | import torch
 6 | 
 7 | 
 8 | class LeastSquaresGenerativeAdversarialLoss(nn.Module):
 9 |     """
10 |     Loss for `Least Squares Generative Adversarial Network (LSGAN) <https://arxiv.org/abs/1611.04076>`_
11 | 
12 |     Args:
13 |         reduction (str, optional): Specifies the reduction to apply to the output:
14 |           ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
15 |           ``'mean'``: the sum of the output will be divided by the number of
16 |           elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
17 | 
18 |     Inputs:
19 |         - prediction (tensor): unnormalized discriminator predictions
20 |         - real (bool): if the ground truth label is for real images or fake images. Default: true
21 | 
22 |     .. warning::
23 |         Do not use sigmoid as the last layer of Discriminator.
24 | 
25 |     """
26 |     def __init__(self, reduction='mean'):
27 |         super(LeastSquaresGenerativeAdversarialLoss, self).__init__()
28 |         self.mse_loss = nn.MSELoss(reduction=reduction)
29 | 
30 |     def forward(self, prediction, real=True):
31 |         if real:
32 |             label = torch.ones_like(prediction)
33 |         else:
34 |             label = torch.zeros_like(prediction)
35 |         return self.mse_loss(prediction, label)
36 | 
37 | 
38 | class VanillaGenerativeAdversarialLoss(nn.Module):
39 |     """
40 |     Loss for `Vanilla Generative Adversarial Network <https://arxiv.org/abs/1406.2661>`_
41 | 
42 |     Args:
43 |         reduction (str, optional): Specifies the reduction to apply to the output:
44 |           ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
45 |           ``'mean'``: the sum of the output will be divided by the number of
46 |           elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
47 | 
48 |     Inputs:
49 |         - prediction (tensor): unnormalized discriminator predictions
50 |         - real (bool): if the ground truth label is for real images or fake images. Default: true
51 | 
52 |     .. warning::
53 |         Do not use sigmoid as the last layer of Discriminator.
54 | 
55 |     """
56 |     def __init__(self, reduction='mean'):
57 |         super(VanillaGenerativeAdversarialLoss, self).__init__()
58 |         self.bce_loss = nn.BCEWithLogitsLoss(reduction=reduction)
59 | 
60 |     def forward(self, prediction, real=True):
61 |         if real:
62 |             label = torch.ones_like(prediction)
63 |         else:
64 |             label = torch.zeros_like(prediction)
65 |         return self.bce_loss(prediction, label)
66 | 
67 | 
68 | class WassersteinGenerativeAdversarialLoss(nn.Module):
69 |     """
70 |     Loss for `Wasserstein Generative Adversarial Network <https://arxiv.org/abs/1701.07875>`_
71 | 
72 |     Args:
73 |         reduction (str, optional): Specifies the reduction to apply to the output:
74 |           ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
75 |           ``'mean'``: the sum of the output will be divided by the number of
76 |           elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
77 | 
78 |     Inputs:
79 |         - prediction (tensor): unnormalized discriminator predictions
80 |         - real (bool): if the ground truth label is for real images or fake images. Default: true
81 | 
82 |     .. warning::
83 |         Do not use sigmoid as the last layer of Discriminator.
84 | 
85 |     """
86 |     def __init__(self, reduction='mean'):
87 |         super(WassersteinGenerativeAdversarialLoss, self).__init__()
88 |         self.mse_loss = nn.MSELoss(reduction=reduction)
89 | 
90 |     def forward(self, prediction, real=True):
91 |         if real:
92 |             return -prediction.mean()
93 |         else:
94 |             return prediction.mean()


--------------------------------------------------------------------------------
/common/vision/transforms/__init__.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | import torch
  4 | from torchvision.transforms import Normalize
  5 | 
  6 | 
  7 | class ResizeImage(object):
  8 |     """Resize the input PIL Image to the given size.
  9 | 
 10 |     Args:
 11 |         size (sequence or int): Desired output size. If size is a sequence like
 12 |           (h, w), output size will be matched to this. If size is an int,
 13 |           output size will be (size, size)
 14 |     """
 15 |     def __init__(self, size):
 16 |         if isinstance(size, int):
 17 |             self.size = (int(size), int(size))
 18 |         else:
 19 |             self.size = size
 20 | 
 21 |     def __call__(self, img):
 22 |         th, tw = self.size
 23 |         return img.resize((th, tw))
 24 | 
 25 | 
 26 | class MultipleApply:
 27 |     """Apply a list of transformations to an image and get multiple transformed images.
 28 | 
 29 |     Args:
 30 |         transforms (list or tuple): list of transformations
 31 | 
 32 |     Example:
 33 |         
 34 |         >>> transform1 = T.Compose([
 35 |         ...     ResizeImage(256),
 36 |         ...     T.RandomCrop(224)
 37 |         ... ])
 38 |         >>> transform2 = T.Compose([
 39 |         ...     ResizeImage(256),
 40 |         ...     T.RandomCrop(224),
 41 |         ... ])
 42 |         >>> multiply_transform = MultipleApply([transform1, transform2])
 43 |     """
 44 |     def __init__(self, transforms):
 45 |         self.transforms = transforms
 46 | 
 47 |     def __call__(self, image):
 48 |         return [t(image) for t in self.transforms]
 49 | 
 50 | 
 51 | class Denormalize(Normalize):
 52 |     """DeNormalize a tensor image with mean and standard deviation.
 53 |     Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
 54 |     channels, this transform will denormalize each channel of the input
 55 |     ``torch.*Tensor`` i.e.,
 56 |     ``output[channel] = input[channel] * std[channel] + mean[channel]``
 57 | 
 58 |     .. note::
 59 |         This transform acts out of place, i.e., it does not mutate the input tensor.
 60 | 
 61 |     Args:
 62 |         mean (sequence): Sequence of means for each channel.
 63 |         std (sequence): Sequence of standard deviations for each channel.
 64 | 
 65 |     """
 66 |     def __init__(self, mean, std):
 67 |         mean = np.array(mean)
 68 |         std = np.array(std)
 69 |         super().__init__((-mean / std).tolist(), (1 / std).tolist())
 70 | 
 71 | 
 72 | class NormalizeAndTranspose:
 73 |     """
 74 |     First, normalize a tensor image with mean and standard deviation.
 75 |     Then, convert the shape (H x W x C) to shape (C x H x W).
 76 |     """
 77 |     def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)):
 78 |         self.mean = np.array(mean, dtype=np.float32)
 79 | 
 80 |     def __call__(self, image):
 81 |         if isinstance(image, Image.Image):
 82 |             image = np.asarray(image, np.float32)
 83 |             # change to BGR
 84 |             image = image[:, :, ::-1]
 85 |             # normalize
 86 |             image -= self.mean
 87 |             image = image.transpose((2, 0, 1)).copy()
 88 |         elif isinstance(image, torch.Tensor):
 89 |             # change to BGR
 90 |             image = image[:, :, [2, 1, 0]]
 91 |             # normalize
 92 |             image -= torch.from_numpy(self.mean).to(image.device)
 93 |             image = image.permute((2, 0, 1))
 94 |         else:
 95 |             raise NotImplementedError(type(image))
 96 |         return image
 97 | 
 98 | 
 99 | class DeNormalizeAndTranspose:
100 |     """
101 |     First, convert a tensor image from the shape (C x H x W ) to shape (H x W x C).
102 |     Then, denormalize it with mean and standard deviation.
103 |     """
104 |     def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)):
105 |         self.mean = np.array(mean, dtype=np.float32)
106 | 
107 |     def __call__(self, image):
108 |         image = image.transpose((1, 2, 0))
109 |         # denormalize
110 |         image += self.mean
111 |         # change to RGB
112 |         image = image[:, :, ::-1]
113 |         return image
114 | 
115 | 


--------------------------------------------------------------------------------
/dalib/adaptation/iwan.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List, Dict
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from common.modules.classifier import Classifier as ClassifierBase
 6 | 
 7 | 
 8 | class ImportanceWeightModule(object):
 9 |     r"""
10 |     Calculating class weight based on the output of discriminator.
11 |     Introduced by `Importance Weighted Adversarial Nets for Partial Domain Adaptation (CVPR 2018) <https://arxiv.org/abs/1803.09210>`_
12 | 
13 |     Args:
14 |         discriminator (torch.nn.Module): A domain discriminator object, which predicts the domains of features.
15 |             Its input shape is :math:`(N, F)` and output shape is :math:`(N, 1)`
16 |         partial_classes_index (list[int], optional): The index of partial classes. Note that this parameter is \
17 |             just for debugging, since in real-world dataset, we have no access to the index of partial classes. \
18 |             Default: None.
19 | 
20 |     Examples::
21 | 
22 |         >>> domain_discriminator = DomainDiscriminator(1024, 1024)
23 |         >>> importance_weight_module = ImportanceWeightModule(domain_discriminator)
24 |         >>> num_iterations = 10000
25 |         >>> for _ in range(num_iterations):
26 |         >>>     # feature from source domain
27 |         >>>     f_s = torch.randn(32, 1024)
28 |         >>>     # importance weights for source instance
29 |         >>>     w_s = importance_weight_module.get_importance_weight(f_s)
30 |     """
31 | 
32 |     def __init__(self, discriminator: nn.Module, partial_classes_index: Optional[List[int]] = None):
33 |         self.discriminator = discriminator
34 |         self.partial_classes_index = partial_classes_index
35 | 
36 |     def get_importance_weight(self, feature):
37 |         """
38 |         Get importance weights for each instance.
39 | 
40 |         Args:
41 |             feature (tensor): feature from source domain, in shape :math:`(N, F)`
42 | 
43 |         Returns:
44 |             instance weight in shape :math:`(N, 1)`
45 |         """
46 |         weight = 1. - self.discriminator(feature)
47 |         weight = weight / weight.mean()
48 |         weight = weight.detach()
49 |         return weight
50 | 
51 |     def get_partial_classes_weight(self, weights: torch.Tensor, labels: torch.Tensor):
52 |         """
53 |         Get class weight averaged on the partial classes and non-partial classes respectively.
54 | 
55 |         Args:
56 |             weights (tensor): instance weight in shape :math:`(N, 1)`
57 |             labels (tensor): ground truth labels in shape :math:`(N, 1)`
58 | 
59 |         .. warning::
60 |             This function is just for debugging, since in real-world dataset, we have no access to the index of \
61 |             partial classes and this function will throw an error when `partial_classes_index` is None.
62 |         """
63 |         assert self.partial_classes_index is not None
64 | 
65 |         weights = weights.squeeze()
66 |         is_partial = torch.Tensor([label in self.partial_classes_index for label in labels]).to(weights.device)
67 |         if is_partial.sum() > 0:
68 |             partial_classes_weight = (weights * is_partial).sum() / is_partial.sum()
69 |         else:
70 |             partial_classes_weight = 0
71 | 
72 |         not_partial = 1. - is_partial
73 |         if not_partial.sum() > 0:
74 |             not_partial_classes_weight = (weights * not_partial).sum() / not_partial.sum()
75 |         else:
76 |             not_partial_classes_weight = 0
77 |         return partial_classes_weight, not_partial_classes_weight
78 | 
79 | 
80 | class ImageClassifier(ClassifierBase):
81 |     r"""The Image Classifier for `Importance Weighted Adversarial Nets for Partial Domain Adaptation <https://arxiv.org/abs/1803.09210>`_
82 |     """
83 | 
84 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs):
85 |         bottleneck = nn.Sequential(
86 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
87 |             nn.Flatten(),
88 |             nn.Linear(backbone.out_features, bottleneck_dim),
89 |             nn.BatchNorm1d(bottleneck_dim),
90 |             nn.ReLU()
91 |         )
92 |         super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs)
93 | 


--------------------------------------------------------------------------------
/icon/icon_utils.py:
--------------------------------------------------------------------------------
  1 | from tensorboardX import SummaryWriter
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | import torchvision.transforms as T
  6 | from common.vision.transforms import ResizeImage
  7 | import torch.nn.functional as F
  8 | from icon.cluster import PairEnum
  9 | from icon.randaugment import rand_augment_transform
 10 | 
 11 | rgb_mean = (0.485, 0.456, 0.406)
 12 | ra_params = dict(translate_const=int(224 * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in rgb_mean]),)
 13 | 
 14 | 
 15 | class Visualizer():
 16 |     def __init__(self, root_dir, exp_name):
 17 |         if not os.path.exists(root_dir):
 18 |             os.makedirs(root_dir)
 19 |         log_dir = os.path.join(root_dir, exp_name)
 20 |         self.writer = SummaryWriter(log_dir)
 21 |         self.step = 0
 22 | 
 23 |     def plot_items(self, items):
 24 |         for name, value in items.items():
 25 |             self.writer.add_scalar(name, value, self.step)
 26 |     
 27 |     def tick(self):
 28 |         self.step += 1
 29 | 
 30 | 
 31 | class TwoViewsTrainTransform(object):
 32 |     def __init__(self, center_crop):
 33 |         normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 34 |         crop = T.CenterCrop(224) if center_crop else T.RandomResizedCrop(224)
 35 |         self.weak = T.Compose([
 36 |             ResizeImage(256),
 37 |             crop,
 38 |             T.RandomHorizontalFlip(),
 39 |             T.ToTensor(),
 40 |             normalize
 41 |         ])
 42 |         self.strong = T.Compose([
 43 |             ResizeImage(256),
 44 |             crop,
 45 |             T.RandomHorizontalFlip(),
 46 |             T.RandomApply([
 47 |                 T.ColorJitter(0.4, 0.4, 0.4, 0.0)
 48 |             ], p=1.0),
 49 |             rand_augment_transform('rand-n{}-m{}-mstd0.5'.format(2, 10), ra_params),
 50 |             T.ToTensor(),
 51 |             normalize,
 52 |         ])
 53 |         
 54 |     def __call__(self, x):
 55 |         weak = self.weak(x)
 56 |         strong = self.strong(x)
 57 |         return weak, strong
 58 | 
 59 | 
 60 | sim_list = []
 61 | def get_ulb_sim_matrix(mode, sim_matrix_ulb, cluster_preds_t, update_list=True):
 62 |     if mode == 'stats':
 63 |         return sim_matrix_ulb, 0, 0
 64 |     elif mode == 'argmax':
 65 |         y_c_t = cluster_preds_t.argmax(dim=1).contiguous().view(-1, 1)
 66 |         sim_matrix_ulb_full = torch.eq(y_c_t, y_c_t.T).float().to(cluster_preds_t.device)
 67 |         sim_matrix_ulb_full = (sim_matrix_ulb_full - 0.5) * 2
 68 |         sim_matrix_ulb_full = sim_matrix_ulb_full.flatten()
 69 |         return sim_matrix_ulb_full
 70 |     else:
 71 |         if mode == 'sim':
 72 |             feat_row, feat_col = PairEnum(F.normalize(cluster_preds_t, dim=1))
 73 |         elif mode == 'prob':
 74 |             feat_row, feat_col = PairEnum(F.softmax(cluster_preds_t, dim=1))
 75 |         tmp_distance_ori = torch.bmm(
 76 |             feat_row.view(feat_row.size(0), 1, -1),
 77 |             feat_col.view(feat_row.size(0), -1, 1)
 78 |         )
 79 |         sim_threshold = 0.92
 80 |         sim_ratio = 0.5 / 12
 81 |         diff_ratio = 5.5 / 12
 82 |         similarity = tmp_distance_ori.squeeze()
 83 |         if update_list:
 84 |             global sim_list
 85 |             sim_list.append(similarity)
 86 |             if len(sim_list) > 30:
 87 |                 sim_list = sim_list[1:]
 88 |         sim_all = torch.cat(sim_list, dim=0)
 89 |         sim_all_sorted, _ = torch.sort(sim_all)
 90 | 
 91 |         n_diff = min(len(sim_all) * diff_ratio, len(sim_all)-1)
 92 |         n_sim = min(len(sim_all) * sim_ratio, len(sim_all))
 93 | 
 94 |         low_threshold = sim_all_sorted[int(n_diff)]
 95 |         high_threshold = max(sim_threshold, sim_all_sorted[-int(n_sim)])
 96 | 
 97 |         sim_matrix_ulb = torch.zeros_like(similarity).float()
 98 | 
 99 |         if high_threshold != low_threshold:
100 |             sim_matrix_ulb[similarity >= high_threshold] = 1.0
101 |             sim_matrix_ulb[similarity <= low_threshold] = -1.0
102 |         else:
103 |             sim_matrix_ulb[similarity > high_threshold] = 1.0
104 |             sim_matrix_ulb[similarity < low_threshold] = -1.0
105 |         return sim_matrix_ulb, low_threshold, high_threshold


--------------------------------------------------------------------------------
/common/vision/datasets/aircrafts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class Aircraft(ImageList):
 8 |     """`FVGC-Aircraft <https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
13 |         sample_rate (int): The sampling rates to sample random ``training`` images for each category.
14 |             Choices include 100, 50, 30, 15. Default: 100.
15 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
16 |             in root directory. If dataset is already downloaded, it is not downloaded again.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
18 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
19 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
20 | 
21 |     .. note:: In `root`, there will exist following files after downloading.
22 |         ::
23 |             train/
24 |             test/
25 |             image_list/
26 |                 train_100.txt
27 |                 train_50.txt
28 |                 train_30.txt
29 |                 train_15.txt
30 |                 test.txt
31 |     """
32 |     download_list = [
33 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/04356d49d0054092b07e/?dl=1"),
34 |         ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/9fed22eba03046d69012/?dl=1"),
35 |         ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/8d5e4c1b031a4a608c68/?dl=1"),
36 |     ]
37 |     image_list = {
38 |         "train": "image_list/train_100.txt",
39 |         "train100": "image_list/train_100.txt",
40 |         "train50": "image_list/train_50.txt",
41 |         "train30": "image_list/train_30.txt",
42 |         "train15": "image_list/train_15.txt",
43 |         "test": "image_list/test.txt",
44 |         "test100": "image_list/test.txt",
45 |     }
46 |     CLASSES = ['707-320', '727-200', '737-200', '737-300', '737-400', '737-500', '737-600', '737-700', '737-800', '737-900', '747-100',
47 |                '747-200', '747-300', '747-400', '757-200', '757-300', '767-200', '767-300', '767-400', '777-200', '777-300', 'A300B4',
48 |                'A310', 'A318', 'A319', 'A320', 'A321', 'A330-200', 'A330-300', 'A340-200', 'A340-300', 'A340-500', 'A340-600', 'A380',
49 |                'ATR-42', 'ATR-72', 'An-12', 'BAE 146-200', 'BAE 146-300', 'BAE-125', 'Beechcraft 1900', 'Boeing 717', 'C-130', 'C-47',
50 |                'CRJ-200', 'CRJ-700', 'CRJ-900', 'Cessna 172', 'Cessna 208', 'Cessna 525', 'Cessna 560', 'Challenger 600', 'DC-10',
51 |                'DC-3', 'DC-6', 'DC-8', 'DC-9-30', 'DH-82', 'DHC-1', 'DHC-6', 'DHC-8-100', 'DHC-8-300', 'DR-400', 'Dornier 328', 'E-170',
52 |                'E-190', 'E-195', 'EMB-120', 'ERJ 135', 'ERJ 145', 'Embraer Legacy 600', 'Eurofighter Typhoon', 'F-16A-B', 'F-A-18',
53 |                'Falcon 2000', 'Falcon 900', 'Fokker 100', 'Fokker 50', 'Fokker 70', 'Global Express', 'Gulfstream IV', 'Gulfstream V',
54 |                'Hawk T1', 'Il-76', 'L-1011', 'MD-11', 'MD-80', 'MD-87', 'MD-90', 'Metroliner', 'Model B200', 'PA-28', 'SR-20',
55 |                'Saab 2000', 'Saab 340', 'Spitfire', 'Tornado', 'Tu-134', 'Tu-154', 'Yak-42']
56 | 
57 |     def __init__(self, root: str, split: str, sample_rate: Optional[int]=100, download: Optional[bool] = False, **kwargs):
58 | 
59 |         if split == 'train':
60 |             list_name = 'train' + str(sample_rate)
61 |             assert list_name in self.image_list
62 |             data_list_file = os.path.join(root, self.image_list[list_name])
63 |         else:
64 |             data_list_file = os.path.join(root, self.image_list['test'])
65 | 
66 |         if download:
67 |             list(map(lambda args: download_data(root, *args), self.download_list))
68 |         else:
69 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
70 | 
71 |         super(Aircraft, self).__init__(root, Aircraft.CLASSES, data_list_file=data_list_file, **kwargs)


--------------------------------------------------------------------------------
/common/vision/models/keypoint_detection/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Modified from https://github.com/microsoft/human-pose-estimation.pytorch
 3 | # ------------------------------------------------------------------------------
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class JointsMSELoss(nn.Module):
 9 |     """
10 |     Typical MSE loss for keypoint detection.
11 | 
12 |     Args:
13 |         reduction (str, optional): Specifies the reduction to apply to the output:
14 |           ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied,
15 |           ``'mean'``: the sum of the output will be divided by the number of
16 |           elements in the output. Default: ``'mean'``
17 | 
18 |     Inputs:
19 |         - output (tensor): heatmap predictions
20 |         - target (tensor): heatmap labels
21 |         - target_weight (tensor): whether the keypoint is visible. All keypoint is visible if None. Default: None.
22 | 
23 |     Shape:
24 |         - output: :math:`(minibatch, K, H, W)` where K means the number of keypoints,
25 |           H and W is the height and width of the heatmap respectively.
26 |         - target: :math:`(minibatch, K, H, W)`.
27 |         - target_weight: :math:`(minibatch, K)`.
28 |         - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(minibatch, K)`.
29 | 
30 |     """
31 |     def __init__(self, reduction='mean'):
32 |         super(JointsMSELoss, self).__init__()
33 |         self.criterion = nn.MSELoss(reduction='none')
34 |         self.reduction = reduction
35 | 
36 |     def forward(self, output, target, target_weight=None):
37 |         B, K, _, _ = output.shape
38 |         heatmaps_pred = output.reshape((B, K, -1))
39 |         heatmaps_gt = target.reshape((B, K, -1))
40 |         loss = self.criterion(heatmaps_pred, heatmaps_gt) * 0.5
41 |         if target_weight is not None:
42 |             loss = loss * target_weight.view((B, K, 1))
43 |         if self.reduction == 'mean':
44 |             return loss.mean()
45 |         elif self.reduction == 'none':
46 |             return loss.mean(dim=-1)
47 | 
48 | 
49 | class JointsKLLoss(nn.Module):
50 |     """
51 |     KL Divergence for keypoint detection proposed by
52 |     `Regressive Domain Adaptation for Unsupervised Keypoint Detection <https://arxiv.org/abs/2103.06175>`_.
53 | 
54 |     Args:
55 |         reduction (str, optional): Specifies the reduction to apply to the output:
56 |           ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied,
57 |           ``'mean'``: the sum of the output will be divided by the number of
58 |           elements in the output. Default: ``'mean'``
59 | 
60 |     Inputs:
61 |         - output (tensor): heatmap predictions
62 |         - target (tensor): heatmap labels
63 |         - target_weight (tensor): whether the keypoint is visible. All keypoint is visible if None. Default: None.
64 | 
65 |     Shape:
66 |         - output: :math:`(minibatch, K, H, W)` where K means the number of keypoints,
67 |           H and W is the height and width of the heatmap respectively.
68 |         - target: :math:`(minibatch, K, H, W)`.
69 |         - target_weight: :math:`(minibatch, K)`.
70 |         - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(minibatch, K)`.
71 | 
72 |     """
73 |     def __init__(self, reduction='mean', epsilon=0.):
74 |         super(JointsKLLoss, self).__init__()
75 |         self.criterion = nn.KLDivLoss(reduction='none')
76 |         self.reduction = reduction
77 |         self.epsilon = epsilon
78 | 
79 |     def forward(self, output, target, target_weight=None):
80 |         B, K, _, _ = output.shape
81 |         heatmaps_pred = output.reshape((B, K, -1))
82 |         heatmaps_pred = F.log_softmax(heatmaps_pred, dim=-1)
83 |         heatmaps_gt = target.reshape((B, K, -1))
84 |         heatmaps_gt = heatmaps_gt + self.epsilon
85 |         heatmaps_gt = heatmaps_gt / heatmaps_gt.sum(dim=-1, keepdims=True)
86 |         loss = self.criterion(heatmaps_pred, heatmaps_gt).sum(dim=-1)
87 |         if target_weight is not None:
88 |             loss = loss * target_weight.view((B, K))
89 |         if self.reduction == 'mean':
90 |             return loss.mean()
91 |         elif self.reduction == 'none':
92 |             return loss.mean(dim=-1)
93 | 


--------------------------------------------------------------------------------
/common/utils/metric/__init__.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import prettytable
  3 | 
  4 | __all__ = ['keypoint_detection']
  5 | 
  6 | def binary_accuracy(output: torch.Tensor, target: torch.Tensor) -> float:
  7 |     """Computes the accuracy for binary classification"""
  8 |     with torch.no_grad():
  9 |         batch_size = target.size(0)
 10 |         pred = (output >= 0.5).float().t().view(-1)
 11 |         correct = pred.eq(target.view(-1)).float().sum()
 12 |         correct.mul_(100. / batch_size)
 13 |         return correct
 14 | 
 15 | 
 16 | def accuracy(output, target, topk=(1,)):
 17 |     r"""
 18 |     Computes the accuracy over the k top predictions for the specified values of k
 19 | 
 20 |     Args:
 21 |         output (tensor): Classification outputs, :math:`(N, C)` where `C = number of classes`
 22 |         target (tensor): :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`
 23 |         topk (sequence[int]): A list of top-N number.
 24 | 
 25 |     Returns:
 26 |         Top-N accuracies (N :math:`\in` topK).
 27 |     """
 28 |     with torch.no_grad():
 29 |         maxk = max(topk)
 30 |         batch_size = target.size(0)
 31 | 
 32 |         _, pred = output.topk(maxk, 1, True, True)
 33 |         pred = pred.t()
 34 |         correct = pred.eq(target[None])
 35 | 
 36 |         res = []
 37 |         for k in topk:
 38 |             correct_k = correct[:k].flatten().sum(dtype=torch.float32)
 39 |             res.append(correct_k * (100.0 / batch_size))
 40 |         return res
 41 | 
 42 | 
 43 | class ConfusionMatrix(object):
 44 |     def __init__(self, num_classes):
 45 |         self.num_classes = num_classes
 46 |         self.mat = None
 47 | 
 48 |     def update(self, target, output):
 49 |         """
 50 |         Update confusion matrix.
 51 | 
 52 |         Args:
 53 |             target: ground truth
 54 |             output: predictions of models
 55 | 
 56 |         Shape:
 57 |             - target: :math:`(minibatch, C)` where C means the number of classes.
 58 |             - output: :math:`(minibatch, C)` where C means the number of classes.
 59 |         """
 60 |         n = self.num_classes
 61 |         if self.mat is None:
 62 |             self.mat = torch.zeros((n, n), dtype=torch.int64, device=target.device)
 63 |         with torch.no_grad():
 64 |             k = (target >= 0) & (target < n)
 65 |             inds = n * target[k].to(torch.int64) + output[k]
 66 |             self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
 67 | 
 68 |     def reset(self):
 69 |         self.mat.zero_()
 70 | 
 71 |     def compute(self):
 72 |         """compute global accuracy, per-class accuracy and per-class IoU"""
 73 |         h = self.mat.float()
 74 |         acc_global = torch.diag(h).sum() / h.sum()
 75 |         acc = torch.diag(h) / h.sum(1)
 76 |         iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
 77 |         return acc_global, acc, iu
 78 | 
 79 |     # def reduce_from_all_processes(self):
 80 |     #     if not torch.distributed.is_available():
 81 |     #         return
 82 |     #     if not torch.distributed.is_initialized():
 83 |     #         return
 84 |     #     torch.distributed.barrier()
 85 |     #     torch.distributed.all_reduce(self.mat)
 86 | 
 87 |     def __str__(self):
 88 |         acc_global, acc, iu = self.compute()
 89 |         return (
 90 |             'global correct: {:.1f}\n'
 91 |             'average row correct: {}\n'
 92 |             'IoU: {}\n'
 93 |             'mean IoU: {:.1f}').format(
 94 |                 acc_global.item() * 100,
 95 |                 ['{:.1f}'.format(i) for i in (acc * 100).tolist()],
 96 |                 ['{:.1f}'.format(i) for i in (iu * 100).tolist()],
 97 |                 iu.mean().item() * 100)
 98 | 
 99 |     def format(self, classes: list):
100 |         """Get the accuracy and IoU for each class in the table format"""
101 |         acc_global, acc, iu = self.compute()
102 | 
103 |         table = prettytable.PrettyTable(["class", "acc", "iou"])
104 |         for i, class_name, per_acc, per_iu in zip(range(len(classes)), classes, (acc * 100).tolist(), (iu * 100).tolist()):
105 |             table.add_row([class_name, per_acc, per_iu])
106 | 
107 |         return 'global correct: {:.1f}\nmean correct:{:.1f}\nmean IoU: {:.1f}\n{}'.format(
108 |             acc_global.item() * 100, acc.mean().item() * 100, iu.mean().item() * 100, table.get_string())
109 | 
110 | 


--------------------------------------------------------------------------------
/common/vision/datasets/stanford_cars.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from .imagelist import ImageList
 4 | from ._util import download as download_data, check_exits
 5 | 
 6 | 
 7 | class StanfordCars(ImageList):
 8 |     """`The Stanford Cars <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset.
 9 | 
10 |     Args:
11 |         root (str): Root directory of dataset
12 |         split (str, optional): The dataset split, supports ``train``, or ``test``.
13 |         sample_rate (int): The sampling rates to sample random ``training`` images for each category.
14 |             Choices include 100, 50, 30, 15. Default: 100.
15 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
16 |             in root directory. If dataset is already downloaded, it is not downloaded again.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a \
18 |             transformed version. E.g, :class:`torchvision.transforms.RandomCrop`.
19 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
20 | 
21 |     .. note:: In `root`, there will exist following files after downloading.
22 |         ::
23 |             train/
24 |             test/
25 |             image_list/
26 |                 train_100.txt
27 |                 train_50.txt
28 |                 train_30.txt
29 |                 train_15.txt
30 |                 test.txt
31 |     """
32 |     download_list = [
33 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/d95c188cc49c404aba70/?dl=1"),
34 |         ("train", "train.tgz", "https://cloud.tsinghua.edu.cn/f/d5ab63c391a949509db0/?dl=1"),
35 |         ("test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/04e6fd5222a84d0a8ff5/?dl=1"),
36 |     ]
37 |     image_list = {
38 |         "train": "image_list/train_100.txt",
39 |         "train100": "image_list/train_100.txt",
40 |         "train50": "image_list/train_50.txt",
41 |         "train30": "image_list/train_30.txt",
42 |         "train15": "image_list/train_15.txt",
43 |         "test": "image_list/test.txt",
44 |         "test100": "image_list/test.txt",
45 |     }
46 |     CLASSES = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
47 |                '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
48 |                '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60',
49 |                '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80',
50 |                '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100',
51 |                '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120',
52 |                '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140',
53 |                '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160',
54 |                '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180',
55 |                '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196']
56 | 
57 |     def __init__(self, root: str, split: str, sample_rate: Optional[int] =100, download: Optional[bool] = False, **kwargs):
58 | 
59 |         if split == 'train':
60 |             list_name = 'train' + str(sample_rate)
61 |             assert list_name in self.image_list
62 |             data_list_file = os.path.join(root, self.image_list[list_name])
63 |         else:
64 |             data_list_file = os.path.join(root, self.image_list['test'])
65 | 
66 |         if download:
67 |             list(map(lambda args: download_data(root, *args), self.download_list))
68 |         else:
69 |             list(map(lambda file_name, _: check_exits(root, file_name), self.download_list))
70 | 
71 |         super(StanfordCars, self).__init__(root, StanfordCars.CLASSES, data_list_file=data_list_file, **kwargs)
72 | 


--------------------------------------------------------------------------------
/common/vision/datasets/openset/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..imagelist import ImageList
 2 | from ..office31 import Office31
 3 | from ..officehome import OfficeHome
 4 | from ..visda2017 import VisDA2017
 5 | 
 6 | from typing import Optional, ClassVar, Sequence
 7 | from copy import deepcopy
 8 | 
 9 | 
10 | __all__ = ['Office31', 'OfficeHome', "VisDA2017"]
11 | 
12 | 
13 | def open_set(dataset_class: ClassVar, public_classes: Sequence[str],
14 |             private_classes: Optional[Sequence[str]] = ()) -> ClassVar:
15 |     """
16 |     Convert a dataset into its open-set version.
17 | 
18 |     In other words, those samples which doesn't belong to `private_classes` will be marked as "unknown".
19 | 
20 |     Be aware that `open_set` will change the label number of each category.
21 | 
22 |     Args:
23 |         dataset_class (class): Dataset class. Only subclass of ``ImageList`` can be open-set.
24 |         public_classes (sequence[str]): A sequence of which categories need to be kept in the open-set dataset.\
25 |             Each element of `public_classes` must belong to the `classes` list of `dataset_class`.
26 |         private_classes (sequence[str], optional): A sequence of which categories need to be marked as "unknown" \
27 |             in the open-set dataset. Each element of `private_classes` must belong to the `classes` list of \
28 |             `dataset_class`. Default: ().
29 | 
30 |     Examples::
31 | 
32 |         >>> public_classes = ['back_pack', 'bike', 'calculator', 'headphones', 'keyboard']
33 |         >>> private_classes = ['laptop_computer', 'monitor', 'mouse', 'mug', 'projector']
34 |         >>> # create a open-set dataset class which has classes
35 |         >>> # 'back_pack', 'bike', 'calculator', 'headphones', 'keyboard' and 'unknown'.
36 |         >>> OpenSetOffice31 = open_set(Office31, public_classes, private_classes)
37 |         >>> # create an instance of the open-set dataset
38 |         >>> dataset = OpenSetDataset(root="data/office31", task="A")
39 | 
40 |     """
41 |     if not (issubclass(dataset_class, ImageList)):
42 |         raise Exception("Only subclass of ImageList can be openset")
43 | 
44 |     class OpenSetDataset(dataset_class):
45 |         def __init__(self, **kwargs):
46 |             super(OpenSetDataset, self).__init__(**kwargs)
47 |             samples = []
48 |             all_classes = list(deepcopy(public_classes)) + ["unknown"]
49 |             for (path, label) in self.samples:
50 |                 class_name = self.classes[label]
51 |                 if class_name in public_classes:
52 |                     samples.append((path, all_classes.index(class_name)))
53 |                 elif class_name in private_classes:
54 |                     samples.append((path, all_classes.index("unknown")))
55 |             self.samples = samples
56 |             self.classes = all_classes
57 |             self.class_to_idx = {cls: idx
58 |                                  for idx, cls in enumerate(self.classes)}
59 | 
60 |     return OpenSetDataset
61 | 
62 | 
63 | def default_open_set(dataset_class: ClassVar, source: bool) -> ClassVar:
64 |     """
65 |     Default open-set used in some paper.
66 | 
67 |     Args:
68 |         dataset_class (class): Dataset class. Currently, dataset_class must be one of
69 |             :class:`~common.vision.datasets.office31.Office31`, :class:`~common.vision.datasets.officehome.OfficeHome`,
70 |             :class:`~common.vision.datasets.visda2017.VisDA2017`,
71 |         source (bool): Whether the dataset is used for source domain or not.
72 |     """
73 |     if dataset_class == Office31:
74 |         public_classes = Office31.CLASSES[:20]
75 |         if source:
76 |             private_classes = ()
77 |         else:
78 |             private_classes = Office31.CLASSES[20:]
79 |     elif dataset_class == OfficeHome:
80 |         public_classes = sorted(OfficeHome.CLASSES)[:25]
81 |         if source:
82 |             private_classes = ()
83 |         else:
84 |             private_classes = sorted(OfficeHome.CLASSES)[25:]
85 |     elif dataset_class == VisDA2017:
86 |         public_classes = ('bicycle', 'bus', 'car', 'motorcycle', 'train', 'truck')
87 |         if source:
88 |             private_classes = ()
89 |         else:
90 |             private_classes = ('aeroplane', 'horse', 'knife', 'person', 'plant', 'skateboard')
91 |     else:
92 |         raise NotImplementedError("Unknown openset domain adaptation dataset: {}".format(dataset_class.__name__))
93 |     return open_set(dataset_class, public_classes, private_classes)
94 | 
95 | 


--------------------------------------------------------------------------------
/icon/cluster.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import math
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | 
  7 | 
  8 | def PairEnum(x,mask=None):
  9 |     # Enumerate all pairs of feature in x
 10 |     assert x.ndimension() == 2, 'Input dimension must be 2'
 11 |     x1 = x.repeat(x.size(0), 1)
 12 |     x2 = x.repeat(1, x.size(0)).view(-1, x.size(1))
 13 |     if mask is not None:
 14 |         xmask = mask.view(-1,1).repeat(1,x.size(1))
 15 |         #dim 0: #sample, dim 1:#feature 
 16 |         x1 = x1[xmask].view(-1,x.size(1))
 17 |         x2 = x2[xmask].view(-1,x.size(1))
 18 |     return x1,x2
 19 | 
 20 | class BCE(nn.Module):
 21 |     eps = 1e-7 # Avoid calculating log(0). Use the small value of float16.
 22 |     def forward(self, prob1, prob2, simi):
 23 |         # simi: 1->similar; -1->dissimilar; 0->unknown(ignore)
 24 |         assert len(prob1)==len(prob2)==len(simi), 'Wrong input size:{0},{1},{2}'.format(str(len(prob1)),str(len(prob2)),str(len(simi)))
 25 |         P = prob1.mul_(prob2)
 26 |         P = P.sum(1)
 27 |         P.mul_(simi).add_(simi.eq(-1).type_as(P))
 28 |         neglogP = -P.add_(BCE.eps).log_()
 29 |         return neglogP.mean()
 30 | 
 31 | 
 32 | class ClusterLoss():
 33 |     def __init__(self, device, num_classes, bce_type, cosine_threshold, topk):
 34 |         # super(NCLMemory, self).__init__()
 35 |         self.device = device
 36 |         self.num_classes = num_classes
 37 |         self.bce_type = bce_type
 38 |         self.costhre = cosine_threshold
 39 |         self.topk = topk
 40 |         self.bce = BCE()
 41 | 
 42 |     def compute_losses(self, inputs):
 43 |         bce_loss = 0.0
 44 |         device = self.device
 45 |         feat, output2 = inputs["x1"], inputs["preds1_u"]
 46 |         output2_bar = inputs["preds2_u"]
 47 |         label = inputs["labels"]
 48 | 
 49 |         num_s = (label < self.num_classes).sum()
 50 |         labels_s = label[:num_s]
 51 |         mask_lb = label < self.num_classes  # masked away label samples. only use unlabel samples for clustering
 52 |         
 53 |         prob2, prob2_bar = F.softmax(output2, dim=1), F.softmax(output2_bar, dim=1)
 54 | 
 55 |         rank_feat = (feat[~mask_lb]).detach()
 56 |         if self.bce_type == 'cos':
 57 |             # default: cosine similarity with threshold
 58 |             feat_row, feat_col = PairEnum(F.normalize(rank_feat, dim=1))
 59 |             tmp_distance_ori = torch.bmm(
 60 |                 feat_row.view(feat_row.size(0), 1, -1),
 61 |                 feat_col.view(feat_row.size(0), -1, 1)
 62 |             )
 63 |             tmp_distance_ori = tmp_distance_ori.squeeze()
 64 |             target_ulb = torch.zeros_like(tmp_distance_ori).float() - 1
 65 |             target_ulb[tmp_distance_ori > self.costhre] = 1
 66 |         elif self.bce_type == 'RK':
 67 |             # top-k rank statics
 68 |             rank_idx = torch.argsort(rank_feat, dim=1, descending=True)
 69 |             rank_idx1, rank_idx2 = PairEnum(rank_idx)
 70 |             rank_idx1, rank_idx2 = rank_idx1[:, :self.topk], rank_idx2[:, :self.topk]
 71 |             rank_idx1, _ = torch.sort(rank_idx1, dim=1)
 72 |             rank_idx2, _ = torch.sort(rank_idx2, dim=1)
 73 |             rank_diff = rank_idx1 - rank_idx2
 74 |             rank_diff = torch.sum(torch.abs(rank_diff), dim=1)
 75 |             target_ulb = torch.ones_like(rank_diff).float().to(device)
 76 |             target_ulb[rank_diff > 0] = -1
 77 | 
 78 |         prob1_ulb, _ = PairEnum(prob2[~mask_lb])
 79 |         _, prob2_ulb = PairEnum(prob2_bar[~mask_lb])
 80 | 
 81 |         bce_loss = self.bce(prob1_ulb, prob2_ulb, target_ulb)
 82 |         return bce_loss, target_ulb
 83 | 
 84 | class Normalize(nn.Module):
 85 |     def __init__(self, power=2):
 86 |         super(Normalize, self).__init__()
 87 |         self.power = power
 88 | 
 89 |     def forward(self, x):
 90 |         norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
 91 |         out = x.div(norm)
 92 |         return out
 93 | 
 94 | def reduce_dimension(features, mode, dim):
 95 |     if mode == 'pca':
 96 |         from sklearn.decomposition import PCA
 97 |         pca = PCA(n_components=dim)
 98 |         transformed_features = pca.fit_transform(features)
 99 |         fit_score = pca.explained_variance_ratio_.sum()
100 |     elif mode == 'umap':
101 |         import umap
102 |         fit = umap.UMAP(n_components=dim)
103 |         transformed_features = fit.fit_transform(features)
104 |         fit_score = 0.0
105 |     return transformed_features, fit_score


--------------------------------------------------------------------------------
/dalib/adaptation/dann.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from ..modules.grl import WarmStartGradientReverseLayer
 7 | from common.modules.classifier import Classifier as ClassifierBase
 8 | from common.utils.metric import binary_accuracy
 9 | 
10 | __all__ = ['DomainAdversarialLoss']
11 | 
12 | 
13 | class DomainAdversarialLoss(nn.Module):
14 |     """
15 |     The Domain Adversarial Loss proposed in
16 |     `Domain-Adversarial Training of Neural Networks (ICML 2015) <https://arxiv.org/abs/1505.07818>`_
17 | 
18 |     Domain adversarial loss measures the domain discrepancy through training a domain discriminator.
19 |     Given domain discriminator :math:`D`, feature representation :math:`f`, the definition of DANN loss is
20 | 
21 |     .. math::
22 |         loss(\mathcal{D}_s, \mathcal{D}_t) = \mathbb{E}_{x_i^s \sim \mathcal{D}_s} log[D(f_i^s)]
23 |             + \mathbb{E}_{x_j^t \sim \mathcal{D}_t} log[1-D(f_j^t)].
24 | 
25 |     Args:
26 |         domain_discriminator (torch.nn.Module): A domain discriminator object, which predicts the domains of features. Its input shape is (N, F) and output shape is (N, 1)
27 |         reduction (str, optional): Specifies the reduction to apply to the output:
28 |             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
29 |             ``'mean'``: the sum of the output will be divided by the number of
30 |             elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
31 |         grl (WarmStartGradientReverseLayer, optional): Default: None.
32 | 
33 |     Inputs:
34 |         - f_s (tensor): feature representations on source domain, :math:`f^s`
35 |         - f_t (tensor): feature representations on target domain, :math:`f^t`
36 |         - w_s (tensor, optional): a rescaling weight given to each instance from source domain.
37 |         - w_t (tensor, optional): a rescaling weight given to each instance from target domain.
38 | 
39 |     Shape:
40 |         - f_s, f_t: :math:`(N, F)` where F means the dimension of input features.
41 |         - Outputs: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(N, )`.
42 | 
43 |     Examples::
44 | 
45 |         >>> from dalib.modules.domain_discriminator import DomainDiscriminator
46 |         >>> discriminator = DomainDiscriminator(in_feature=1024, hidden_size=1024)
47 |         >>> loss = DomainAdversarialLoss(discriminator, reduction='mean')
48 |         >>> # features from source domain and target domain
49 |         >>> f_s, f_t = torch.randn(20, 1024), torch.randn(20, 1024)
50 |         >>> # If you want to assign different weights to each instance, you should pass in w_s and w_t
51 |         >>> w_s, w_t = torch.randn(20), torch.randn(20)
52 |         >>> output = loss(f_s, f_t, w_s, w_t)
53 |     """
54 | 
55 |     def __init__(self, domain_discriminator: nn.Module, reduction: Optional[str] = 'mean',
56 |                  grl: Optional = None):
57 |         super(DomainAdversarialLoss, self).__init__()
58 |         self.grl = WarmStartGradientReverseLayer(alpha=1., lo=0., hi=1., max_iters=1000, auto_step=True) if grl is None else grl
59 |         self.domain_discriminator = domain_discriminator
60 |         self.bce = lambda input, target, weight: \
61 |             F.binary_cross_entropy(input, target, weight=weight, reduction=reduction)
62 |         self.domain_discriminator_accuracy = None
63 | 
64 |     def forward(self, f_s: torch.Tensor, f_t: torch.Tensor,
65 |                 w_s: Optional[torch.Tensor] = None, w_t: Optional[torch.Tensor] = None) -> torch.Tensor:
66 |         f = self.grl(torch.cat((f_s, f_t), dim=0))
67 |         d = self.domain_discriminator(f)
68 |         d_s, d_t = d.chunk(2, dim=0)
69 |         d_label_s = torch.ones((f_s.size(0), 1)).to(f_s.device)
70 |         d_label_t = torch.zeros((f_t.size(0), 1)).to(f_t.device)
71 |         self.domain_discriminator_accuracy = 0.5 * (binary_accuracy(d_s, d_label_s) + binary_accuracy(d_t, d_label_t))
72 | 
73 |         if w_s is None:
74 |             w_s = torch.ones_like(d_label_s)
75 |         if w_t is None:
76 |             w_t = torch.ones_like(d_label_t)
77 |         return 0.5 * (self.bce(d_s, d_label_s, w_s.view_as(d_s)) + self.bce(d_t, d_label_t, w_t.view_as(d_t)))
78 | 
79 | 
80 | class ImageClassifier(ClassifierBase):
81 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs):
82 |         bottleneck = nn.Sequential(
83 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
84 |             nn.Flatten(),
85 |             nn.Linear(backbone.out_features, bottleneck_dim),
86 |             nn.BatchNorm1d(bottleneck_dim),
87 |             nn.ReLU()
88 |         )
89 |         super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs)
90 | 


--------------------------------------------------------------------------------
/dalib/adaptation/segmentation/advent.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | 
  7 | class Discriminator(nn.Sequential):
  8 |     """
  9 |     Domain discriminator model from
 10 |     `ADVENT: Adversarial Entropy Minimization for Domain Adaptation in Semantic Segmentation (CVPR 2019) <https://arxiv.org/abs/1811.12833>`_
 11 | 
 12 |     Distinguish pixel-by-pixel whether the input predictions come from the source domain or the target domain.
 13 |     The source domain label is 1 and the target domain label is 0.
 14 | 
 15 |     Args:
 16 |         num_classes (int): num of classes in the predictions
 17 |         ndf (int): dimension of the hidden features
 18 | 
 19 |     Shape:
 20 |         - Inputs: :math:`(minibatch, C, H, W)` where :math:`C` is the number of classes
 21 |         - Outputs: :math:`(minibatch, 1, H, W)`
 22 |     """
 23 |     def __init__(self, num_classes, ndf=64):
 24 |         super(Discriminator, self).__init__(
 25 |             nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1),
 26 |             nn.LeakyReLU(negative_slope=0.2, inplace=True),
 27 |             nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1),
 28 |             nn.LeakyReLU(negative_slope=0.2, inplace=True),
 29 |             nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1),
 30 |             nn.LeakyReLU(negative_slope=0.2, inplace=True),
 31 |             nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1),
 32 |             nn.LeakyReLU(negative_slope=0.2, inplace=True),
 33 |             nn.Conv2d(ndf * 8, 1, kernel_size=4, stride=2, padding=1),
 34 |         )
 35 | 
 36 | 
 37 | def prob_2_entropy(prob):
 38 |     """ convert probabilistic prediction maps to weighted self-information maps
 39 |     """
 40 |     n, c, h, w = prob.size()
 41 |     return -torch.mul(prob, torch.log2(prob + 1e-30)) / np.log2(c)
 42 | 
 43 | 
 44 | def bce_loss(y_pred, y_label):
 45 |     y_truth_tensor = torch.FloatTensor(y_pred.size())
 46 |     y_truth_tensor.fill_(y_label)
 47 |     y_truth_tensor = y_truth_tensor.to(y_pred.get_device())
 48 |     return F.binary_cross_entropy_with_logits(y_pred, y_truth_tensor)
 49 | 
 50 | 
 51 | class DomainAdversarialEntropyLoss(nn.Module):
 52 |     r"""The `Domain Adversarial Entropy Loss <https://arxiv.org/abs/1811.12833>`_
 53 | 
 54 |     Minimizing entropy with adversarial learning through training a domain discriminator.
 55 | 
 56 |     Args:
 57 |         domain_discriminator (torch.nn.Module): A domain discriminator object, which predicts
 58 |           the domains of predictions. Its input shape is :math:`(minibatch, C, H, W)` and output shape is :math:`(minibatch, 1, H, W)`
 59 | 
 60 |     Inputs:
 61 |         - logits (tensor): logits output of segmentation model
 62 |         - domain_label (str, optional): whether the data comes from source or target.
 63 |           Choices: ['source', 'target']. Default: 'source'
 64 | 
 65 |     Shape:
 66 |         - logits: :math:`(minibatch, C, H, W)` where :math:`C` means the number of classes
 67 |         - Outputs: scalar.
 68 | 
 69 |     Examples::
 70 | 
 71 |         >>> B, C, H, W = 2, 19, 512, 512
 72 |         >>> discriminator = Discriminator(num_classes=C)
 73 |         >>> dann = DomainAdversarialEntropyLoss(discriminator)
 74 |         >>> # logits output on source domain and target domain
 75 |         >>> y_s, y_t = torch.randn(B, C, H, W), torch.randn(B, C, H, W)
 76 |         >>> loss = 0.5 * (dann(y_s, "source") + dann(y_t, "target"))
 77 |     """
 78 |     def __init__(self, discriminator: nn.Module):
 79 |         super(DomainAdversarialEntropyLoss, self).__init__()
 80 |         self.discriminator = discriminator
 81 | 
 82 |     def forward(self, logits, domain_label='source'):
 83 |         """
 84 |         """
 85 |         assert domain_label in ['source', 'target']
 86 |         probability = F.softmax(logits, dim=1)
 87 |         entropy = prob_2_entropy(probability)
 88 |         domain_prediciton = self.discriminator(entropy)
 89 |         if domain_label == 'source':
 90 |             return bce_loss(domain_prediciton, 1)
 91 |         else:
 92 |             return bce_loss(domain_prediciton, 0)
 93 | 
 94 |     def train(self, mode=True):
 95 |         r"""Sets the discriminator in training mode. In the training mode,
 96 |         all the parameters in discriminator will be set requires_grad=True.
 97 | 
 98 |         Args:
 99 |             mode (bool): whether to set training mode (``True``) or evaluation mode (``False``). Default: ``True``.
100 |         """
101 |         self.discriminator.train(mode)
102 |         for param in self.discriminator.parameters():
103 |             param.requires_grad = mode
104 |         return self
105 | 
106 |     def eval(self):
107 |         r"""Sets the module in evaluation mode. In the training mode,
108 |         all the parameters in discriminator will be set requires_grad=False.
109 | 
110 |         This is equivalent with :meth:`self.train(False) <torch.nn.Module.train>`.
111 |         """
112 |         return self.train(False)
113 | 


--------------------------------------------------------------------------------
/common/vision/models/keypoint_detection/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Modified from https://github.com/microsoft/human-pose-estimation.pytorch
  3 | # ------------------------------------------------------------------------------
  4 | 
  5 | import torch.nn as nn
  6 | from ..resnet import _resnet, Bottleneck
  7 | 
  8 | 
  9 | class Upsampling(nn.Sequential):
 10 |     """
 11 |     3-layers deconvolution used in `Simple Baseline <https://arxiv.org/abs/1804.06208>`_.
 12 |     """
 13 |     def __init__(self, in_channel=2048, hidden_dims=(256, 256, 256), kernel_sizes=(4, 4, 4), bias=False):
 14 |         assert len(hidden_dims) == len(kernel_sizes), \
 15 |             'ERROR: len(hidden_dims) is different len(kernel_sizes)'
 16 | 
 17 |         layers = []
 18 |         for hidden_dim, kernel_size in zip(hidden_dims, kernel_sizes):
 19 |             if kernel_size == 4:
 20 |                 padding = 1
 21 |                 output_padding = 0
 22 |             elif kernel_size == 3:
 23 |                 padding = 1
 24 |                 output_padding = 1
 25 |             elif kernel_size == 2:
 26 |                 padding = 0
 27 |                 output_padding = 0
 28 |             else:
 29 |                 raise NotImplementedError("kernel_size is {}".format(kernel_size))
 30 | 
 31 |             layers.append(
 32 |                 nn.ConvTranspose2d(
 33 |                     in_channels=in_channel,
 34 |                     out_channels=hidden_dim,
 35 |                     kernel_size=kernel_size,
 36 |                     stride=2,
 37 |                     padding=padding,
 38 |                     output_padding=output_padding,
 39 |                     bias=bias))
 40 |             layers.append(nn.BatchNorm2d(hidden_dim))
 41 |             layers.append(nn.ReLU(inplace=True))
 42 |             in_channel = hidden_dim
 43 | 
 44 |         super(Upsampling, self).__init__(*layers)
 45 | 
 46 |         # init following Simple Baseline
 47 |         for name, m in self.named_modules():
 48 |             if isinstance(m, nn.ConvTranspose2d):
 49 |                 nn.init.normal_(m.weight, std=0.001)
 50 |                 if bias:
 51 |                     nn.init.constant_(m.bias, 0)
 52 |             elif isinstance(m, nn.BatchNorm2d):
 53 |                 nn.init.constant_(m.weight, 1)
 54 |                 nn.init.constant_(m.bias, 0)
 55 | 
 56 | 
 57 | class PoseResNet(nn.Module):
 58 |     """
 59 |     `Simple Baseline <https://arxiv.org/abs/1804.06208>`_ for keypoint detection.
 60 | 
 61 |     Args:
 62 |         backbone (torch.nn.Module): Backbone to extract 2-d features from data
 63 |         upsampling (torch.nn.Module): Layer to upsample image feature to heatmap size
 64 |         feature_dim (int): The dimension of the features from upsampling layer.
 65 |         num_keypoints (int): Number of keypoints
 66 |         finetune (bool, optional): Whether use 10x smaller learning rate in the backbone. Default: False
 67 |     """
 68 |     def __init__(self, backbone, upsampling, feature_dim, num_keypoints, finetune=False):
 69 |         super(PoseResNet, self).__init__()
 70 |         self.backbone = backbone
 71 |         self.upsampling = upsampling
 72 |         self.head = nn.Conv2d(in_channels=feature_dim, out_channels=num_keypoints, kernel_size=1, stride=1, padding=0)
 73 |         self.finetune = finetune
 74 |         for m in self.head.modules():
 75 |             nn.init.normal_(m.weight, std=0.001)
 76 |             nn.init.constant_(m.bias, 0)
 77 | 
 78 |     def forward(self, x):
 79 |         x = self.backbone(x)
 80 |         x = self.upsampling(x)
 81 |         x = self.head(x)
 82 |         return x
 83 | 
 84 |     def get_parameters(self, lr=1.):
 85 |         return [
 86 |             {'params': self.backbone.parameters(), 'lr': 0.1 * lr if self.finetune else lr},
 87 |             {'params': self.upsampling.parameters(), 'lr': lr},
 88 |             {'params': self.head.parameters(), 'lr': lr},
 89 |         ]
 90 | 
 91 | 
 92 | def _pose_resnet(arch, num_keypoints, block, layers, pretrained_backbone, deconv_with_bias, finetune=False, progress=True, **kwargs):
 93 |     backbone = _resnet(arch, block, layers, pretrained_backbone, progress, **kwargs)
 94 |     upsampling = Upsampling(backbone.out_features, bias=deconv_with_bias)
 95 |     model = PoseResNet(backbone, upsampling, 256, num_keypoints, finetune)
 96 |     return model
 97 | 
 98 | 
 99 | def pose_resnet101(num_keypoints, pretrained_backbone=True, deconv_with_bias=False, finetune=False, progress=True, **kwargs):
100 |     """Constructs a Simple Baseline model with a ResNet-101 backbone.
101 | 
102 |     Args:
103 |         num_keypoints (int): number of keypoints
104 |         pretrained_backbone (bool, optional): If True, returns a model pre-trained on ImageNet. Default: True.
105 |         deconv_with_bias (bool, optional): Whether use bias in the deconvolution layer. Default: False
106 |         finetune (bool, optional): Whether use 10x smaller learning rate in the backbone. Default: False
107 |         progress (bool, optional): If True, displays a progress bar of the download to stderr. Default: True
108 |     """
109 |     return _pose_resnet('resnet101', num_keypoints, Bottleneck, [3, 4, 23, 3], pretrained_backbone, deconv_with_bias, finetune, progress, **kwargs)


--------------------------------------------------------------------------------
/dalib/translation/cyclegan/util.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import functools
  3 | import random
  4 | import torch
  5 | from torch.nn import init
  6 | 
  7 | 
  8 | class Identity(nn.Module):
  9 |     def forward(self, x):
 10 |         return x
 11 | 
 12 | 
 13 | def get_norm_layer(norm_type='instance'):
 14 |     """Return a normalization layer
 15 | 
 16 |     Parameters:
 17 |         norm_type (str) -- the name of the normalization layer: batch | instance | none
 18 | 
 19 |     For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
 20 |     For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
 21 |     """
 22 |     if norm_type == 'batch':
 23 |         norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
 24 |     elif norm_type == 'instance':
 25 |         norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
 26 |     elif norm_type == 'none':
 27 |         def norm_layer(x): return Identity()
 28 |     else:
 29 |         raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
 30 |     return norm_layer
 31 | 
 32 | 
 33 | def init_weights(net, init_type='normal', init_gain=0.02):
 34 |     """Initialize network weights.
 35 | 
 36 |     Args:
 37 |         net (torch.nn.Module): network to be initialized
 38 |         init_type (str): the name of an initialization method. Choices includes: ``normal`` |
 39 |             ``xavier`` | ``kaiming`` | ``orthogonal``
 40 |         init_gain (float): scaling factor for normal, xavier and orthogonal.
 41 | 
 42 |     'normal' is used in the original CycleGAN paper. But xavier and kaiming might
 43 |     work better for some applications.
 44 |     """
 45 |     def init_func(m):  # define the initialization function
 46 |         classname = m.__class__.__name__
 47 |         if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
 48 |             if init_type == 'normal':
 49 |                 init.normal_(m.weight.data, 0.0, init_gain)
 50 |             elif init_type == 'xavier':
 51 |                 init.xavier_normal_(m.weight.data, gain=init_gain)
 52 |             elif init_type == 'kaiming':
 53 |                 init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
 54 |             elif init_type == 'orthogonal':
 55 |                 init.orthogonal_(m.weight.data, gain=init_gain)
 56 |             else:
 57 |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
 58 |             if hasattr(m, 'bias') and m.bias is not None:
 59 |                 init.constant_(m.bias.data, 0.0)
 60 |         elif classname.find('BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
 61 |             init.normal_(m.weight.data, 1.0, init_gain)
 62 |             init.constant_(m.bias.data, 0.0)
 63 | 
 64 |     print('initialize network with %s' % init_type)
 65 |     net.apply(init_func)  # apply the initialization function <init_func>
 66 | 
 67 | 
 68 | class ImagePool:
 69 |     """An image buffer that stores previously generated images.
 70 | 
 71 |     This buffer enables us to update discriminators using a history of generated images
 72 |     rather than the ones produced by the latest generators.
 73 | 
 74 |     Args:
 75 |         pool_size (int): the size of image buffer, if pool_size=0, no buffer will be created
 76 | 
 77 |     """
 78 | 
 79 |     def __init__(self, pool_size):
 80 |         self.pool_size = pool_size
 81 |         if self.pool_size > 0:  # create an empty pool
 82 |             self.num_imgs = 0
 83 |             self.images = []
 84 | 
 85 |     def query(self, images):
 86 |         """Return an image from the pool.
 87 | 
 88 |         Args:
 89 |             images (torch.Tensor): the latest generated images from the generator
 90 | 
 91 |         Returns:
 92 |             By 50/100, the buffer will return input images.
 93 |             By 50/100, the buffer will return images previously stored in the buffer,
 94 |             and insert the current images to the buffer.
 95 | 
 96 |         """
 97 |         if self.pool_size == 0:  # if the buffer size is 0, do nothing
 98 |             return images
 99 |         return_images = []
100 |         for image in images:
101 |             image = torch.unsqueeze(image.data, 0)
102 |             if self.num_imgs < self.pool_size:   # if the buffer is not full; keep inserting current images to the buffer
103 |                 self.num_imgs = self.num_imgs + 1
104 |                 self.images.append(image)
105 |                 return_images.append(image)
106 |             else:
107 |                 p = random.uniform(0, 1)
108 |                 if p > 0.5:  # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer
109 |                     random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
110 |                     tmp = self.images[random_id].clone()
111 |                     self.images[random_id] = image
112 |                     return_images.append(tmp)
113 |                 else:       # by another 50% chance, the buffer will return the current image
114 |                     return_images.append(image)
115 |         return_images = torch.cat(return_images, 0)   # collect all the images and return
116 |         return return_images
117 | 
118 | 
119 | def set_requires_grad(net, requires_grad=False):
120 |     """
121 |     Set requies_grad=Fasle for all the networks to avoid unnecessary computations
122 |     """
123 |     for param in net.parameters():
124 |         param.requires_grad = requires_grad
125 | 


--------------------------------------------------------------------------------
/common/vision/datasets/keypoint_detection/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | def generate_target(joints, joints_vis, heatmap_size, sigma, image_size):
  6 |     """Generate heatamap for joints.
  7 | 
  8 |     Args:
  9 |         joints: (K, 2)
 10 |         joints_vis: (K, 1)
 11 |         heatmap_size: W, H
 12 |         sigma:
 13 |         image_size:
 14 | 
 15 |     Returns:
 16 | 
 17 |     """
 18 |     num_joints = joints.shape[0]
 19 |     target_weight = np.ones((num_joints, 1), dtype=np.float32)
 20 |     target_weight[:, 0] = joints_vis[:, 0]
 21 | 
 22 |     target = np.zeros((num_joints,
 23 |                        heatmap_size[1],
 24 |                        heatmap_size[0]),
 25 |                       dtype=np.float32)
 26 | 
 27 |     tmp_size = sigma * 3
 28 |     image_size = np.array(image_size)
 29 |     heatmap_size = np.array(heatmap_size)
 30 | 
 31 |     for joint_id in range(num_joints):
 32 |         feat_stride = image_size / heatmap_size
 33 |         mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
 34 |         mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
 35 |         # Check that any part of the gaussian is in-bounds
 36 |         ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
 37 |         br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
 38 |         if mu_x >= heatmap_size[0] or mu_y >= heatmap_size[1] \
 39 |                 or mu_x < 0 or mu_y < 0:
 40 |             # If not, just return the image as is
 41 |             target_weight[joint_id] = 0
 42 |             continue
 43 | 
 44 |         # Generate gaussian
 45 |         size = 2 * tmp_size + 1
 46 |         x = np.arange(0, size, 1, np.float32)
 47 |         y = x[:, np.newaxis]
 48 |         x0 = y0 = size // 2
 49 |         # The gaussian is not normalized, we want the center value to equal 1
 50 |         g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
 51 | 
 52 |         # Usable gaussian range
 53 |         g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
 54 |         g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
 55 |         # Image range
 56 |         img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
 57 |         img_y = max(0, ul[1]), min(br[1], heatmap_size[1])
 58 | 
 59 |         v = target_weight[joint_id]
 60 |         if v > 0.5:
 61 |             target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
 62 |                 g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
 63 | 
 64 |     return target, target_weight
 65 | 
 66 | 
 67 | def keypoint2d_to_3d(keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, Zc: np.ndarray):
 68 |     """Convert 2D keypoints to 3D keypoints"""
 69 |     uv1 = np.concatenate([np.copy(keypoint2d), np.ones((keypoint2d.shape[0], 1))], axis=1).T * Zc  # 3 x NUM_KEYPOINTS
 70 |     xyz = np.matmul(np.linalg.inv(intrinsic_matrix), uv1).T  # NUM_KEYPOINTS x 3
 71 |     return xyz
 72 | 
 73 | 
 74 | def keypoint3d_to_2d(keypoint3d: np.ndarray, intrinsic_matrix: np.ndarray):
 75 |     """Convert 3D keypoints to 2D keypoints"""
 76 |     keypoint2d = np.matmul(intrinsic_matrix, keypoint3d.T).T  # NUM_KEYPOINTS x 3
 77 |     keypoint2d = keypoint2d[:, :2] / keypoint2d[:, 2:3]  # NUM_KEYPOINTS x 2
 78 |     return keypoint2d
 79 | 
 80 | 
 81 | def scale_box(box, image_width, image_height, scale):
 82 |     """
 83 |     Change `box` to a square box.
 84 |     The side with of the square box will be `scale` * max(w, h)
 85 |     where w and h is the width and height of the origin box
 86 |     """
 87 |     left, upper, right, lower = box
 88 |     center_x, center_y = (left + right) / 2, (upper + lower) / 2
 89 |     w, h = right - left, lower - upper
 90 |     side_with = min(round(scale * max(w, h)), min(image_width, image_height))
 91 |     left = round(center_x - side_with / 2)
 92 |     right = left + side_with - 1
 93 |     upper = round(center_y - side_with / 2)
 94 |     lower = upper + side_with - 1
 95 |     if left < 0:
 96 |         left = 0
 97 |         right = side_with - 1
 98 |     if right >= image_width:
 99 |         right = image_width - 1
100 |         left = image_width - side_with
101 |     if upper < 0:
102 |         upper = 0
103 |         lower = side_with -1
104 |     if lower >= image_height:
105 |         lower = image_height - 1
106 |         upper = image_height - side_with
107 |     return left, upper, right, lower
108 | 
109 | 
110 | def get_bounding_box(keypoint2d: np.array):
111 |     """Get the bounding box for keypoints"""
112 |     left = np.min(keypoint2d[:, 0])
113 |     right = np.max(keypoint2d[:, 0])
114 |     upper = np.min(keypoint2d[:, 1])
115 |     lower = np.max(keypoint2d[:, 1])
116 |     return left, upper, right, lower
117 | 
118 | 
119 | def visualize_heatmap(image, heatmaps, filename):
120 |     image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR).copy()
121 |     H, W = heatmaps.shape[1], heatmaps.shape[2]
122 |     resized_image = cv2.resize(image, (int(W), int(H)))
123 |     heatmaps = heatmaps.mul(255).clamp(0, 255).byte().cpu().numpy()
124 |     for k in range(heatmaps.shape[0]):
125 |         heatmap = heatmaps[k]
126 |         colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
127 |         masked_image = colored_heatmap * 0.7 + resized_image * 0.3
128 |         cv2.imwrite(filename.format(k), masked_image)
129 |         
130 | 
131 | def area(left, upper, right, lower):
132 |     return max(right - left + 1, 0) * max(lower - upper + 1, 0)
133 | 
134 | 
135 | def intersection(box_a, box_b):
136 |     left_a, upper_a, right_a, lower_a = box_a
137 |     left_b, upper_b, right_b, lower_b = box_b
138 |     return max(left_a, left_b), max(upper_a, upper_b), min(right_a, right_b), min(lower_a, lower_b)
139 | 


--------------------------------------------------------------------------------
/common/vision/datasets/segmentation/cityscapes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from .segmentation_list import SegmentationList
  3 | from .._util import download as download_data
  4 | 
  5 | 
  6 | class Cityscapes(SegmentationList):
  7 |     """`Cityscapes <https://www.cityscapes-dataset.com/>`_ is a real-world semantic segmentation dataset collected
  8 |     in driving scenarios.
  9 | 
 10 |     Args:
 11 |         root (str): Root directory of dataset
 12 |         split (str, optional): The dataset split, supports ``train``, or ``val``.
 13 |         data_folder (str, optional): Sub-directory of the image. Default: 'leftImg8bit'.
 14 |         label_folder (str, optional): Sub-directory of the label. Default: 'gtFine'.
 15 |         mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None.
 16 |         transforms (callable, optional): A function/transform that  takes in  (PIL image, label) pair \
 17 |             and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`.
 18 | 
 19 |     .. note:: You need to download Cityscapes manually.
 20 |         Ensure that there exist following files in the `root` directory before you using this class.
 21 |         ::
 22 |             leftImg8bit/
 23 |                 train/
 24 |                 val/
 25 |                 test/
 26 |             gtFine/
 27 |                 train/
 28 |                 val/
 29 |                 test/
 30 |     """
 31 | 
 32 |     CLASSES = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign',
 33 |                'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
 34 |                'bicycle']
 35 | 
 36 |     ID_TO_TRAIN_ID = {
 37 |         7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 17: 5,
 38 |         19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12,
 39 |         26: 13, 27: 14, 28: 15, 31: 16, 32: 17, 33: 18
 40 |     }
 41 |     TRAIN_ID_TO_COLOR = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156),
 42 |                                   (190, 153, 153), (153, 153, 153), (250, 170, 30), (220, 220, 0),
 43 |                                   (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
 44 |                                   (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100),
 45 |                                   (0, 0, 230), (119, 11, 32), [0, 0, 0]]
 46 |     download_list = [
 47 |         ("image_list", "image_list.zip", "https://cloud.tsinghua.edu.cn/f/08745e798b16483db4bf/?dl=1"),
 48 |     ]
 49 |     EVALUATE_CLASSES = CLASSES
 50 | 
 51 |     def __init__(self, root, split='train', data_folder='leftImg8bit', label_folder='gtFine', **kwargs):
 52 |         assert split in ['train', 'val']
 53 | 
 54 |         # download meta information from Internet
 55 |         list(map(lambda args: download_data(root, *args), self.download_list))
 56 |         data_list_file = os.path.join(root, "image_list", "{}.txt".format(split))
 57 |         self.split = split
 58 |         super(Cityscapes, self).__init__(root, Cityscapes.CLASSES, data_list_file, data_list_file,
 59 |                                          os.path.join(data_folder, split), os.path.join(label_folder, split),
 60 |                                          id_to_train_id=Cityscapes.ID_TO_TRAIN_ID,
 61 |                                          train_id_to_color=Cityscapes.TRAIN_ID_TO_COLOR, **kwargs)
 62 | 
 63 |     def parse_label_file(self, label_list_file):
 64 |         with open(label_list_file, "r") as f:
 65 |             label_list = [line.strip().replace("leftImg8bit", "gtFine_labelIds") for line in f.readlines()]
 66 |         return label_list
 67 | 
 68 | 
 69 | class FoggyCityscapes(Cityscapes):
 70 |     """`Foggy Cityscapes <https://www.cityscapes-dataset.com/>`_ is a real-world semantic segmentation dataset collected
 71 |     in foggy driving scenarios.
 72 | 
 73 |     Args:
 74 |         root (str): Root directory of dataset
 75 |         split (str, optional): The dataset split, supports ``train``, or ``val``.
 76 |         data_folder (str, optional): Sub-directory of the image. Default: 'leftImg8bit'.
 77 |         label_folder (str, optional): Sub-directory of the label. Default: 'gtFine'.
 78 |         beta (float, optional): The parameter for foggy. Choices includes: 0.005, 0.01, 0.02. Default: 0.02
 79 |         mean (seq[float]): mean BGR value. Normalize the image if not None. Default: None.
 80 |         transforms (callable, optional): A function/transform that  takes in  (PIL image, label) pair \
 81 |             and returns a transformed version. E.g, :class:`~common.vision.transforms.segmentation.Resize`.
 82 | 
 83 |     .. note:: You need to download Cityscapes manually.
 84 |         Ensure that there exist following files in the `root` directory before you using this class.
 85 |         ::
 86 |             leftImg8bit_foggy/
 87 |                 train/
 88 |                 val/
 89 |                 test/
 90 |             gtFine/
 91 |                 train/
 92 |                 val/
 93 |                 test/
 94 |     """
 95 |     def __init__(self, root, split='train', data_folder='leftImg8bit_foggy', label_folder='gtFine', beta=0.02, **kwargs):
 96 |         assert beta in [0.02, 0.01, 0.005]
 97 |         self.beta = beta
 98 |         super(FoggyCityscapes, self).__init__(root, split, data_folder, label_folder, **kwargs)
 99 | 
100 |     def parse_data_file(self, file_name):
101 |         """Parse file to image list
102 | 
103 |         Args:
104 |             file_name (str): The path of data file
105 | 
106 |         Returns:
107 |             List of image path
108 |         """
109 |         with open(file_name, "r") as f:
110 |             data_list = [line.strip().replace("leftImg8bit", "leftImg8bit_foggy_beta_{}".format(self.beta)) for line in f.readlines()]
111 |         return data_list
112 | 


--------------------------------------------------------------------------------
/common/vision/datasets/keypoint_detection/hand_3d_studio.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import random
  4 | from PIL import ImageFile, Image
  5 | import torch
  6 | import os.path as osp
  7 | 
  8 | from .._util import download as download_data, check_exits
  9 | from .keypoint_dataset import Hand21KeypointDataset
 10 | from .util import *
 11 | 
 12 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 13 | 
 14 | 
 15 | class Hand3DStudio(Hand21KeypointDataset):
 16 |     """`Hand-3d-Studio Dataset <https://www.yangangwang.com/papers/ZHAO-H3S-2020-02.html>`_
 17 | 
 18 |     Args:
 19 |         root (str): Root directory of dataset
 20 |         split (str, optional): The dataset split, supports ``train``, ``test``, or ``all``.
 21 |         task (str, optional): The task to create dataset. Choices include ``'noobject'``: only hands without objects, \
 22 |             ``'object'``: only hands interacting with hands, and ``'all'``: all hands. Default: 'noobject'.
 23 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
 24 |             in root directory. If dataset is already downloaded, it is not downloaded again.
 25 |         transforms (callable, optional): A function/transform that takes in a dict (which contains PIL image and
 26 |             its labels) and returns a transformed version. E.g, :class:`~common.vision.transforms.keypoint_detection.Resize`.
 27 |         image_size (tuple): (width, height) of the image. Default: (256, 256)
 28 |         heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64)
 29 |         sigma (int): sigma parameter when generate the heatmap. Default: 2
 30 | 
 31 |     .. note::
 32 |         We found that the original H3D image is in high resolution while most part in an image is background,
 33 |         thus we crop the image and keep only the surrounding area of hands (1.5x bigger than hands) to speed up training.
 34 | 
 35 |     .. note:: In `root`, there will exist following files after downloading.
 36 |         ::
 37 |             H3D_crop/
 38 |                 annotation.json
 39 |                 part1/
 40 |                 part2/
 41 |                 part3/
 42 |                 part4/
 43 |                 part5/
 44 |     """
 45 |     def __init__(self, root, split='train', task='noobject', download=True, **kwargs):
 46 |         assert split in ['train', 'test', 'all']
 47 |         self.split = split
 48 |         assert task in ['noobject', 'object', 'all']
 49 |         self.task = task
 50 | 
 51 |         if download:
 52 |             download_data(root, "H3D_crop", "H3D_crop.tar", "https://cloud.tsinghua.edu.cn/f/d4e612e44dc04d8eb01f/?dl=1")
 53 |         else:
 54 |             check_exits(root, "H3D_crop")
 55 | 
 56 |         root = osp.join(root, "H3D_crop")
 57 |         # load labels
 58 |         annotation_file = os.path.join(root, 'annotation.json')
 59 |         print("loading from {}".format(annotation_file))
 60 |         with open(annotation_file) as f:
 61 |             samples = list(json.load(f))
 62 |         if task == 'noobject':
 63 |             samples = [sample for sample in samples if int(sample['without_object']) == 1]
 64 |         elif task == 'object':
 65 |             samples = [sample for sample in samples if int(sample['without_object']) == 0]
 66 | 
 67 |         random.seed(42)
 68 |         random.shuffle(samples)
 69 |         samples_len = len(samples)
 70 |         samples_split = min(int(samples_len * 0.2), 3200)
 71 |         if split == 'train':
 72 |             samples = samples[samples_split:]
 73 |         elif split == 'test':
 74 |             samples = samples[:samples_split]
 75 | 
 76 |         super(Hand3DStudio, self).__init__(root, samples, **kwargs)
 77 | 
 78 |     def __getitem__(self, index):
 79 |         sample = self.samples[index]
 80 |         image_name = sample['name']
 81 |         image_path = os.path.join(self.root, image_name)
 82 |         image = Image.open(image_path)
 83 |         keypoint3d_camera = np.array(sample['keypoint3d'])  # NUM_KEYPOINTS x 3
 84 |         keypoint2d = np.array(sample['keypoint2d'])  # NUM_KEYPOINTS x 2
 85 |         intrinsic_matrix = np.array(sample['intrinsic_matrix'])
 86 |         Zc = keypoint3d_camera[:, 2]
 87 | 
 88 |         image, data = self.transforms(image, keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix)
 89 |         keypoint2d = data['keypoint2d']
 90 |         intrinsic_matrix = data['intrinsic_matrix']
 91 |         keypoint3d_camera = keypoint2d_to_3d(keypoint2d, intrinsic_matrix, Zc)
 92 | 
 93 |         # noramlize 2D pose:
 94 |         visible = np.ones((self.num_keypoints, ), dtype=np.float32)
 95 |         visible = visible[:, np.newaxis]
 96 |         # 2D heatmap
 97 |         target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size)
 98 |         target = torch.from_numpy(target)
 99 |         target_weight = torch.from_numpy(target_weight)
100 | 
101 |         # normalize 3D pose:
102 |         # put middle finger metacarpophalangeal (MCP) joint in the center of the coordinate system
103 |         # and make distance between wrist and middle finger MCP joint to be of length 1
104 |         keypoint3d_n = keypoint3d_camera - keypoint3d_camera[9:10, :]
105 |         keypoint3d_n = keypoint3d_n / np.sqrt(np.sum(keypoint3d_n[0, :] ** 2))
106 | 
107 |         meta = {
108 |             'image': image_name,
109 |             'keypoint2d': keypoint2d,  # （NUM_KEYPOINTS x 2）
110 |             'keypoint3d': keypoint3d_n,  # （NUM_KEYPOINTS x 3）
111 |         }
112 |         return image, target, target_weight, meta
113 | 
114 | 
115 | class Hand3DStudioAll(Hand3DStudio):
116 |     """
117 |     `Hand-3d-Studio Dataset <https://www.yangangwang.com/papers/ZHAO-H3S-2020-02.html>`_
118 | 
119 |     """
120 |     def __init__(self,  root, task='all', **kwargs):
121 |         super(Hand3DStudioAll, self).__init__(root, task=task, **kwargs)


--------------------------------------------------------------------------------
/common/utils/analysis/tsne.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import matplotlib
  3 | import os
  4 | matplotlib.use('Agg')
  5 | from sklearn.manifold import TSNE
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import matplotlib.colors as col
  9 | import matplotlib.cm as cm
 10 | # import umap
 11 | 
 12 | def visualize(source_feature: torch.Tensor, source_labels: torch.Tensor,
 13 |               target_feature: torch.Tensor, target_labels: torch.Tensor,
 14 |               filename: str, source_color='r', target_color='b'):
 15 |     """
 16 |     Visualize features from different domains using t-SNE.
 17 | 
 18 |     Args:
 19 |         source_feature (tensor): features from source domain in shape :math:`(minibatch, F)`
 20 |         target_feature (tensor): features from target domain in shape :math:`(minibatch, F)`
 21 |         filename (str): the file name to save t-SNE
 22 |         source_color (str): the color of the source features. Default: 'r'
 23 |         target_color (str): the color of the target features. Default: 'b'
 24 | 
 25 |     """
 26 |     source_feature = source_feature.numpy()
 27 |     target_feature = target_feature.numpy()
 28 |     features = np.concatenate([source_feature, target_feature], axis=0)
 29 | 
 30 |     # map features to 2-d using TSNE
 31 |     print("Transforming features by umap...")
 32 |     X_tsne = TSNE(n_components=2, random_state=33).fit_transform(features)
 33 |     # X_tsne = umap.UMAP(n_components=2, metric='euclidean', n_neighbors=15).fit_transform(features)
 34 | 
 35 |     # domain labels, 1 represents source while 0 represents target
 36 |     domains = np.concatenate((np.ones(len(source_feature)), np.zeros(len(target_feature))))
 37 | 
 38 |     # visualize using matplotlib
 39 |     plt.figure(figsize=(10, 10))
 40 |     plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=domains, cmap=col.ListedColormap([target_color, source_color]), s=2)
 41 |     plt.savefig(filename)
 42 | 
 43 | def visualize_cluster(source_features, source_labels, source_clusters,
 44 |                     target_features, target_labels, target_clusters, file_root,
 45 |                     num_s=None, num_t=None, umap=True, metric='euclidean'):
 46 |     source_features = source_features.numpy()
 47 |     target_features = target_features.numpy()
 48 |     source_labels = source_labels.numpy()
 49 |     target_labels = target_labels.numpy()
 50 |     source_clusters = source_clusters.numpy()
 51 |     target_clusters = target_clusters.numpy()
 52 |     num_classes = len(np.unique(source_labels))
 53 |     num_source = source_features.shape[0]
 54 |     num_target = target_features.shape[0]
 55 | 
 56 |     # select features
 57 |     if num_s is not None and num_s < num_source:
 58 |         source_tsne_idx = np.random.choice(num_source, num_s, replace=False)
 59 |         source_features = source_features[source_tsne_idx, :]
 60 |         source_labels = source_labels[source_tsne_idx]
 61 |         source_clusters = source_clusters[source_tsne_idx]
 62 |         num_source = num_s
 63 |     if num_t is not None and num_t < num_target:
 64 |         target_tsne_idx = np.random.choice(num_target, num_t, replace=False)
 65 |         target_features = target_features[target_tsne_idx, :]
 66 |         target_labels = target_labels[target_tsne_idx]
 67 |         target_clusters = target_clusters[target_tsne_idx]
 68 |         num_target = num_t
 69 |     features = np.concatenate([source_features, target_features], axis=0)
 70 | 
 71 |     # map features to 2-d using TSNE
 72 |     if umap:
 73 |         print("Transforming features by umap...")
 74 |         assert False
 75 |         # X_tsne = umap.UMAP(n_components=2, metric='euclidean', n_neighbors=15).fit_transform(features)
 76 |     else:
 77 |         print("Transforming features by tsne...")
 78 |         X_tsne = TSNE(
 79 |             n_components=2, random_state=33, metric=metric
 80 |         ).fit_transform(features)
 81 | 
 82 |     source_tsne = X_tsne[:num_source, :]
 83 |     target_tsne = X_tsne[num_source:, :]
 84 |     
 85 |     # draw
 86 |     plt.figure(figsize=(15, 15))
 87 |     colors = cm.rainbow(np.linspace(0, 1, num_classes))
 88 |     for i in range(num_classes):
 89 |         # draw the anchor class with color
 90 |         source_mask = (source_labels == i)
 91 |         target_mask = (target_labels == i)
 92 |         source_mask_neg = ~source_mask
 93 |         target_mask_neg = ~target_mask
 94 | 
 95 |         # draw other class as grey
 96 |         plt.scatter(
 97 |             source_tsne[source_mask_neg][:, 0], source_tsne[source_mask_neg][:, 1],
 98 |             c="grey", s=7, marker='o'
 99 |         )
100 |         plt.scatter(
101 |             target_tsne[target_mask_neg][:, 0], target_tsne[target_mask_neg][:, 1],
102 |             c="grey", s=12, marker='x'
103 |         )
104 | 
105 |         # draw anchor class as color
106 |         for j in range(num_classes):
107 |             source_cluster_mask = (source_clusters == j)
108 |             target_cluster_mask = (target_clusters == j)
109 |             source_combined_mask = source_mask & source_cluster_mask
110 |             target_combined_mask = target_mask & target_cluster_mask
111 |             plt.scatter(
112 |                 source_tsne[source_combined_mask][:, 0], source_tsne[source_combined_mask][:, 1],
113 |                 color=colors[j], s=14, marker='o', alpha=0.7
114 |             )
115 |             plt.scatter(
116 |                 target_tsne[target_combined_mask][:, 0], target_tsne[target_combined_mask][:, 1],
117 |                 color=colors[j], s=21, marker='x', alpha=0.7
118 |             )
119 | 
120 |         plt.savefig(os.path.join(file_root, "class_%d.png" % i))
121 |         plt.clf()
122 |     
123 |     # Draw source vs target domain
124 |     plt.scatter(source_tsne[:, 0], source_tsne[:, 1], color='red', s=2)
125 |     plt.scatter(target_tsne[:, 0], target_tsne[:, 1], color='blue', s=2)
126 |     plt.savefig(os.path.join(file_root, "overall.png"))


--------------------------------------------------------------------------------
/dalib/adaptation/jan.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Sequence
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from common.modules.classifier import Classifier as ClassifierBase
  6 | from ..modules.grl import GradientReverseLayer
  7 | from ..modules.kernels import GaussianKernel
  8 | from .dan import _update_index_matrix
  9 | 
 10 | 
 11 | __all__ = ['JointMultipleKernelMaximumMeanDiscrepancy', 'ImageClassifier']
 12 | 
 13 | 
 14 | 
 15 | class JointMultipleKernelMaximumMeanDiscrepancy(nn.Module):
 16 |     r"""The Joint Multiple Kernel Maximum Mean Discrepancy (JMMD) used in
 17 |     `Deep Transfer Learning with Joint Adaptation Networks (ICML 2017) <https://arxiv.org/abs/1605.06636>`_
 18 | 
 19 |     Given source domain :math:`\mathcal{D}_s` of :math:`n_s` labeled points and target domain :math:`\mathcal{D}_t`
 20 |     of :math:`n_t` unlabeled points drawn i.i.d. from P and Q respectively, the deep networks will generate
 21 |     activations in layers :math:`\mathcal{L}` as :math:`\{(z_i^{s1}, ..., z_i^{s|\mathcal{L}|})\}_{i=1}^{n_s}` and
 22 |     :math:`\{(z_i^{t1}, ..., z_i^{t|\mathcal{L}|})\}_{i=1}^{n_t}`. The empirical estimate of
 23 |     :math:`\hat{D}_{\mathcal{L}}(P, Q)` is computed as the squared distance between the empirical kernel mean
 24 |     embeddings as
 25 | 
 26 |     .. math::
 27 |         \hat{D}_{\mathcal{L}}(P, Q) &=
 28 |         \dfrac{1}{n_s^2} \sum_{i=1}^{n_s}\sum_{j=1}^{n_s} \prod_{l\in\mathcal{L}} k^l(z_i^{sl}, z_j^{sl}) \\
 29 |         &+ \dfrac{1}{n_t^2} \sum_{i=1}^{n_t}\sum_{j=1}^{n_t} \prod_{l\in\mathcal{L}} k^l(z_i^{tl}, z_j^{tl}) \\
 30 |         &- \dfrac{2}{n_s n_t} \sum_{i=1}^{n_s}\sum_{j=1}^{n_t} \prod_{l\in\mathcal{L}} k^l(z_i^{sl}, z_j^{tl}). \\
 31 | 
 32 |     Args:
 33 |         kernels (tuple(tuple(torch.nn.Module))): kernel functions, where `kernels[r]` corresponds to kernel :math:`k^{\mathcal{L}[r]}`.
 34 |         linear (bool): whether use the linear version of JAN. Default: False
 35 |         thetas (list(Theta): use adversarial version JAN if not None. Default: None
 36 | 
 37 |     Inputs:
 38 |         - z_s (tuple(tensor)): multiple layers' activations from the source domain, :math:`z^s`
 39 |         - z_t (tuple(tensor)): multiple layers' activations from the target domain, :math:`z^t`
 40 | 
 41 |     Shape:
 42 |         - :math:`z^{sl}` and :math:`z^{tl}`: :math:`(minibatch, *)`  where * means any dimension
 43 |         - Outputs: scalar
 44 | 
 45 |     .. note::
 46 |         Activations :math:`z^{sl}` and :math:`z^{tl}` must have the same shape.
 47 | 
 48 |     .. note::
 49 |         The kernel values will add up when there are multiple kernels for a certain layer.
 50 | 
 51 |     Examples::
 52 | 
 53 |         >>> feature_dim = 1024
 54 |         >>> batch_size = 10
 55 |         >>> layer1_kernels = (GaussianKernel(alpha=0.5), GaussianKernel(1.), GaussianKernel(2.))
 56 |         >>> layer2_kernels = (GaussianKernel(1.), )
 57 |         >>> loss = JointMultipleKernelMaximumMeanDiscrepancy((layer1_kernels, layer2_kernels))
 58 |         >>> # layer1 features from source domain and target domain
 59 |         >>> z1_s, z1_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim)
 60 |         >>> # layer2 features from source domain and target domain
 61 |         >>> z2_s, z2_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim)
 62 |         >>> output = loss((z1_s, z2_s), (z1_t, z2_t))
 63 |     """
 64 | 
 65 |     def __init__(self, kernels: Sequence[Sequence[nn.Module]], linear: Optional[bool] = True, thetas: Sequence[nn.Module] = None):
 66 |         super(JointMultipleKernelMaximumMeanDiscrepancy, self).__init__()
 67 |         self.kernels = kernels
 68 |         self.index_matrix = None
 69 |         self.linear = linear
 70 |         if thetas:
 71 |             self.thetas = thetas
 72 |         else:
 73 |             self.thetas = [nn.Identity() for _ in kernels]
 74 | 
 75 |     def forward(self, z_s: torch.Tensor, z_t: torch.Tensor) -> torch.Tensor:
 76 |         batch_size = int(z_s[0].size(0))
 77 |         self.index_matrix = _update_index_matrix(batch_size, self.index_matrix, self.linear).to(z_s[0].device)
 78 | 
 79 |         kernel_matrix = torch.ones_like(self.index_matrix)
 80 |         for layer_z_s, layer_z_t, layer_kernels, theta in zip(z_s, z_t, self.kernels, self.thetas):
 81 |             layer_features = torch.cat([layer_z_s, layer_z_t], dim=0)
 82 |             layer_features = theta(layer_features)
 83 |             kernel_matrix *= sum(
 84 |                 [kernel(layer_features) for kernel in layer_kernels])  # Add up the matrix of each kernel
 85 | 
 86 |         # Add 2 / (n-1) to make up for the value on the diagonal
 87 |         # to ensure loss is positive in the non-linear version
 88 |         loss = (kernel_matrix * self.index_matrix).sum() + 2. / float(batch_size - 1)
 89 |         return loss
 90 | 
 91 | 
 92 | class Theta(nn.Module):
 93 |     """
 94 |     maximize loss respect to :math:`\theta`
 95 |     minimize loss respect to features
 96 |     """
 97 |     def __init__(self, dim: int):
 98 |         super(Theta, self).__init__()
 99 |         self.grl1 = GradientReverseLayer()
100 |         self.grl2 = GradientReverseLayer()
101 |         self.layer1 = nn.Linear(dim, dim)
102 |         nn.init.eye_(self.layer1.weight)
103 |         nn.init.zeros_(self.layer1.bias)
104 | 
105 |     def forward(self, features: torch.Tensor) -> torch.Tensor:
106 |         features = self.grl1(features)
107 |         return self.grl2(self.layer1(features))
108 | 
109 | 
110 | class ImageClassifier(ClassifierBase):
111 |     def __init__(self, backbone: nn.Module, num_classes: int, bottleneck_dim: Optional[int] = 256, **kwargs):
112 |         bottleneck = nn.Sequential(
113 |             nn.AdaptiveAvgPool2d(output_size=(1, 1)),
114 |             nn.Flatten(),
115 |             nn.Linear(backbone.out_features, bottleneck_dim),
116 |             nn.BatchNorm1d(bottleneck_dim),
117 |             nn.ReLU(),
118 |             nn.Dropout(0.5)
119 |         )
120 |         super(ImageClassifier, self).__init__(backbone, num_classes, bottleneck, bottleneck_dim, **kwargs)


--------------------------------------------------------------------------------
/common/vision/datasets/keypoint_detection/surreal.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from PIL import ImageFile
  4 | import torch
  5 | from ...transforms.keypoint_detection import *
  6 | from .util import *
  7 | from .._util import download as download_data, check_exits
  8 | from .keypoint_dataset import Body16KeypointDataset
  9 | 
 10 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 11 | 
 12 | 
 13 | class SURREAL(Body16KeypointDataset):
 14 |     """`Surreal Dataset <https://www.di.ens.fr/willow/research/surreal/data/>`_
 15 | 
 16 |     Args:
 17 |         root (str): Root directory of dataset
 18 |         split (str, optional): The dataset split, supports ``train``, ``test``, or ``all``.
 19 |             Default: ``train``.
 20 |         task (str, optional): Placeholder.
 21 |         download (bool, optional): If true, downloads the dataset from the internet and puts it \
 22 |             in root directory. If dataset is already downloaded, it is not downloaded again.
 23 |         transforms (callable, optional): A function/transform that takes in a dict (which contains PIL image and
 24 |             its labels) and returns a transformed version. E.g, :class:`~common.vision.transforms.keypoint_detection.Resize`.
 25 |         image_size (tuple): (width, height) of the image. Default: (256, 256)
 26 |         heatmap_size (tuple): (width, height) of the heatmap. Default: (64, 64)
 27 |         sigma (int): sigma parameter when generate the heatmap. Default: 2
 28 | 
 29 |     .. note::
 30 |         We found that the original Surreal image is in high resolution while most part in an image is background,
 31 |         thus we crop the image and keep only the surrounding area of hands (1.5x bigger than hands) to speed up training.
 32 | 
 33 |     .. note:: In `root`, there will exist following files after downloading.
 34 |         ::
 35 |             train/
 36 |             test/
 37 |             val/
 38 |     """
 39 |     def __init__(self, root, split='train', task='all', download=True, **kwargs):
 40 |         assert split in ['train', 'test', 'val']
 41 |         self.split = split
 42 | 
 43 |         if download:
 44 |             download_data(root, "train/run0", "train0.tgz", "https://cloud.tsinghua.edu.cn/f/b13604f06ff1445c830a/?dl=1")
 45 |             download_data(root, "train/run1", "train1.tgz", "https://cloud.tsinghua.edu.cn/f/919aefe2de3541c3b940/?dl=1")
 46 |             download_data(root, "train/run1", "train2.tgz", "https://cloud.tsinghua.edu.cn/f/34864760ad4945b9bcd6/?dl=1")
 47 |             download_data(root, "val", "val.tgz", "https://cloud.tsinghua.edu.cn/f/16b20f2e76684f848dc1/?dl=1")
 48 |             download_data(root, "test", "test.tgz", "https://cloud.tsinghua.edu.cn/f/36c72d86e43540e0a913/?dl=1")
 49 |         else:
 50 |             check_exits(root, "train/run0")
 51 |             check_exits(root, "train/run1")
 52 |             check_exits(root, "train/run2")
 53 |             check_exits(root, "val")
 54 |             check_exits(root, "test")
 55 | 
 56 |         all_samples = []
 57 |         for part in [0, 1, 2]:
 58 |             annotation_file = os.path.join(root, split, 'run{}.json'.format(part))
 59 |             print("loading", annotation_file)
 60 |             with open(annotation_file) as f:
 61 |                 samples = json.load(f)
 62 |                 for sample in samples:
 63 |                     sample["image_path"] = os.path.join(root, self.split, 'run{}'.format(part), sample['name'])
 64 |                 all_samples.extend(samples)
 65 | 
 66 |         random.seed(42)
 67 |         random.shuffle(all_samples)
 68 |         samples_len = len(all_samples)
 69 |         samples_split = min(int(samples_len * 0.2), 3200)
 70 |         if self.split == 'train':
 71 |             all_samples = all_samples[samples_split:]
 72 |         elif self.split == 'test':
 73 |             all_samples = all_samples[:samples_split]
 74 |         self.joints_index = (7, 4, 1, 2, 5, 8, 0, 9, 12, 15, 20, 18, 13, 14, 19, 21)
 75 | 
 76 |         super(SURREAL, self).__init__(root, all_samples, **kwargs)
 77 | 
 78 |     def __getitem__(self, index):
 79 |         sample = self.samples[index]
 80 |         image_name = sample['name']
 81 | 
 82 |         image_path = sample['image_path']
 83 |         image = Image.open(image_path)
 84 |         keypoint3d_camera = np.array(sample['keypoint3d'])[self.joints_index, :]  # NUM_KEYPOINTS x 3
 85 |         keypoint2d = np.array(sample['keypoint2d'])[self.joints_index, :]  # NUM_KEYPOINTS x 2
 86 |         intrinsic_matrix = np.array(sample['intrinsic_matrix'])
 87 |         Zc = keypoint3d_camera[:, 2]
 88 | 
 89 |         image, data = self.transforms(image, keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix)
 90 |         keypoint2d = data['keypoint2d']
 91 |         intrinsic_matrix = data['intrinsic_matrix']
 92 |         keypoint3d_camera = keypoint2d_to_3d(keypoint2d, intrinsic_matrix, Zc)
 93 | 
 94 |         # noramlize 2D pose:
 95 |         visible = np.array([1.] * 16, dtype=np.float32)
 96 |         visible = visible[:, np.newaxis]
 97 | 
 98 |         # 2D heatmap
 99 |         target, target_weight = generate_target(keypoint2d, visible, self.heatmap_size, self.sigma, self.image_size)
100 |         target = torch.from_numpy(target)
101 |         target_weight = torch.from_numpy(target_weight)
102 | 
103 |         # normalize 3D pose:
104 |         # put middle finger metacarpophalangeal (MCP) joint in the center of the coordinate system
105 |         # and make distance between wrist and middle finger MCP joint to be of length 1
106 |         keypoint3d_n = keypoint3d_camera - keypoint3d_camera[9:10, :]
107 |         keypoint3d_n = keypoint3d_n / np.sqrt(np.sum(keypoint3d_n[0, :] ** 2))
108 | 
109 |         meta = {
110 |             'image': image_name,
111 |             'keypoint2d': keypoint2d,  # （NUM_KEYPOINTS x 2）
112 |             'keypoint3d': keypoint3d_n,  # （NUM_KEYPOINTS x 3）
113 |         }
114 |         return image, target, target_weight, meta
115 | 
116 |     def __len__(self):
117 |         return len(self.samples)
118 | 


--------------------------------------------------------------------------------