├── README.md ├── circle_loss.py ├── example_cls.py ├── example_cls_wo_circleloss.py ├── example_compare.py ├── example_vis.py └── figures ├── compare.png └── tsne.png /README.md: -------------------------------------------------------------------------------- 1 | # circleloss.pytorch 2 | Examples of playing with Circle Loss from the paper "[Circle Loss: A Unified Perspective of Pair Similarity Optimization](https://arxiv.org/abs/2002.10857)", CVPR 2020. 3 | 4 | The implementation of Circle Loss is from [TinyZeaMays/CircleLoss](https://github.com/TinyZeaMays/CircleLoss). 5 | 6 | ## Example 1: Visualization (learned features) 7 | ``` 8 | >>> python example_vis.py 9 | ``` 10 | This will visualize the learned features using t-SNE. 11 |

12 | 13 |

14 | 15 | ## Example 2: Classification 16 | ### Training w/o circle loss 17 | ``` 18 | >>> python example_cls_wo_circleloss.py 19 | [1/40] Training classifier. 20 | Test set: Accuracy: 5348/10000 (53%) 21 | ... 22 | [40/40] Training classifier. 23 | Test set: Accuracy: 9863/10000 (99%) 24 | ``` 25 | This will train a simple neural network under the cross entropy loss. 26 | ### Training w/ circle loss 27 | ``` 28 | >>> python example_cls.py 29 | [1/20] Training with Circle Loss. 30 | ... 31 | [20/20] Training with Circle Loss. 32 | [1/20] Training classifier. Test set: Accuracy: 9682/10000 (97%) 33 | ... 34 | [20/20] Training classifier. Test set: Accuracy: 9888/10000 (99%) 35 | ``` 36 | This will train a simple neural network under the circle loss firstly, and then train a classifier under the cross entropy loss using the extracted features. 37 | 38 | ## Example 3: Comparison 39 | ``` 40 | >>> python example_compare.py 41 | ``` 42 | This will judge whether two digits are the same. 43 | 44 |

45 | 46 |

47 | 48 | ## Acknowledgements 49 | Thanks the implementation [TinyZeaMays/CircleLoss](https://github.com/TinyZeaMays/CircleLoss) and the authors of the paper. 50 | -------------------------------------------------------------------------------- /circle_loss.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | from torch import nn, Tensor 5 | 6 | 7 | def convert_label_to_similarity(normed_feature: Tensor, label: Tensor) -> Tuple[Tensor, Tensor]: 8 | similarity_matrix = normed_feature @ normed_feature.transpose(1, 0) 9 | label_matrix = label.unsqueeze(1) == label.unsqueeze(0) 10 | 11 | positive_matrix = label_matrix.triu(diagonal=1) 12 | negative_matrix = label_matrix.logical_not().triu(diagonal=1) 13 | 14 | similarity_matrix = similarity_matrix.view(-1) 15 | positive_matrix = positive_matrix.view(-1) 16 | negative_matrix = negative_matrix.view(-1) 17 | return similarity_matrix[positive_matrix], similarity_matrix[negative_matrix] 18 | 19 | 20 | class CircleLoss(nn.Module): 21 | def __init__(self, m: float, gamma: float) -> None: 22 | super(CircleLoss, self).__init__() 23 | self.m = m 24 | self.gamma = gamma 25 | self.soft_plus = nn.Softplus() 26 | 27 | def forward(self, sp: Tensor, sn: Tensor) -> Tensor: 28 | ap = torch.clamp_min(- sp.detach() + 1 + self.m, min=0.) 29 | an = torch.clamp_min(sn.detach() + self.m, min=0.) 30 | 31 | delta_p = 1 - self.m 32 | delta_n = self.m 33 | 34 | logit_p = - ap * (sp - delta_p) * self.gamma 35 | logit_n = an * (sn - delta_n) * self.gamma 36 | 37 | loss = self.soft_plus(torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0)) 38 | 39 | return loss 40 | 41 | -------------------------------------------------------------------------------- /example_cls.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.optim import SGD 4 | from torch.utils.data import DataLoader 5 | from torchvision.datasets import MNIST 6 | from torchvision.transforms import ToTensor 7 | 8 | from circle_loss import convert_label_to_similarity, CircleLoss 9 | 10 | 11 | def get_loader(is_train: bool, batch_size: int) -> DataLoader: 12 | return DataLoader( 13 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True), 14 | batch_size=batch_size, 15 | shuffle=is_train, 16 | ) 17 | 18 | 19 | class Model(nn.Module): 20 | def __init__(self) -> None: 21 | super(Model, self).__init__() 22 | self.feature_extractor = nn.Sequential( 23 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5), 24 | nn.MaxPool2d(kernel_size=2), 25 | nn.ReLU(), 26 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5), 27 | nn.MaxPool2d(kernel_size=2), 28 | nn.ReLU(), 29 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), 30 | nn.MaxPool2d(kernel_size=2), 31 | nn.ReLU(), 32 | ) 33 | 34 | def forward(self, inp: Tensor) -> Tensor: 35 | feature = self.feature_extractor(inp).mean(dim=[2, 3]) 36 | return nn.functional.normalize(feature) 37 | 38 | 39 | class Classifier(nn.Module): 40 | def __init__(self) -> None: 41 | super(Classifier, self).__init__() 42 | self.classifier = nn.Linear(32, 10) 43 | 44 | def forward(self, inp: Tensor) -> Tensor: 45 | return self.classifier(inp) 46 | 47 | 48 | def main() -> None: 49 | model = Model() 50 | classifier = Classifier() 51 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) 52 | optimizer_cls = SGD(classifier.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) 53 | train_loader = get_loader(is_train=True, batch_size=64) 54 | val_loader = get_loader(is_train=False, batch_size=1000) 55 | criterion = CircleLoss(m=0.25, gamma=80) 56 | criterion_xe = nn.CrossEntropyLoss() 57 | 58 | for epoch in range(20): 59 | for img, label in train_loader: 60 | model.zero_grad() 61 | features = model(img) 62 | loss = criterion(*convert_label_to_similarity(features, label)) 63 | loss.backward() 64 | optimizer.step() 65 | print('[{}/{}] Training with Circle Loss.'.format(epoch + 1, 20)) 66 | 67 | for epoch in range(20): 68 | for img, label in train_loader: 69 | model.zero_grad() 70 | classifier.zero_grad() 71 | features = model(img) 72 | output = classifier(features) 73 | loss = criterion_xe(output, label) 74 | loss.backward() 75 | optimizer_cls.step() 76 | print('[{}/{}] Training classifier.'.format(epoch + 1, 20)) 77 | 78 | correct = 0 79 | for img, label in val_loader: 80 | features = model(img) 81 | output = classifier(features) 82 | pred = output.data.max(1)[1] 83 | correct += pred.eq(label.data).cpu().sum() 84 | print('Test set: Accuracy: {}/{} ({:.0f}%)'.format( 85 | correct, len(val_loader.dataset), 100. * correct / len(val_loader.dataset))) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /example_cls_wo_circleloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.optim import SGD 4 | from torch.utils.data import DataLoader 5 | from torchvision.datasets import MNIST 6 | from torchvision.transforms import ToTensor 7 | 8 | 9 | def get_loader(is_train: bool, batch_size: int) -> DataLoader: 10 | return DataLoader( 11 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True), 12 | batch_size=batch_size, 13 | shuffle=is_train, 14 | ) 15 | 16 | 17 | class Model(nn.Module): 18 | def __init__(self) -> None: 19 | super(Model, self).__init__() 20 | self.feature_extractor = nn.Sequential( 21 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5), 22 | nn.MaxPool2d(kernel_size=2), 23 | nn.ReLU(), 24 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5), 25 | nn.MaxPool2d(kernel_size=2), 26 | nn.ReLU(), 27 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), 28 | nn.MaxPool2d(kernel_size=2), 29 | nn.ReLU(), 30 | ) 31 | self.classifier = nn.Linear(32, 10) 32 | 33 | def forward(self, inp: Tensor) -> Tensor: 34 | feature = self.feature_extractor(inp).mean(dim=[2, 3]) 35 | return self.classifier(feature) 36 | 37 | 38 | def main() -> None: 39 | model = Model() 40 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) 41 | train_loader = get_loader(is_train=True, batch_size=64) 42 | val_loader = get_loader(is_train=False, batch_size=1000) 43 | criterion_xe = nn.CrossEntropyLoss() 44 | 45 | for epoch in range(40): 46 | for img, label in train_loader: 47 | model.zero_grad() 48 | output = model(img) 49 | loss = criterion_xe(output, label) 50 | loss.backward() 51 | optimizer.step() 52 | print('[{}/{}] Training classifier.'.format(epoch + 1, 40)) 53 | 54 | correct = 0 55 | for img, label in val_loader: 56 | output = model(img) 57 | pred = output.data.max(1)[1] 58 | correct += pred.eq(label.data).cpu().sum() 59 | print('Test set: Accuracy: {}/{} ({:.0f}%)'.format( 60 | correct, len(val_loader.dataset), 100. * correct / len(val_loader.dataset))) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /example_compare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import matplotlib.pyplot as plt 4 | from torch import nn, Tensor 5 | from torch.optim import SGD 6 | from torch.utils.data import DataLoader 7 | from torchvision.datasets import MNIST 8 | from torchvision.transforms import ToTensor 9 | from tqdm import tqdm 10 | 11 | from circle_loss import convert_label_to_similarity, CircleLoss 12 | 13 | 14 | def get_loader(is_train: bool, batch_size: int) -> DataLoader: 15 | return DataLoader( 16 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True), 17 | batch_size=batch_size, 18 | shuffle=is_train, 19 | ) 20 | 21 | 22 | def plot(img_1, img_2, same): 23 | plt.figure(12) 24 | if not same: 25 | plt.suptitle('These two digits are different.', fontsize=20) 26 | else: 27 | plt.suptitle('These two digits are the same.', fontsize=20) 28 | plt.subplot(121) 29 | plt.imshow(img_1, cmap='Greys') 30 | plt.axis('off') 31 | 32 | plt.subplot(122) 33 | plt.imshow(img_2, cmap='Greys') 34 | plt.axis('off') 35 | plt.show() 36 | 37 | 38 | class Model(nn.Module): 39 | def __init__(self) -> None: 40 | super(Model, self).__init__() 41 | self.feature_extractor = nn.Sequential( 42 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5), 43 | nn.MaxPool2d(kernel_size=2), 44 | nn.ReLU(), 45 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5), 46 | nn.MaxPool2d(kernel_size=2), 47 | nn.ReLU(), 48 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), 49 | nn.MaxPool2d(kernel_size=2), 50 | nn.ReLU(), 51 | ) 52 | 53 | def forward(self, inp: Tensor) -> Tensor: 54 | feature = self.feature_extractor(inp).mean(dim=[2, 3]) 55 | return nn.functional.normalize(feature) 56 | 57 | 58 | def main() -> None: 59 | model = Model() 60 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) 61 | train_loader = get_loader(is_train=True, batch_size=64) 62 | val_loader = get_loader(is_train=False, batch_size=2) 63 | criterion = CircleLoss(m=0.25, gamma=80) 64 | 65 | for epoch in range(20): 66 | for img, label in tqdm(train_loader): 67 | model.zero_grad() 68 | pred = model(img) 69 | loss = criterion(*convert_label_to_similarity(pred, label)) 70 | loss.backward() 71 | optimizer.step() 72 | 73 | thresh = 0.75 74 | for img, label in val_loader: 75 | pred = model(img) 76 | pred_label = torch.sum(pred[0] * pred[1]) > thresh 77 | plot(img[0, 0].data.numpy(), img[1, 0].data.numpy(), pred_label) 78 | break 79 | 80 | 81 | if __name__ == "__main__": 82 | main() 83 | -------------------------------------------------------------------------------- /example_vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from torch import nn, Tensor 6 | from torch.optim import SGD 7 | from torch.utils.data import DataLoader 8 | from torchvision.datasets import MNIST 9 | from torchvision.transforms import ToTensor 10 | from sklearn import manifold 11 | from tqdm import tqdm 12 | 13 | from circle_loss import convert_label_to_similarity, CircleLoss 14 | 15 | 16 | def get_loader(is_train: bool, batch_size: int) -> DataLoader: 17 | return DataLoader( 18 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True), 19 | batch_size=batch_size, 20 | shuffle=is_train, 21 | ) 22 | 23 | 24 | def plot_features(features, labels, num_classes): 25 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=501) 26 | features = tsne.fit_transform(features) 27 | x_min, x_max = features.min(0), features.max(0) 28 | features = (features - x_min) / (x_max - x_min) 29 | colors = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'] 30 | for label_idx in range(num_classes): 31 | plt.scatter( 32 | features[labels == label_idx, 0], 33 | features[labels == label_idx, 1], 34 | c=colors[label_idx], 35 | s=50, 36 | alpha=0.6 37 | ) 38 | plt.legend(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], loc='upper right', fontsize=10) 39 | plt.title('t-SNE visualization of the learned features', fontsize=30) 40 | plt.axis('tight') 41 | plt.show() 42 | 43 | 44 | class Model(nn.Module): 45 | def __init__(self) -> None: 46 | super(Model, self).__init__() 47 | self.feature_extractor = nn.Sequential( 48 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5), 49 | nn.MaxPool2d(kernel_size=2), 50 | nn.ReLU(), 51 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5), 52 | nn.MaxPool2d(kernel_size=2), 53 | nn.ReLU(), 54 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), 55 | nn.MaxPool2d(kernel_size=2), 56 | nn.ReLU(), 57 | ) 58 | 59 | def forward(self, inp: Tensor) -> Tensor: 60 | feature = self.feature_extractor(inp).mean(dim=[2, 3]) 61 | return nn.functional.normalize(feature) 62 | 63 | 64 | def main() -> None: 65 | model = Model() 66 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) 67 | train_loader = get_loader(is_train=True, batch_size=64) 68 | val_loader = get_loader(is_train=False, batch_size=1000) 69 | criterion = CircleLoss(m=0.25, gamma=80) 70 | 71 | for epoch in range(20): 72 | for img, label in tqdm(train_loader): 73 | model.zero_grad() 74 | pred = model(img) 75 | loss = criterion(*convert_label_to_similarity(pred, label)) 76 | loss.backward() 77 | optimizer.step() 78 | 79 | all_features = [] 80 | all_labels = [] 81 | for img, label in val_loader: 82 | pred = model(img) 83 | all_features.append(pred.data.numpy()) 84 | all_labels.append(label.data.numpy()) 85 | all_features = np.concatenate(all_features, 0) 86 | all_labels = np.concatenate(all_labels, 0) 87 | plot_features(all_features, all_labels, 10) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /figures/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhjohnchan/circleloss.pytorch/7ca202187df4626e215feadb60fda297b3c4113f/figures/compare.png -------------------------------------------------------------------------------- /figures/tsne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhjohnchan/circleloss.pytorch/7ca202187df4626e215feadb60fda297b3c4113f/figures/tsne.png --------------------------------------------------------------------------------