├── README.md
├── circle_loss.py
├── example_cls.py
├── example_cls_wo_circleloss.py
├── example_compare.py
├── example_vis.py
└── figures
├── compare.png
└── tsne.png
/README.md:
--------------------------------------------------------------------------------
1 | # circleloss.pytorch
2 | Examples of playing with Circle Loss from the paper "[Circle Loss: A Unified Perspective of Pair Similarity Optimization](https://arxiv.org/abs/2002.10857)", CVPR 2020.
3 |
4 | The implementation of Circle Loss is from [TinyZeaMays/CircleLoss](https://github.com/TinyZeaMays/CircleLoss).
5 |
6 | ## Example 1: Visualization (learned features)
7 | ```
8 | >>> python example_vis.py
9 | ```
10 | This will visualize the learned features using t-SNE.
11 |
12 |
13 |
14 |
15 | ## Example 2: Classification
16 | ### Training w/o circle loss
17 | ```
18 | >>> python example_cls_wo_circleloss.py
19 | [1/40] Training classifier.
20 | Test set: Accuracy: 5348/10000 (53%)
21 | ...
22 | [40/40] Training classifier.
23 | Test set: Accuracy: 9863/10000 (99%)
24 | ```
25 | This will train a simple neural network under the cross entropy loss.
26 | ### Training w/ circle loss
27 | ```
28 | >>> python example_cls.py
29 | [1/20] Training with Circle Loss.
30 | ...
31 | [20/20] Training with Circle Loss.
32 | [1/20] Training classifier. Test set: Accuracy: 9682/10000 (97%)
33 | ...
34 | [20/20] Training classifier. Test set: Accuracy: 9888/10000 (99%)
35 | ```
36 | This will train a simple neural network under the circle loss firstly, and then train a classifier under the cross entropy loss using the extracted features.
37 |
38 | ## Example 3: Comparison
39 | ```
40 | >>> python example_compare.py
41 | ```
42 | This will judge whether two digits are the same.
43 |
44 |
45 |
46 |
47 |
48 | ## Acknowledgements
49 | Thanks the implementation [TinyZeaMays/CircleLoss](https://github.com/TinyZeaMays/CircleLoss) and the authors of the paper.
50 |
--------------------------------------------------------------------------------
/circle_loss.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import torch
4 | from torch import nn, Tensor
5 |
6 |
7 | def convert_label_to_similarity(normed_feature: Tensor, label: Tensor) -> Tuple[Tensor, Tensor]:
8 | similarity_matrix = normed_feature @ normed_feature.transpose(1, 0)
9 | label_matrix = label.unsqueeze(1) == label.unsqueeze(0)
10 |
11 | positive_matrix = label_matrix.triu(diagonal=1)
12 | negative_matrix = label_matrix.logical_not().triu(diagonal=1)
13 |
14 | similarity_matrix = similarity_matrix.view(-1)
15 | positive_matrix = positive_matrix.view(-1)
16 | negative_matrix = negative_matrix.view(-1)
17 | return similarity_matrix[positive_matrix], similarity_matrix[negative_matrix]
18 |
19 |
20 | class CircleLoss(nn.Module):
21 | def __init__(self, m: float, gamma: float) -> None:
22 | super(CircleLoss, self).__init__()
23 | self.m = m
24 | self.gamma = gamma
25 | self.soft_plus = nn.Softplus()
26 |
27 | def forward(self, sp: Tensor, sn: Tensor) -> Tensor:
28 | ap = torch.clamp_min(- sp.detach() + 1 + self.m, min=0.)
29 | an = torch.clamp_min(sn.detach() + self.m, min=0.)
30 |
31 | delta_p = 1 - self.m
32 | delta_n = self.m
33 |
34 | logit_p = - ap * (sp - delta_p) * self.gamma
35 | logit_n = an * (sn - delta_n) * self.gamma
36 |
37 | loss = self.soft_plus(torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0))
38 |
39 | return loss
40 |
41 |
--------------------------------------------------------------------------------
/example_cls.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn, Tensor
3 | from torch.optim import SGD
4 | from torch.utils.data import DataLoader
5 | from torchvision.datasets import MNIST
6 | from torchvision.transforms import ToTensor
7 |
8 | from circle_loss import convert_label_to_similarity, CircleLoss
9 |
10 |
11 | def get_loader(is_train: bool, batch_size: int) -> DataLoader:
12 | return DataLoader(
13 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True),
14 | batch_size=batch_size,
15 | shuffle=is_train,
16 | )
17 |
18 |
19 | class Model(nn.Module):
20 | def __init__(self) -> None:
21 | super(Model, self).__init__()
22 | self.feature_extractor = nn.Sequential(
23 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5),
24 | nn.MaxPool2d(kernel_size=2),
25 | nn.ReLU(),
26 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
27 | nn.MaxPool2d(kernel_size=2),
28 | nn.ReLU(),
29 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
30 | nn.MaxPool2d(kernel_size=2),
31 | nn.ReLU(),
32 | )
33 |
34 | def forward(self, inp: Tensor) -> Tensor:
35 | feature = self.feature_extractor(inp).mean(dim=[2, 3])
36 | return nn.functional.normalize(feature)
37 |
38 |
39 | class Classifier(nn.Module):
40 | def __init__(self) -> None:
41 | super(Classifier, self).__init__()
42 | self.classifier = nn.Linear(32, 10)
43 |
44 | def forward(self, inp: Tensor) -> Tensor:
45 | return self.classifier(inp)
46 |
47 |
48 | def main() -> None:
49 | model = Model()
50 | classifier = Classifier()
51 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
52 | optimizer_cls = SGD(classifier.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
53 | train_loader = get_loader(is_train=True, batch_size=64)
54 | val_loader = get_loader(is_train=False, batch_size=1000)
55 | criterion = CircleLoss(m=0.25, gamma=80)
56 | criterion_xe = nn.CrossEntropyLoss()
57 |
58 | for epoch in range(20):
59 | for img, label in train_loader:
60 | model.zero_grad()
61 | features = model(img)
62 | loss = criterion(*convert_label_to_similarity(features, label))
63 | loss.backward()
64 | optimizer.step()
65 | print('[{}/{}] Training with Circle Loss.'.format(epoch + 1, 20))
66 |
67 | for epoch in range(20):
68 | for img, label in train_loader:
69 | model.zero_grad()
70 | classifier.zero_grad()
71 | features = model(img)
72 | output = classifier(features)
73 | loss = criterion_xe(output, label)
74 | loss.backward()
75 | optimizer_cls.step()
76 | print('[{}/{}] Training classifier.'.format(epoch + 1, 20))
77 |
78 | correct = 0
79 | for img, label in val_loader:
80 | features = model(img)
81 | output = classifier(features)
82 | pred = output.data.max(1)[1]
83 | correct += pred.eq(label.data).cpu().sum()
84 | print('Test set: Accuracy: {}/{} ({:.0f}%)'.format(
85 | correct, len(val_loader.dataset), 100. * correct / len(val_loader.dataset)))
86 |
87 |
88 | if __name__ == "__main__":
89 | main()
90 |
--------------------------------------------------------------------------------
/example_cls_wo_circleloss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn, Tensor
3 | from torch.optim import SGD
4 | from torch.utils.data import DataLoader
5 | from torchvision.datasets import MNIST
6 | from torchvision.transforms import ToTensor
7 |
8 |
9 | def get_loader(is_train: bool, batch_size: int) -> DataLoader:
10 | return DataLoader(
11 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True),
12 | batch_size=batch_size,
13 | shuffle=is_train,
14 | )
15 |
16 |
17 | class Model(nn.Module):
18 | def __init__(self) -> None:
19 | super(Model, self).__init__()
20 | self.feature_extractor = nn.Sequential(
21 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5),
22 | nn.MaxPool2d(kernel_size=2),
23 | nn.ReLU(),
24 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
25 | nn.MaxPool2d(kernel_size=2),
26 | nn.ReLU(),
27 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
28 | nn.MaxPool2d(kernel_size=2),
29 | nn.ReLU(),
30 | )
31 | self.classifier = nn.Linear(32, 10)
32 |
33 | def forward(self, inp: Tensor) -> Tensor:
34 | feature = self.feature_extractor(inp).mean(dim=[2, 3])
35 | return self.classifier(feature)
36 |
37 |
38 | def main() -> None:
39 | model = Model()
40 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
41 | train_loader = get_loader(is_train=True, batch_size=64)
42 | val_loader = get_loader(is_train=False, batch_size=1000)
43 | criterion_xe = nn.CrossEntropyLoss()
44 |
45 | for epoch in range(40):
46 | for img, label in train_loader:
47 | model.zero_grad()
48 | output = model(img)
49 | loss = criterion_xe(output, label)
50 | loss.backward()
51 | optimizer.step()
52 | print('[{}/{}] Training classifier.'.format(epoch + 1, 40))
53 |
54 | correct = 0
55 | for img, label in val_loader:
56 | output = model(img)
57 | pred = output.data.max(1)[1]
58 | correct += pred.eq(label.data).cpu().sum()
59 | print('Test set: Accuracy: {}/{} ({:.0f}%)'.format(
60 | correct, len(val_loader.dataset), 100. * correct / len(val_loader.dataset)))
61 |
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/example_compare.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import matplotlib.pyplot as plt
4 | from torch import nn, Tensor
5 | from torch.optim import SGD
6 | from torch.utils.data import DataLoader
7 | from torchvision.datasets import MNIST
8 | from torchvision.transforms import ToTensor
9 | from tqdm import tqdm
10 |
11 | from circle_loss import convert_label_to_similarity, CircleLoss
12 |
13 |
14 | def get_loader(is_train: bool, batch_size: int) -> DataLoader:
15 | return DataLoader(
16 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True),
17 | batch_size=batch_size,
18 | shuffle=is_train,
19 | )
20 |
21 |
22 | def plot(img_1, img_2, same):
23 | plt.figure(12)
24 | if not same:
25 | plt.suptitle('These two digits are different.', fontsize=20)
26 | else:
27 | plt.suptitle('These two digits are the same.', fontsize=20)
28 | plt.subplot(121)
29 | plt.imshow(img_1, cmap='Greys')
30 | plt.axis('off')
31 |
32 | plt.subplot(122)
33 | plt.imshow(img_2, cmap='Greys')
34 | plt.axis('off')
35 | plt.show()
36 |
37 |
38 | class Model(nn.Module):
39 | def __init__(self) -> None:
40 | super(Model, self).__init__()
41 | self.feature_extractor = nn.Sequential(
42 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5),
43 | nn.MaxPool2d(kernel_size=2),
44 | nn.ReLU(),
45 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
46 | nn.MaxPool2d(kernel_size=2),
47 | nn.ReLU(),
48 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
49 | nn.MaxPool2d(kernel_size=2),
50 | nn.ReLU(),
51 | )
52 |
53 | def forward(self, inp: Tensor) -> Tensor:
54 | feature = self.feature_extractor(inp).mean(dim=[2, 3])
55 | return nn.functional.normalize(feature)
56 |
57 |
58 | def main() -> None:
59 | model = Model()
60 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
61 | train_loader = get_loader(is_train=True, batch_size=64)
62 | val_loader = get_loader(is_train=False, batch_size=2)
63 | criterion = CircleLoss(m=0.25, gamma=80)
64 |
65 | for epoch in range(20):
66 | for img, label in tqdm(train_loader):
67 | model.zero_grad()
68 | pred = model(img)
69 | loss = criterion(*convert_label_to_similarity(pred, label))
70 | loss.backward()
71 | optimizer.step()
72 |
73 | thresh = 0.75
74 | for img, label in val_loader:
75 | pred = model(img)
76 | pred_label = torch.sum(pred[0] * pred[1]) > thresh
77 | plot(img[0, 0].data.numpy(), img[1, 0].data.numpy(), pred_label)
78 | break
79 |
80 |
81 | if __name__ == "__main__":
82 | main()
83 |
--------------------------------------------------------------------------------
/example_vis.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from torch import nn, Tensor
6 | from torch.optim import SGD
7 | from torch.utils.data import DataLoader
8 | from torchvision.datasets import MNIST
9 | from torchvision.transforms import ToTensor
10 | from sklearn import manifold
11 | from tqdm import tqdm
12 |
13 | from circle_loss import convert_label_to_similarity, CircleLoss
14 |
15 |
16 | def get_loader(is_train: bool, batch_size: int) -> DataLoader:
17 | return DataLoader(
18 | dataset=MNIST(root="./data", train=is_train, transform=ToTensor(), download=True),
19 | batch_size=batch_size,
20 | shuffle=is_train,
21 | )
22 |
23 |
24 | def plot_features(features, labels, num_classes):
25 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=501)
26 | features = tsne.fit_transform(features)
27 | x_min, x_max = features.min(0), features.max(0)
28 | features = (features - x_min) / (x_max - x_min)
29 | colors = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']
30 | for label_idx in range(num_classes):
31 | plt.scatter(
32 | features[labels == label_idx, 0],
33 | features[labels == label_idx, 1],
34 | c=colors[label_idx],
35 | s=50,
36 | alpha=0.6
37 | )
38 | plt.legend(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], loc='upper right', fontsize=10)
39 | plt.title('t-SNE visualization of the learned features', fontsize=30)
40 | plt.axis('tight')
41 | plt.show()
42 |
43 |
44 | class Model(nn.Module):
45 | def __init__(self) -> None:
46 | super(Model, self).__init__()
47 | self.feature_extractor = nn.Sequential(
48 | nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5),
49 | nn.MaxPool2d(kernel_size=2),
50 | nn.ReLU(),
51 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
52 | nn.MaxPool2d(kernel_size=2),
53 | nn.ReLU(),
54 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
55 | nn.MaxPool2d(kernel_size=2),
56 | nn.ReLU(),
57 | )
58 |
59 | def forward(self, inp: Tensor) -> Tensor:
60 | feature = self.feature_extractor(inp).mean(dim=[2, 3])
61 | return nn.functional.normalize(feature)
62 |
63 |
64 | def main() -> None:
65 | model = Model()
66 | optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
67 | train_loader = get_loader(is_train=True, batch_size=64)
68 | val_loader = get_loader(is_train=False, batch_size=1000)
69 | criterion = CircleLoss(m=0.25, gamma=80)
70 |
71 | for epoch in range(20):
72 | for img, label in tqdm(train_loader):
73 | model.zero_grad()
74 | pred = model(img)
75 | loss = criterion(*convert_label_to_similarity(pred, label))
76 | loss.backward()
77 | optimizer.step()
78 |
79 | all_features = []
80 | all_labels = []
81 | for img, label in val_loader:
82 | pred = model(img)
83 | all_features.append(pred.data.numpy())
84 | all_labels.append(label.data.numpy())
85 | all_features = np.concatenate(all_features, 0)
86 | all_labels = np.concatenate(all_labels, 0)
87 | plot_features(all_features, all_labels, 10)
88 |
89 |
90 | if __name__ == "__main__":
91 | main()
92 |
--------------------------------------------------------------------------------
/figures/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhjohnchan/circleloss.pytorch/7ca202187df4626e215feadb60fda297b3c4113f/figures/compare.png
--------------------------------------------------------------------------------
/figures/tsne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhjohnchan/circleloss.pytorch/7ca202187df4626e215feadb60fda297b3c4113f/figures/tsne.png
--------------------------------------------------------------------------------