├── README.md ├── Supervised Learning 1.py ├── Supervised Learning 2.py └── Unsupervised Learning.py /README.md: -------------------------------------------------------------------------------- 1 | # Huanqiang-FF-Algorithms 2 | Hinton's [Forward-Forward Algorithm](https://arxiv.org/abs/2212.13345) Implementation PyTorch 3 | 4 | 5 | # Progress 6 | ■ Supervised Learning for MNIST dataset with one group negative samples (test accuracy: 96.6%) 7 | ■ Supervised Learning for MNIST dataset with 9 group negative samples (test accuracy: 98.06%) 8 | □ Unsupervised Learning for MNIST dataset (test accuracy: 95%) 9 | 10 | 11 | Anyone who has questions can contact duan0001@outlook.com and the code will be updated in few months 12 | -------------------------------------------------------------------------------- /Supervised Learning 1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn #torch 3 | import torchvision 4 | import tensorflow as tf 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from tensorflow import keras 8 | from tqdm import tqdm #the training progress show 9 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation 10 | from torch.nn.functional import normalize 11 | from torchvision import datasets 12 | from torchvision.datasets import MNIST 13 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 14 | from torch.utils.data import DataLoader 15 | 16 | 17 | device="cuda:3" 18 | print(device) 19 | 20 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 21 | x_train.shape, x_test.shape 22 | 23 | def edit_data(x, y, method="edit"): 24 | is_batch = x.ndim == 3 25 | if method == "edit": 26 | if is_batch: 27 | x[:, 0, :10] = 0.0 28 | for i in range(x.shape[0]): 29 | x[i, 0, y[i]] = 255 30 | else: 31 | x[0, :10] = 0.0 32 | x[0, y] = 255 33 | 34 | def random_label(y): 35 | if type(y) != np.ndarray: 36 | label = list(range(10)) 37 | del label[y] 38 | return np.random.choice(label) 39 | else: 40 | label = np.copy(y) 41 | for i in range(y.shape[0]): 42 | label[i] = random_label(y[i]) 43 | return label 44 | 45 | pos = np.copy(x_train) 46 | neg = np.copy(x_train) 47 | edit_data(pos, y_train) 48 | edit_data(neg, random_label(y_train)) 49 | 50 | 51 | pos = (pos-33.31002426147461 )/78.56748962402344 52 | neg = (neg-33.31002426147461 )/78.56748962402344 53 | pos = pos.reshape(pos.shape[0], -1) 54 | neg = neg.reshape(neg.shape[0], -1) 55 | x_pos = torch.tensor(pos, dtype=torch.float) 56 | x_neg = torch.tensor(neg, dtype=torch.float) 57 | 58 | x_train = (x_train-33.31002426147461 )/78.56748962402344 59 | x_train = x_train.reshape(x_train.shape[0], -1) 60 | y_train = y_train.reshape(y_train.shape[0]) 61 | x = torch.tensor(x_train, dtype=torch.float) 62 | y = torch.tensor(y_train, dtype=torch.float) 63 | 64 | x_test = (x_test-33.31002426147461 )/78.56748962402344 65 | x_test = x_test.reshape(x_test.shape[0], -1) 66 | y_test = y_test.reshape(y_test.shape[0]) 67 | x_te = torch.tensor(x_test, dtype=torch.float) 68 | y_te = torch.tensor(y_test, dtype=torch.float) 69 | 70 | 71 | x_pos, x_neg, x, y = x_pos.cuda(device), x_neg.cuda(device), x.cuda(device), y.cuda(device) 72 | x_te, y_te = x_te.cuda(device), y_te.cuda(device) 73 | 74 | def overlay_y_on_x(x, y): 75 | x_ = x.clone() 76 | x_[:, :10] *= 0.0 77 | x_[range(x.shape[0]), y] = x.max() 78 | return x_ 79 | # 实现hinton 说的把label 利用one-hot的方式 加到像素上去 80 | # 先克隆x的值 得到784 的Tensor 然后清空前十个 再把最大的normlized 的pixel 放到正确的y 或者错误的y上 81 | # label对就是positive label错就是negative 82 | 83 | class Layer(nn.Linear): 84 | def __init__(self, in_features, out_features, 85 | bias=True, device=None, dtype=None): 86 | super().__init__(in_features, out_features, bias, device, dtype) 87 | 88 | #self.Sigmoid = torch.nn.Sigmoid() 89 | self.relu = torch.nn.ReLU() 90 | self.opt = Adam(self.parameters(), lr=0.06) 91 | self.threshold = 2.0 # 为什么是2 distubition 92 | self.num_epochs = 120 93 | # 基础变量 至于为什么threshhold是2 大家的模板都是2 我没找出来为什么 94 | 95 | def forward(self, x): 96 | x_direction = x / (x.norm(2, 1, keepdim=True) + 0.01) #batch norm 97 | return self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0)) 98 | 99 | def train(self, x_pos, x_neg): 100 | for i in tqdm(range(self.num_epochs)): 101 | for b in range (60): 102 | g_pos = self.forward(x_pos[b*1000: (b+1)*1000]).pow(2).mean(1) 103 | g_neg = self.forward(x_neg[b*1000: (b+1)*1000]).pow(2).mean(1) 104 | #g_pos = self.forward(x_pos).pow(2).mean(1) 105 | #g_neg = self.forward(x_neg).pow(2).mean(1) 106 | # The following loss pushes pos (neg) samples to 107 | # values larger (smaller) than the self.threshold. 108 | loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, g_neg - self.threshold]))).mean() 109 | self.opt.zero_grad() 110 | # this backward just compute the derivative and hence 111 | # is not considered backpropagation. 112 | loss.backward() 113 | self.opt.step() 114 | return self.forward(x_pos).detach(), self.forward(x_neg).detach() #Returns a new Tensor, detached from the current graph. 115 | 116 | class Net(torch.nn.Module): 117 | 118 | def __init__(self, dims): 119 | super().__init__() 120 | self.layers = [] 121 | for d in range(len(dims) - 1): 122 | self.layers += [Layer(dims[d], dims[d + 1]).cuda(device)] 123 | 124 | def predict(self, x): 125 | goodness_per_label = [] 126 | for label in range(10): 127 | h = overlay_y_on_x(x, label) 128 | goodness = [] 129 | for layer in self.layers: 130 | h = layer(h) 131 | goodness += [h.pow(2).mean(1)] 132 | goodness_per_label += [sum(goodness).unsqueeze(1)] 133 | goodness_per_label = torch.cat(goodness_per_label, 1) 134 | return goodness_per_label.argmax(1) 135 | 136 | def train(self, x_pos, x_neg): 137 | h_pos, h_neg = x_pos, x_neg 138 | for i, layer in enumerate(self.layers): 139 | print('training layer', i, '...') 140 | h_pos, h_neg = layer.train(h_pos, h_neg) 141 | 142 | # 在每个layer里面进行训练 实际训练模式就是Layer的性质 143 | 144 | if __name__ == "__main__": 145 | torch.manual_seed(123) 146 | 147 | net = Net([784, 2000, 2000, 2000, 2000]) 148 | net.train(x_pos, x_neg) 149 | 150 | print('train score:', 100*net.predict(x[0: 1000]).eq(y[0:1000]).float().mean().item(),"%") 151 | print('test score:', 100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item(),"%") 152 | print('test score:', 100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item(),"%") 153 | print('test score:', 100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item(),"%") 154 | print('test score:', 100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item(),"%") 155 | print('test score:', 100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item(),"%") 156 | print('final accuracy', (100*net.predict(x_te).eq(y_te).float().mean().item()+100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item()+100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item()+100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item()+100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item())/5, '%') 157 | -------------------------------------------------------------------------------- /Supervised Learning 2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn #torch 3 | import torchvision 4 | import tensorflow as tf 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from tensorflow import keras 8 | from tqdm import tqdm #the training progress show 9 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation 10 | from torch.nn.functional import normalize 11 | from torchvision import datasets 12 | from torchvision.datasets import MNIST 13 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 14 | from torch.utils.data import DataLoader 15 | 16 | 17 | 18 | device="cuda:5" 19 | print(device) 20 | 21 | 22 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 23 | x_train.shape, x_test.shape 24 | 25 | 26 | def edit_data(x, y, method="edit"): 27 | is_batch = x.ndim == 3 28 | if method == "edit": 29 | if is_batch: 30 | x[:, 0, :10] = 0.0 31 | for i in range(x.shape[0]): 32 | x[i, 0, y[i]] = 255 33 | else: 34 | x[0, :10] = 0.0 35 | x[0, y] = 255 36 | 37 | pos = np.copy(x_train) 38 | neg1 = np.copy(x_train) 39 | neg2 = np.copy(x_train) 40 | neg3 = np.copy(x_train) 41 | neg4 = np.copy(x_train) 42 | neg5 = np.copy(x_train) 43 | neg6 = np.copy(x_train) 44 | neg7 = np.copy(x_train) 45 | neg8 = np.copy(x_train) 46 | neg9 = np.copy(x_train) 47 | 48 | edit_data(pos, y_train) 49 | edit_data(neg1, (y_train + 1)%10) 50 | edit_data(neg2, (y_train + 2)%10) 51 | edit_data(neg3, (y_train + 3)%10) 52 | edit_data(neg4, (y_train + 4)%10) 53 | edit_data(neg5, (y_train + 5)%10) 54 | edit_data(neg6, (y_train + 6)%10) 55 | edit_data(neg7, (y_train + 7)%10) 56 | edit_data(neg8, (y_train + 8)%10) 57 | edit_data(neg9, (y_train + 9)%10) 58 | 59 | 60 | pos = (pos-33.31002426147461 )/78.56748962402344 61 | neg1 = (neg1-33.31002426147461 )/78.56748962402344 62 | neg2 = (neg2-33.31002426147461 )/78.56748962402344 63 | neg3 = (neg3-33.31002426147461 )/78.56748962402344 64 | neg4 = (neg4-33.31002426147461 )/78.56748962402344 65 | neg5 = (neg5-33.31002426147461 )/78.56748962402344 66 | neg6 = (neg6-33.31002426147461 )/78.56748962402344 67 | neg7 = (neg7-33.31002426147461 )/78.56748962402344 68 | neg8 = (neg8-33.31002426147461 )/78.56748962402344 69 | neg9 = (neg9-33.31002426147461 )/78.56748962402344 70 | 71 | pos = pos.reshape(pos.shape[0], -1) 72 | neg1 = neg1.reshape(neg1.shape[0], -1) 73 | neg2 = neg2.reshape(neg2.shape[0], -1) 74 | neg3 = neg3.reshape(neg3.shape[0], -1) 75 | neg4 = neg4.reshape(neg4.shape[0], -1) 76 | neg5 = neg5.reshape(neg5.shape[0], -1) 77 | neg6 = neg6.reshape(neg6.shape[0], -1) 78 | neg7 = neg7.reshape(neg7.shape[0], -1) 79 | neg8 = neg8.reshape(neg8.shape[0], -1) 80 | neg9 = neg9.reshape(neg9.shape[0], -1) 81 | 82 | x_pos = torch.tensor(pos, dtype=torch.float) 83 | x_neg1 = torch.tensor(neg1, dtype=torch.float) 84 | x_neg2 = torch.tensor(neg2, dtype=torch.float) 85 | x_neg3 = torch.tensor(neg3, dtype=torch.float) 86 | x_neg4 = torch.tensor(neg4, dtype=torch.float) 87 | x_neg5 = torch.tensor(neg5, dtype=torch.float) 88 | x_neg6 = torch.tensor(neg6, dtype=torch.float) 89 | x_neg7 = torch.tensor(neg7, dtype=torch.float) 90 | x_neg8 = torch.tensor(neg8, dtype=torch.float) 91 | x_neg9 = torch.tensor(neg9, dtype=torch.float) 92 | 93 | x_train = (x_train-33.31002426147461 )/78.56748962402344 94 | x_train = x_train.reshape(x_train.shape[0], -1) 95 | y_train = y_train.reshape(y_train.shape[0]) 96 | x = torch.tensor(x_train, dtype=torch.float) 97 | y = torch.tensor(y_train, dtype=torch.float) 98 | 99 | x_test = (x_test-33.31002426147461 )/78.56748962402344 100 | x_test = x_test.reshape(x_test.shape[0], -1) 101 | y_test = y_test.reshape(y_test.shape[0]) 102 | x_te = torch.tensor(x_test, dtype=torch.float) 103 | y_te = torch.tensor(y_test, dtype=torch.float) 104 | 105 | x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9 = x_pos.cuda(device), x_neg1.cuda(device), x_neg2.cuda(device), x_neg3.cuda(device), x_neg4.cuda(device), x_neg5.cuda(device), x_neg6.cuda(device), x_neg7.cuda(device), x_neg8.cuda(device), x_neg9.cuda(device) 106 | x, y = x.cuda(device), y.cuda(device) 107 | x_te, y_te = x_te.cuda(device), y_te.cuda(device) 108 | 109 | def overlay_y_on_x(x, y): 110 | x_ = x.clone() 111 | x_[:, :10] *= 0.0 112 | x_[range(x.shape[0]), y] = x.max() 113 | return x_ 114 | # 实现hinton 说的把label 利用one-hot的方式 加到像素上去 115 | # 先克隆x的值 得到784 的Tensor 然后清空前十个 再把最大的normlized 的pixel 放到正确的y 或者错误的y上 116 | # label对就是positive label错就是negative 117 | 118 | class Layer(nn.Linear): 119 | def __init__(self, in_features, out_features, 120 | bias=True, device=None, dtype=None): 121 | super().__init__(in_features, out_features, bias, device, dtype) 122 | 123 | #self.Sigmoid = torch.nn.Sigmoid() 124 | self.relu = torch.nn.ReLU() 125 | self.opt = Adam(self.parameters(), lr=0.06) 126 | self.threshold = 2.0 # 为什么是2 distubition 127 | self.num_epochs = 126 128 | # 基础变量 至于为什么threshhold是2 大家的模板都是2 我没找出来为什么 129 | 130 | def forward(self, x): 131 | x_direction = x / (x.norm(2, 1, keepdim=True) + 0.02) #batch norm 132 | return self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0)) 133 | 134 | def train(self, x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9): 135 | for i in tqdm(range(self.num_epochs)): 136 | for b in range (200): 137 | g_pos = self.forward(x_pos[b*300: (b+1)*300]).pow(2).mean(1) 138 | g_neg1 = self.forward(x_neg1[b*300: (b+1)*300]).pow(2).mean(1) 139 | g_neg2 = self.forward(x_neg2[b*300: (b+1)*300]).pow(2).mean(1) 140 | g_neg3 = self.forward(x_neg3[b*300: (b+1)*300]).pow(2).mean(1) 141 | g_neg4 = self.forward(x_neg4[b*300: (b+1)*300]).pow(2).mean(1) 142 | g_neg5 = self.forward(x_neg5[b*300: (b+1)*300]).pow(2).mean(1) 143 | g_neg6 = self.forward(x_neg6[b*300: (b+1)*300]).pow(2).mean(1) 144 | g_neg7 = self.forward(x_neg7[b*300: (b+1)*300]).pow(2).mean(1) 145 | g_neg8 = self.forward(x_neg8[b*300: (b+1)*300]).pow(2).mean(1) 146 | g_neg9 = self.forward(x_neg9[b*300: (b+1)*300]).pow(2).mean(1) 147 | #g_pos = self.forward(x_pos).pow(2).mean(1) 148 | #g_neg = self.forward(x_neg).pow(2).mean(1) 149 | # The following loss pushes pos (neg) samples to 150 | # values larger (smaller) than the self.threshold. 151 | #loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, g_neg1 - self.threshold, g_neg2 - self.threshold, g_neg3 - self.threshold, g_neg4 - self.threshold, g_neg5 - self.threshold, g_neg6 - self.threshold, g_neg7 - self.threshold, g_neg8 - self.threshold, g_neg9 - self.threshold]))).mean() 152 | loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, ((g_neg1 - self.threshold) + (g_neg2 - self.threshold) + (g_neg3 - self.threshold) + (g_neg4 - self.threshold) + (g_neg5 - self.threshold) + (g_neg6 - self.threshold) + (g_neg7 - self.threshold) + (g_neg8 - self.threshold) + (g_neg9 - self.threshold))/9]))).mean() 153 | self.opt.zero_grad() 154 | # this backward just compute the derivative and hence 155 | # is not considered backpropagation. 156 | loss.backward() 157 | self.opt.step() 158 | return self.forward(x_pos).detach(), self.forward(x_neg1).detach(), self.forward(x_neg2).detach(), self.forward(x_neg3).detach(), self.forward(x_neg4).detach(), self.forward(x_neg5).detach(), self.forward(x_neg6).detach(), self.forward(x_neg7).detach(), self.forward(x_neg8).detach(), self.forward(x_neg9).detach() #Returns a new Tensor, detached from the current graph. 159 | 160 | 161 | class Net(torch.nn.Module): 162 | 163 | def __init__(self, dims): 164 | super().__init__() 165 | self.layers = [] 166 | for d in range(len(dims) - 1): 167 | self.layers += [Layer(dims[d], dims[d + 1]).cuda(device)] 168 | 169 | def predict(self, x): 170 | goodness_per_label = [] 171 | for label in range(10): 172 | h = overlay_y_on_x(x, label) 173 | goodness = [] 174 | for layer in self.layers: 175 | h = layer(h) 176 | goodness += [h.pow(2).mean(1)] 177 | goodness_per_label += [sum(goodness).unsqueeze(1)] 178 | goodness_per_label = torch.cat(goodness_per_label, 1) 179 | return goodness_per_label.argmax(1) 180 | 181 | def train(self, x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9): 182 | h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9 = x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9 183 | for i, layer in enumerate(self.layers): 184 | print('training layer', i, '...') 185 | h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9 = layer.train(h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9) 186 | 187 | # 在每个layer里面进行训练 实际训练模式就是Layer的性质 188 | 189 | if __name__ == "__main__": 190 | torch.manual_seed(123) 191 | 192 | net = Net([784, 2000, 2000, 2000, 2000]) 193 | net.train(x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9) 194 | 195 | print('train score:', 100*net.predict(x[0:10000]).eq(y[0:10000]).float().mean().item(),"%") 196 | #print('train score:', 100*net.predict(x[10000: 20000]).eq(y[10000: 20000]).float().mean().item(),"%") 197 | #print('train score:', 100*net.predict(x[10000: 20000]).eq(y[10000: 20000]).float().mean().item(),"%") 198 | #print('train score:', 100*net.predict(x[20000: 30000]).eq(y[20000: 30000]).float().mean().item(),"%") 199 | #print('train score:', 100*net.predict(x[30000: 40000]).eq(y[30000: 40000]).float().mean().item(),"%") 200 | #print('train score:', 100*net.predict(x[40000: 50000]).eq(y[40000: 50000]).float().mean().item(),"%") 201 | #print('train score:', 100*net.predict(x[50000: 60000]).eq(y[50000: 60000]).float().mean().item(),"%") 202 | #print('test score:', 100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item(),"%") 203 | print('test score:', 100*net.predict(x_te).eq(y_te).float().mean().item(),"%") 204 | #print('test score:', 100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item(),"%") 205 | #print('test score:', 100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item(),"%") 206 | #print('test score:', 100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item(),"%") 207 | #print('final accuracy', (100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item()+100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item()+100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item()+100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item()+100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item())/5, '%') 208 | 209 | 210 | -------------------------------------------------------------------------------- /Unsupervised Learning.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import torch 3 | import torch.nn as nn #torch 4 | import torchvision 5 | import tensorflow as tf 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from tensorflow import keras 9 | from tqdm import tqdm #the training progress show 10 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation 11 | from torch.nn.functional import normalize 12 | from torchvision import datasets 13 | from torchvision.datasets import MNIST 14 | import torch.nn.functional as F 15 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 16 | 17 | 18 | # %% 19 | train_loader = torch.utils.data.DataLoader( 20 | torchvision.datasets.MNIST('./files/', train=True, download=True, 21 | transform=torchvision.transforms.Compose([ 22 | torchvision.transforms.ToTensor(), 23 | torchvision.transforms.Normalize( 24 | (0.1307,), (0.3081,)) 25 | ])), 26 | batch_size=100, shuffle=True) 27 | 28 | test_loader = torch.utils.data.DataLoader( 29 | torchvision.datasets.MNIST('./files/', train=False, download=True, 30 | transform=torchvision.transforms.Compose([ 31 | torchvision.transforms.ToTensor(), 32 | torchvision.transforms.Normalize( 33 | (0.1307,), (0.3081,)) 34 | ])), 35 | batch_size=100, shuffle=True) 36 | 37 | 38 | # %% 39 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() 40 | x_train.shape, x_test.shape 41 | 42 | # %% 43 | random_ex = np.random.rand(28,28) 44 | random_ex = random_ex *0 45 | mask_ex = random_ex 46 | mask_ex[1, 13:15] = 255 47 | mask_ex[1, 23:25] = 255 48 | mask_ex[2, 10:26] = 255 49 | mask_ex[3, 9:27] = 255 50 | mask_ex[4, 8:27] = 255 51 | mask_ex[5, 5:26] = 255 52 | mask_ex[6, 4:26] = 255 53 | mask_ex[7, 4:18] = 255 54 | mask_ex[7, 23:26] = 255 55 | mask_ex[8, 3:17] = 255 56 | mask_ex[8, 24:26] = 255 57 | mask_ex[9, 3:16] = 255 58 | mask_ex[9, 24:26] = 255 59 | mask_ex[10, 3:16] = 255 60 | mask_ex[10, 25:27] = 255 61 | mask_ex[11, 3:15] = 255 62 | mask_ex[11, 25:27] = 255 63 | mask_ex[12, 3:12] = 255 64 | mask_ex[12, 24:27] = 255 65 | mask_ex[13, 3:9] = 255 66 | mask_ex[13, 24:27] = 255 67 | mask_ex[14, 3:8] = 255 68 | mask_ex[14, 24:27] = 255 69 | mask_ex[15, 3:7] = 255 70 | mask_ex[15, 24:27] = 255 71 | mask_ex[16, 3:8] = 255 72 | mask_ex[16, 24:27] = 255 73 | mask_ex[17, 3:9] = 255 74 | mask_ex[17, 18:22] = 255 75 | mask_ex[17, 24:26] = 255 76 | mask_ex[18, 4:10] = 255 77 | mask_ex[18, 18:22] = 255 78 | mask_ex[18, 23:26] = 255 79 | mask_ex[19, 5:10] = 255 80 | mask_ex[19, 23:26] = 255 81 | mask_ex[20, 6:10] = 255 82 | mask_ex[20, 23:27] = 255 83 | mask_ex[21, 7:10] = 255 84 | mask_ex[21, 23:27] = 255 85 | mask_ex[22, 7:10] = 255 86 | mask_ex[22, 23:27] = 255 87 | mask_ex[23, 7:9] = 255 88 | mask_ex[23, 23:27] = 255 89 | mask_ex[24,11:13] = 255 90 | mask_ex[24, 23:27] = 255 91 | mask_ex[25, 24:27] = 255 92 | mask_ex[26, 25] = 255 93 | mask_ex = mask_ex/255 94 | plt.imshow(mask_ex, cmap='gray') 95 | 96 | # %% 97 | random_ex2 = np.random.rand(28,28) 98 | random_ex2[:, :28] = 255 99 | mask_ex2 = random_ex2 100 | mask_ex2[1, 13:15] = 0 101 | mask_ex2[1, 23:25] = 0 102 | mask_ex2[2, 10:26] = 0 103 | mask_ex2[3, 9:27] = 0 104 | mask_ex2[4, 8:27] = 0 105 | mask_ex2[5, 5:26] = 0 106 | mask_ex2[6, 4:26] = 0 107 | mask_ex2[7, 4:18] = 0 108 | mask_ex2[7, 23:26] = 0 109 | mask_ex2[8, 3:17] = 0 110 | mask_ex2[8, 24:26] = 0 111 | mask_ex2[9, 3:16] = 0 112 | mask_ex2[9, 24:26] = 0 113 | mask_ex2[10, 3:16] = 0 114 | mask_ex2[10, 25:27] = 0 115 | mask_ex2[11, 3:15] = 0 116 | mask_ex2[11, 25:27] = 0 117 | mask_ex2[12, 3:12] = 0 118 | mask_ex2[12, 24:27] = 0 119 | mask_ex2[13, 3:9] = 0 120 | mask_ex2[13, 24:27] = 0 121 | mask_ex2[14, 3:8] = 0 122 | mask_ex2[14, 24:27] = 0 123 | mask_ex2[15, 3:7] = 0 124 | mask_ex2[15, 24:27] = 0 125 | mask_ex2[16, 3:8] = 0 126 | mask_ex2[16, 24:27] = 0 127 | mask_ex2[17, 3:9] = 0 128 | mask_ex2[17, 18:22] = 0 129 | mask_ex2[17, 24:26] = 0 130 | mask_ex2[18, 4:10] = 0 131 | mask_ex2[18, 18:22] = 0 132 | mask_ex2[18, 23:26] = 0 133 | mask_ex2[19, 5:10] = 0 134 | mask_ex2[19, 23:26] = 0 135 | mask_ex2[20, 6:10] = 0 136 | mask_ex2[20, 23:27] = 0 137 | mask_ex2[21, 7:10] = 0 138 | mask_ex2[21, 23:27] = 0 139 | mask_ex2[22, 7:10] = 0 140 | mask_ex2[22, 23:27] = 0 141 | mask_ex2[23, 7:9] = 0 142 | mask_ex2[23, 23:27] = 0 143 | mask_ex2[24,11:13] = 0 144 | mask_ex2[24, 23:27] = 0 145 | mask_ex2[25, 24:27] = 0 146 | mask_ex2[26, 25] = 0 147 | mask_ex2 = mask_ex2/255 148 | plt.imshow(mask_ex2, cmap='gray') 149 | 150 | 151 | # %% 152 | a = x_train[15] * mask_ex + x_train[18] * mask_ex2 153 | plt.imshow(a, cmap='gray') 154 | 155 | # %% 156 | x_train_rnd = np.zeros(shape=(60000,28,28)) 157 | 158 | for i in range (x_train.shape[0]): 159 | rnd = np.random.randint(x_train.shape[0]) 160 | x_train_rnd[i] = x_train[rnd] 161 | 162 | 163 | # %% 164 | x_neg = x_train * mask_ex + x_train_rnd * mask_ex2 165 | plt.imshow(x_neg[1], cmap='gray') 166 | x_pos = x_train 167 | 168 | # %% 169 | device = 'cuda:2' 170 | 171 | # %% 172 | 173 | x_pos = (x_pos-33.31002426147461 )/78.56748962402344 174 | x_neg = (x_neg-33.31002426147461 )/78.56748962402344 175 | pos = x_pos.reshape(x_pos.shape[0], -1) 176 | neg = x_neg.reshape(x_neg.shape[0], -1) 177 | x_pos = torch.tensor(pos, dtype=torch.float) 178 | x_neg = torch.tensor(neg, dtype=torch.float) 179 | 180 | x_test = (x_test-33.31002426147461 )/78.56748962402344 181 | x_test = x_test.reshape(x_test.shape[0], -1) 182 | y_test = y_test.reshape(y_test.shape[0]) 183 | x_te = torch.tensor(x_test, dtype=torch.float) 184 | y_te = torch.tensor(y_test, dtype=torch.float) 185 | 186 | x_train = (x_train-33.31002426147461 )/78.56748962402344 187 | x_train = x_train.reshape(x_train.shape[0], -1) 188 | y_train = y_train.reshape(y_train.shape[0]) 189 | x = torch.tensor(x_train, dtype=torch.float) 190 | y = torch.tensor(y_train, dtype=torch.float) 191 | 192 | x_pos, x_neg = x_pos.cuda(device), x_neg.cuda(device) 193 | x_te, y_te = x_te.cuda(device), y_te.cuda(device) 194 | x, y = x.cuda(device), y.cuda(device) 195 | 196 | print(x.shape, y.shape, x.dtype) 197 | 198 | # %% 199 | class Layer(nn.Linear): 200 | def __init__(self, in_features, out_features, 201 | bias=True, device=None, dtype=None): 202 | super().__init__(in_features, out_features, bias, device, dtype) 203 | self.relu = torch.nn.ReLU() 204 | self.opt = Adam(self.parameters(), lr=0.06) 205 | self.threshold = 2 206 | self.num_epochs = 100 207 | 208 | def forward(self, x): 209 | x_direction = x / (x.norm(2, 1, keepdim=True) + 0.02) 210 | normlized_activity = self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0)) 211 | return normlized_activity 212 | 213 | def train_layer(self, x_pos, x_neg): 214 | for i in tqdm(range(self.num_epochs)): 215 | for b in range (60): 216 | g_pos = self.forward(x_pos[b*1000: (b+1)*1000]).pow(2).mean(1) 217 | g_neg = self.forward(x_neg[b*1000: (b+1)*1000]).pow(2).mean(1) 218 | #g_pos = self.forward(x_pos).pow(2).mean(1) 219 | #g_neg = self.forward(x_neg).pow(2).mean(1) 220 | # The following loss pushes pos (neg) samples to 221 | # values larger (smaller) than the self.threshold. 222 | loss = torch.log(1 + torch.exp(torch.cat([ 223 | -g_pos + self.threshold, 224 | g_neg - self.threshold]))).mean() 225 | self.opt.zero_grad() 226 | # this backward just compute the derivative and hence 227 | # is not considered backpropagation. 228 | loss.backward() 229 | self.opt.step() 230 | return self.forward(x_pos).detach(), self.forward(x_neg).detach() 231 | 232 | # %% 233 | 234 | class FFNet(torch.nn.Module): 235 | 236 | def __init__(self): 237 | super().__init__() 238 | self.hlayer1 = Layer(784, 2000) 239 | self.hlayer2 = Layer(2000, 2000) 240 | self.hlayer3 = Layer(2000, 2000) 241 | self.hlayer4 = Layer(2000, 2000) 242 | self.layers = [] 243 | self.layers = nn.Sequential(self.hlayer1.cuda(device), self.hlayer2.cuda(device), self.hlayer3.cuda(device), self.hlayer4.cuda(device)) 244 | 245 | def train_ffnet(self, x_pos, x_neg): 246 | h_pos, h_neg = x_pos, x_neg 247 | for i, layer in enumerate(self.layers): 248 | print('training layer', i, '...') 249 | h_pos, h_neg = layer.train_layer(h_pos, h_neg) 250 | 251 | 252 | # %% 253 | (x_train2, y_train2), (x_test2, y_test2) = tf.keras.datasets.mnist.load_data() 254 | 255 | def edit_data(x, y, method="edit"): 256 | is_batch = x.ndim == 3 257 | if method == "edit": 258 | if is_batch: 259 | x[:, 0, :10] = 0.0 260 | for i in range(x.shape[0]): 261 | x[i, 0, y[i]] = 255 262 | else: 263 | x[0, :10] = 0.0 264 | x[0, y] = 255 265 | 266 | def random_label(y): 267 | if type(y) != np.ndarray: 268 | label = list(range(10)) 269 | del label[y] 270 | return np.random.choice(label) 271 | else: 272 | label = np.copy(y) 273 | for i in range(y.shape[0]): 274 | label[i] = random_label(y[i]) 275 | return label 276 | 277 | pos2 = np.copy(x_train2) 278 | neg2 = np.copy(x_train2) 279 | edit_data(pos2, y_train2) 280 | edit_data(neg2, random_label(y_train2)) 281 | 282 | 283 | pos2 = (pos2-33.31002426147461 )/78.56748962402344 284 | neg2 = (neg2-33.31002426147461 )/78.56748962402344 285 | pos2 = pos2.reshape(pos2.shape[0], -1) 286 | neg2 = neg2.reshape(neg2.shape[0], -1) 287 | x_pos2 = torch.tensor(pos2, dtype=torch.float) 288 | x_neg2 = torch.tensor(neg2, dtype=torch.float) 289 | 290 | x_pos2, x_neg2 = x_pos.cuda(device), x_neg.cuda(device) 291 | 292 | 293 | # %% 294 | net = FFNet() 295 | net.train_ffnet(x_pos2, x_neg2) 296 | 297 | def freeze(model): 298 | for param in model.parameters(): 299 | param.requires_grad = False 300 | 301 | # %% 302 | net.hlayer1.weight 303 | net.hlayer2.weight 304 | net.hlayer3.weight 305 | net.hlayer4.weight 306 | 307 | # %% 308 | 309 | h1 = net.hlayer1(x) 310 | h2 = net.hlayer2(h1) 311 | h3 = net.hlayer3(h2) 312 | h4 = net.hlayer4(h3) 313 | 314 | h1.shape, h2.shape, h3.shape, h4.shape 315 | 316 | 317 | # %% 318 | h5= torch.cat((h2,h3,h4), 1) 319 | print(h5.shape, h5, h5.size(0)) 320 | h6= h5.view(h5.size(0), -1) 321 | print(h6.shape, h6) 322 | 323 | # %% 324 | class Net(nn.Module): 325 | def __init__(self): 326 | super(Net, self).__init__() 327 | 328 | self.hlayer1 = net.hlayer1 329 | self.hlayer2 = net.hlayer2 330 | self.hlayer3 = net.hlayer3 331 | self.hlayer4 = net.hlayer4 332 | freeze(self) 333 | self.fc = nn.Linear(6000, 10).cuda(device) 334 | 335 | def forward(self, x): 336 | x = torch.flatten(x, 1) 337 | n1= self.hlayer1(x) 338 | n2= self.hlayer2(n1) 339 | n3= self.hlayer3(n2) 340 | n4= self.hlayer4(n3) 341 | n2 = n2 / (n2.norm(2, 1, keepdim=True) + 0.01) 342 | n3 = n3 / (n2.norm(2, 1, keepdim=True) + 0.01) 343 | n4 = n4 / (n2.norm(2, 1, keepdim=True) + 0.01) 344 | 345 | n5= torch.cat((n2,n3,n4), 1) 346 | n5 = n5.view(n5.size(0), -1) 347 | 348 | output = self.fc(n5) 349 | 350 | return output 351 | 352 | network = Net() 353 | print(network.fc) 354 | 355 | 356 | # %% 357 | criterion = torch.nn.CrossEntropyLoss() 358 | #optimizer = torch.optim.Adam(network.parameters(), lr = 0.01) 359 | optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, network.parameters()), lr = 0.01) 360 | 361 | # %% 362 | output = network(x) 363 | output.shape 364 | 365 | # %% 366 | n_epochs = 1 367 | batch_size_train = 100 368 | batch_size_test = 1000 369 | 370 | # %% 371 | train_losses = [] 372 | train_counter = [] 373 | test_losses = [] 374 | test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)] 375 | 376 | 377 | # %% 378 | def train(epoch): 379 | network.train() 380 | for batch_idx, (data, target) in enumerate(train_loader): 381 | data =data.cuda(device) 382 | target = target.cuda(device) 383 | optimizer.zero_grad() 384 | output = network(data) 385 | loss = criterion(output, target) 386 | loss.backward() 387 | if batch_idx % 10 == 0: 388 | print("batch_idx = ", batch_idx) 389 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 390 | epoch, batch_idx * len(data), len(train_loader.dataset), 391 | 100. * batch_idx / len(train_loader), loss.item())) 392 | 393 | train_losses.append(loss.item()) 394 | train_counter.append( 395 | (batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset))) 396 | torch.save(network.state_dict(), './results/model.pth') 397 | torch.save(optimizer.state_dict(), './results/optimizer.pth') 398 | 399 | 400 | # %% 401 | for i in range (n_epochs): 402 | train(i) 403 | 404 | # %% 405 | net.hlayer1.weight 406 | net.hlayer2.weight 407 | net.hlayer3.weight 408 | net.hlayer4.weight 409 | 410 | # %% 411 | def test(): 412 | network.eval() 413 | test_loss = 0 414 | correct = 0 415 | with torch.no_grad(): 416 | for data, target in test_loader: 417 | data =data.cuda(device) 418 | target = target.cuda(device) 419 | #print(data.dtype, target.shape) 420 | output = network(data) 421 | test_loss += criterion(output, target).item() 422 | pred = output.data.max(1, keepdim=True)[1] 423 | correct += pred.eq(target.data.view_as(pred)).sum() 424 | test_loss /= len(test_loader.dataset) 425 | test_losses.append(test_loss) 426 | print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 427 | test_loss, correct, len(test_loader.dataset), 428 | 100. * correct / len(test_loader.dataset))) 429 | 430 | 431 | # %% 432 | test() 433 | 434 | --------------------------------------------------------------------------------