├── README.md
├── Supervised Learning 1.py
├── Supervised Learning 2.py
└── Unsupervised Learning.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Huanqiang-FF-Algorithms
 2 | Hinton's [Forward-Forward Algorithm](https://arxiv.org/abs/2212.13345) Implementation PyTorch
 3 | 
 4 | 
 5 | # Progress
 6 | ■ Supervised Learning for MNIST dataset with one group negative samples (test accuracy： 96.6%)  
 7 | ■ Supervised Learning for MNIST dataset with 9 group negative samples   (test accuracy： 98.06%)  
 8 | □ Unsupervised Learning for MNIST dataset (test accuracy: 95%)  
 9 | 
10 | 
11 | Anyone who has questions can contact duan0001@outlook.com and the code will be updated in few months
12 | 


--------------------------------------------------------------------------------
/Supervised Learning 1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn   #torch
  3 | import torchvision
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from tensorflow import keras
  8 | from tqdm import tqdm   #the training progress show
  9 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation
 10 | from torch.nn.functional import normalize
 11 | from torchvision import datasets
 12 | from torchvision.datasets import MNIST
 13 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 
 14 | from torch.utils.data import DataLoader
 15 | 
 16 | 
 17 | device="cuda:3"
 18 | print(device)
 19 | 
 20 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 21 | x_train.shape, x_test.shape
 22 | 
 23 | def edit_data(x, y, method="edit"):
 24 |     is_batch = x.ndim == 3
 25 |     if method == "edit":
 26 |         if is_batch:
 27 |             x[:, 0, :10] = 0.0
 28 |             for i in range(x.shape[0]):
 29 |                 x[i, 0, y[i]] = 255
 30 |         else:
 31 |             x[0, :10] = 0.0
 32 |             x[0, y] = 255
 33 | 
 34 | def random_label(y):
 35 |     if type(y) != np.ndarray:
 36 |         label = list(range(10))
 37 |         del label[y]
 38 |         return np.random.choice(label)
 39 |     else:
 40 |         label = np.copy(y)
 41 |         for i in range(y.shape[0]):
 42 |             label[i] = random_label(y[i])
 43 |         return label
 44 | 
 45 | pos = np.copy(x_train)
 46 | neg = np.copy(x_train)
 47 | edit_data(pos, y_train)
 48 | edit_data(neg, random_label(y_train))
 49 | 
 50 | 
 51 | pos = (pos-33.31002426147461 )/78.56748962402344
 52 | neg = (neg-33.31002426147461 )/78.56748962402344
 53 | pos = pos.reshape(pos.shape[0], -1)
 54 | neg = neg.reshape(neg.shape[0], -1)
 55 | x_pos = torch.tensor(pos, dtype=torch.float)
 56 | x_neg = torch.tensor(neg, dtype=torch.float)
 57 | 
 58 | x_train = (x_train-33.31002426147461 )/78.56748962402344
 59 | x_train = x_train.reshape(x_train.shape[0], -1)
 60 | y_train = y_train.reshape(y_train.shape[0])
 61 | x = torch.tensor(x_train, dtype=torch.float)
 62 | y = torch.tensor(y_train, dtype=torch.float)
 63 | 
 64 | x_test = (x_test-33.31002426147461 )/78.56748962402344
 65 | x_test = x_test.reshape(x_test.shape[0], -1)
 66 | y_test = y_test.reshape(y_test.shape[0])
 67 | x_te = torch.tensor(x_test, dtype=torch.float)
 68 | y_te = torch.tensor(y_test, dtype=torch.float)
 69 | 
 70 | 
 71 | x_pos, x_neg, x, y = x_pos.cuda(device), x_neg.cuda(device), x.cuda(device), y.cuda(device)
 72 | x_te, y_te = x_te.cuda(device), y_te.cuda(device)
 73 | 
 74 | def overlay_y_on_x(x, y):
 75 |     x_ = x.clone()
 76 |     x_[:, :10] *= 0.0
 77 |     x_[range(x.shape[0]), y] = x.max()
 78 |     return x_
 79 | # 实现hinton 说的把label 利用one-hot的方式 加到像素上去 
 80 | # 先克隆x的值 得到784 的Tensor 然后清空前十个 再把最大的normlized 的pixel 放到正确的y 或者错误的y上 
 81 | # label对就是positive label错就是negative 
 82 | 
 83 | class Layer(nn.Linear):
 84 |     def __init__(self, in_features, out_features,
 85 |                  bias=True, device=None, dtype=None):
 86 |         super().__init__(in_features, out_features, bias, device, dtype)
 87 |         
 88 |         #self.Sigmoid = torch.nn.Sigmoid()
 89 |         self.relu = torch.nn.ReLU()
 90 |         self.opt = Adam(self.parameters(), lr=0.06)
 91 |         self.threshold = 2.0 # 为什么是2 distubition
 92 |         self.num_epochs = 120
 93 | # 基础变量 至于为什么threshhold是2 大家的模板都是2 我没找出来为什么
 94 | 
 95 |     def forward(self, x):
 96 |         x_direction = x / (x.norm(2, 1, keepdim=True) + 0.01) #batch norm 
 97 |         return self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0))
 98 | 
 99 |     def train(self, x_pos, x_neg):
100 |         for i in tqdm(range(self.num_epochs)):
101 |             for b in range (60):
102 |              g_pos = self.forward(x_pos[b*1000: (b+1)*1000]).pow(2).mean(1)
103 |              g_neg = self.forward(x_neg[b*1000: (b+1)*1000]).pow(2).mean(1)
104 |              #g_pos = self.forward(x_pos).pow(2).mean(1)
105 |              #g_neg = self.forward(x_neg).pow(2).mean(1)
106 |              # The following loss pushes pos (neg) samples to
107 |              # values larger (smaller) than the self.threshold.
108 |              loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, g_neg - self.threshold]))).mean()
109 |              self.opt.zero_grad()
110 |              # this backward just compute the derivative and hence
111 |              # is not considered backpropagation.
112 |              loss.backward()
113 |              self.opt.step()
114 |         return self.forward(x_pos).detach(), self.forward(x_neg).detach() #Returns a new Tensor, detached from the current graph.
115 | 
116 | class Net(torch.nn.Module):
117 | 
118 |     def __init__(self, dims):
119 |         super().__init__()
120 |         self.layers = []
121 |         for d in range(len(dims) - 1):
122 |             self.layers += [Layer(dims[d], dims[d + 1]).cuda(device)]
123 | 
124 |     def predict(self, x):
125 |         goodness_per_label = []
126 |         for label in range(10):
127 |             h = overlay_y_on_x(x, label)
128 |             goodness = []
129 |             for layer in self.layers:
130 |                 h = layer(h)
131 |                 goodness += [h.pow(2).mean(1)]
132 |             goodness_per_label += [sum(goodness).unsqueeze(1)]
133 |         goodness_per_label = torch.cat(goodness_per_label, 1)
134 |         return goodness_per_label.argmax(1)
135 | 
136 |     def train(self, x_pos, x_neg):
137 |         h_pos, h_neg = x_pos, x_neg
138 |         for i, layer in enumerate(self.layers):
139 |             print('training layer', i, '...')
140 |             h_pos, h_neg = layer.train(h_pos, h_neg)
141 | 
142 | # 在每个layer里面进行训练 实际训练模式就是Layer的性质
143 | 
144 | if __name__ == "__main__":
145 |     torch.manual_seed(123)
146 | 
147 |     net = Net([784, 2000, 2000, 2000, 2000])
148 |     net.train(x_pos, x_neg)
149 | 
150 | print('train score:', 100*net.predict(x[0: 1000]).eq(y[0:1000]).float().mean().item(),"%")
151 | print('test score:', 100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item(),"%")
152 | print('test score:', 100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item(),"%")
153 | print('test score:', 100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item(),"%")
154 | print('test score:', 100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item(),"%")
155 | print('test score:', 100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item(),"%")
156 | print('final accuracy', (100*net.predict(x_te).eq(y_te).float().mean().item()+100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item()+100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item()+100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item()+100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item())/5, '%')   
157 | 


--------------------------------------------------------------------------------
/Supervised Learning 2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn   #torch
  3 | import torchvision
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from tensorflow import keras
  8 | from tqdm import tqdm   #the training progress show
  9 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation
 10 | from torch.nn.functional import normalize
 11 | from torchvision import datasets
 12 | from torchvision.datasets import MNIST
 13 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 
 14 | from torch.utils.data import DataLoader
 15 | 
 16 | 
 17 | 
 18 | device="cuda:5"
 19 | print(device)
 20 | 
 21 | 
 22 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 23 | x_train.shape, x_test.shape
 24 | 
 25 | 
 26 | def edit_data(x, y, method="edit"):
 27 |     is_batch = x.ndim == 3
 28 |     if method == "edit":
 29 |         if is_batch:
 30 |             x[:, 0, :10] = 0.0
 31 |             for i in range(x.shape[0]):
 32 |                 x[i, 0, y[i]] = 255
 33 |         else:
 34 |             x[0, :10] = 0.0
 35 |             x[0, y] = 255
 36 | 
 37 | pos = np.copy(x_train)
 38 | neg1 = np.copy(x_train)
 39 | neg2 = np.copy(x_train)
 40 | neg3 = np.copy(x_train)
 41 | neg4 = np.copy(x_train)
 42 | neg5 = np.copy(x_train)
 43 | neg6 = np.copy(x_train)
 44 | neg7 = np.copy(x_train)
 45 | neg8 = np.copy(x_train)
 46 | neg9 = np.copy(x_train)
 47 | 
 48 | edit_data(pos, y_train)
 49 | edit_data(neg1, (y_train + 1)%10)
 50 | edit_data(neg2, (y_train + 2)%10)
 51 | edit_data(neg3, (y_train + 3)%10)
 52 | edit_data(neg4, (y_train + 4)%10)
 53 | edit_data(neg5, (y_train + 5)%10)
 54 | edit_data(neg6, (y_train + 6)%10)
 55 | edit_data(neg7, (y_train + 7)%10)
 56 | edit_data(neg8, (y_train + 8)%10)
 57 | edit_data(neg9, (y_train + 9)%10)
 58 | 
 59 | 
 60 | pos = (pos-33.31002426147461 )/78.56748962402344
 61 | neg1 = (neg1-33.31002426147461 )/78.56748962402344
 62 | neg2 = (neg2-33.31002426147461 )/78.56748962402344
 63 | neg3 = (neg3-33.31002426147461 )/78.56748962402344
 64 | neg4 = (neg4-33.31002426147461 )/78.56748962402344
 65 | neg5 = (neg5-33.31002426147461 )/78.56748962402344
 66 | neg6 = (neg6-33.31002426147461 )/78.56748962402344
 67 | neg7 = (neg7-33.31002426147461 )/78.56748962402344
 68 | neg8 = (neg8-33.31002426147461 )/78.56748962402344
 69 | neg9 = (neg9-33.31002426147461 )/78.56748962402344
 70 | 
 71 | pos = pos.reshape(pos.shape[0], -1)
 72 | neg1 = neg1.reshape(neg1.shape[0], -1)
 73 | neg2 = neg2.reshape(neg2.shape[0], -1)
 74 | neg3 = neg3.reshape(neg3.shape[0], -1)
 75 | neg4 = neg4.reshape(neg4.shape[0], -1)
 76 | neg5 = neg5.reshape(neg5.shape[0], -1)
 77 | neg6 = neg6.reshape(neg6.shape[0], -1)
 78 | neg7 = neg7.reshape(neg7.shape[0], -1)
 79 | neg8 = neg8.reshape(neg8.shape[0], -1)
 80 | neg9 = neg9.reshape(neg9.shape[0], -1)
 81 | 
 82 | x_pos = torch.tensor(pos, dtype=torch.float)
 83 | x_neg1 = torch.tensor(neg1, dtype=torch.float)
 84 | x_neg2 = torch.tensor(neg2, dtype=torch.float)
 85 | x_neg3 = torch.tensor(neg3, dtype=torch.float)
 86 | x_neg4 = torch.tensor(neg4, dtype=torch.float)
 87 | x_neg5 = torch.tensor(neg5, dtype=torch.float)
 88 | x_neg6 = torch.tensor(neg6, dtype=torch.float)
 89 | x_neg7 = torch.tensor(neg7, dtype=torch.float)
 90 | x_neg8 = torch.tensor(neg8, dtype=torch.float)
 91 | x_neg9 = torch.tensor(neg9, dtype=torch.float)
 92 | 
 93 | x_train = (x_train-33.31002426147461 )/78.56748962402344
 94 | x_train = x_train.reshape(x_train.shape[0], -1)
 95 | y_train = y_train.reshape(y_train.shape[0])
 96 | x = torch.tensor(x_train, dtype=torch.float)
 97 | y = torch.tensor(y_train, dtype=torch.float)
 98 | 
 99 | x_test = (x_test-33.31002426147461 )/78.56748962402344
100 | x_test = x_test.reshape(x_test.shape[0], -1)
101 | y_test = y_test.reshape(y_test.shape[0])
102 | x_te = torch.tensor(x_test, dtype=torch.float)
103 | y_te = torch.tensor(y_test, dtype=torch.float)
104 | 
105 | x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9  = x_pos.cuda(device), x_neg1.cuda(device), x_neg2.cuda(device), x_neg3.cuda(device), x_neg4.cuda(device), x_neg5.cuda(device), x_neg6.cuda(device), x_neg7.cuda(device), x_neg8.cuda(device), x_neg9.cuda(device)
106 | x, y = x.cuda(device), y.cuda(device)
107 | x_te, y_te = x_te.cuda(device), y_te.cuda(device)
108 | 
109 | def overlay_y_on_x(x, y):
110 |     x_ = x.clone()
111 |     x_[:, :10] *= 0.0
112 |     x_[range(x.shape[0]), y] = x.max()
113 |     return x_
114 | # 实现hinton 说的把label 利用one-hot的方式 加到像素上去 
115 | # 先克隆x的值 得到784 的Tensor 然后清空前十个 再把最大的normlized 的pixel 放到正确的y 或者错误的y上 
116 | # label对就是positive label错就是negative 
117 | 
118 | class Layer(nn.Linear):
119 |     def __init__(self, in_features, out_features,
120 |                  bias=True, device=None, dtype=None):
121 |         super().__init__(in_features, out_features, bias, device, dtype)
122 |         
123 |         #self.Sigmoid = torch.nn.Sigmoid()
124 |         self.relu = torch.nn.ReLU()
125 |         self.opt = Adam(self.parameters(), lr=0.06)
126 |         self.threshold = 2.0 # 为什么是2 distubition
127 |         self.num_epochs = 126
128 | # 基础变量 至于为什么threshhold是2 大家的模板都是2 我没找出来为什么
129 | 
130 |     def forward(self, x):
131 |         x_direction = x / (x.norm(2, 1, keepdim=True) + 0.02) #batch norm 
132 |         return self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0))
133 | 
134 |     def train(self, x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9):
135 |         for i in tqdm(range(self.num_epochs)):
136 |             for b in range (200):
137 |              g_pos = self.forward(x_pos[b*300: (b+1)*300]).pow(2).mean(1)
138 |              g_neg1 = self.forward(x_neg1[b*300: (b+1)*300]).pow(2).mean(1)
139 |              g_neg2 = self.forward(x_neg2[b*300: (b+1)*300]).pow(2).mean(1)
140 |              g_neg3 = self.forward(x_neg3[b*300: (b+1)*300]).pow(2).mean(1)
141 |              g_neg4 = self.forward(x_neg4[b*300: (b+1)*300]).pow(2).mean(1)
142 |              g_neg5 = self.forward(x_neg5[b*300: (b+1)*300]).pow(2).mean(1)
143 |              g_neg6 = self.forward(x_neg6[b*300: (b+1)*300]).pow(2).mean(1)
144 |              g_neg7 = self.forward(x_neg7[b*300: (b+1)*300]).pow(2).mean(1)
145 |              g_neg8 = self.forward(x_neg8[b*300: (b+1)*300]).pow(2).mean(1)
146 |              g_neg9 = self.forward(x_neg9[b*300: (b+1)*300]).pow(2).mean(1)
147 |              #g_pos = self.forward(x_pos).pow(2).mean(1)
148 |              #g_neg = self.forward(x_neg).pow(2).mean(1)
149 |              # The following loss pushes pos (neg) samples to
150 |              # values larger (smaller) than the self.threshold.
151 |              #loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, g_neg1 - self.threshold,  g_neg2 - self.threshold, g_neg3 - self.threshold, g_neg4 - self.threshold, g_neg5 - self.threshold, g_neg6 - self.threshold, g_neg7 - self.threshold, g_neg8 - self.threshold, g_neg9 - self.threshold]))).mean()
152 |              loss = torch.log(1 + torch.exp(torch.cat([-g_pos + self.threshold, ((g_neg1 - self.threshold) + (g_neg2 - self.threshold) + (g_neg3 - self.threshold) + (g_neg4 - self.threshold) + (g_neg5 - self.threshold) + (g_neg6 - self.threshold) + (g_neg7 - self.threshold) + (g_neg8 - self.threshold) + (g_neg9 - self.threshold))/9]))).mean()
153 |              self.opt.zero_grad()
154 |              # this backward just compute the derivative and hence
155 |              # is not considered backpropagation.
156 |              loss.backward()
157 |              self.opt.step()
158 |         return self.forward(x_pos).detach(), self.forward(x_neg1).detach(), self.forward(x_neg2).detach(), self.forward(x_neg3).detach(), self.forward(x_neg4).detach(), self.forward(x_neg5).detach(), self.forward(x_neg6).detach(), self.forward(x_neg7).detach(), self.forward(x_neg8).detach(), self.forward(x_neg9).detach() #Returns a new Tensor, detached from the current graph.
159 | 
160 | 
161 | class Net(torch.nn.Module):
162 | 
163 |     def __init__(self, dims):
164 |         super().__init__()
165 |         self.layers = []
166 |         for d in range(len(dims) - 1):
167 |             self.layers += [Layer(dims[d], dims[d + 1]).cuda(device)]
168 | 
169 |     def predict(self, x):
170 |         goodness_per_label = []
171 |         for label in range(10):
172 |             h = overlay_y_on_x(x, label)
173 |             goodness = []
174 |             for layer in self.layers:
175 |                 h = layer(h)
176 |                 goodness += [h.pow(2).mean(1)]
177 |             goodness_per_label += [sum(goodness).unsqueeze(1)]
178 |         goodness_per_label = torch.cat(goodness_per_label, 1)
179 |         return goodness_per_label.argmax(1)
180 | 
181 |     def train(self, x_pos, x_neg1,  x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9):
182 |         h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9 = x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9
183 |         for i, layer in enumerate(self.layers):
184 |             print('training layer', i, '...')
185 |             h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9 = layer.train(h_pos, h_neg1, h_neg2, h_neg3, h_neg4, h_neg5, h_neg6, h_neg7, h_neg8, h_neg9)
186 | 
187 | # 在每个layer里面进行训练 实际训练模式就是Layer的性质
188 | 
189 | if __name__ == "__main__":
190 |     torch.manual_seed(123)
191 | 
192 |     net = Net([784, 2000, 2000, 2000, 2000])
193 |     net.train(x_pos, x_neg1, x_neg2, x_neg3, x_neg4, x_neg5, x_neg6, x_neg7, x_neg8, x_neg9)
194 | 
195 | print('train score:', 100*net.predict(x[0:10000]).eq(y[0:10000]).float().mean().item(),"%")
196 | #print('train score:', 100*net.predict(x[10000: 20000]).eq(y[10000: 20000]).float().mean().item(),"%")
197 | #print('train score:', 100*net.predict(x[10000: 20000]).eq(y[10000: 20000]).float().mean().item(),"%")
198 | #print('train score:', 100*net.predict(x[20000: 30000]).eq(y[20000: 30000]).float().mean().item(),"%")
199 | #print('train score:', 100*net.predict(x[30000: 40000]).eq(y[30000: 40000]).float().mean().item(),"%")
200 | #print('train score:', 100*net.predict(x[40000: 50000]).eq(y[40000: 50000]).float().mean().item(),"%")
201 | #print('train score:', 100*net.predict(x[50000: 60000]).eq(y[50000: 60000]).float().mean().item(),"%")
202 | #print('test score:', 100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item(),"%")
203 | print('test score:', 100*net.predict(x_te).eq(y_te).float().mean().item(),"%")
204 | #print('test score:', 100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item(),"%")
205 | #print('test score:', 100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item(),"%")
206 | #print('test score:', 100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item(),"%")
207 | #print('final accuracy', (100*net.predict(x_te[0: 2000]).eq(y_te[0: 2000]).float().mean().item()+100*net.predict(x_te[2000: 4000]).eq(y_te[2000: 4000]).float().mean().item()+100*net.predict(x_te[4000: 6000]).eq(y_te[4000: 6000]).float().mean().item()+100*net.predict(x_te[6000: 8000]).eq(y_te[6000: 8000]).float().mean().item()+100*net.predict(x_te[8000: 10000]).eq(y_te[8000: 10000]).float().mean().item())/5, '%')   
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------
/Unsupervised Learning.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | import torch
  3 | import torch.nn as nn   #torch
  4 | import torchvision
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from tensorflow import keras
  9 | from tqdm import tqdm   #the training progress show
 10 | from torch.optim import Adam, Rprop #the learning rules for the weight optimzation
 11 | from torch.nn.functional import normalize
 12 | from torchvision import datasets
 13 | from torchvision.datasets import MNIST
 14 | import torch.nn.functional as F
 15 | from torchvision.transforms import Compose, ToTensor, Normalize, Lambda # lambda self defined 
 16 | 
 17 | 
 18 | # %%
 19 | train_loader = torch.utils.data.DataLoader(
 20 |   torchvision.datasets.MNIST('./files/', train=True, download=True,
 21 |                              transform=torchvision.transforms.Compose([
 22 |                                torchvision.transforms.ToTensor(),
 23 |                                torchvision.transforms.Normalize(
 24 |                                  (0.1307,), (0.3081,))
 25 |                              ])),
 26 |   batch_size=100, shuffle=True)
 27 | 
 28 | test_loader = torch.utils.data.DataLoader(
 29 |   torchvision.datasets.MNIST('./files/', train=False, download=True,
 30 |                              transform=torchvision.transforms.Compose([
 31 |                                torchvision.transforms.ToTensor(),
 32 |                                torchvision.transforms.Normalize(
 33 |                                  (0.1307,), (0.3081,))
 34 |                              ])),
 35 |   batch_size=100, shuffle=True)
 36 | 
 37 | 
 38 | # %%
 39 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 40 | x_train.shape, x_test.shape
 41 | 
 42 | # %%
 43 | random_ex = np.random.rand(28,28)
 44 | random_ex = random_ex *0
 45 | mask_ex = random_ex
 46 | mask_ex[1, 13:15] = 255
 47 | mask_ex[1, 23:25] = 255
 48 | mask_ex[2, 10:26] = 255
 49 | mask_ex[3, 9:27] = 255
 50 | mask_ex[4, 8:27] = 255
 51 | mask_ex[5, 5:26] = 255
 52 | mask_ex[6, 4:26] = 255
 53 | mask_ex[7, 4:18] = 255
 54 | mask_ex[7, 23:26] = 255
 55 | mask_ex[8, 3:17] = 255
 56 | mask_ex[8, 24:26] = 255
 57 | mask_ex[9, 3:16] = 255
 58 | mask_ex[9, 24:26] = 255
 59 | mask_ex[10, 3:16] = 255
 60 | mask_ex[10, 25:27] = 255
 61 | mask_ex[11, 3:15] = 255
 62 | mask_ex[11, 25:27] = 255
 63 | mask_ex[12, 3:12] = 255
 64 | mask_ex[12, 24:27] = 255
 65 | mask_ex[13, 3:9] = 255
 66 | mask_ex[13, 24:27] = 255
 67 | mask_ex[14, 3:8] = 255
 68 | mask_ex[14, 24:27] = 255
 69 | mask_ex[15, 3:7] = 255
 70 | mask_ex[15, 24:27] = 255
 71 | mask_ex[16, 3:8] = 255
 72 | mask_ex[16, 24:27] = 255
 73 | mask_ex[17, 3:9] = 255
 74 | mask_ex[17, 18:22] = 255
 75 | mask_ex[17, 24:26] = 255
 76 | mask_ex[18, 4:10] = 255
 77 | mask_ex[18, 18:22] = 255
 78 | mask_ex[18, 23:26] = 255
 79 | mask_ex[19, 5:10] = 255
 80 | mask_ex[19, 23:26] = 255
 81 | mask_ex[20, 6:10] = 255
 82 | mask_ex[20, 23:27] = 255
 83 | mask_ex[21, 7:10] = 255
 84 | mask_ex[21, 23:27] = 255
 85 | mask_ex[22, 7:10] = 255
 86 | mask_ex[22, 23:27] = 255
 87 | mask_ex[23, 7:9] = 255
 88 | mask_ex[23, 23:27] = 255
 89 | mask_ex[24,11:13] = 255
 90 | mask_ex[24, 23:27] = 255
 91 | mask_ex[25, 24:27] = 255
 92 | mask_ex[26, 25] = 255
 93 | mask_ex = mask_ex/255
 94 | plt.imshow(mask_ex, cmap='gray')
 95 | 
 96 | # %%
 97 | random_ex2 = np.random.rand(28,28)
 98 | random_ex2[:, :28] = 255
 99 | mask_ex2 = random_ex2
100 | mask_ex2[1, 13:15] = 0
101 | mask_ex2[1, 23:25] = 0
102 | mask_ex2[2, 10:26] = 0
103 | mask_ex2[3, 9:27] = 0
104 | mask_ex2[4, 8:27] = 0
105 | mask_ex2[5, 5:26] = 0
106 | mask_ex2[6, 4:26] = 0
107 | mask_ex2[7, 4:18] = 0
108 | mask_ex2[7, 23:26] = 0
109 | mask_ex2[8, 3:17] = 0
110 | mask_ex2[8, 24:26] = 0
111 | mask_ex2[9, 3:16] = 0
112 | mask_ex2[9, 24:26] = 0
113 | mask_ex2[10, 3:16] = 0
114 | mask_ex2[10, 25:27] = 0
115 | mask_ex2[11, 3:15] = 0
116 | mask_ex2[11, 25:27] = 0
117 | mask_ex2[12, 3:12] = 0
118 | mask_ex2[12, 24:27] = 0
119 | mask_ex2[13, 3:9] = 0
120 | mask_ex2[13, 24:27] = 0
121 | mask_ex2[14, 3:8] = 0
122 | mask_ex2[14, 24:27] = 0
123 | mask_ex2[15, 3:7] = 0
124 | mask_ex2[15, 24:27] = 0
125 | mask_ex2[16, 3:8] = 0
126 | mask_ex2[16, 24:27] = 0
127 | mask_ex2[17, 3:9] = 0
128 | mask_ex2[17, 18:22] = 0
129 | mask_ex2[17, 24:26] = 0
130 | mask_ex2[18, 4:10] = 0
131 | mask_ex2[18, 18:22] = 0
132 | mask_ex2[18, 23:26] = 0
133 | mask_ex2[19, 5:10] = 0
134 | mask_ex2[19, 23:26] = 0
135 | mask_ex2[20, 6:10] = 0
136 | mask_ex2[20, 23:27] = 0
137 | mask_ex2[21, 7:10] = 0
138 | mask_ex2[21, 23:27] = 0
139 | mask_ex2[22, 7:10] = 0
140 | mask_ex2[22, 23:27] = 0
141 | mask_ex2[23, 7:9] = 0
142 | mask_ex2[23, 23:27] = 0
143 | mask_ex2[24,11:13] = 0
144 | mask_ex2[24, 23:27] = 0
145 | mask_ex2[25, 24:27] = 0
146 | mask_ex2[26, 25] = 0
147 | mask_ex2 = mask_ex2/255
148 | plt.imshow(mask_ex2, cmap='gray')
149 | 
150 | 
151 | # %%
152 | a = x_train[15] * mask_ex + x_train[18] * mask_ex2
153 | plt.imshow(a, cmap='gray')
154 | 
155 | # %%
156 | x_train_rnd = np.zeros(shape=(60000,28,28))
157 | 
158 | for i in range (x_train.shape[0]):
159 |     rnd = np.random.randint(x_train.shape[0])
160 |     x_train_rnd[i] = x_train[rnd]
161 |     
162 | 
163 | # %%
164 | x_neg = x_train * mask_ex + x_train_rnd * mask_ex2   
165 | plt.imshow(x_neg[1], cmap='gray')
166 | x_pos = x_train
167 | 
168 | # %%
169 | device = 'cuda:2'
170 | 
171 | # %%
172 | 
173 | x_pos = (x_pos-33.31002426147461 )/78.56748962402344
174 | x_neg = (x_neg-33.31002426147461 )/78.56748962402344
175 | pos = x_pos.reshape(x_pos.shape[0], -1)
176 | neg = x_neg.reshape(x_neg.shape[0], -1)
177 | x_pos = torch.tensor(pos, dtype=torch.float)
178 | x_neg = torch.tensor(neg, dtype=torch.float)
179 | 
180 | x_test = (x_test-33.31002426147461 )/78.56748962402344
181 | x_test = x_test.reshape(x_test.shape[0], -1)
182 | y_test = y_test.reshape(y_test.shape[0])
183 | x_te = torch.tensor(x_test, dtype=torch.float)
184 | y_te = torch.tensor(y_test, dtype=torch.float)
185 | 
186 | x_train = (x_train-33.31002426147461 )/78.56748962402344
187 | x_train = x_train.reshape(x_train.shape[0], -1)
188 | y_train = y_train.reshape(y_train.shape[0])
189 | x = torch.tensor(x_train, dtype=torch.float)
190 | y = torch.tensor(y_train, dtype=torch.float)
191 | 
192 | x_pos, x_neg = x_pos.cuda(device), x_neg.cuda(device)
193 | x_te, y_te = x_te.cuda(device), y_te.cuda(device)
194 | x, y = x.cuda(device), y.cuda(device)
195 | 
196 | print(x.shape, y.shape, x.dtype)
197 | 
198 | # %%
199 | class Layer(nn.Linear):
200 |     def __init__(self, in_features, out_features,
201 |                  bias=True, device=None, dtype=None):
202 |         super().__init__(in_features, out_features, bias, device, dtype)
203 |         self.relu = torch.nn.ReLU()
204 |         self.opt = Adam(self.parameters(), lr=0.06)
205 |         self.threshold = 2
206 |         self.num_epochs = 100
207 | 
208 |     def forward(self, x):
209 |         x_direction = x / (x.norm(2, 1, keepdim=True) + 0.02)
210 |         normlized_activity = self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0))
211 |         return normlized_activity
212 | 
213 |     def train_layer(self, x_pos, x_neg):
214 |         for i in tqdm(range(self.num_epochs)):
215 |           for b in range (60):
216 |             g_pos = self.forward(x_pos[b*1000: (b+1)*1000]).pow(2).mean(1)
217 |             g_neg = self.forward(x_neg[b*1000: (b+1)*1000]).pow(2).mean(1)
218 |             #g_pos = self.forward(x_pos).pow(2).mean(1)
219 |             #g_neg = self.forward(x_neg).pow(2).mean(1)
220 |             # The following loss pushes pos (neg) samples to
221 |             # values larger (smaller) than the self.threshold.
222 |             loss = torch.log(1 + torch.exp(torch.cat([
223 |                 -g_pos + self.threshold,
224 |                 g_neg - self.threshold]))).mean()
225 |             self.opt.zero_grad()
226 |             # this backward just compute the derivative and hence
227 |             # is not considered backpropagation.
228 |             loss.backward()
229 |             self.opt.step()
230 |         return self.forward(x_pos).detach(), self.forward(x_neg).detach()
231 | 
232 | # %%
233 | 
234 | class FFNet(torch.nn.Module):
235 | 
236 |     def __init__(self):
237 |         super().__init__()
238 |         self.hlayer1 = Layer(784, 2000)
239 |         self.hlayer2 = Layer(2000, 2000)
240 |         self.hlayer3 = Layer(2000, 2000)
241 |         self.hlayer4 = Layer(2000, 2000)
242 |         self.layers = []
243 |         self.layers = nn.Sequential(self.hlayer1.cuda(device), self.hlayer2.cuda(device), self.hlayer3.cuda(device), self.hlayer4.cuda(device))
244 | 
245 |     def train_ffnet(self, x_pos, x_neg):
246 |         h_pos, h_neg = x_pos, x_neg
247 |         for i, layer in enumerate(self.layers):
248 |             print('training layer', i, '...')
249 |             h_pos, h_neg = layer.train_layer(h_pos, h_neg)
250 | 
251 | 
252 | # %%
253 | (x_train2, y_train2), (x_test2, y_test2) = tf.keras.datasets.mnist.load_data()
254 | 
255 | def edit_data(x, y, method="edit"):
256 |     is_batch = x.ndim == 3
257 |     if method == "edit":
258 |         if is_batch:
259 |             x[:, 0, :10] = 0.0
260 |             for i in range(x.shape[0]):
261 |                 x[i, 0, y[i]] = 255
262 |         else:
263 |             x[0, :10] = 0.0
264 |             x[0, y] = 255
265 | 
266 | def random_label(y):
267 |     if type(y) != np.ndarray:
268 |         label = list(range(10))
269 |         del label[y]
270 |         return np.random.choice(label)
271 |     else:
272 |         label = np.copy(y)
273 |         for i in range(y.shape[0]):
274 |             label[i] = random_label(y[i])
275 |         return label
276 | 
277 | pos2 = np.copy(x_train2)
278 | neg2 = np.copy(x_train2)
279 | edit_data(pos2, y_train2)
280 | edit_data(neg2, random_label(y_train2))
281 | 
282 | 
283 | pos2 = (pos2-33.31002426147461 )/78.56748962402344
284 | neg2 = (neg2-33.31002426147461 )/78.56748962402344
285 | pos2 = pos2.reshape(pos2.shape[0], -1)
286 | neg2 = neg2.reshape(neg2.shape[0], -1)
287 | x_pos2 = torch.tensor(pos2, dtype=torch.float)
288 | x_neg2 = torch.tensor(neg2, dtype=torch.float)
289 | 
290 | x_pos2, x_neg2 = x_pos.cuda(device), x_neg.cuda(device)
291 | 
292 | 
293 | # %%
294 | net = FFNet()
295 | net.train_ffnet(x_pos2, x_neg2)
296 | 
297 | def freeze(model):
298 |     for param in model.parameters():
299 |         param.requires_grad = False
300 | 
301 | # %%
302 | net.hlayer1.weight
303 | net.hlayer2.weight
304 | net.hlayer3.weight
305 | net.hlayer4.weight
306 | 
307 | # %%
308 | 
309 | h1 = net.hlayer1(x)
310 | h2 = net.hlayer2(h1)
311 | h3 = net.hlayer3(h2)
312 | h4 = net.hlayer4(h3)
313 | 
314 | h1.shape, h2.shape, h3.shape, h4.shape
315 | 
316 | 
317 | # %%
318 | h5= torch.cat((h2,h3,h4), 1)
319 | print(h5.shape, h5,  h5.size(0))
320 | h6= h5.view(h5.size(0), -1)
321 | print(h6.shape, h6)
322 | 
323 | # %%
324 | class Net(nn.Module):
325 |     def __init__(self):
326 |         super(Net, self).__init__()
327 |         
328 |         self.hlayer1 = net.hlayer1
329 |         self.hlayer2 = net.hlayer2
330 |         self.hlayer3 = net.hlayer3
331 |         self.hlayer4 = net.hlayer4
332 |         freeze(self)     
333 |         self.fc = nn.Linear(6000, 10).cuda(device)
334 |         
335 |     def forward(self, x):
336 |         x = torch.flatten(x, 1)
337 |         n1= self.hlayer1(x)
338 |         n2= self.hlayer2(n1)
339 |         n3= self.hlayer3(n2)
340 |         n4= self.hlayer4(n3)
341 |         n2 = n2 / (n2.norm(2, 1, keepdim=True) + 0.01)
342 |         n3 = n3 / (n2.norm(2, 1, keepdim=True) + 0.01)
343 |         n4 = n4 / (n2.norm(2, 1, keepdim=True) + 0.01)
344 | 
345 |         n5= torch.cat((n2,n3,n4), 1)
346 |         n5 = n5.view(n5.size(0), -1)
347 |         
348 |         output = self.fc(n5)
349 |         
350 |         return output 
351 |         
352 | network = Net()
353 | print(network.fc)
354 | 
355 | 
356 | # %%
357 | criterion = torch.nn.CrossEntropyLoss()
358 | #optimizer = torch.optim.Adam(network.parameters(), lr = 0.01)
359 | optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, network.parameters()), lr = 0.01)
360 | 
361 | # %%
362 | output = network(x)
363 | output.shape
364 | 
365 | # %%
366 | n_epochs = 1
367 | batch_size_train = 100
368 | batch_size_test = 1000
369 | 
370 | # %%
371 | train_losses = []
372 | train_counter = []
373 | test_losses = []
374 | test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
375 | 
376 | 
377 | # %%
378 | def train(epoch):
379 |   network.train()
380 |   for batch_idx, (data, target) in enumerate(train_loader):
381 |     data =data.cuda(device)
382 |     target = target.cuda(device)
383 |     optimizer.zero_grad()
384 |     output = network(data)
385 |     loss = criterion(output, target)
386 |     loss.backward()
387 |     if batch_idx % 10 == 0:
388 |       print("batch_idx = ", batch_idx)
389 |       print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
390 |         epoch, batch_idx * len(data), len(train_loader.dataset),
391 |         100. * batch_idx / len(train_loader), loss.item()))
392 |       
393 |       train_losses.append(loss.item())
394 |       train_counter.append(
395 |         (batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
396 |       torch.save(network.state_dict(), './results/model.pth')
397 |       torch.save(optimizer.state_dict(), './results/optimizer.pth')
398 |          
399 | 
400 | # %%
401 | for i in range (n_epochs):
402 |     train(i)
403 | 
404 | # %%
405 | net.hlayer1.weight
406 | net.hlayer2.weight
407 | net.hlayer3.weight
408 | net.hlayer4.weight
409 | 
410 | # %%
411 | def test():
412 |   network.eval()
413 |   test_loss = 0
414 |   correct = 0
415 |   with torch.no_grad():
416 |     for data, target in test_loader:
417 |       data =data.cuda(device)
418 |       target = target.cuda(device)
419 |       #print(data.dtype, target.shape)
420 |       output = network(data)
421 |       test_loss += criterion(output, target).item()
422 |       pred = output.data.max(1, keepdim=True)[1]
423 |       correct += pred.eq(target.data.view_as(pred)).sum()
424 |   test_loss /= len(test_loader.dataset)
425 |   test_losses.append(test_loss)
426 |   print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
427 |     test_loss, correct, len(test_loader.dataset),
428 |     100. * correct / len(test_loader.dataset)))
429 | 
430 | 
431 | # %%
432 | test()
433 | 
434 | 


--------------------------------------------------------------------------------