├── char-RNN对姓氏进行分类和生成
├── torchaudio
├── 基础
├── 强化学习（DQN）
├── 空间变换器网络
└── 迁移学习


/char-RNN对姓氏进行分类和生成:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals, print_function, division
  2 | from io import open
  3 | import glob
  4 | import os
  5 | 
  6 | def findFiles(path): return glob.glob(path)
  7 | 
  8 | print(findFiles('*.txt'))
  9 | 
 10 | import unicodedata
 11 | import string
 12 | 
 13 | all_letters = string.ascii_letters + " .,;'"
 14 | n_letters = len(all_letters)
 15 | print(all_letters)
 16 | 
 17 | def unicodeToAscii(s):
 18 |     return ''.join(
 19 |         c for c in unicodedata.normalize('NFD', s)
 20 |         if unicodedata.category(c) != 'Mn'
 21 |         and c in all_letters
 22 |         )
 23 | 
 24 | print(unicodeToAscii('Ślusàrski'))
 25 | 
 26 | #构建姓氏字典
 27 | category_lines = {}
 28 | all_categories = []
 29 | 
 30 | def readLines(filename):
 31 |     lines = open(filename, encoding='utf-8').read().strip().split('\n')
 32 |     return [unicodeToAscii(line) for line in lines]
 33 | 
 34 | for filename in findFiles('*.txt'):
 35 |     category = os.path.splitext(os.path.basename(filename))[0]
 36 |     all_categories.append(category)
 37 |     lines = readLines(filename)
 38 |     category_lines[category] = lines
 39 | 
 40 | n_categories = len(all_categories)
 41 | print(n_categories)
 42 | 
 43 | print(category_lines['Chinese'][:5])
 44 | 
 45 | import torch
 46 | 
 47 | #编码
 48 | def letterToIndex(letter):
 49 |     return all_letters.find(letter)
 50 | 
 51 | #字母转为向量
 52 | [1,0,0,...0]
 53 | def letterToTensor(letter):
 54 |     tensor = torch.zeros(1, n_letters)
 55 |     tensor[0][letterToIndex(letter)] = 1
 56 |     return tensor
 57 | 
 58 | #单词转为向量
 59 | []
 60 | def lineToTensor(line):
 61 |     tensor = torch.zeros(len(line), 1, n_letters)
 62 |     for li, letter in enumerate(line):
 63 |         tensor[li][0][letterToIndex(letter)] = 1
 64 |     return tensor
 65 | 
 66 | print(letterToTensor('J'))
 67 | 
 68 | print(lineToTensor('Jones').size())
 69 | 
 70 | import torch.nn as nn
 71 | 
 72 | class RNN(nn.Module):
 73 |     def __init__(self, input_size, hidden_size, output_size):
 74 |         super(RNN, self).__init__()
 75 |         #self.rnn = nn.RNN(return_state=True)
 76 |         self.hidden_size = hidden_size
 77 |         self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
 78 |         self.i2o = nn.Linear(input_size + hidden_size, output_size)
 79 |         self.softmax = nn.LogSoftmax(dim=1)
 80 | 
 81 |     def forward(self, input, hidden):
 82 |         combined = torch.cat((input, hidden), 1)
 83 |         hidden = self.i2h(combined)
 84 |         output = self.i2o(combined)
 85 |         output = self.softmax(output)
 86 |         return output, hidden
 87 | 
 88 |     def initHidden(self):
 89 |         return torch.zeros(1, self.hidden_size)
 90 | 
 91 | n_hidden = 128
 92 | rnn = RNN(n_letters, n_hidden, n_categories)
 93 | 
 94 | input = letterToTensor('A')
 95 | hidden =torch.zeros(1, n_hidden)
 96 | output, next_hidden = rnn(input, hidden)
 97 | print(output)
 98 | 
 99 | input = lineToTensor('Albert')
100 | hidden = torch.zeros(1, n_hidden)
101 | output, next_hidden = rnn(input[0], hidden)
102 | print(output)
103 | 
104 | def categoryFromOutput(output):
105 |     top_n, top_i = output.topk(1)
106 |     category_i = top_i[0].item()
107 |     return all_categories[category_i], category_i
108 | 
109 | print(categoryFromOutput(output))
110 | 
111 | import random
112 | #随机获得训练样本
113 | def randomChoice(l):
114 |     return l[random.randint(0, len(l) - 1)]
115 | 
116 | def randomTrainingExample():
117 |     category = randomChoice(all_categories)
118 |     line = randomChoice(category_lines[category])
119 |     category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
120 |     line_tensor = lineToTensor(line)
121 |     return category, line, category_tensor, line_tensor
122 | 
123 | for i in range(10):
124 |     category, line, category_tensor, line_tensor = randomTrainingExample()
125 |     print('category =', category, '\t // \t line =', line)
126 |     
127 | criterion = nn.NLLLoss()
128 | learning_rate = 0.005 
129 | optimize = 
130 | 
131 | def train(category_tensor, line_tensor):
132 |     hidden = rnn.initHidden()
133 |     rnn.zero_grad()
134 | 
135 |     for i in range(line_tensor.size()[0]):
136 |         output, hidden = rnn(line_tensor[i], hidden)
137 | 
138 |     loss = criterion(output, category_tensor)
139 |     loss.backward()
140 |     for p in rnn.parameters():
141 |         p.data.add_(-learning_rate, p.grad.data)
142 | 
143 |     return output, loss.item()
144 | 
145 | import time
146 | import math
147 | 
148 | n_iters = 100000
149 | print_every = 5000
150 | plot_every = 1000
151 | 
152 | current_loss = 0
153 | all_losses = []
154 | 
155 | def timeSince(since):
156 |     now = time.time()
157 |     s = now - since
158 |     m = math.floor(s / 60)
159 |     s -= m * 60
160 |     return '%dm %ds' % (m, s)
161 | 
162 | start = time.time()
163 | 
164 | for iter in range(1, n_iters + 1):
165 |     category, line, category_tensor, line_tensor = randomTrainingExample()
166 |     output, loss = train(category_tensor, line_tensor)
167 |     current_loss += loss
168 | 
169 |     if iter % print_every == 0:
170 |         guess, guess_i = categoryFromOutput(output)
171 |         correct = '✓' if guess == category else '✗ (%s)' % category
172 |         print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))
173 | 
174 |     if iter % plot_every == 0:
175 |         all_losses.append(current_loss / plot_every)
176 |         current_loss = 0
177 |         
178 | import matplotlib.pyplot as plt
179 | import matplotlib.ticker as ticker
180 | 
181 | plt.figure()
182 | plt.plot(all_losses)
183 | 
184 | #测试
185 | def evaluate(line_tensor):
186 |    #每次重新测试一个单词
187 |     hidden = rnn.initHidden()
188 | 
189 |     for i in range(line_tensor.size()[0]):
190 |         output, hidden = rnn(line_tensor[i], hidden)
191 | 
192 |     return output
193 | 
194 | def predict(input_line, n_predictions=3):
195 |     print('\n> %s' % input_line)
196 |     with torch.no_grad():
197 |         output = evaluate(lineToTensor(input_line))
198 |         topv, topi = output.topk(n_predictions, 1, True)
199 |         predictions = []
200 | 
201 |         for i in range(n_predictions):
202 |             value = topv[0][i].item()
203 |             category_index = topi[0][i].item()
204 |             print('(%.2f) %s' % (value, all_categories[category_index]))
205 |             predictions.append([value, all_categories[category_index]])
206 | 
207 | predict('Yu')
208 | 
209 | #测试
210 | confusion = torch.zeros(n_categories, n_categories)
211 | n_confusion = 10000
212 | 
213 | for i in range(n_confusion):
214 |     category, line, category_tensor, line_tensor = randomTrainingExample()
215 |     output = evaluate(line_tensor)
216 |     guess, guess_i = categoryFromOutput(output)
217 |     category_i = all_categories.index(category)
218 |     confusion[category_i][guess_i] += 1
219 | 
220 | for i in range(n_categories):
221 |     confusion[i] = confusion[i] / confusion[i].sum()
222 | 
223 | fig = plt.figure()
224 | ax = fig.add_subplot(111)
225 | cax = ax.matshow(confusion.numpy())
226 | fig.colorbar(cax)
227 | 
228 | ax.set_xticklabels([''] + all_categories, rotation=90)
229 | ax.set_yticklabels([''] + all_categories)
230 | 
231 | ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
232 | ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
233 | 
234 | plt.show()
235 | 
236 | #生成姓氏
237 | import torch
238 | import torch.nn as nn
239 | 
240 | all_letters = string.ascii_letters + " .,;'-"
241 | n_letters = len(all_letters) + 1 #结束符号
242 | 
243 | class RNN(nn.Module):
244 |     def __init__(self, input_size, hidden_size, output_size):
245 |         super(RNN, self).__init__()
246 |         self.hidden_size = hidden_size
247 | 
248 |         self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
249 |         self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
250 |         self.o2o = nn.Linear(hidden_size + output_size, output_size)
251 |         #输出解释为下一个字母的概率
252 |         #增加随机性
253 |         self.dropout = nn.Dropout(0.1)
254 |         self.softmax = nn.LogSoftmax(dim=1) 
255 | 
256 |     def forward(self, category, input, hidden):
257 |         input_combined = torch.cat((category, input, hidden), 1)
258 |         hidden = self.i2h(input_combined)
259 |         output = self.i2o(input_combined)
260 |         output_combined = torch.cat((hidden, output), 1)
261 |         output = self.o2o(output_combined)
262 |         output = self.dropout(output)
263 |         output = self.softmax(output)
264 |         return output, hidden
265 | 
266 |     def initHidden(self):
267 |         return torch.zeros(1, self.hidden_size)
268 |         
269 | import random
270 | 
271 | def randomChoice(l):
272 |     return l[random.randint(0, len(l) - 1)]
273 | 
274 | def randomTrainingPair():
275 |     category = randomChoice(all_categories)
276 |     line = randomChoice(category_lines[category])
277 |     return category, line
278 |     
279 | #预测每个时间步中当前字母的下一个字母，因此字母对是该行中连续的字母组,
280 | #例如:"Yu<EOS>"我们将创建（“Y”，“u”），（“u”，“ EOS”）。
281 | 
282 | def categoryTensor(category):
283 |     li = all_categories.index(category)
284 |     tensor = torch.zeros(1, n_categories)
285 |     tensor[0][li] = 1
286 |     return tensor
287 | #yu
288 | def inputTensor(line):
289 |     tensor = torch.zeros(len(line), 1, n_letters)
290 |     for li in range(len(line)):
291 |         letter = line[li]
292 |         tensor[li][0][all_letters.find(letter)] = 1
293 |     return tensor
294 | #u<EOS>
295 | def targetTensor(line):
296 |     letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
297 |     letter_indexes.append(n_letters - 1) #结束符号
298 |     return torch.LongTensor(letter_indexes)
299 |     
300 | #随机选取数据
301 | def randomTrainingExample():
302 |     category, line = randomTrainingPair()
303 |     category_tensor = categoryTensor(category)
304 |     input_line_tensor = inputTensor(line)
305 |     target_line_tensor = targetTensor(line)
306 |     return category_tensor, input_line_tensor, target_line_tensor
307 |     
308 | criterion = nn.NLLLoss()
309 | 
310 | learning_rate = 0.0005
311 | 
312 | def train(category_tensor, input_line_tensor, target_line_tensor):
313 |     target_line_tensor.unsqueeze_(-1)
314 |     hidden = rnn.initHidden()
315 | 
316 |     rnn.zero_grad()
317 | 
318 |     loss = 0
319 | 
320 |     for i in range(input_line_tensor.size(0)):
321 |         output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
322 |         l = criterion(output, target_line_tensor[i])
323 |         loss += l
324 | 
325 |     loss.backward()
326 | 
327 |     for p in rnn.parameters():
328 |         p.data.add_(-learning_rate, p.grad.data)
329 | 
330 |     return output, loss.item() / input_line_tensor.size(0)
331 |     
332 | rnn = RNN(n_letters, 128, n_letters)
333 | 
334 | n_iters = 100000
335 | print_every = 5000
336 | plot_every = 500
337 | all_losses = []
338 | total_loss = 0 
339 | 
340 | start = time.time()
341 | 
342 | for iter in range(1, n_iters + 1):
343 |     output, loss = train(randomTrainingExample())
344 |     total_loss += loss
345 | 
346 |     if iter % print_every == 0:
347 |         print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))
348 | 
349 |     if iter % plot_every == 0:
350 |         all_losses.append(total_loss / plot_every)
351 |         total_loss = 0
352 |  
353 |  max_length = 20
354 | 
355 | def sample(category, start_letter='A'):
356 |     with torch.no_grad():  
357 |         category_tensor = categoryTensor(category)
358 |         input = inputTensor(start_letter)
359 |         hidden = rnn.initHidden()
360 | 
361 |         output_name = start_letter
362 | 
363 |         for i in range(max_length):
364 |             output, hidden = rnn(category_tensor, input[0], hidden)
365 |             topv, topi = output.topk(1)
366 |             topi = topi[0][0]
367 |             if topi == n_letters - 1:
368 |                 break
369 |             else:
370 |                 letter = all_letters[topi]
371 |                 output_name += letter
372 |             input = inputTensor(letter)
373 | 
374 |         return output_name
375 | 
376 | def samples(category, start_letters='ABC'):
377 |     for start_letter in start_letters:
378 |         print(sample(category, start_letter))
379 | 
380 | samples('Chinese', 'Y')
381 | samples('Chinese', 'YYY')
382 | 
383 | 
384 |     
385 | 


--------------------------------------------------------------------------------
/torchaudio:
--------------------------------------------------------------------------------
 1 | 使用torchaudio处理音频
 2 | kaldi
 3 | 
 4 | !pip3 install torchaudio
 5 | 
 6 | import torch
 7 | import torchaudio
 8 | import matplotlib.pyplot as plt
 9 | 
10 | filename = "steam-train-whistle-daniel_simon-converted-from-mp3.wav"
11 | waveform, sample_rate = torchaudio.load(filename)
12 | 
13 | print("Shape of waveform: {}".format(waveform.size()))
14 | print("Sample rate of waveform: {}".format(sample_rate))
15 | 
16 | plt.figure()
17 | plt.plot(waveform.t().numpy())
18 | 
19 | 转换 
20 | 
21 | Resample ：将波形重采样为其他采样率。
22 | Spectrogram ：根据波形创建频谱图。
23 | MelScale ：使用转换矩阵将普通STFT转换为Mel频率STFT。
24 | AmplitudeToDB ：这将频谱图从功率/振幅标度转换为分贝标度。20log()
25 | MFCC ：从波形创建梅尔频率倒谱系数。
26 | MelSpectrogram ：使用PyTorch中的STFT功能从波形创建MEL频谱图。
27 | MuLawEncoding ：基于mu-law压扩对波形进行编码。
28 | MuLawDecoding ：解码mu-law编码的波形。
29 | 
30 | #对数频谱
31 | specgram = torchaudio.transforms.Spectrogram()(waveform)
32 | 
33 | print("Shape of spectrogram: {}".format(specgram.size()))
34 | 
35 | plt.figure()
36 | plt.imshow(specgram.log2()[0,:,:].numpy(), cmap='gray')
37 | 
38 | #对数mel频谱
39 | specgram = torchaudio.transforms.MelSpectrogram()(waveform)
40 | 
41 | print("Shape of spectrogram: {}".format(specgram.size()))
42 | 
43 | plt.figure()
44 | p = plt.imshow(specgram.log2()[0,:,:].detach().numpy(), cmap='gray')
45 | 
46 | #重新采样
47 | new_sample_rate = sample_rate/10
48 | 
49 | channel = 0
50 | transformed = torchaudio.transforms.Resample(sample_rate, new_sample_rate)(waveform[channel,:].view(1,-1))
51 | 
52 | print("Shape of transformed waveform: {}".format(transformed.size()))
53 | 
54 | plt.figure()
55 | plt.plot(transformed[0,:].numpy())
56 | plt.plot(waveform[0,:].numpy())
57 | 
58 | #基于Mu-Law编码对信号进行编码
59 | #Mu-Law是由国际电话电报咨询委员会颁布的用于脉码调制的标准多媒体数字信号编、解码器（压缩/解压缩）运算法则。
60 | #作为一种压缩扩展的方法，其可以改善信噪比率而不需要增添更多的数据。
61 | print("Min of waveform: {}\nMax of waveform: {}\nMean of waveform: {}".format(waveform.min(), waveform.max(), waveform.mean()))
62 | 
63 | def normalize(tensor):
64 |     tensor_minusmean = tensor - tensor.mean()
65 |     return tensor_minusmean/tensor_minusmean.abs().max()
66 | 
67 | transformed = torchaudio.transforms.MuLawEncoding()(waveform)
68 | 
69 | print("Shape of transformed waveform: {}".format(transformed.size()))
70 | 
71 | plt.figure()
72 | plt.plot(transformed[0,:].numpy())
73 | plt.plot(waveform[0,:].numpy())
74 | 
75 | reconstructed = torchaudio.transforms.MuLawDecoding()(transformed)
76 | 
77 | print("Shape of recovered waveform: {}".format(reconstructed.size()))
78 | 
79 | plt.figure()
80 | plt.plot(reconstructed[0,:].numpy())
81 | plt.plot(waveform[0,:].numpy())
82 | 
83 | 


--------------------------------------------------------------------------------
/基础:
--------------------------------------------------------------------------------
  1 | 神经网络领域的numpy
  2 | 安装：
  3 | https://pytorch.org/
  4 | 
  5 | pip3 install torch torchvision
  6 | 
  7 | #tensor
  8 | import torch
  9 | import numpy as np
 10 | 
 11 | np_data = np.arange(6).reshape((2, 3))
 12 | torch_data = torch.from_numpy(np_data)
 13 | tensor2array = torch_data.numpy()
 14 | print(
 15 |     '\nnumpy array:', np_data,         
 16 |     '\ntorch tensor:', torch_data,      
 17 |     '\ntensor to array:', tensor2array, 
 18 | )
 19 | 
 20 | 数据类型dtype，8种
 21 | 16-bit floating point	torch.float16 or torch.half
 22 | 32-bit floating point	torch.float32 or torch.float	
 23 | 64-bit floating point	torch.float64 or torch.double	
 24 | 	
 25 | 8-bit integer (unsigned)	torch.uint8	
 26 | 8-bit integer (signed)	torch.int8	
 27 | 16-bit integer (signed)	torch.int16 or torch.short	
 28 | 32-bit integer (signed)	torch.int32 or torch.int	
 29 | 64-bit integer (signed)	torch.int64 or torch.long	
 30 | 
 31 | # abs 绝对值计算
 32 | data = [-1, -2, 1, 2]
 33 | tensor = torch.FloatTensor(data)  
 34 | print(
 35 |     '\nabs',
 36 |     '\nnumpy: ', np.abs(data),          
 37 |     '\ntorch: ', torch.abs(tensor)     
 38 | )
 39 | 
 40 | # sin   三角函数 sin
 41 | print(
 42 |     '\nsin',
 43 |     '\nnumpy: ', np.sin(data),      
 44 |     '\ntorch: ', torch.sin(tensor)  
 45 | )
 46 | 
 47 | # mean  均值
 48 | print(
 49 |     '\nmean',
 50 |     '\nnumpy: ', np.mean(data),       
 51 |     '\ntorch: ', torch.mean(tensor)    
 52 | )
 53 | 
 54 | data = [[1,2], [3,4]]
 55 | tensor = torch.FloatTensor(data)  
 56 | # 正确
 57 | print(
 58 |     '\nmatrix multiplication (matmul)',
 59 |     '\nnumpy: ', np.matmul(data, data),    
 60 |     '\ntorch: ', torch.mm(tensor, tensor)   
 61 | )
 62 | 
 63 | 
 64 | #变量
 65 | import torch
 66 | from torch.autograd import Variable 
 67 | 
 68 | tensor = torch.FloatTensor([[1,2],[3,4]])
 69 | 
 70 | variable = Variable(tensor, requires_grad=True)
 71 | 
 72 | print(tensor)
 73 | 
 74 | print(variable)
 75 | 
 76 | t_out = torch.mean(tensor*tensor)       
 77 | v_out = torch.mean(variable*variable)   
 78 | print(t_out)
 79 | print(v_out)    
 80 | 
 81 | v_out.backward()    # 模拟 v_out 的误差反向传递
 82 | 
 83 | print(variable.grad)    # 初始 Variable 的梯度
 84 | 
 85 | print(variable)     #  Variable 形式
 86 | 
 87 | print(variable.data)    # tensor 形式
 88 | 
 89 | print(variable.data.numpy())    # numpy 形式
 90 | 
 91 | #数据集
 92 | #内置
 93 | torchvision.datasets
 94 | MNIST
 95 | COCO（用于图像标注和目标检测）(Captioning and Detection)
 96 | LSUN Classification
 97 | ImageFolder
 98 | Imagenet-12
 99 | CIFAR10 and CIFAR100
100 | STL10
101 | 
102 | datasets.MNIST(root, train=True, transform=None, target_transform=None, download=False)
103 | 
104 | #自定义
105 | torch.utils.data.Dataset
106 | Class dateset(Dataset)
107 |  def __len__(self):
108 |  def __getitem__(self, idx):
109 | len(dataset)
110 | dataset[i]
111 | 
112 | #迭代
113 | for i in range(len(dataset)):
114 |   dataset[i]
115 | 
116 | torch.utils.data.DataLoader
117 | dataloader = DataLoader(dataset, batch_size=4,
118 |             shuffle=True, num_workers=4)
119 |             
120 | #网络层
121 | import torch.nn as nn
122 | import torch.nn.functional as F
123 | #线性层
124 | class torch.nn.Linear(in_features, out_features, bias=True)
125 | torch.nn.functional.linear(input, weight, bias=None)
126 | 
127 | #卷积层
128 | class torch.nn.Conv1d(in_channels, out_channels, kernel_size, 
129 |     stride=1, padding=0, dilation=1, groups=1, bias=True)
130 | dilation: 用于控制内核点之间的距离
131 | 空洞卷积
132 | groups: 控制输入和输出之间的连接
133 | 28*28*2 3*3*64 26*26*64   2*3*3*64
134 | 28*28*1 3*3*32 26*26*32   1*3*3*32
135 | 28*28*1 3*3*32 26*26*32   1*3*3*32
136 | torch.nn.functional.conv1d(input, weight, bias=None, 
137 |       stride=1, padding=0, dilation=1, groups=1)
138 | class torch.nn.Conv2d
139 | 1d 100*1*batchsize->3*1*64->100*64*batchsize
140 | 2d 224*224*3*batchsize->3*3*3*64->224*224*64*batchsize
141 | 3d 3*3*3*64
142 | class torch.nn.Conv3d
143 | 
144 | class torch.nn.ConvTranspose1d(in_channels, out_channels, 
145 |         kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True)
146 | torch.nn.functional.conv_transpose1d(input, weight, bias=None, 
147 |           stride=1, padding=0, output_padding=0, groups=1)
148 | class torch.nn.ConvTranspose2d
149 | class torch.nn.ConvTranspose3d
150 | 
151 | #池化层
152 | class torch.nn.MaxPool1d(kernel_size, stride=None, 
153 |         padding=0, dilation=1, return_indices=False, ceil_mode=False)
154 | return_indices - 如果等于True，会返回输出最大值的序号
155 | ceil_mode - 如果等于True，计算输出信号大小的时候，会使用向上取整，代替默认的向下取整的操作
156 | torch.nn.functional.max_pool1d(input, kernel_size, stride=None, padding=0, 
157 |        dilation=1, ceil_mode=False, return_indices=False)
158 | class torch.nn.MaxPool2d
159 | class torch.nn.MaxPool3d   
160 | class torch.nn.MaxUnpool1d(kernel_size, stride=None, padding=0) 
161 | torch.nn.functional.max_unpool1d(input, indices, 
162 |             kernel_size, stride=None, padding=0, output_size=None)
163 | 2d
164 | 3d
165 | 
166 | class torch.nn.AvgPool1d(kernel_size, stride=None, padding=0, 
167 |             ceil_mode=False, count_include_pad=True)
168 | torch.nn.functional.avg_pool1d(input, kernel_size, stride=None, padding=0, 
169 |             ceil_mode=False, count_include_pad=True)
170 | 2d
171 | 3d
172 | 
173 | class torch.nn.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, 
174 |                 return_indices=False, _random_samples=None)
175 | #对输入信号提供2维的幂平均池化操作
176 | class torch.nn.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)
177 | torch.nn.functional.lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False)
178 | 
179 | class torch.nn.AdaptiveMaxPool1d(output_size, return_indices=False)
180 | torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False)
181 | 2d
182 | 
183 | class torch.nn.AdaptiveAvgPool1d(output_size)
184 | torch.nn.functional.adaptive_avg_pool1d(input, output_size)
185 | 2d
186 | 
187 | #激活函数
188 | class torch.nn.ReLU(inplace=False)
189 | torch.nn.functional.relu(input, inplace=False)
190 | class torch.nn.ReLU6(inplace=False)
191 | torch.nn.functional.relu6(input, inplace=False)
192 | class torch.nn.ELU(alpha=1.0, inplace=False)
193 | torch.nn.functional.elu(input, alpha=1.0, inplace=False)
194 | class torch.nn.PReLU(num_parameters=1, init=0.25)
195 | torch.nn.functional.prelu(input, weight)
196 | class torch.nn.LeakyReLU(negative_slope=0.01, inplace=False)
197 | torch.nn.functional.leaky_relu(input, negative_slope=0.01, inplace=False)
198 | class torch.nn.Threshold(threshold, value, inplace=False) 
199 | torch.nn.functional.threshold(input, threshold, value, inplace=False)
200 | class torch.nn.Tanh()
201 | torch.nn.functional.tanh(input)
202 | class torch.nn.Hardtanh(min_value=-1, max_value=1, inplace=False)
203 | torch.nn.functional.hardtanh(input, min_val=-1.0, max_val=1.0, inplace=False)
204 | class torch.nn.Sigmoid()
205 | torch.nn.functional.sigmoid(input)
206 | class torch.nn.LogSigmoid()
207 | torch.nn.functional.logsigmoid(input)
208 | class torch.nn.Softplus(beta=1, threshold=20)
209 | torch.nn.functional.softplus(input, beta=1, threshold=20)
210 | class torch.nn.Softshrink(lambd=0.5)
211 | torch.nn.functional.softshrink(input, lambd=0.5)
212 | class torch.nn.Softmin()
213 | torch.nn.functional.softmin(input)
214 | class torch.nn.Softmax()
215 | torch.nn.functional.softmax(input)
216 | class torch.nn.LogSoftmax()
217 | torch.nn.functional.log_softmax(input)
218 | 
219 | class torch.nn.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True)
220 | torch.nn.functional.batch_norm(input, running_mean, running_var, 
221 |           weight=None, bias=None, training=False, momentum=0.1, eps=1e-05)
222 | 2d
223 | 3d
224 | 
225 | #RNN
226 | class torch.nn.RNN( args, * kwargs)
227 | input_size – 输入x的特征数量。
228 | hidden_size – 隐层的特征数量。
229 | num_layers – RNN的层数。
230 | nonlinearity – 指定非线性函数使用tanh还是relu。默认是tanh。
231 | bias – 如果是False，那么RNN层就不会使用偏置权重 $b_ih$和$b_hh$,默认是True
232 | dropout – 如果值非零，那么除了最后一层外，其它层的输出都会套上一个dropout层。
233 | bidirectional – 如果True，将会变成一个双向RNN，默认为False。
234 | class torch.nn.LSTM( args, * kwargs)
235 | class torch.nn.GRU()
236 | #单个RNN细胞
237 | class torch.nn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh')
238 | class torch.nn.LSTMCell(input_size, hidden_size, bias=True)
239 | class torch.nn.GRUCell(input_size, hidden_size, bias=True)
240 | 
241 | #droupout
242 | class torch.nn.Dropout(p=0.5, inplace=False)
243 | p - 将元素置0的概率。默认值：0.5
244 | torch.nn.functional.dropout(input, p=0.5, training=False, inplace=False)
245 | 
246 | #Embedding层
247 | class torch.nn.Embedding(num_embeddings, embedding_dim, 
248 |     padding_idx=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False)
249 | 
250 | #损失函数
251 | class torch.nn.L1Loss(size_average=True)
252 | torch.nn.functional.l1_loss(input, target, size_average=True)
253 | class torch.nn.SmoothL1Loss(size_average=True)
254 | torch.nn.functional.smooth_l1_loss(input, target, size_average=True)
255 | class torch.nn.MSELoss(size_average=True)
256 | torch.nn.functional.mse_loss(input, target, size_average=True)
257 | class torch.nn.CrossEntropyLoss(weight=None, size_average=True)
258 | torch.nn.functional.cross_entropy(input, target, weight=None, size_average=True)
259 | class torch.nn.BCELoss(weight=None, size_average=True)
260 | torch.nn.functional.binary_cross_entropy(input, target, weight=None, size_average=True)
261 | #最大似然函数
262 | class torch.nn.NLLLoss(weight=None, size_average=True)
263 | torch.nn.functional.nll_loss(input, target, weight=None, size_average=True)
264 | class torch.nn.NLLLoss2d(weight=None, size_average=True)
265 | torch.nn.functional.nll_loss2d(input, target, weight=None, size_average=True)
266 | class torch.nn.KLDivLoss(weight=None, size_average=True)
267 | torch.nn.functional.kl_div(input, target, size_average=True)
268 | 
269 | class torch.nn.HingeEmbeddingLoss(size_average=True)
270 | class torch.nn.CosineEmbeddingLoss(margin=0, size_average=True)
271 | 
272 | #填充
273 | torch.nn.functional.pad(input, pad, mode='constant', value=0)
274 | 
275 | import torch.nn as nn
276 | import torch.nn.functional as F
277 | #maxunpool
278 | pool = nn.MaxPool2d(2, stride=2, return_indices=True)
279 | unpool = nn.MaxUnpool2d(2, stride=2)
280 | input = Variable(torch.Tensor([[[[ 1,  2,  3,  4],
281 |     [ 5,  6,  7,  8],
282 |     [ 9, 10, 11, 12],
283 |     [13, 14, 15, 16]]]]))
284 | output, indices = pool(input)
285 | print(output,indices)
286 | unpool(output, indices)
287 | 
288 | #优化器
289 | torch.optim
290 | class torch.optim.Optimizer(params, defaults)
291 | step()
292 | zero_grad()
293 | 
294 | class torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)
295 | class torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0)
296 | class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
297 | class torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
298 | class torch.optim.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
299 | class torch.optim.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None)
300 | class torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
301 | class torch.optim.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50))
302 | class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
303 | 
304 | #模型
305 | 两种方法
306 | #线性模型
307 | 
308 | net = torch.nn.Sequential(
309 |     torch.nn.Linear(1, 10),
310 |     torch.nn.ReLU(),
311 |     torch.nn.Linear(10, 1)
312 | )
313 | 
314 | class LinearNet(torch.nn.Module):
315 |     def __init__(self, n_feature, n_hidden, n_output):
316 |         super(LinearNet, self).__init__()
317 |         self.hidden = torch.nn.Linear(n_feature, n_hidden)
318 |         self.predict = torch.nn.Linear(n_hidden, n_output)
319 | 
320 |     def forward(self, x):
321 |         x = F.relu(self.hidden(x))
322 |         x = self.predict(x)
323 |         return x
324 | 
325 | net1 = LinearNet(1, 10, 1) 
326 | 
327 | #卷积模型
328 | class CNN(nn.Module):
329 |     def __init__(self):
330 |         super(CNN, self).__init__()
331 |         self.conv1 = nn.Sequential(  
332 |             nn.Conv2d(
333 |             in_channels=1,      
334 |             out_channels=16,   
335 |             kernel_size=5,     
336 |             stride=1,         
337 |             padding=2, 
338 |             ),      
339 |             nn.ReLU(),    
340 |             nn.MaxPool2d(kernel_size=2),   
341 |         )
342 |         self.conv2 = nn.Sequential(  
343 |             nn.Conv2d(16, 32, 5, 1, 2), 
344 |             nn.ReLU(),  
345 |             nn.MaxPool2d(2),  
346 |         )
347 |         self.out = nn.Linear(32 * 7 * 7, 10) 
348 | 
349 |     def forward(self, x):
350 |         x = self.conv1(x)
351 |         x = self.conv2(x)
352 |         x = x.view(x.size(0), -1)  
353 |         output = self.out(x)
354 |         return output
355 | cnn = CNN()
356 | print(cnn) 
357 | 
358 | #RNN
359 | class RNN(nn.Module):
360 |     def __init__(self):
361 |         super(RNN, self).__init__()
362 | 
363 |         self.rnn = nn.LSTM(     
364 |             input_size=28,      
365 |             hidden_size=64,     
366 |             num_layers=1,      
367 |             batch_first=True,   
368 |         )
369 | 
370 |         self.out = nn.Linear(64, 10)    
371 | 
372 |     def forward(self, x):   
373 |         # x shape (batch, time_step, input_size)
374 |         # r_out shape (batch, time_step, output_size)
375 |         # h_n shape (n_layers, batch, hidden_size) .
376 |         # h_c shape (n_layers, batch, hidden_size)     
377 |         r_out, (h_n, h_c) = self.rnn(x, None)   
378 |         # 选取最后一个时间点的 r_out 输出
379 |         # 这里 r_out[:, -1, :] 的值也是 h_n 的值
380 |         out = self.out(r_out[:, -1, :])
381 |         return out
382 | 
383 | rnn = RNN()
384 | print(rnn)
385 | 
386 | #gpu加速
387 | 将所有数据和操作加上.cuda()
388 | x = x.cuda() 
389 | y = y.cuda() 
390 | cnn = CNN()
391 | cnn.cuda()  
392 | 
393 | #保存
394 | torch.save(net, 'net.pkl')  #保存整个网络
395 | torch.save(net.state_dict(), 'net_params.pkl')   #只保存网络中的参数
396 | #提取
397 | net = torch.load('net.pkl')
398 | net3 = 
399 | net3.load_state_dict(torch.load('net_params.pkl'))
400 | 
401 | #minist
402 | import torch
403 | import torch.nn as nn
404 | import torch.utils.data as Data
405 | import torchvision      
406 | import matplotlib.pyplot as plt
407 | 
408 | # Hyper Parameters
409 | EPOCH = 10          
410 | BATCH_SIZE = 50
411 | LR = 0.001        # 学习率
412 | DOWNLOAD_MNIST = True  # 如果你已经下载好了mnist数据就写上 False
413 | 
414 | train_data = torchvision.datasets.MNIST(
415 |     root='./mnist/',    # 保存或者提取位置
416 |     train=True,  # this is training data
417 |     transform=torchvision.transforms.ToTensor(), 
418 |     download=DOWNLOAD_MNIST # 没下载就下载, 下载了就不用再下了
419 | )
420 | 
421 | test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
422 | 
423 | train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
424 | 
425 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)/255.  
426 | test_y = test_data.test_labels
427 | 
428 | class CNN(nn.Module):
429 |     def __init__(self):
430 |         super(CNN, self).__init__()
431 |         self.conv1 = nn.Sequential(  
432 |             nn.Conv2d(
433 |             in_channels=1,      
434 |             out_channels=16,   
435 |             kernel_size=5,     
436 |             stride=1,         
437 |             padding=2, 
438 |             ),      
439 |             nn.ReLU(),    
440 |             nn.MaxPool2d(kernel_size=2),   
441 |         )
442 |         self.conv2 = nn.Sequential(  
443 |             nn.Conv2d(16, 32, 5, 1, 2), 
444 |             nn.ReLU(),  
445 |             nn.MaxPool2d(2),  
446 |         )
447 |         self.out = nn.Linear(32 * 7 * 7, 10) 
448 |       
449 |     def forward(self, x):
450 |         x = self.conv1(x)
451 |         x = self.conv_new(x)
452 |         x = self.conv2(x)
453 |         x = x.view(x.size(0), -1)  
454 |         output = self.out(x)
455 |         return output
456 |         
457 | cnn=CNN()
458 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
459 | loss_func = nn.CrossEntropyLoss()   # the target label is not one-hotted
460 | 
461 | # training and testing
462 | for epoch in range(EPOCH):
463 |     for step, (b_x, b_y) in enumerate(train_loader):   # 分配 batch data, normalize x when iterate train_loader
464 |         output = cnn(b_x)               # cnn output
465 |         loss = loss_func(output, b_y)   # cross entropy loss
466 |         optimizer.zero_grad()           # clear gradients for this training step
467 |         loss.backward()              # backpropagation, compute gradients
468 |         optimizer.step()              # apply gradients
469 | 
470 |     test_output = cnn(test_x)
471 |     pred_y = torch.max(test_output, 1)[1].data.numpy()
472 |     accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
473 |     print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
474 |     
475 | #GPU
476 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor).cuda()/255.  
477 | test_y = test_data.test_labels.cuda()
478 | cnn.cuda()
479 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
480 | loss_func = nn.CrossEntropyLoss()   # the target label is not one-hotted
481 | 
482 | for epoch in range(EPOCH):
483 |     for step, (b_x, b_y) in enumerate(train_loader):   # 分配 batch data, normalize x when iterate train_loader
484 |         b_x=b_x.cuda()
485 |         b_y=b_y.cuda()
486 |         output = cnn(b_x)               # cnn output
487 |         loss = loss_func(output, b_y)   # cross entropy loss
488 |         optimizer.zero_grad()           # clear gradients for this training step
489 |         loss.backward()              # backpropagation, compute gradients
490 |         optimizer.step()              # apply gradients
491 | 
492 |     test_output = cnn(test_x)
493 |     pred_y = torch.max(test_output, 1)[1].cuda().data
494 |     accuracy = torch.sum(pred_y == test_y).cuda().type(torch.FloatTensor) / test_y.size(0)
495 |     print('Epoch: ', epoch, '| train loss: %.4f' % loss.data, '| test accuracy: %.2f' % accuracy)
496 |     
497 | 


--------------------------------------------------------------------------------
/强化学习（DQN）:
--------------------------------------------------------------------------------
  1 | 强化学习环境OpenAI Gym
  2 | 
  3 | !pip3 install gym
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import numpy as np
  9 | import gym
 10 | 
 11 | # 超参数
 12 | BATCH_SIZE = 32
 13 | LR = 0.01                  
 14 | EPSILON = 0.9 #              
 15 | GAMMA = 0.9 #reward deacy factor               
 16 | TARGET_REPLACE_ITER = 10   # Q 现实网络的更新频率
 17 | MEMORY_CAPACITY = 2000      # 记忆库大小
 18 | env = gym.make('CartPole-v0')   # 立杆子游戏
 19 | env = env.unwrapped
 20 | N_ACTIONS = env.action_space.n  # 小车能做的动作
 21 | N_STATES = env.observation_space.shape[0]   # 小车能获取的环境信息数
 22 | 
 23 | class Net(nn.Module):
 24 |     def __init__(self, ):
 25 |         super(Net, self).__init__()
 26 |         self.fc1 = nn.Linear(N_STATES, 10)
 27 |         self.fc1.weight.data.normal_(0, 0.1)   
 28 |         self.out = nn.Linear(10, N_ACTIONS)
 29 |         self.out.weight.data.normal_(0, 0.1)   
 30 | 
 31 |     def forward(self, x):
 32 |         x = self.fc1(x)
 33 |         x = F.relu(x)
 34 |         actions_value = self.out(x)
 35 |         return actions_value
 36 |         
 37 |         class DQN(object):
 38 |     def __init__(self):
 39 |         self.eval_net, self.target_net = Net(), Net()
 40 | 
 41 |         self.learn_step_counter = 0     # 用于 target 更新计时
 42 |         self.memory_counter = 0         # 记忆库记数
 43 |         self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2))     # 初始化记忆库
 44 |         self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)    # torch 的优化器
 45 |         self.loss_func = nn.MSELoss()   # 误差公式
 46 | 
 47 |     def choose_action(self, x):
 48 |         #EPSILON贪心算法
 49 |         x = torch.unsqueeze(torch.FloatTensor(x), 0)
 50 |         # 这里只输入一个 sample
 51 |         if np.random.uniform() < EPSILON:   # 选最优动作
 52 |             actions_value = self.eval_net.forward(x)
 53 |             action = torch.max(actions_value, 1)[1].data.numpy()[0]     # return the argmax
 54 |         else:   # 选随机动作
 55 |             action = np.random.randint(0, N_ACTIONS)
 56 |         return action
 57 | 
 58 |     def store_transition(self, s, a, r, s_):
 59 |         transition = np.hstack((s, [a, r], s_))
 60 |         # 如果记忆库满了, 就覆盖老数据
 61 |         index = self.memory_counter % MEMORY_CAPACITY
 62 |         self.memory[index, :] = transition
 63 |         self.memory_counter += 1
 64 | 
 65 |     def learn(self):
 66 |         # target net 参数更新
 67 |         if self.learn_step_counter % TARGET_REPLACE_ITER == 0:
 68 |             self.target_net.load_state_dict(self.eval_net.state_dict())
 69 |         self.learn_step_counter += 1
 70 | 
 71 |         # 抽取记忆库中的批数据
 72 |         sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)
 73 |         b_memory = self.memory[sample_index, :]
 74 |         b_s = torch.FloatTensor(b_memory[:, :N_STATES])
 75 |         b_a = torch.LongTensor(b_memory[:, N_STATES:N_STATES+1].astype(int))
 76 |         b_r = torch.FloatTensor(b_memory[:, N_STATES+1:N_STATES+2])
 77 |         b_s_ = torch.FloatTensor(b_memory[:, -N_STATES:])
 78 | 
 79 |         # 针对做过的动作b_a, 来选 q_eval 的值, (q_eval 原本有所有动作的值)
 80 |         q_eval = self.eval_net(b_s).gather(1, b_a)  # shape (batch, 1)
 81 |         q_next = self.target_net(b_s_).detach()     # q_next 不进行反向传递误差
 82 |         q_target = b_r + GAMMA * q_next.max(1)[0]   # shape (batch, 1)
 83 |         loss = self.loss_func(q_eval, q_target)
 84 |         print(loss)
 85 | 
 86 |         # 计算, 更新 eval net
 87 |         self.optimizer.zero_grad()
 88 |         loss.backward()
 89 |         self.optimizer.step()
 90 |         
 91 | dqn = DQN() 
 92 | 
 93 | for i_episode in range(400):
 94 |     #重建环境
 95 |     s = env.reset()
 96 |     while True:
 97 |         a = dqn.choose_action(s)
 98 | 
 99 |         # 选动作, 得到环境反馈
100 |         s_, r, done, info = env.step(a)
101 | 
102 |         # 修改 reward, 使 DQN 快速学习
103 |         x, x_dot, theta, theta_dot = s_
104 |         r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
105 |         r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
106 |         r = r1 + r2
107 | 
108 |         # 存记忆
109 |         dqn.store_transition(s, a, r, s_)
110 | 
111 |         if dqn.memory_counter > MEMORY_CAPACITY:
112 |             dqn.learn() # 记忆库满了就进行学习
113 | 
114 |         if done:    # 如果回合结束, 进入下回合
115 |             break
116 | 
117 |         s = s_
118 |         
119 |         
120 | 


--------------------------------------------------------------------------------
/空间变换器网络:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | import torchvision
  7 | from torchvision import datasets, transforms
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | 
 11 | #空间变换器网络（简称STN）的视觉注意力机制
 12 | #允许神经网络学习如何对输入图像执行空间变换
 13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 14 | 
 15 | # Training dataset
 16 | train_loader = torch.utils.data.DataLoader(
 17 |     datasets.MNIST(root='.', train=True, download=True,
 18 |                    transform=transforms.Compose([
 19 |                        transforms.ToTensor(),
 20 |                        transforms.Normalize((0.1307,), (0.3081,))
 21 |                    ])), batch_size=64, shuffle=True, num_workers=4)
 22 | # Test dataset
 23 | test_loader = torch.utils.data.DataLoader(
 24 |     datasets.MNIST(root='.', train=False, transform=transforms.Compose([
 25 |         transforms.ToTensor(),
 26 |         transforms.Normalize((0.1307,), (0.3081,))
 27 |     ])), batch_size=64, shuffle=True, num_workers=4)
 28 |     
 29 | class Net(nn.Module):
 30 |     def __init__(self):
 31 |         super(Net, self).__init__()
 32 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 33 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 34 |         self.conv2_drop = nn.Dropout2d()
 35 |         self.fc1 = nn.Linear(320, 50)
 36 |         self.fc2 = nn.Linear(50, 10)
 37 | 
 38 |         #位置网络
 39 |         self.localization = nn.Sequential(
 40 |             nn.Conv2d(1, 8, kernel_size=7),
 41 |             nn.MaxPool2d(2, stride=2),
 42 |             nn.ReLU(True),
 43 |             nn.Conv2d(8, 10, kernel_size=5),
 44 |             nn.MaxPool2d(2, stride=2),
 45 |             nn.ReLU(True)
 46 |         )
 47 | 
 48 |         #回归得到3 * 2仿射矩阵
 49 |         self.fc_loc = nn.Sequential(
 50 |             nn.Linear(10 * 3 * 3, 32),
 51 |             nn.ReLU(True),
 52 |             nn.Linear(32, 3 * 2)
 53 |         )
 54 | 
 55 |         #初始化权重
 56 |         self.fc_loc[2].weight.data.zero_()
 57 |         self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
 58 |     #stn网络
 59 |     def stn(self, x):
 60 |         xs = self.localization(x)
 61 |         xs = xs.view(-1, 10 * 3 * 3)
 62 |         theta = self.fc_loc(xs)
 63 |         theta = theta.view(-1, 2, 3)
 64 | 
 65 |         grid = F.affine_grid(theta, x.size())
 66 |         x = F.grid_sample(x, grid)
 67 | 
 68 |         return x
 69 |     #向前传播
 70 |     def forward(self, x):
 71 |         x = self.stn(x)
 72 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 73 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 74 |         x = x.view(-1, 320)
 75 |         x = F.relu(self.fc1(x))
 76 |         x = F.dropout(x, training=self.training)
 77 |         x = self.fc2(x)
 78 |         return F.log_softmax(x, dim=1)
 79 | 
 80 | model = Net().to(device)
 81 | 
 82 | optimizer = optim.SGD(model.parameters(), lr=0.01)
 83 | 
 84 | def train(epoch):
 85 |     model.train()
 86 |     for batch_idx, (data, target) in enumerate(train_loader):
 87 |         data, target = data.to(device), target.to(device)
 88 | 
 89 |         optimizer.zero_grad()
 90 |         output = model(data)
 91 |         loss = F.nll_loss(output, target)
 92 |         loss.backward()
 93 |         optimizer.step()
 94 |         if batch_idx % 500 == 0:
 95 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 96 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 97 |                 100. * batch_idx / len(train_loader), loss.item()))
 98 | 
 99 | def test():
100 |     with torch.no_grad():
101 |         model.eval()
102 |         test_loss = 0
103 |         correct = 0
104 |         for data, target in test_loader:
105 |             data, target = data.to(device), target.to(device)
106 |             output = model(data)
107 | 
108 |             test_loss += F.nll_loss(output, target, size_average=False).item()
109 |             pred = output.max(1, keepdim=True)[1]
110 |             correct += pred.eq(target.view_as(pred)).sum().item()
111 | 
112 |         test_loss /= len(test_loader.dataset)
113 |         print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
114 |               .format(test_loss, correct, len(test_loader.dataset),
115 |                       100. * correct / len(test_loader.dataset)))
116 |                       
117 |                       
118 | def convert_image_np(inp):
119 |     inp = inp.numpy().transpose((1, 2, 0))
120 |     mean = np.array([0.485, 0.456, 0.406])
121 |     std = np.array([0.229, 0.224, 0.225])
122 |     inp = std * inp + mean
123 |     inp = np.clip(inp, 0, 1)
124 |     return inp
125 | 
126 | def visualize_stn():
127 |     with torch.no_grad():
128 |         data = next(iter(test_loader))[0].to(device)
129 | 
130 |         input_tensor = data.cpu()
131 |         transformed_input_tensor = model.stn(data).cpu()
132 | 
133 |         in_grid = convert_image_np(
134 |             torchvision.utils.make_grid(input_tensor))
135 | 
136 |         out_grid = convert_image_np(
137 |             torchvision.utils.make_grid(transformed_input_tensor))
138 | 
139 |         f, axarr = plt.subplots(1, 2)
140 |         axarr[0].imshow(in_grid)
141 |         axarr[0].set_title('Dataset Images')
142 | 
143 |         axarr[1].imshow(out_grid)
144 |         axarr[1].set_title('Transformed Images')
145 | 
146 | for epoch in range(20):
147 |     train(epoch)
148 |     test()
149 | 
150 | visualize_stn()
151 | 
152 | plt.ioff()
153 | plt.show()
154 | 
155 | 


--------------------------------------------------------------------------------
/迁移学习:
--------------------------------------------------------------------------------
  1 | #数据增强
  2 | im_aug = torchvision.transforms.Compose([
  3 |     torchvision.transforms.Resize(100),
  4 |     torchvision.transforms.RandomHorizontalFlip(),
  5 |     torchvision.transforms.RandomCrop(50),
  6 |     torchvision.transforms.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),
  7 |     torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
  8 | ])
  9 | 
 10 | train_set = CIFAR10('./data', train=True, transform=im_aug)
 11 | 
 12 | for x in dataset：
 13 |   x = im_aug(x)
 14 |   
 15 | #迁移学习
 16 | from __future__ import print_function, division
 17 | 
 18 | import torch
 19 | import torch.nn as nn
 20 | import torch.optim as optim
 21 | from torch.optim import lr_scheduler
 22 | import numpy as np
 23 | import torchvision
 24 | from torchvision import datasets, models, transforms
 25 | import matplotlib.pyplot as plt
 26 | import time
 27 | import os
 28 | import copy
 29 | 
 30 | #minist
 31 | import torch
 32 | import torch.nn as nn
 33 | import torch.utils.data as Data
 34 | import torchvision      
 35 | import matplotlib.pyplot as plt
 36 | 
 37 | # Hyper Parameters
 38 | EPOCH = 10          
 39 | BATCH_SIZE = 50
 40 | LR = 0.001        # 学习率
 41 | DOWNLOAD_MNIST = True  # 如果你已经下载好了mnist数据就写上 False
 42 | 
 43 | train_data = torchvision.datasets.MNIST(
 44 |     root='./mnist/',    # 保存或者提取位置
 45 |     train=True,  # this is training data
 46 |     transform=torchvision.transforms.ToTensor(), 
 47 |     download=DOWNLOAD_MNIST # 没下载就下载, 下载了就不用再下了
 48 | )
 49 | 
 50 | print(train_data)
 51 | test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
 52 | 
 53 | train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
 54 | 
 55 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)/255.  
 56 | test_y = test_data.test_labels
 57 | 
 58 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 59 | 
 60 | class CNN(nn.Module):
 61 |     def __init__(self):
 62 |         super(CNN, self).__init__()
 63 |         self.conv1 = nn.Sequential(  
 64 |             nn.Conv2d(
 65 |             in_channels=1,      
 66 |             out_channels=16,   
 67 |             kernel_size=5,     
 68 |             stride=1,         
 69 |             padding=2, 
 70 |             ),      
 71 |             nn.ReLU(),    
 72 |             nn.MaxPool2d(kernel_size=2),   
 73 |         )
 74 |         self.conv2 = nn.Sequential(  
 75 |             nn.Conv2d(16, 32, 5, 1, 2), 
 76 |             nn.ReLU(),  
 77 |             nn.MaxPool2d(2),  
 78 |         )
 79 |         self.out = nn.Linear(32 * 7 * 7, 10) 
 80 | 
 81 |     def forward(self, x):
 82 |         x = self.conv1(x)
 83 |         x = self.conv2(x)
 84 |         x = x.view(x.size(0), -1)  
 85 |         output = self.out(x)
 86 |         return output
 87 |         
 88 | cnn=CNN()
 89 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
 90 | loss_func = nn.CrossEntropyLoss()   # the target label is not one-hotted
 91 | 
 92 | # training and testing
 93 | for epoch in range(EPOCH):
 94 |     for step, (b_x, b_y) in enumerate(train_loader):   # 分配 batch data, normalize x when iterate train_loader
 95 |         output = cnn(b_x)               # cnn output
 96 |         loss = loss_func(output, b_y)   # cross entropy loss
 97 |         optimizer.zero_grad()           # clear gradients for this training step
 98 |         loss.backward()              # backpropagation, compute gradients
 99 |         optimizer.step()              # apply gradients
100 | 
101 |     test_output = cnn(test_x)
102 |     pred_y = torch.max(test_output, 1)[1].data.numpy()
103 |     accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
104 |     print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
105 |     
106 | from __future__ import print_function, division
107 | 
108 | import torch
109 | import torch.nn as nn
110 | import torch.optim as optim
111 | from torch.optim import lr_scheduler
112 | import numpy as np
113 | import torchvision
114 | from torchvision import datasets, models, transforms
115 | import matplotlib.pyplot as plt
116 | import time
117 | import os
118 | import copy
119 | 
120 | # 数据增强和归一化
121 | data_transforms = {
122 |     'train': transforms.Compose([
123 |         transforms.RandomResizedCrop(224),
124 |         transforms.RandomHorizontalFlip(),
125 |         transforms.ToTensor(),
126 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
127 |     ]),
128 |     'val': transforms.Compose([
129 |         transforms.Resize(256),
130 |         transforms.CenterCrop(224),
131 |         transforms.ToTensor(),
132 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
133 |     ]),
134 | }
135 | 
136 | data_dir = 'hymenoptera_data'
137 | image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
138 |           data_transforms[x])
139 |           for x in ['train', 'val']}
140 | dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
141 |          shuffle=True, num_workers=4)
142 |          for x in ['train', 'val']}
143 | dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
144 | class_names = image_datasets['train'].classes
145 | 
146 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
147 | 
148 | def imshow(inp, title=None):
149 |     inp = inp.numpy().transpose((1, 2, 0))
150 |     mean = np.array([0.485, 0.456, 0.406])
151 |     std = np.array([0.229, 0.224, 0.225])
152 |     inp = std * inp + mean
153 |     inp = np.clip(inp, 0, 1)
154 |     plt.imshow(inp)
155 |     if title is not None:
156 |         plt.title(title)
157 | 
158 | inputs, classes = next(iter(dataloaders['train']))
159 | 
160 | out = torchvision.utils.make_grid(inputs)
161 | 
162 | imshow(out, title=[class_names[x] for x in classes])
163 | 
164 | def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
165 |     since = time.time()
166 | 
167 |     best_model_wts = copy.deepcopy(model.state_dict())
168 |     best_acc = 0.0
169 | 
170 |     for epoch in range(num_epochs):
171 |         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
172 |         print('-' * 10)
173 | 
174 |         for phase in ['train', 'val']:
175 |             if phase == 'train':
176 |                 model.train()  
177 |             else:
178 |                 model.eval()   
179 | 
180 |             running_loss = 0.0
181 |             running_corrects = 0
182 | 
183 |             #训练
184 |             for inputs, labels in dataloaders[phase]:
185 |                 inputs = inputs.to(device)
186 |                 labels = labels.to(device)
187 | 
188 |                 optimizer.zero_grad()
189 | 
190 |                 with torch.set_grad_enabled(phase == 'train'):
191 |                     outputs = model(inputs)
192 |                     _, preds = torch.max(outputs, 1)
193 |                     loss = criterion(outputs, labels)
194 | 
195 |                     if phase == 'train':
196 |                         loss.backward()
197 |                         optimizer.step()
198 | 
199 |                 running_loss += loss.item() * inputs.size(0)
200 |                 running_corrects += torch.sum(preds == labels.data)
201 |             if phase == 'train':
202 |                 scheduler.step()
203 | 
204 |             epoch_loss = running_loss / dataset_sizes[phase]
205 |             epoch_acc = running_corrects.double() / dataset_sizes[phase]
206 | 
207 |             print('{} Loss: {:.4f} Acc: {:.4f}'.format(
208 |                 phase, epoch_loss, epoch_acc))
209 | 
210 |             if phase == 'val' and epoch_acc > best_acc:
211 |                 best_acc = epoch_acc
212 |                 best_model_wts = copy.deepcopy(model.state_dict())
213 | 
214 |     time_elapsed = time.time() - since
215 |     print('Training complete in {:.0f}m {:.0f}s'.format(
216 |         time_elapsed // 60, time_elapsed % 60))
217 |     print('Best val Acc: {:4f}'.format(best_acc))
218 | 
219 |     # load best model weights
220 |     model.load_state_dict(best_model_wts)
221 |     return model
222 |     
223 | def visualize_model(model, num_images=6):
224 |     was_training = model.training
225 |     model.eval()
226 |     images_so_far = 0
227 |     fig = plt.figure()
228 | 
229 |     with torch.no_grad():
230 |         for i, (inputs, labels) in enumerate(dataloaders['val']):
231 |             inputs = inputs.to(device)
232 |             labels = labels.to(device)
233 | 
234 |             outputs = model(inputs)
235 |             _, preds = torch.max(outputs, 1)
236 | 
237 |             for j in range(inputs.size()[0]):
238 |                 images_so_far += 1
239 |                 ax = plt.subplot(num_images//2, 2, images_so_far)
240 |                 ax.axis('off')
241 |                 ax.set_title('predicted: {}'.format(class_names[preds[j]]))
242 |                 imshow(inputs.cpu().data[j])
243 | 
244 |                 if images_so_far == num_images:
245 |                     model.train(mode=was_training)
246 |                     return
247 |         model.train(mode=was_training)
248 | #finetune
249 | model_ft = models.resnet18(pretrained=True)
250 | print(model_ft)
251 | num_ftrs = model_ft.fc.in_features
252 | model_ft.fc = nn.Linear(num_ftrs, 2)
253 | print(model_ft)
254 | model_ft = model_ft.to(device)
255 | 
256 | criterion = nn.CrossEntropyLoss()
257 | 
258 | optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
259 | 
260 | exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
261 | 
262 | model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
263 |                        num_epochs=25)
264 |                        
265 | #冻结除最后一层之外的所有网络
266 | model_conv = torchvision.models.resnet18(pretrained=True)
267 | for param in model_conv.parameters():
268 |     param.requires_grad = False
269 | 
270 | num_ftrs = model_conv.fc.in_features
271 | model_conv.fc = nn.Linear(num_ftrs, 2)
272 | 
273 | model_conv = model_conv.to(device)
274 | 
275 | criterion = nn.CrossEntropyLoss()
276 | 
277 | optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
278 | 
279 | exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
280 | 
281 | model_conv = train_model(model_conv, criterion, optimizer_conv,
282 |                          exp_lr_scheduler, num_epochs=20)
283 |                          
284 | model_conv = torchvision.models.resnet18(pretrained=True)
285 | model = torch.nn.Sequential(
286 |     model_conv,
287 |     torch.nn.Linear(1000, 2)
288 | )
289 | print(model)
290 | 


--------------------------------------------------------------------------------