├── char-RNN对姓氏进行分类和生成 ├── torchaudio ├── 基础 ├── 强化学习(DQN) ├── 空间变换器网络 └── 迁移学习 /char-RNN对姓氏进行分类和生成: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals, print_function, division 2 | from io import open 3 | import glob 4 | import os 5 | 6 | def findFiles(path): return glob.glob(path) 7 | 8 | print(findFiles('*.txt')) 9 | 10 | import unicodedata 11 | import string 12 | 13 | all_letters = string.ascii_letters + " .,;'" 14 | n_letters = len(all_letters) 15 | print(all_letters) 16 | 17 | def unicodeToAscii(s): 18 | return ''.join( 19 | c for c in unicodedata.normalize('NFD', s) 20 | if unicodedata.category(c) != 'Mn' 21 | and c in all_letters 22 | ) 23 | 24 | print(unicodeToAscii('Ślusàrski')) 25 | 26 | #构建姓氏字典 27 | category_lines = {} 28 | all_categories = [] 29 | 30 | def readLines(filename): 31 | lines = open(filename, encoding='utf-8').read().strip().split('\n') 32 | return [unicodeToAscii(line) for line in lines] 33 | 34 | for filename in findFiles('*.txt'): 35 | category = os.path.splitext(os.path.basename(filename))[0] 36 | all_categories.append(category) 37 | lines = readLines(filename) 38 | category_lines[category] = lines 39 | 40 | n_categories = len(all_categories) 41 | print(n_categories) 42 | 43 | print(category_lines['Chinese'][:5]) 44 | 45 | import torch 46 | 47 | #编码 48 | def letterToIndex(letter): 49 | return all_letters.find(letter) 50 | 51 | #字母转为向量 52 | [1,0,0,...0] 53 | def letterToTensor(letter): 54 | tensor = torch.zeros(1, n_letters) 55 | tensor[0][letterToIndex(letter)] = 1 56 | return tensor 57 | 58 | #单词转为向量 59 | [] 60 | def lineToTensor(line): 61 | tensor = torch.zeros(len(line), 1, n_letters) 62 | for li, letter in enumerate(line): 63 | tensor[li][0][letterToIndex(letter)] = 1 64 | return tensor 65 | 66 | print(letterToTensor('J')) 67 | 68 | print(lineToTensor('Jones').size()) 69 | 70 | import torch.nn as nn 71 | 72 | class RNN(nn.Module): 73 | def __init__(self, input_size, hidden_size, output_size): 74 | super(RNN, self).__init__() 75 | #self.rnn = nn.RNN(return_state=True) 76 | self.hidden_size = hidden_size 77 | self.i2h = nn.Linear(input_size + hidden_size, hidden_size) 78 | self.i2o = nn.Linear(input_size + hidden_size, output_size) 79 | self.softmax = nn.LogSoftmax(dim=1) 80 | 81 | def forward(self, input, hidden): 82 | combined = torch.cat((input, hidden), 1) 83 | hidden = self.i2h(combined) 84 | output = self.i2o(combined) 85 | output = self.softmax(output) 86 | return output, hidden 87 | 88 | def initHidden(self): 89 | return torch.zeros(1, self.hidden_size) 90 | 91 | n_hidden = 128 92 | rnn = RNN(n_letters, n_hidden, n_categories) 93 | 94 | input = letterToTensor('A') 95 | hidden =torch.zeros(1, n_hidden) 96 | output, next_hidden = rnn(input, hidden) 97 | print(output) 98 | 99 | input = lineToTensor('Albert') 100 | hidden = torch.zeros(1, n_hidden) 101 | output, next_hidden = rnn(input[0], hidden) 102 | print(output) 103 | 104 | def categoryFromOutput(output): 105 | top_n, top_i = output.topk(1) 106 | category_i = top_i[0].item() 107 | return all_categories[category_i], category_i 108 | 109 | print(categoryFromOutput(output)) 110 | 111 | import random 112 | #随机获得训练样本 113 | def randomChoice(l): 114 | return l[random.randint(0, len(l) - 1)] 115 | 116 | def randomTrainingExample(): 117 | category = randomChoice(all_categories) 118 | line = randomChoice(category_lines[category]) 119 | category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long) 120 | line_tensor = lineToTensor(line) 121 | return category, line, category_tensor, line_tensor 122 | 123 | for i in range(10): 124 | category, line, category_tensor, line_tensor = randomTrainingExample() 125 | print('category =', category, '\t // \t line =', line) 126 | 127 | criterion = nn.NLLLoss() 128 | learning_rate = 0.005 129 | optimize = 130 | 131 | def train(category_tensor, line_tensor): 132 | hidden = rnn.initHidden() 133 | rnn.zero_grad() 134 | 135 | for i in range(line_tensor.size()[0]): 136 | output, hidden = rnn(line_tensor[i], hidden) 137 | 138 | loss = criterion(output, category_tensor) 139 | loss.backward() 140 | for p in rnn.parameters(): 141 | p.data.add_(-learning_rate, p.grad.data) 142 | 143 | return output, loss.item() 144 | 145 | import time 146 | import math 147 | 148 | n_iters = 100000 149 | print_every = 5000 150 | plot_every = 1000 151 | 152 | current_loss = 0 153 | all_losses = [] 154 | 155 | def timeSince(since): 156 | now = time.time() 157 | s = now - since 158 | m = math.floor(s / 60) 159 | s -= m * 60 160 | return '%dm %ds' % (m, s) 161 | 162 | start = time.time() 163 | 164 | for iter in range(1, n_iters + 1): 165 | category, line, category_tensor, line_tensor = randomTrainingExample() 166 | output, loss = train(category_tensor, line_tensor) 167 | current_loss += loss 168 | 169 | if iter % print_every == 0: 170 | guess, guess_i = categoryFromOutput(output) 171 | correct = '✓' if guess == category else '✗ (%s)' % category 172 | print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct)) 173 | 174 | if iter % plot_every == 0: 175 | all_losses.append(current_loss / plot_every) 176 | current_loss = 0 177 | 178 | import matplotlib.pyplot as plt 179 | import matplotlib.ticker as ticker 180 | 181 | plt.figure() 182 | plt.plot(all_losses) 183 | 184 | #测试 185 | def evaluate(line_tensor): 186 | #每次重新测试一个单词 187 | hidden = rnn.initHidden() 188 | 189 | for i in range(line_tensor.size()[0]): 190 | output, hidden = rnn(line_tensor[i], hidden) 191 | 192 | return output 193 | 194 | def predict(input_line, n_predictions=3): 195 | print('\n> %s' % input_line) 196 | with torch.no_grad(): 197 | output = evaluate(lineToTensor(input_line)) 198 | topv, topi = output.topk(n_predictions, 1, True) 199 | predictions = [] 200 | 201 | for i in range(n_predictions): 202 | value = topv[0][i].item() 203 | category_index = topi[0][i].item() 204 | print('(%.2f) %s' % (value, all_categories[category_index])) 205 | predictions.append([value, all_categories[category_index]]) 206 | 207 | predict('Yu') 208 | 209 | #测试 210 | confusion = torch.zeros(n_categories, n_categories) 211 | n_confusion = 10000 212 | 213 | for i in range(n_confusion): 214 | category, line, category_tensor, line_tensor = randomTrainingExample() 215 | output = evaluate(line_tensor) 216 | guess, guess_i = categoryFromOutput(output) 217 | category_i = all_categories.index(category) 218 | confusion[category_i][guess_i] += 1 219 | 220 | for i in range(n_categories): 221 | confusion[i] = confusion[i] / confusion[i].sum() 222 | 223 | fig = plt.figure() 224 | ax = fig.add_subplot(111) 225 | cax = ax.matshow(confusion.numpy()) 226 | fig.colorbar(cax) 227 | 228 | ax.set_xticklabels([''] + all_categories, rotation=90) 229 | ax.set_yticklabels([''] + all_categories) 230 | 231 | ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) 232 | ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) 233 | 234 | plt.show() 235 | 236 | #生成姓氏 237 | import torch 238 | import torch.nn as nn 239 | 240 | all_letters = string.ascii_letters + " .,;'-" 241 | n_letters = len(all_letters) + 1 #结束符号 242 | 243 | class RNN(nn.Module): 244 | def __init__(self, input_size, hidden_size, output_size): 245 | super(RNN, self).__init__() 246 | self.hidden_size = hidden_size 247 | 248 | self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size) 249 | self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size) 250 | self.o2o = nn.Linear(hidden_size + output_size, output_size) 251 | #输出解释为下一个字母的概率 252 | #增加随机性 253 | self.dropout = nn.Dropout(0.1) 254 | self.softmax = nn.LogSoftmax(dim=1) 255 | 256 | def forward(self, category, input, hidden): 257 | input_combined = torch.cat((category, input, hidden), 1) 258 | hidden = self.i2h(input_combined) 259 | output = self.i2o(input_combined) 260 | output_combined = torch.cat((hidden, output), 1) 261 | output = self.o2o(output_combined) 262 | output = self.dropout(output) 263 | output = self.softmax(output) 264 | return output, hidden 265 | 266 | def initHidden(self): 267 | return torch.zeros(1, self.hidden_size) 268 | 269 | import random 270 | 271 | def randomChoice(l): 272 | return l[random.randint(0, len(l) - 1)] 273 | 274 | def randomTrainingPair(): 275 | category = randomChoice(all_categories) 276 | line = randomChoice(category_lines[category]) 277 | return category, line 278 | 279 | #预测每个时间步中当前字母的下一个字母,因此字母对是该行中连续的字母组, 280 | #例如:"Yu"我们将创建(“Y”,“u”),(“u”,“ EOS”)。 281 | 282 | def categoryTensor(category): 283 | li = all_categories.index(category) 284 | tensor = torch.zeros(1, n_categories) 285 | tensor[0][li] = 1 286 | return tensor 287 | #yu 288 | def inputTensor(line): 289 | tensor = torch.zeros(len(line), 1, n_letters) 290 | for li in range(len(line)): 291 | letter = line[li] 292 | tensor[li][0][all_letters.find(letter)] = 1 293 | return tensor 294 | #u 295 | def targetTensor(line): 296 | letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))] 297 | letter_indexes.append(n_letters - 1) #结束符号 298 | return torch.LongTensor(letter_indexes) 299 | 300 | #随机选取数据 301 | def randomTrainingExample(): 302 | category, line = randomTrainingPair() 303 | category_tensor = categoryTensor(category) 304 | input_line_tensor = inputTensor(line) 305 | target_line_tensor = targetTensor(line) 306 | return category_tensor, input_line_tensor, target_line_tensor 307 | 308 | criterion = nn.NLLLoss() 309 | 310 | learning_rate = 0.0005 311 | 312 | def train(category_tensor, input_line_tensor, target_line_tensor): 313 | target_line_tensor.unsqueeze_(-1) 314 | hidden = rnn.initHidden() 315 | 316 | rnn.zero_grad() 317 | 318 | loss = 0 319 | 320 | for i in range(input_line_tensor.size(0)): 321 | output, hidden = rnn(category_tensor, input_line_tensor[i], hidden) 322 | l = criterion(output, target_line_tensor[i]) 323 | loss += l 324 | 325 | loss.backward() 326 | 327 | for p in rnn.parameters(): 328 | p.data.add_(-learning_rate, p.grad.data) 329 | 330 | return output, loss.item() / input_line_tensor.size(0) 331 | 332 | rnn = RNN(n_letters, 128, n_letters) 333 | 334 | n_iters = 100000 335 | print_every = 5000 336 | plot_every = 500 337 | all_losses = [] 338 | total_loss = 0 339 | 340 | start = time.time() 341 | 342 | for iter in range(1, n_iters + 1): 343 | output, loss = train(randomTrainingExample()) 344 | total_loss += loss 345 | 346 | if iter % print_every == 0: 347 | print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss)) 348 | 349 | if iter % plot_every == 0: 350 | all_losses.append(total_loss / plot_every) 351 | total_loss = 0 352 | 353 | max_length = 20 354 | 355 | def sample(category, start_letter='A'): 356 | with torch.no_grad(): 357 | category_tensor = categoryTensor(category) 358 | input = inputTensor(start_letter) 359 | hidden = rnn.initHidden() 360 | 361 | output_name = start_letter 362 | 363 | for i in range(max_length): 364 | output, hidden = rnn(category_tensor, input[0], hidden) 365 | topv, topi = output.topk(1) 366 | topi = topi[0][0] 367 | if topi == n_letters - 1: 368 | break 369 | else: 370 | letter = all_letters[topi] 371 | output_name += letter 372 | input = inputTensor(letter) 373 | 374 | return output_name 375 | 376 | def samples(category, start_letters='ABC'): 377 | for start_letter in start_letters: 378 | print(sample(category, start_letter)) 379 | 380 | samples('Chinese', 'Y') 381 | samples('Chinese', 'YYY') 382 | 383 | 384 | 385 | -------------------------------------------------------------------------------- /torchaudio: -------------------------------------------------------------------------------- 1 | 使用torchaudio处理音频 2 | kaldi 3 | 4 | !pip3 install torchaudio 5 | 6 | import torch 7 | import torchaudio 8 | import matplotlib.pyplot as plt 9 | 10 | filename = "steam-train-whistle-daniel_simon-converted-from-mp3.wav" 11 | waveform, sample_rate = torchaudio.load(filename) 12 | 13 | print("Shape of waveform: {}".format(waveform.size())) 14 | print("Sample rate of waveform: {}".format(sample_rate)) 15 | 16 | plt.figure() 17 | plt.plot(waveform.t().numpy()) 18 | 19 | 转换 20 | 21 | Resample :将波形重采样为其他采样率。 22 | Spectrogram :根据波形创建频谱图。 23 | MelScale :使用转换矩阵将普通STFT转换为Mel频率STFT。 24 | AmplitudeToDB :这将频谱图从功率/振幅标度转换为分贝标度。20log() 25 | MFCC :从波形创建梅尔频率倒谱系数。 26 | MelSpectrogram :使用PyTorch中的STFT功能从波形创建MEL频谱图。 27 | MuLawEncoding :基于mu-law压扩对波形进行编码。 28 | MuLawDecoding :解码mu-law编码的波形。 29 | 30 | #对数频谱 31 | specgram = torchaudio.transforms.Spectrogram()(waveform) 32 | 33 | print("Shape of spectrogram: {}".format(specgram.size())) 34 | 35 | plt.figure() 36 | plt.imshow(specgram.log2()[0,:,:].numpy(), cmap='gray') 37 | 38 | #对数mel频谱 39 | specgram = torchaudio.transforms.MelSpectrogram()(waveform) 40 | 41 | print("Shape of spectrogram: {}".format(specgram.size())) 42 | 43 | plt.figure() 44 | p = plt.imshow(specgram.log2()[0,:,:].detach().numpy(), cmap='gray') 45 | 46 | #重新采样 47 | new_sample_rate = sample_rate/10 48 | 49 | channel = 0 50 | transformed = torchaudio.transforms.Resample(sample_rate, new_sample_rate)(waveform[channel,:].view(1,-1)) 51 | 52 | print("Shape of transformed waveform: {}".format(transformed.size())) 53 | 54 | plt.figure() 55 | plt.plot(transformed[0,:].numpy()) 56 | plt.plot(waveform[0,:].numpy()) 57 | 58 | #基于Mu-Law编码对信号进行编码 59 | #Mu-Law是由国际电话电报咨询委员会颁布的用于脉码调制的标准多媒体数字信号编、解码器(压缩/解压缩)运算法则。 60 | #作为一种压缩扩展的方法,其可以改善信噪比率而不需要增添更多的数据。 61 | print("Min of waveform: {}\nMax of waveform: {}\nMean of waveform: {}".format(waveform.min(), waveform.max(), waveform.mean())) 62 | 63 | def normalize(tensor): 64 | tensor_minusmean = tensor - tensor.mean() 65 | return tensor_minusmean/tensor_minusmean.abs().max() 66 | 67 | transformed = torchaudio.transforms.MuLawEncoding()(waveform) 68 | 69 | print("Shape of transformed waveform: {}".format(transformed.size())) 70 | 71 | plt.figure() 72 | plt.plot(transformed[0,:].numpy()) 73 | plt.plot(waveform[0,:].numpy()) 74 | 75 | reconstructed = torchaudio.transforms.MuLawDecoding()(transformed) 76 | 77 | print("Shape of recovered waveform: {}".format(reconstructed.size())) 78 | 79 | plt.figure() 80 | plt.plot(reconstructed[0,:].numpy()) 81 | plt.plot(waveform[0,:].numpy()) 82 | 83 | -------------------------------------------------------------------------------- /基础: -------------------------------------------------------------------------------- 1 | 神经网络领域的numpy 2 | 安装: 3 | https://pytorch.org/ 4 | 5 | pip3 install torch torchvision 6 | 7 | #tensor 8 | import torch 9 | import numpy as np 10 | 11 | np_data = np.arange(6).reshape((2, 3)) 12 | torch_data = torch.from_numpy(np_data) 13 | tensor2array = torch_data.numpy() 14 | print( 15 | '\nnumpy array:', np_data, 16 | '\ntorch tensor:', torch_data, 17 | '\ntensor to array:', tensor2array, 18 | ) 19 | 20 | 数据类型dtype,8种 21 | 16-bit floating point torch.float16 or torch.half 22 | 32-bit floating point torch.float32 or torch.float 23 | 64-bit floating point torch.float64 or torch.double 24 | 25 | 8-bit integer (unsigned) torch.uint8 26 | 8-bit integer (signed) torch.int8 27 | 16-bit integer (signed) torch.int16 or torch.short 28 | 32-bit integer (signed) torch.int32 or torch.int 29 | 64-bit integer (signed) torch.int64 or torch.long 30 | 31 | # abs 绝对值计算 32 | data = [-1, -2, 1, 2] 33 | tensor = torch.FloatTensor(data) 34 | print( 35 | '\nabs', 36 | '\nnumpy: ', np.abs(data), 37 | '\ntorch: ', torch.abs(tensor) 38 | ) 39 | 40 | # sin 三角函数 sin 41 | print( 42 | '\nsin', 43 | '\nnumpy: ', np.sin(data), 44 | '\ntorch: ', torch.sin(tensor) 45 | ) 46 | 47 | # mean 均值 48 | print( 49 | '\nmean', 50 | '\nnumpy: ', np.mean(data), 51 | '\ntorch: ', torch.mean(tensor) 52 | ) 53 | 54 | data = [[1,2], [3,4]] 55 | tensor = torch.FloatTensor(data) 56 | # 正确 57 | print( 58 | '\nmatrix multiplication (matmul)', 59 | '\nnumpy: ', np.matmul(data, data), 60 | '\ntorch: ', torch.mm(tensor, tensor) 61 | ) 62 | 63 | 64 | #变量 65 | import torch 66 | from torch.autograd import Variable 67 | 68 | tensor = torch.FloatTensor([[1,2],[3,4]]) 69 | 70 | variable = Variable(tensor, requires_grad=True) 71 | 72 | print(tensor) 73 | 74 | print(variable) 75 | 76 | t_out = torch.mean(tensor*tensor) 77 | v_out = torch.mean(variable*variable) 78 | print(t_out) 79 | print(v_out) 80 | 81 | v_out.backward() # 模拟 v_out 的误差反向传递 82 | 83 | print(variable.grad) # 初始 Variable 的梯度 84 | 85 | print(variable) # Variable 形式 86 | 87 | print(variable.data) # tensor 形式 88 | 89 | print(variable.data.numpy()) # numpy 形式 90 | 91 | #数据集 92 | #内置 93 | torchvision.datasets 94 | MNIST 95 | COCO(用于图像标注和目标检测)(Captioning and Detection) 96 | LSUN Classification 97 | ImageFolder 98 | Imagenet-12 99 | CIFAR10 and CIFAR100 100 | STL10 101 | 102 | datasets.MNIST(root, train=True, transform=None, target_transform=None, download=False) 103 | 104 | #自定义 105 | torch.utils.data.Dataset 106 | Class dateset(Dataset) 107 | def __len__(self): 108 | def __getitem__(self, idx): 109 | len(dataset) 110 | dataset[i] 111 | 112 | #迭代 113 | for i in range(len(dataset)): 114 | dataset[i] 115 | 116 | torch.utils.data.DataLoader 117 | dataloader = DataLoader(dataset, batch_size=4, 118 | shuffle=True, num_workers=4) 119 | 120 | #网络层 121 | import torch.nn as nn 122 | import torch.nn.functional as F 123 | #线性层 124 | class torch.nn.Linear(in_features, out_features, bias=True) 125 | torch.nn.functional.linear(input, weight, bias=None) 126 | 127 | #卷积层 128 | class torch.nn.Conv1d(in_channels, out_channels, kernel_size, 129 | stride=1, padding=0, dilation=1, groups=1, bias=True) 130 | dilation: 用于控制内核点之间的距离 131 | 空洞卷积 132 | groups: 控制输入和输出之间的连接 133 | 28*28*2 3*3*64 26*26*64 2*3*3*64 134 | 28*28*1 3*3*32 26*26*32 1*3*3*32 135 | 28*28*1 3*3*32 26*26*32 1*3*3*32 136 | torch.nn.functional.conv1d(input, weight, bias=None, 137 | stride=1, padding=0, dilation=1, groups=1) 138 | class torch.nn.Conv2d 139 | 1d 100*1*batchsize->3*1*64->100*64*batchsize 140 | 2d 224*224*3*batchsize->3*3*3*64->224*224*64*batchsize 141 | 3d 3*3*3*64 142 | class torch.nn.Conv3d 143 | 144 | class torch.nn.ConvTranspose1d(in_channels, out_channels, 145 | kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True) 146 | torch.nn.functional.conv_transpose1d(input, weight, bias=None, 147 | stride=1, padding=0, output_padding=0, groups=1) 148 | class torch.nn.ConvTranspose2d 149 | class torch.nn.ConvTranspose3d 150 | 151 | #池化层 152 | class torch.nn.MaxPool1d(kernel_size, stride=None, 153 | padding=0, dilation=1, return_indices=False, ceil_mode=False) 154 | return_indices - 如果等于True,会返回输出最大值的序号 155 | ceil_mode - 如果等于True,计算输出信号大小的时候,会使用向上取整,代替默认的向下取整的操作 156 | torch.nn.functional.max_pool1d(input, kernel_size, stride=None, padding=0, 157 | dilation=1, ceil_mode=False, return_indices=False) 158 | class torch.nn.MaxPool2d 159 | class torch.nn.MaxPool3d 160 | class torch.nn.MaxUnpool1d(kernel_size, stride=None, padding=0) 161 | torch.nn.functional.max_unpool1d(input, indices, 162 | kernel_size, stride=None, padding=0, output_size=None) 163 | 2d 164 | 3d 165 | 166 | class torch.nn.AvgPool1d(kernel_size, stride=None, padding=0, 167 | ceil_mode=False, count_include_pad=True) 168 | torch.nn.functional.avg_pool1d(input, kernel_size, stride=None, padding=0, 169 | ceil_mode=False, count_include_pad=True) 170 | 2d 171 | 3d 172 | 173 | class torch.nn.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, 174 | return_indices=False, _random_samples=None) 175 | #对输入信号提供2维的幂平均池化操作 176 | class torch.nn.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False) 177 | torch.nn.functional.lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False) 178 | 179 | class torch.nn.AdaptiveMaxPool1d(output_size, return_indices=False) 180 | torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False) 181 | 2d 182 | 183 | class torch.nn.AdaptiveAvgPool1d(output_size) 184 | torch.nn.functional.adaptive_avg_pool1d(input, output_size) 185 | 2d 186 | 187 | #激活函数 188 | class torch.nn.ReLU(inplace=False) 189 | torch.nn.functional.relu(input, inplace=False) 190 | class torch.nn.ReLU6(inplace=False) 191 | torch.nn.functional.relu6(input, inplace=False) 192 | class torch.nn.ELU(alpha=1.0, inplace=False) 193 | torch.nn.functional.elu(input, alpha=1.0, inplace=False) 194 | class torch.nn.PReLU(num_parameters=1, init=0.25) 195 | torch.nn.functional.prelu(input, weight) 196 | class torch.nn.LeakyReLU(negative_slope=0.01, inplace=False) 197 | torch.nn.functional.leaky_relu(input, negative_slope=0.01, inplace=False) 198 | class torch.nn.Threshold(threshold, value, inplace=False) 199 | torch.nn.functional.threshold(input, threshold, value, inplace=False) 200 | class torch.nn.Tanh() 201 | torch.nn.functional.tanh(input) 202 | class torch.nn.Hardtanh(min_value=-1, max_value=1, inplace=False) 203 | torch.nn.functional.hardtanh(input, min_val=-1.0, max_val=1.0, inplace=False) 204 | class torch.nn.Sigmoid() 205 | torch.nn.functional.sigmoid(input) 206 | class torch.nn.LogSigmoid() 207 | torch.nn.functional.logsigmoid(input) 208 | class torch.nn.Softplus(beta=1, threshold=20) 209 | torch.nn.functional.softplus(input, beta=1, threshold=20) 210 | class torch.nn.Softshrink(lambd=0.5) 211 | torch.nn.functional.softshrink(input, lambd=0.5) 212 | class torch.nn.Softmin() 213 | torch.nn.functional.softmin(input) 214 | class torch.nn.Softmax() 215 | torch.nn.functional.softmax(input) 216 | class torch.nn.LogSoftmax() 217 | torch.nn.functional.log_softmax(input) 218 | 219 | class torch.nn.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True) 220 | torch.nn.functional.batch_norm(input, running_mean, running_var, 221 | weight=None, bias=None, training=False, momentum=0.1, eps=1e-05) 222 | 2d 223 | 3d 224 | 225 | #RNN 226 | class torch.nn.RNN( args, * kwargs) 227 | input_size – 输入x的特征数量。 228 | hidden_size – 隐层的特征数量。 229 | num_layers – RNN的层数。 230 | nonlinearity – 指定非线性函数使用tanh还是relu。默认是tanh。 231 | bias – 如果是False,那么RNN层就不会使用偏置权重 $b_ih$和$b_hh$,默认是True 232 | dropout – 如果值非零,那么除了最后一层外,其它层的输出都会套上一个dropout层。 233 | bidirectional – 如果True,将会变成一个双向RNN,默认为False。 234 | class torch.nn.LSTM( args, * kwargs) 235 | class torch.nn.GRU() 236 | #单个RNN细胞 237 | class torch.nn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh') 238 | class torch.nn.LSTMCell(input_size, hidden_size, bias=True) 239 | class torch.nn.GRUCell(input_size, hidden_size, bias=True) 240 | 241 | #droupout 242 | class torch.nn.Dropout(p=0.5, inplace=False) 243 | p - 将元素置0的概率。默认值:0.5 244 | torch.nn.functional.dropout(input, p=0.5, training=False, inplace=False) 245 | 246 | #Embedding层 247 | class torch.nn.Embedding(num_embeddings, embedding_dim, 248 | padding_idx=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False) 249 | 250 | #损失函数 251 | class torch.nn.L1Loss(size_average=True) 252 | torch.nn.functional.l1_loss(input, target, size_average=True) 253 | class torch.nn.SmoothL1Loss(size_average=True) 254 | torch.nn.functional.smooth_l1_loss(input, target, size_average=True) 255 | class torch.nn.MSELoss(size_average=True) 256 | torch.nn.functional.mse_loss(input, target, size_average=True) 257 | class torch.nn.CrossEntropyLoss(weight=None, size_average=True) 258 | torch.nn.functional.cross_entropy(input, target, weight=None, size_average=True) 259 | class torch.nn.BCELoss(weight=None, size_average=True) 260 | torch.nn.functional.binary_cross_entropy(input, target, weight=None, size_average=True) 261 | #最大似然函数 262 | class torch.nn.NLLLoss(weight=None, size_average=True) 263 | torch.nn.functional.nll_loss(input, target, weight=None, size_average=True) 264 | class torch.nn.NLLLoss2d(weight=None, size_average=True) 265 | torch.nn.functional.nll_loss2d(input, target, weight=None, size_average=True) 266 | class torch.nn.KLDivLoss(weight=None, size_average=True) 267 | torch.nn.functional.kl_div(input, target, size_average=True) 268 | 269 | class torch.nn.HingeEmbeddingLoss(size_average=True) 270 | class torch.nn.CosineEmbeddingLoss(margin=0, size_average=True) 271 | 272 | #填充 273 | torch.nn.functional.pad(input, pad, mode='constant', value=0) 274 | 275 | import torch.nn as nn 276 | import torch.nn.functional as F 277 | #maxunpool 278 | pool = nn.MaxPool2d(2, stride=2, return_indices=True) 279 | unpool = nn.MaxUnpool2d(2, stride=2) 280 | input = Variable(torch.Tensor([[[[ 1, 2, 3, 4], 281 | [ 5, 6, 7, 8], 282 | [ 9, 10, 11, 12], 283 | [13, 14, 15, 16]]]])) 284 | output, indices = pool(input) 285 | print(output,indices) 286 | unpool(output, indices) 287 | 288 | #优化器 289 | torch.optim 290 | class torch.optim.Optimizer(params, defaults) 291 | step() 292 | zero_grad() 293 | 294 | class torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) 295 | class torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0) 296 | class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) 297 | class torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) 298 | class torch.optim.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) 299 | class torch.optim.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None) 300 | class torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) 301 | class torch.optim.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) 302 | class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False) 303 | 304 | #模型 305 | 两种方法 306 | #线性模型 307 | 308 | net = torch.nn.Sequential( 309 | torch.nn.Linear(1, 10), 310 | torch.nn.ReLU(), 311 | torch.nn.Linear(10, 1) 312 | ) 313 | 314 | class LinearNet(torch.nn.Module): 315 | def __init__(self, n_feature, n_hidden, n_output): 316 | super(LinearNet, self).__init__() 317 | self.hidden = torch.nn.Linear(n_feature, n_hidden) 318 | self.predict = torch.nn.Linear(n_hidden, n_output) 319 | 320 | def forward(self, x): 321 | x = F.relu(self.hidden(x)) 322 | x = self.predict(x) 323 | return x 324 | 325 | net1 = LinearNet(1, 10, 1) 326 | 327 | #卷积模型 328 | class CNN(nn.Module): 329 | def __init__(self): 330 | super(CNN, self).__init__() 331 | self.conv1 = nn.Sequential( 332 | nn.Conv2d( 333 | in_channels=1, 334 | out_channels=16, 335 | kernel_size=5, 336 | stride=1, 337 | padding=2, 338 | ), 339 | nn.ReLU(), 340 | nn.MaxPool2d(kernel_size=2), 341 | ) 342 | self.conv2 = nn.Sequential( 343 | nn.Conv2d(16, 32, 5, 1, 2), 344 | nn.ReLU(), 345 | nn.MaxPool2d(2), 346 | ) 347 | self.out = nn.Linear(32 * 7 * 7, 10) 348 | 349 | def forward(self, x): 350 | x = self.conv1(x) 351 | x = self.conv2(x) 352 | x = x.view(x.size(0), -1) 353 | output = self.out(x) 354 | return output 355 | cnn = CNN() 356 | print(cnn) 357 | 358 | #RNN 359 | class RNN(nn.Module): 360 | def __init__(self): 361 | super(RNN, self).__init__() 362 | 363 | self.rnn = nn.LSTM( 364 | input_size=28, 365 | hidden_size=64, 366 | num_layers=1, 367 | batch_first=True, 368 | ) 369 | 370 | self.out = nn.Linear(64, 10) 371 | 372 | def forward(self, x): 373 | # x shape (batch, time_step, input_size) 374 | # r_out shape (batch, time_step, output_size) 375 | # h_n shape (n_layers, batch, hidden_size) . 376 | # h_c shape (n_layers, batch, hidden_size) 377 | r_out, (h_n, h_c) = self.rnn(x, None) 378 | # 选取最后一个时间点的 r_out 输出 379 | # 这里 r_out[:, -1, :] 的值也是 h_n 的值 380 | out = self.out(r_out[:, -1, :]) 381 | return out 382 | 383 | rnn = RNN() 384 | print(rnn) 385 | 386 | #gpu加速 387 | 将所有数据和操作加上.cuda() 388 | x = x.cuda() 389 | y = y.cuda() 390 | cnn = CNN() 391 | cnn.cuda() 392 | 393 | #保存 394 | torch.save(net, 'net.pkl') #保存整个网络 395 | torch.save(net.state_dict(), 'net_params.pkl') #只保存网络中的参数 396 | #提取 397 | net = torch.load('net.pkl') 398 | net3 = 399 | net3.load_state_dict(torch.load('net_params.pkl')) 400 | 401 | #minist 402 | import torch 403 | import torch.nn as nn 404 | import torch.utils.data as Data 405 | import torchvision 406 | import matplotlib.pyplot as plt 407 | 408 | # Hyper Parameters 409 | EPOCH = 10 410 | BATCH_SIZE = 50 411 | LR = 0.001 # 学习率 412 | DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 False 413 | 414 | train_data = torchvision.datasets.MNIST( 415 | root='./mnist/', # 保存或者提取位置 416 | train=True, # this is training data 417 | transform=torchvision.transforms.ToTensor(), 418 | download=DOWNLOAD_MNIST # 没下载就下载, 下载了就不用再下了 419 | ) 420 | 421 | test_data = torchvision.datasets.MNIST(root='./mnist/', train=False) 422 | 423 | train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) 424 | 425 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)/255. 426 | test_y = test_data.test_labels 427 | 428 | class CNN(nn.Module): 429 | def __init__(self): 430 | super(CNN, self).__init__() 431 | self.conv1 = nn.Sequential( 432 | nn.Conv2d( 433 | in_channels=1, 434 | out_channels=16, 435 | kernel_size=5, 436 | stride=1, 437 | padding=2, 438 | ), 439 | nn.ReLU(), 440 | nn.MaxPool2d(kernel_size=2), 441 | ) 442 | self.conv2 = nn.Sequential( 443 | nn.Conv2d(16, 32, 5, 1, 2), 444 | nn.ReLU(), 445 | nn.MaxPool2d(2), 446 | ) 447 | self.out = nn.Linear(32 * 7 * 7, 10) 448 | 449 | def forward(self, x): 450 | x = self.conv1(x) 451 | x = self.conv_new(x) 452 | x = self.conv2(x) 453 | x = x.view(x.size(0), -1) 454 | output = self.out(x) 455 | return output 456 | 457 | cnn=CNN() 458 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters 459 | loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted 460 | 461 | # training and testing 462 | for epoch in range(EPOCH): 463 | for step, (b_x, b_y) in enumerate(train_loader): # 分配 batch data, normalize x when iterate train_loader 464 | output = cnn(b_x) # cnn output 465 | loss = loss_func(output, b_y) # cross entropy loss 466 | optimizer.zero_grad() # clear gradients for this training step 467 | loss.backward() # backpropagation, compute gradients 468 | optimizer.step() # apply gradients 469 | 470 | test_output = cnn(test_x) 471 | pred_y = torch.max(test_output, 1)[1].data.numpy() 472 | accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0)) 473 | print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy) 474 | 475 | #GPU 476 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor).cuda()/255. 477 | test_y = test_data.test_labels.cuda() 478 | cnn.cuda() 479 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters 480 | loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted 481 | 482 | for epoch in range(EPOCH): 483 | for step, (b_x, b_y) in enumerate(train_loader): # 分配 batch data, normalize x when iterate train_loader 484 | b_x=b_x.cuda() 485 | b_y=b_y.cuda() 486 | output = cnn(b_x) # cnn output 487 | loss = loss_func(output, b_y) # cross entropy loss 488 | optimizer.zero_grad() # clear gradients for this training step 489 | loss.backward() # backpropagation, compute gradients 490 | optimizer.step() # apply gradients 491 | 492 | test_output = cnn(test_x) 493 | pred_y = torch.max(test_output, 1)[1].cuda().data 494 | accuracy = torch.sum(pred_y == test_y).cuda().type(torch.FloatTensor) / test_y.size(0) 495 | print('Epoch: ', epoch, '| train loss: %.4f' % loss.data, '| test accuracy: %.2f' % accuracy) 496 | 497 | -------------------------------------------------------------------------------- /强化学习(DQN): -------------------------------------------------------------------------------- 1 | 强化学习环境OpenAI Gym 2 | 3 | !pip3 install gym 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import numpy as np 9 | import gym 10 | 11 | # 超参数 12 | BATCH_SIZE = 32 13 | LR = 0.01 14 | EPSILON = 0.9 # 15 | GAMMA = 0.9 #reward deacy factor 16 | TARGET_REPLACE_ITER = 10 # Q 现实网络的更新频率 17 | MEMORY_CAPACITY = 2000 # 记忆库大小 18 | env = gym.make('CartPole-v0') # 立杆子游戏 19 | env = env.unwrapped 20 | N_ACTIONS = env.action_space.n # 小车能做的动作 21 | N_STATES = env.observation_space.shape[0] # 小车能获取的环境信息数 22 | 23 | class Net(nn.Module): 24 | def __init__(self, ): 25 | super(Net, self).__init__() 26 | self.fc1 = nn.Linear(N_STATES, 10) 27 | self.fc1.weight.data.normal_(0, 0.1) 28 | self.out = nn.Linear(10, N_ACTIONS) 29 | self.out.weight.data.normal_(0, 0.1) 30 | 31 | def forward(self, x): 32 | x = self.fc1(x) 33 | x = F.relu(x) 34 | actions_value = self.out(x) 35 | return actions_value 36 | 37 | class DQN(object): 38 | def __init__(self): 39 | self.eval_net, self.target_net = Net(), Net() 40 | 41 | self.learn_step_counter = 0 # 用于 target 更新计时 42 | self.memory_counter = 0 # 记忆库记数 43 | self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # 初始化记忆库 44 | self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) # torch 的优化器 45 | self.loss_func = nn.MSELoss() # 误差公式 46 | 47 | def choose_action(self, x): 48 | #EPSILON贪心算法 49 | x = torch.unsqueeze(torch.FloatTensor(x), 0) 50 | # 这里只输入一个 sample 51 | if np.random.uniform() < EPSILON: # 选最优动作 52 | actions_value = self.eval_net.forward(x) 53 | action = torch.max(actions_value, 1)[1].data.numpy()[0] # return the argmax 54 | else: # 选随机动作 55 | action = np.random.randint(0, N_ACTIONS) 56 | return action 57 | 58 | def store_transition(self, s, a, r, s_): 59 | transition = np.hstack((s, [a, r], s_)) 60 | # 如果记忆库满了, 就覆盖老数据 61 | index = self.memory_counter % MEMORY_CAPACITY 62 | self.memory[index, :] = transition 63 | self.memory_counter += 1 64 | 65 | def learn(self): 66 | # target net 参数更新 67 | if self.learn_step_counter % TARGET_REPLACE_ITER == 0: 68 | self.target_net.load_state_dict(self.eval_net.state_dict()) 69 | self.learn_step_counter += 1 70 | 71 | # 抽取记忆库中的批数据 72 | sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE) 73 | b_memory = self.memory[sample_index, :] 74 | b_s = torch.FloatTensor(b_memory[:, :N_STATES]) 75 | b_a = torch.LongTensor(b_memory[:, N_STATES:N_STATES+1].astype(int)) 76 | b_r = torch.FloatTensor(b_memory[:, N_STATES+1:N_STATES+2]) 77 | b_s_ = torch.FloatTensor(b_memory[:, -N_STATES:]) 78 | 79 | # 针对做过的动作b_a, 来选 q_eval 的值, (q_eval 原本有所有动作的值) 80 | q_eval = self.eval_net(b_s).gather(1, b_a) # shape (batch, 1) 81 | q_next = self.target_net(b_s_).detach() # q_next 不进行反向传递误差 82 | q_target = b_r + GAMMA * q_next.max(1)[0] # shape (batch, 1) 83 | loss = self.loss_func(q_eval, q_target) 84 | print(loss) 85 | 86 | # 计算, 更新 eval net 87 | self.optimizer.zero_grad() 88 | loss.backward() 89 | self.optimizer.step() 90 | 91 | dqn = DQN() 92 | 93 | for i_episode in range(400): 94 | #重建环境 95 | s = env.reset() 96 | while True: 97 | a = dqn.choose_action(s) 98 | 99 | # 选动作, 得到环境反馈 100 | s_, r, done, info = env.step(a) 101 | 102 | # 修改 reward, 使 DQN 快速学习 103 | x, x_dot, theta, theta_dot = s_ 104 | r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 105 | r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 106 | r = r1 + r2 107 | 108 | # 存记忆 109 | dqn.store_transition(s, a, r, s_) 110 | 111 | if dqn.memory_counter > MEMORY_CAPACITY: 112 | dqn.learn() # 记忆库满了就进行学习 113 | 114 | if done: # 如果回合结束, 进入下回合 115 | break 116 | 117 | s = s_ 118 | 119 | 120 | -------------------------------------------------------------------------------- /空间变换器网络: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | #空间变换器网络(简称STN)的视觉注意力机制 12 | #允许神经网络学习如何对输入图像执行空间变换 13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 14 | 15 | # Training dataset 16 | train_loader = torch.utils.data.DataLoader( 17 | datasets.MNIST(root='.', train=True, download=True, 18 | transform=transforms.Compose([ 19 | transforms.ToTensor(), 20 | transforms.Normalize((0.1307,), (0.3081,)) 21 | ])), batch_size=64, shuffle=True, num_workers=4) 22 | # Test dataset 23 | test_loader = torch.utils.data.DataLoader( 24 | datasets.MNIST(root='.', train=False, transform=transforms.Compose([ 25 | transforms.ToTensor(), 26 | transforms.Normalize((0.1307,), (0.3081,)) 27 | ])), batch_size=64, shuffle=True, num_workers=4) 28 | 29 | class Net(nn.Module): 30 | def __init__(self): 31 | super(Net, self).__init__() 32 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 33 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 34 | self.conv2_drop = nn.Dropout2d() 35 | self.fc1 = nn.Linear(320, 50) 36 | self.fc2 = nn.Linear(50, 10) 37 | 38 | #位置网络 39 | self.localization = nn.Sequential( 40 | nn.Conv2d(1, 8, kernel_size=7), 41 | nn.MaxPool2d(2, stride=2), 42 | nn.ReLU(True), 43 | nn.Conv2d(8, 10, kernel_size=5), 44 | nn.MaxPool2d(2, stride=2), 45 | nn.ReLU(True) 46 | ) 47 | 48 | #回归得到3 * 2仿射矩阵 49 | self.fc_loc = nn.Sequential( 50 | nn.Linear(10 * 3 * 3, 32), 51 | nn.ReLU(True), 52 | nn.Linear(32, 3 * 2) 53 | ) 54 | 55 | #初始化权重 56 | self.fc_loc[2].weight.data.zero_() 57 | self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) 58 | #stn网络 59 | def stn(self, x): 60 | xs = self.localization(x) 61 | xs = xs.view(-1, 10 * 3 * 3) 62 | theta = self.fc_loc(xs) 63 | theta = theta.view(-1, 2, 3) 64 | 65 | grid = F.affine_grid(theta, x.size()) 66 | x = F.grid_sample(x, grid) 67 | 68 | return x 69 | #向前传播 70 | def forward(self, x): 71 | x = self.stn(x) 72 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 73 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 74 | x = x.view(-1, 320) 75 | x = F.relu(self.fc1(x)) 76 | x = F.dropout(x, training=self.training) 77 | x = self.fc2(x) 78 | return F.log_softmax(x, dim=1) 79 | 80 | model = Net().to(device) 81 | 82 | optimizer = optim.SGD(model.parameters(), lr=0.01) 83 | 84 | def train(epoch): 85 | model.train() 86 | for batch_idx, (data, target) in enumerate(train_loader): 87 | data, target = data.to(device), target.to(device) 88 | 89 | optimizer.zero_grad() 90 | output = model(data) 91 | loss = F.nll_loss(output, target) 92 | loss.backward() 93 | optimizer.step() 94 | if batch_idx % 500 == 0: 95 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 96 | epoch, batch_idx * len(data), len(train_loader.dataset), 97 | 100. * batch_idx / len(train_loader), loss.item())) 98 | 99 | def test(): 100 | with torch.no_grad(): 101 | model.eval() 102 | test_loss = 0 103 | correct = 0 104 | for data, target in test_loader: 105 | data, target = data.to(device), target.to(device) 106 | output = model(data) 107 | 108 | test_loss += F.nll_loss(output, target, size_average=False).item() 109 | pred = output.max(1, keepdim=True)[1] 110 | correct += pred.eq(target.view_as(pred)).sum().item() 111 | 112 | test_loss /= len(test_loader.dataset) 113 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' 114 | .format(test_loss, correct, len(test_loader.dataset), 115 | 100. * correct / len(test_loader.dataset))) 116 | 117 | 118 | def convert_image_np(inp): 119 | inp = inp.numpy().transpose((1, 2, 0)) 120 | mean = np.array([0.485, 0.456, 0.406]) 121 | std = np.array([0.229, 0.224, 0.225]) 122 | inp = std * inp + mean 123 | inp = np.clip(inp, 0, 1) 124 | return inp 125 | 126 | def visualize_stn(): 127 | with torch.no_grad(): 128 | data = next(iter(test_loader))[0].to(device) 129 | 130 | input_tensor = data.cpu() 131 | transformed_input_tensor = model.stn(data).cpu() 132 | 133 | in_grid = convert_image_np( 134 | torchvision.utils.make_grid(input_tensor)) 135 | 136 | out_grid = convert_image_np( 137 | torchvision.utils.make_grid(transformed_input_tensor)) 138 | 139 | f, axarr = plt.subplots(1, 2) 140 | axarr[0].imshow(in_grid) 141 | axarr[0].set_title('Dataset Images') 142 | 143 | axarr[1].imshow(out_grid) 144 | axarr[1].set_title('Transformed Images') 145 | 146 | for epoch in range(20): 147 | train(epoch) 148 | test() 149 | 150 | visualize_stn() 151 | 152 | plt.ioff() 153 | plt.show() 154 | 155 | -------------------------------------------------------------------------------- /迁移学习: -------------------------------------------------------------------------------- 1 | #数据增强 2 | im_aug = torchvision.transforms.Compose([ 3 | torchvision.transforms.Resize(100), 4 | torchvision.transforms.RandomHorizontalFlip(), 5 | torchvision.transforms.RandomCrop(50), 6 | torchvision.transforms.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5), 7 | torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 8 | ]) 9 | 10 | train_set = CIFAR10('./data', train=True, transform=im_aug) 11 | 12 | for x in dataset: 13 | x = im_aug(x) 14 | 15 | #迁移学习 16 | from __future__ import print_function, division 17 | 18 | import torch 19 | import torch.nn as nn 20 | import torch.optim as optim 21 | from torch.optim import lr_scheduler 22 | import numpy as np 23 | import torchvision 24 | from torchvision import datasets, models, transforms 25 | import matplotlib.pyplot as plt 26 | import time 27 | import os 28 | import copy 29 | 30 | #minist 31 | import torch 32 | import torch.nn as nn 33 | import torch.utils.data as Data 34 | import torchvision 35 | import matplotlib.pyplot as plt 36 | 37 | # Hyper Parameters 38 | EPOCH = 10 39 | BATCH_SIZE = 50 40 | LR = 0.001 # 学习率 41 | DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 False 42 | 43 | train_data = torchvision.datasets.MNIST( 44 | root='./mnist/', # 保存或者提取位置 45 | train=True, # this is training data 46 | transform=torchvision.transforms.ToTensor(), 47 | download=DOWNLOAD_MNIST # 没下载就下载, 下载了就不用再下了 48 | ) 49 | 50 | print(train_data) 51 | test_data = torchvision.datasets.MNIST(root='./mnist/', train=False) 52 | 53 | train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) 54 | 55 | test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)/255. 56 | test_y = test_data.test_labels 57 | 58 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 59 | 60 | class CNN(nn.Module): 61 | def __init__(self): 62 | super(CNN, self).__init__() 63 | self.conv1 = nn.Sequential( 64 | nn.Conv2d( 65 | in_channels=1, 66 | out_channels=16, 67 | kernel_size=5, 68 | stride=1, 69 | padding=2, 70 | ), 71 | nn.ReLU(), 72 | nn.MaxPool2d(kernel_size=2), 73 | ) 74 | self.conv2 = nn.Sequential( 75 | nn.Conv2d(16, 32, 5, 1, 2), 76 | nn.ReLU(), 77 | nn.MaxPool2d(2), 78 | ) 79 | self.out = nn.Linear(32 * 7 * 7, 10) 80 | 81 | def forward(self, x): 82 | x = self.conv1(x) 83 | x = self.conv2(x) 84 | x = x.view(x.size(0), -1) 85 | output = self.out(x) 86 | return output 87 | 88 | cnn=CNN() 89 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters 90 | loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted 91 | 92 | # training and testing 93 | for epoch in range(EPOCH): 94 | for step, (b_x, b_y) in enumerate(train_loader): # 分配 batch data, normalize x when iterate train_loader 95 | output = cnn(b_x) # cnn output 96 | loss = loss_func(output, b_y) # cross entropy loss 97 | optimizer.zero_grad() # clear gradients for this training step 98 | loss.backward() # backpropagation, compute gradients 99 | optimizer.step() # apply gradients 100 | 101 | test_output = cnn(test_x) 102 | pred_y = torch.max(test_output, 1)[1].data.numpy() 103 | accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0)) 104 | print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy) 105 | 106 | from __future__ import print_function, division 107 | 108 | import torch 109 | import torch.nn as nn 110 | import torch.optim as optim 111 | from torch.optim import lr_scheduler 112 | import numpy as np 113 | import torchvision 114 | from torchvision import datasets, models, transforms 115 | import matplotlib.pyplot as plt 116 | import time 117 | import os 118 | import copy 119 | 120 | # 数据增强和归一化 121 | data_transforms = { 122 | 'train': transforms.Compose([ 123 | transforms.RandomResizedCrop(224), 124 | transforms.RandomHorizontalFlip(), 125 | transforms.ToTensor(), 126 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 127 | ]), 128 | 'val': transforms.Compose([ 129 | transforms.Resize(256), 130 | transforms.CenterCrop(224), 131 | transforms.ToTensor(), 132 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 133 | ]), 134 | } 135 | 136 | data_dir = 'hymenoptera_data' 137 | image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), 138 | data_transforms[x]) 139 | for x in ['train', 'val']} 140 | dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, 141 | shuffle=True, num_workers=4) 142 | for x in ['train', 'val']} 143 | dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} 144 | class_names = image_datasets['train'].classes 145 | 146 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 147 | 148 | def imshow(inp, title=None): 149 | inp = inp.numpy().transpose((1, 2, 0)) 150 | mean = np.array([0.485, 0.456, 0.406]) 151 | std = np.array([0.229, 0.224, 0.225]) 152 | inp = std * inp + mean 153 | inp = np.clip(inp, 0, 1) 154 | plt.imshow(inp) 155 | if title is not None: 156 | plt.title(title) 157 | 158 | inputs, classes = next(iter(dataloaders['train'])) 159 | 160 | out = torchvision.utils.make_grid(inputs) 161 | 162 | imshow(out, title=[class_names[x] for x in classes]) 163 | 164 | def train_model(model, criterion, optimizer, scheduler, num_epochs=25): 165 | since = time.time() 166 | 167 | best_model_wts = copy.deepcopy(model.state_dict()) 168 | best_acc = 0.0 169 | 170 | for epoch in range(num_epochs): 171 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 172 | print('-' * 10) 173 | 174 | for phase in ['train', 'val']: 175 | if phase == 'train': 176 | model.train() 177 | else: 178 | model.eval() 179 | 180 | running_loss = 0.0 181 | running_corrects = 0 182 | 183 | #训练 184 | for inputs, labels in dataloaders[phase]: 185 | inputs = inputs.to(device) 186 | labels = labels.to(device) 187 | 188 | optimizer.zero_grad() 189 | 190 | with torch.set_grad_enabled(phase == 'train'): 191 | outputs = model(inputs) 192 | _, preds = torch.max(outputs, 1) 193 | loss = criterion(outputs, labels) 194 | 195 | if phase == 'train': 196 | loss.backward() 197 | optimizer.step() 198 | 199 | running_loss += loss.item() * inputs.size(0) 200 | running_corrects += torch.sum(preds == labels.data) 201 | if phase == 'train': 202 | scheduler.step() 203 | 204 | epoch_loss = running_loss / dataset_sizes[phase] 205 | epoch_acc = running_corrects.double() / dataset_sizes[phase] 206 | 207 | print('{} Loss: {:.4f} Acc: {:.4f}'.format( 208 | phase, epoch_loss, epoch_acc)) 209 | 210 | if phase == 'val' and epoch_acc > best_acc: 211 | best_acc = epoch_acc 212 | best_model_wts = copy.deepcopy(model.state_dict()) 213 | 214 | time_elapsed = time.time() - since 215 | print('Training complete in {:.0f}m {:.0f}s'.format( 216 | time_elapsed // 60, time_elapsed % 60)) 217 | print('Best val Acc: {:4f}'.format(best_acc)) 218 | 219 | # load best model weights 220 | model.load_state_dict(best_model_wts) 221 | return model 222 | 223 | def visualize_model(model, num_images=6): 224 | was_training = model.training 225 | model.eval() 226 | images_so_far = 0 227 | fig = plt.figure() 228 | 229 | with torch.no_grad(): 230 | for i, (inputs, labels) in enumerate(dataloaders['val']): 231 | inputs = inputs.to(device) 232 | labels = labels.to(device) 233 | 234 | outputs = model(inputs) 235 | _, preds = torch.max(outputs, 1) 236 | 237 | for j in range(inputs.size()[0]): 238 | images_so_far += 1 239 | ax = plt.subplot(num_images//2, 2, images_so_far) 240 | ax.axis('off') 241 | ax.set_title('predicted: {}'.format(class_names[preds[j]])) 242 | imshow(inputs.cpu().data[j]) 243 | 244 | if images_so_far == num_images: 245 | model.train(mode=was_training) 246 | return 247 | model.train(mode=was_training) 248 | #finetune 249 | model_ft = models.resnet18(pretrained=True) 250 | print(model_ft) 251 | num_ftrs = model_ft.fc.in_features 252 | model_ft.fc = nn.Linear(num_ftrs, 2) 253 | print(model_ft) 254 | model_ft = model_ft.to(device) 255 | 256 | criterion = nn.CrossEntropyLoss() 257 | 258 | optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) 259 | 260 | exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) 261 | 262 | model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, 263 | num_epochs=25) 264 | 265 | #冻结除最后一层之外的所有网络 266 | model_conv = torchvision.models.resnet18(pretrained=True) 267 | for param in model_conv.parameters(): 268 | param.requires_grad = False 269 | 270 | num_ftrs = model_conv.fc.in_features 271 | model_conv.fc = nn.Linear(num_ftrs, 2) 272 | 273 | model_conv = model_conv.to(device) 274 | 275 | criterion = nn.CrossEntropyLoss() 276 | 277 | optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) 278 | 279 | exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) 280 | 281 | model_conv = train_model(model_conv, criterion, optimizer_conv, 282 | exp_lr_scheduler, num_epochs=20) 283 | 284 | model_conv = torchvision.models.resnet18(pretrained=True) 285 | model = torch.nn.Sequential( 286 | model_conv, 287 | torch.nn.Linear(1000, 2) 288 | ) 289 | print(model) 290 | --------------------------------------------------------------------------------