├── .gitignore ├── .travis.yml ├── 01_basics.py ├── 02_manual_gradient.py ├── 03_auto_gradient.py ├── 05_linear_regression.py ├── 06_logistic_regression.py ├── 07_diabets_logistic.py ├── 08_1_dataset_loader.py ├── 08_2_dataset_loade_logistic.py ├── 09_01_softmax_loss.py ├── 09_2_softmax_mnist.py ├── 10_1_cnn_mnist.py ├── 11_1_toy_inception_mnist.py ├── 12_1_rnn_basics.py ├── 12_2_hello_rnn.py ├── 12_3_hello_rnn_seq.py ├── 12_4_hello_rnn_emb.py ├── 13_1_rnn_classification_basics.py ├── 13_2_rnn_classification.py ├── 13_3_char_rnn.py ├── 13_4_pack_pad.py ├── 14_1_seq2seq.py ├── 14_2_seq2seq_att.py ├── README.md ├── data ├── diabetes.csv.gz ├── names_test.csv.gz ├── names_train.csv.gz └── shakespeare.txt.gz ├── name_dataset.py ├── requirements.txt ├── seq2seq_models.py ├── slides ├── Lecture 01: Overview.pdf ├── Lecture 02: Linear Model.pdf ├── Lecture 03: Gradient Descent.pdf ├── Lecture 05: Linear regression in PyTorch way.pdf ├── Lecture 06: Logistic Regression.pdf ├── Lecture 07: Wide & Deep.pdf ├── Lecture 08: DataLoader.pdf ├── Lecture 09: Softmax Classifier.pdf └── P-Epilogue: What's the next?.pdf └── text_loader.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.key 3 | .nsml* 4 | *.pt 5 | X* 6 | mnist_data 7 | ppts 8 | .ipynb_checkpoints 9 | client_secret.json 10 | __pycache__/ 11 | .py* 12 | tmp 13 | template.pdf 14 | *.ipynb 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # code below is taken from https://github.com/fchollet/keras/blob/master/.travis.yml 2 | sudo: required 3 | dist: trusty 4 | language: python 5 | python: # Only two versions for now 6 | - "2.7" 7 | - "3.6" 8 | # command to install dependencies 9 | install: "pip install -r requirements.txt" 10 | 11 | script: 12 | - python -m compileall . 13 | - ls ??_*.py|xargs -n 1 -P 3 python 14 | -------------------------------------------------------------------------------- /01_basics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | x_data = [1.0, 2.0, 3.0] 5 | y_data = [2.0, 4.0, 6.0] 6 | 7 | 8 | # our model for the forward pass 9 | def forward(x): 10 | return x * w 11 | 12 | 13 | # Loss function 14 | def loss(x, y): 15 | y_pred = forward(x) 16 | return (y_pred - y) * (y_pred - y) 17 | 18 | # List of weights/Mean square Error (Mse) for each input 19 | w_list = [] 20 | mse_list = [] 21 | 22 | for w in np.arange(0.0, 4.1, 0.1): 23 | # Print the weights and initialize the lost 24 | print("w=", w) 25 | l_sum = 0 26 | 27 | for x_val, y_val in zip(x_data, y_data): 28 | # For each input and output, calculate y_hat 29 | # Compute the total loss and add to the total error 30 | y_pred_val = forward(x_val) 31 | l = loss(x_val, y_val) 32 | l_sum += l 33 | print("\t", x_val, y_val, y_pred_val, l) 34 | # Now compute the Mean squared error (mse) of each 35 | # Aggregate the weight/mse from this run 36 | print("MSE=", l_sum / len(x_data)) 37 | w_list.append(w) 38 | mse_list.append(l_sum / len(x_data)) 39 | 40 | # Plot it all 41 | plt.plot(w_list, mse_list) 42 | plt.ylabel('Loss') 43 | plt.xlabel('w') 44 | plt.show() 45 | -------------------------------------------------------------------------------- /02_manual_gradient.py: -------------------------------------------------------------------------------- 1 | # Training Data 2 | x_data = [1.0, 2.0, 3.0] 3 | y_data = [2.0, 4.0, 6.0] 4 | 5 | w = 1.0 # a random guess: random value 6 | 7 | 8 | # our model forward pass 9 | def forward(x): 10 | return x * w 11 | 12 | 13 | # Loss function 14 | def loss(x, y): 15 | y_pred = forward(x) 16 | return (y_pred - y) * (y_pred - y) 17 | 18 | 19 | # compute gradient 20 | def gradient(x, y): # d_loss/d_w 21 | return 2 * x * (x * w - y) 22 | 23 | 24 | # Before training 25 | print("Prediction (before training)", 4, forward(4)) 26 | 27 | # Training loop 28 | for epoch in range(10): 29 | for x_val, y_val in zip(x_data, y_data): 30 | # Compute derivative w.r.t to the learned weights 31 | # Update the weights 32 | # Compute the loss and print progress 33 | grad = gradient(x_val, y_val) 34 | w = w - 0.01 * grad 35 | print("\tgrad: ", x_val, y_val, round(grad, 2)) 36 | l = loss(x_val, y_val) 37 | print("progress:", epoch, "w=", round(w, 2), "loss=", round(l, 2)) 38 | 39 | # After training 40 | print("Predicted score (after training)", "4 hours of studying: ", forward(4)) 41 | -------------------------------------------------------------------------------- /03_auto_gradient.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pdb 3 | 4 | x_data = [1.0, 2.0, 3.0] 5 | y_data = [2.0, 4.0, 6.0] 6 | w = torch.tensor([1.0], requires_grad=True) 7 | 8 | # our model forward pass 9 | def forward(x): 10 | return x * w 11 | 12 | # Loss function 13 | def loss(y_pred, y_val): 14 | return (y_pred - y_val) ** 2 15 | 16 | # Before training 17 | print("Prediction (before training)", 4, forward(4).item()) 18 | 19 | # Training loop 20 | for epoch in range(10): 21 | for x_val, y_val in zip(x_data, y_data): 22 | y_pred = forward(x_val) # 1) Forward pass 23 | l = loss(y_pred, y_val) # 2) Compute loss 24 | l.backward() # 3) Back propagation to update weights 25 | print("\tgrad: ", x_val, y_val, w.grad.item()) 26 | w.data = w.data - 0.01 * w.grad.item() 27 | 28 | # Manually zero the gradients after updating weights 29 | w.grad.data.zero_() 30 | 31 | print(f"Epoch: {epoch} | Loss: {l.item()}") 32 | 33 | # After training 34 | print("Prediction (after training)", 4, forward(4).item()) 35 | -------------------------------------------------------------------------------- /05_linear_regression.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | from torch import tensor 4 | 5 | x_data = tensor([[1.0], [2.0], [3.0]]) 6 | y_data = tensor([[2.0], [4.0], [6.0]]) 7 | 8 | 9 | class Model(nn.Module): 10 | def __init__(self): 11 | """ 12 | In the constructor we instantiate two nn.Linear module 13 | """ 14 | super(Model, self).__init__() 15 | self.linear = torch.nn.Linear(1, 1) # One in and one out 16 | 17 | def forward(self, x): 18 | """ 19 | In the forward function we accept a Variable of input data and we must return 20 | a Variable of output data. We can use Modules defined in the constructor as 21 | well as arbitrary operators on Variables. 22 | """ 23 | y_pred = self.linear(x) 24 | return y_pred 25 | 26 | 27 | # our model 28 | model = Model() 29 | 30 | # Construct our loss function and an Optimizer. The call to model.parameters() 31 | # in the SGD constructor will contain the learnable parameters of the two 32 | # nn.Linear modules which are members of the model. 33 | criterion = torch.nn.MSELoss(reduction='sum') 34 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01) 35 | 36 | # Training loop 37 | for epoch in range(500): 38 | # 1) Forward pass: Compute predicted y by passing x to the model 39 | y_pred = model(x_data) 40 | 41 | # 2) Compute and print loss 42 | loss = criterion(y_pred, y_data) 43 | print(f'Epoch: {epoch} | Loss: {loss.item()} ') 44 | 45 | # Zero gradients, perform a backward pass, and update the weights. 46 | optimizer.zero_grad() 47 | loss.backward() 48 | optimizer.step() 49 | 50 | 51 | # After training 52 | hour_var = tensor([[4.0]]) 53 | y_pred = model(hour_var) 54 | print("Prediction (after training)", 4, model(hour_var).data[0][0].item()) 55 | -------------------------------------------------------------------------------- /06_logistic_regression.py: -------------------------------------------------------------------------------- 1 | from torch import tensor 2 | from torch import nn 3 | from torch import sigmoid 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | 7 | # Training data and ground truth 8 | x_data = tensor([[1.0], [2.0], [3.0], [4.0]]) 9 | y_data = tensor([[0.], [0.], [1.], [1.]]) 10 | 11 | 12 | class Model(nn.Module): 13 | def __init__(self): 14 | """ 15 | In the constructor we instantiate nn.Linear module 16 | """ 17 | super(Model, self).__init__() 18 | self.linear = nn.Linear(1, 1) # One in and one out 19 | 20 | def forward(self, x): 21 | """ 22 | In the forward function we accept a Variable of input data and we must return 23 | a Variable of output data. 24 | """ 25 | y_pred = sigmoid(self.linear(x)) 26 | return y_pred 27 | 28 | 29 | # our model 30 | model = Model() 31 | 32 | # Construct our loss function and an Optimizer. The call to model.parameters() 33 | # in the SGD constructor will contain the learnable parameters of the two 34 | # nn.Linear modules which are members of the model. 35 | criterion = nn.BCELoss(reduction='mean') 36 | optimizer = optim.SGD(model.parameters(), lr=0.01) 37 | 38 | # Training loop 39 | for epoch in range(1000): 40 | # Forward pass: Compute predicted y by passing x to the model 41 | y_pred = model(x_data) 42 | 43 | # Compute and print loss 44 | loss = criterion(y_pred, y_data) 45 | print(f'Epoch {epoch + 1}/1000 | Loss: {loss.item():.4f}') 46 | 47 | # Zero gradients, perform a backward pass, and update the weights. 48 | optimizer.zero_grad() 49 | loss.backward() 50 | optimizer.step() 51 | 52 | # After training 53 | print(f'\nLet\'s predict the hours need to score above 50%\n{"=" * 50}') 54 | hour_var = model(tensor([[1.0]])) 55 | print(f'Prediction after 1 hour of training: {hour_var.item():.4f} | Above 50%: {hour_var.item() > 0.5}') 56 | hour_var = model(tensor([[7.0]])) 57 | print(f'Prediction after 7 hours of training: {hour_var.item():.4f} | Above 50%: { hour_var.item() > 0.5}') 58 | -------------------------------------------------------------------------------- /07_diabets_logistic.py: -------------------------------------------------------------------------------- 1 | from torch import nn, optim, from_numpy 2 | import numpy as np 3 | 4 | xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32) 5 | x_data = from_numpy(xy[:, 0:-1]) 6 | y_data = from_numpy(xy[:, [-1]]) 7 | print(f'X\'s shape: {x_data.shape} | Y\'s shape: {y_data.shape}') 8 | 9 | 10 | class Model(nn.Module): 11 | def __init__(self): 12 | """ 13 | In the constructor we instantiate two nn.Linear module 14 | """ 15 | super(Model, self).__init__() 16 | self.l1 = nn.Linear(8, 6) 17 | self.l2 = nn.Linear(6, 4) 18 | self.l3 = nn.Linear(4, 1) 19 | 20 | self.sigmoid = nn.Sigmoid() 21 | 22 | def forward(self, x): 23 | """ 24 | In the forward function we accept a Variable of input data and we must return 25 | a Variable of output data. We can use Modules defined in the constructor as 26 | well as arbitrary operators on Variables. 27 | """ 28 | out1 = self.sigmoid(self.l1(x)) 29 | out2 = self.sigmoid(self.l2(out1)) 30 | y_pred = self.sigmoid(self.l3(out2)) 31 | return y_pred 32 | 33 | 34 | # our model 35 | model = Model() 36 | 37 | 38 | # Construct our loss function and an Optimizer. The call to model.parameters() 39 | # in the SGD constructor will contain the learnable parameters of the two 40 | # nn.Linear modules which are members of the model. 41 | criterion = nn.BCELoss(reduction='mean') 42 | optimizer = optim.SGD(model.parameters(), lr=0.1) 43 | 44 | # Training loop 45 | for epoch in range(100): 46 | # Forward pass: Compute predicted y by passing x to the model 47 | y_pred = model(x_data) 48 | 49 | # Compute and print loss 50 | loss = criterion(y_pred, y_data) 51 | print(f'Epoch: {epoch + 1}/100 | Loss: {loss.item():.4f}') 52 | 53 | # Zero gradients, perform a backward pass, and update the weights. 54 | optimizer.zero_grad() 55 | loss.backward() 56 | optimizer.step() 57 | -------------------------------------------------------------------------------- /08_1_dataset_loader.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class 4 | from torch.utils.data import Dataset, DataLoader 5 | from torch import from_numpy, tensor 6 | import numpy as np 7 | 8 | class DiabetesDataset(Dataset): 9 | """ Diabetes dataset.""" 10 | 11 | # Initialize your data, download, etc. 12 | def __init__(self): 13 | xy = np.loadtxt('./data/diabetes.csv.gz', 14 | delimiter=',', dtype=np.float32) 15 | self.len = xy.shape[0] 16 | self.x_data = from_numpy(xy[:, 0:-1]) 17 | self.y_data = from_numpy(xy[:, [-1]]) 18 | 19 | def __getitem__(self, index): 20 | return self.x_data[index], self.y_data[index] 21 | 22 | def __len__(self): 23 | return self.len 24 | 25 | 26 | dataset = DiabetesDataset() 27 | train_loader = DataLoader(dataset=dataset, 28 | batch_size=32, 29 | shuffle=True, 30 | num_workers=2) 31 | 32 | for epoch in range(2): 33 | for i, data in enumerate(train_loader, 0): 34 | # get the inputs 35 | inputs, labels = data 36 | 37 | # wrap them in Variable 38 | inputs, labels = tensor(inputs), tensor(labels) 39 | 40 | # Run your training process 41 | print(f'Epoch: {i} | Inputs {inputs.data} | Labels {labels.data}') 42 | -------------------------------------------------------------------------------- /08_2_dataset_loade_logistic.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class 4 | from torch.utils.data import Dataset, DataLoader 5 | from torch import nn, from_numpy, optim 6 | import numpy as np 7 | 8 | 9 | class DiabetesDataset(Dataset): 10 | """ Diabetes dataset.""" 11 | # Initialize your data, download, etc. 12 | def __init__(self): 13 | xy = np.loadtxt('./data/diabetes.csv.gz', 14 | delimiter=',', dtype=np.float32) 15 | self.len = xy.shape[0] 16 | self.x_data = from_numpy(xy[:, 0:-1]) 17 | self.y_data = from_numpy(xy[:, [-1]]) 18 | 19 | def __getitem__(self, index): 20 | return self.x_data[index], self.y_data[index] 21 | 22 | def __len__(self): 23 | return self.len 24 | 25 | 26 | dataset = DiabetesDataset() 27 | train_loader = DataLoader(dataset=dataset, 28 | batch_size=32, 29 | shuffle=True, 30 | num_workers=2) 31 | 32 | 33 | class Model(nn.Module): 34 | 35 | def __init__(self): 36 | """ 37 | In the constructor we instantiate two nn.Linear module 38 | """ 39 | super(Model, self).__init__() 40 | self.l1 = nn.Linear(8, 6) 41 | self.l2 = nn.Linear(6, 4) 42 | self.l3 = nn.Linear(4, 1) 43 | 44 | self.sigmoid = nn.Sigmoid() 45 | 46 | def forward(self, x): 47 | """ 48 | In the forward function we accept a Variable of input data and we must return 49 | a Variable of output data. We can use Modules defined in the constructor as 50 | well as arbitrary operators on Variables. 51 | """ 52 | out1 = self.sigmoid(self.l1(x)) 53 | out2 = self.sigmoid(self.l2(out1)) 54 | y_pred = self.sigmoid(self.l3(out2)) 55 | return y_pred 56 | 57 | 58 | # our model 59 | model = Model() 60 | 61 | # Construct our loss function and an Optimizer. The call to model.parameters() 62 | # in the SGD constructor will contain the learnable parameters of the two 63 | # nn.Linear modules which are members of the model. 64 | criterion = nn.BCELoss(reduction='sum') 65 | optimizer = optim.SGD(model.parameters(), lr=0.1) 66 | 67 | # Training loop 68 | for epoch in range(2): 69 | for i, data in enumerate(train_loader, 0): 70 | # get the inputs 71 | inputs, labels = data 72 | 73 | # Forward pass: Compute predicted y by passing x to the model 74 | y_pred = model(inputs) 75 | 76 | # Compute and print loss 77 | loss = criterion(y_pred, labels) 78 | print(f'Epoch {epoch + 1} | Batch: {i+1} | Loss: {loss.item():.4f}') 79 | 80 | # Zero gradients, perform a backward pass, and update the weights. 81 | optimizer.zero_grad() 82 | loss.backward() 83 | optimizer.step() 84 | -------------------------------------------------------------------------------- /09_01_softmax_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn, tensor, max 2 | import numpy as np 3 | 4 | # Cross entropy example 5 | # One hot 6 | # 0: 1 0 0 7 | # 1: 0 1 0 8 | # 2: 0 0 1 9 | Y = np.array([1, 0, 0]) 10 | Y_pred1 = np.array([0.7, 0.2, 0.1]) 11 | Y_pred2 = np.array([0.1, 0.3, 0.6]) 12 | print(f'Loss1: {np.sum(-Y * np.log(Y_pred1)):.4f}') 13 | print(f'Loss2: {np.sum(-Y * np.log(Y_pred2)):.4f}') 14 | 15 | # Softmax + CrossEntropy (logSoftmax + NLLLoss) 16 | loss = nn.CrossEntropyLoss() 17 | 18 | # target is of size nBatch 19 | # each element in target has to have 0 <= value < nClasses (0-2) 20 | # Input is class, not one-hot 21 | Y = tensor([0], requires_grad=False) 22 | 23 | # input is of size nBatch x nClasses = 1 x 4 24 | # Y_pred are logits (not softmax) 25 | Y_pred1 = tensor([[2.0, 1.0, 0.1]]) 26 | Y_pred2 = tensor([[0.5, 2.0, 0.3]]) 27 | 28 | l1 = loss(Y_pred1, Y) 29 | l2 = loss(Y_pred2, Y) 30 | 31 | print(f'PyTorch Loss1: {l1.item():.4f} \nPyTorch Loss2: {l2.item():.4f}') 32 | print(f'Y_pred1: {max(Y_pred1.data, 1)[1].item()}') 33 | print(f'Y_pred2: {max(Y_pred2.data, 1)[1].item()}') 34 | 35 | # target is of size nBatch 36 | # each element in target has to have 0 <= value < nClasses (0-2) 37 | # Input is class, not one-hot 38 | Y = tensor([2, 0, 1], requires_grad=False) 39 | 40 | # input is of size nBatch x nClasses = 2 x 4 41 | # Y_pred are logits (not softmax) 42 | Y_pred1 = tensor([[0.1, 0.2, 0.9], 43 | [1.1, 0.1, 0.2], 44 | [0.2, 2.1, 0.1]]) 45 | 46 | Y_pred2 = tensor([[0.8, 0.2, 0.3], 47 | [0.2, 0.3, 0.5], 48 | [0.2, 0.2, 0.5]]) 49 | 50 | l1 = loss(Y_pred1, Y) 51 | l2 = loss(Y_pred2, Y) 52 | print(f'Batch Loss1: {l1.item():.4f} \nBatch Loss2: {l2.data:.4f}') 53 | -------------------------------------------------------------------------------- /09_2_softmax_mnist.py: -------------------------------------------------------------------------------- 1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py 2 | from __future__ import print_function 3 | from torch import nn, optim, cuda 4 | from torch.utils import data 5 | from torchvision import datasets, transforms 6 | import torch.nn.functional as F 7 | import time 8 | 9 | # Training settings 10 | batch_size = 64 11 | device = 'cuda' if cuda.is_available() else 'cpu' 12 | print(f'Training MNIST Model on {device}\n{"=" * 44}') 13 | 14 | # MNIST Dataset 15 | train_dataset = datasets.MNIST(root='./mnist_data/', 16 | train=True, 17 | transform=transforms.ToTensor(), 18 | download=True) 19 | 20 | test_dataset = datasets.MNIST(root='./mnist_data/', 21 | train=False, 22 | transform=transforms.ToTensor()) 23 | 24 | # Data Loader (Input Pipeline) 25 | train_loader = data.DataLoader(dataset=train_dataset, 26 | batch_size=batch_size, 27 | shuffle=True) 28 | 29 | test_loader = data.DataLoader(dataset=test_dataset, 30 | batch_size=batch_size, 31 | shuffle=False) 32 | 33 | 34 | class Net(nn.Module): 35 | 36 | def __init__(self): 37 | super(Net, self).__init__() 38 | self.l1 = nn.Linear(784, 520) 39 | self.l2 = nn.Linear(520, 320) 40 | self.l3 = nn.Linear(320, 240) 41 | self.l4 = nn.Linear(240, 120) 42 | self.l5 = nn.Linear(120, 10) 43 | 44 | def forward(self, x): 45 | x = x.view(-1, 784) # Flatten the data (n, 1, 28, 28)-> (n, 784) 46 | x = F.relu(self.l1(x)) 47 | x = F.relu(self.l2(x)) 48 | x = F.relu(self.l3(x)) 49 | x = F.relu(self.l4(x)) 50 | return self.l5(x) 51 | 52 | 53 | model = Net() 54 | model.to(device) 55 | criterion = nn.CrossEntropyLoss() 56 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 57 | 58 | 59 | def train(epoch): 60 | model.train() 61 | for batch_idx, (data, target) in enumerate(train_loader): 62 | data, target = data.to(device), target.to(device) 63 | optimizer.zero_grad() 64 | output = model(data) 65 | loss = criterion(output, target) 66 | loss.backward() 67 | optimizer.step() 68 | if batch_idx % 10 == 0: 69 | print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format( 70 | epoch, batch_idx * len(data), len(train_loader.dataset), 71 | 100. * batch_idx / len(train_loader), loss.item())) 72 | 73 | 74 | def test(): 75 | model.eval() 76 | test_loss = 0 77 | correct = 0 78 | for data, target in test_loader: 79 | data, target = data.to(device), target.to(device) 80 | output = model(data) 81 | # sum up batch loss 82 | test_loss += criterion(output, target).item() 83 | # get the index of the max 84 | pred = output.data.max(1, keepdim=True)[1] 85 | correct += pred.eq(target.data.view_as(pred)).cpu().sum() 86 | 87 | test_loss /= len(test_loader.dataset) 88 | print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ' 89 | f'({100. * correct / len(test_loader.dataset):.0f}%)') 90 | 91 | 92 | if __name__ == '__main__': 93 | since = time.time() 94 | for epoch in range(1, 10): 95 | epoch_start = time.time() 96 | train(epoch) 97 | m, s = divmod(time.time() - epoch_start, 60) 98 | print(f'Training time: {m:.0f}m {s:.0f}s') 99 | test() 100 | m, s = divmod(time.time() - epoch_start, 60) 101 | print(f'Testing time: {m:.0f}m {s:.0f}s') 102 | 103 | m, s = divmod(time.time() - since, 60) 104 | print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!') 105 | 106 | -------------------------------------------------------------------------------- /10_1_cnn_mnist.py: -------------------------------------------------------------------------------- 1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py 2 | from __future__ import print_function 3 | import argparse 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torchvision import datasets, transforms 9 | from torch.autograd import Variable 10 | 11 | # Training settings 12 | batch_size = 64 13 | 14 | # MNIST Dataset 15 | train_dataset = datasets.MNIST(root='./data/', 16 | train=True, 17 | transform=transforms.ToTensor(), 18 | download=True) 19 | 20 | test_dataset = datasets.MNIST(root='./data/', 21 | train=False, 22 | transform=transforms.ToTensor()) 23 | 24 | # Data Loader (Input Pipeline) 25 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 26 | batch_size=batch_size, 27 | shuffle=True) 28 | 29 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 30 | batch_size=batch_size, 31 | shuffle=False) 32 | 33 | 34 | class Net(nn.Module): 35 | 36 | def __init__(self): 37 | super(Net, self).__init__() 38 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 39 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 40 | self.mp = nn.MaxPool2d(2) 41 | self.fc = nn.Linear(320, 10) 42 | 43 | def forward(self, x): 44 | in_size = x.size(0) 45 | x = F.relu(self.mp(self.conv1(x))) 46 | x = F.relu(self.mp(self.conv2(x))) 47 | x = x.view(in_size, -1) # flatten the tensor 48 | x = self.fc(x) 49 | return F.log_softmax(x) 50 | 51 | 52 | model = Net() 53 | 54 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 55 | 56 | 57 | def train(epoch): 58 | model.train() 59 | for batch_idx, (data, target) in enumerate(train_loader): 60 | data, target = Variable(data), Variable(target) 61 | optimizer.zero_grad() 62 | output = model(data) 63 | loss = F.nll_loss(output, target) 64 | loss.backward() 65 | optimizer.step() 66 | if batch_idx % 10 == 0: 67 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 68 | epoch, batch_idx * len(data), len(train_loader.dataset), 69 | 100. * batch_idx / len(train_loader), loss.item())) 70 | 71 | 72 | def test(): 73 | model.eval() 74 | test_loss = 0 75 | correct = 0 76 | for data, target in test_loader: 77 | data, target = Variable(data, volatile=True), Variable(target) 78 | output = model(data) 79 | # sum up batch loss 80 | test_loss += F.nll_loss(output, target, size_average=False).data 81 | # get the index of the max log-probability 82 | pred = output.data.max(1, keepdim=True)[1] 83 | correct += pred.eq(target.data.view_as(pred)).cpu().sum() 84 | 85 | test_loss /= len(test_loader.dataset) 86 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 87 | test_loss, correct, len(test_loader.dataset), 88 | 100. * correct / len(test_loader.dataset))) 89 | 90 | 91 | for epoch in range(1, 10): 92 | train(epoch) 93 | test() 94 | -------------------------------------------------------------------------------- /11_1_toy_inception_mnist.py: -------------------------------------------------------------------------------- 1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py 2 | from __future__ import print_function 3 | import argparse 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torchvision import datasets, transforms 9 | from torch.autograd import Variable 10 | 11 | # Training settings 12 | batch_size = 64 13 | 14 | # MNIST Dataset 15 | train_dataset = datasets.MNIST(root='./data/', 16 | train=True, 17 | transform=transforms.ToTensor(), 18 | download=True) 19 | 20 | test_dataset = datasets.MNIST(root='./data/', 21 | train=False, 22 | transform=transforms.ToTensor()) 23 | 24 | # Data Loader (Input Pipeline) 25 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 26 | batch_size=batch_size, 27 | shuffle=True) 28 | 29 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 30 | batch_size=batch_size, 31 | shuffle=False) 32 | 33 | 34 | class InceptionA(nn.Module): 35 | 36 | def __init__(self, in_channels): 37 | super(InceptionA, self).__init__() 38 | self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1) 39 | 40 | self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1) 41 | self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2) 42 | 43 | self.branch3x3dbl_1 = nn.Conv2d(in_channels, 16, kernel_size=1) 44 | self.branch3x3dbl_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1) 45 | self.branch3x3dbl_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1) 46 | 47 | self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1) 48 | 49 | def forward(self, x): 50 | branch1x1 = self.branch1x1(x) 51 | 52 | branch5x5 = self.branch5x5_1(x) 53 | branch5x5 = self.branch5x5_2(branch5x5) 54 | 55 | branch3x3dbl = self.branch3x3dbl_1(x) 56 | branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) 57 | branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) 58 | 59 | branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) 60 | branch_pool = self.branch_pool(branch_pool) 61 | 62 | outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] 63 | return torch.cat(outputs, 1) 64 | 65 | 66 | class Net(nn.Module): 67 | 68 | def __init__(self): 69 | super(Net, self).__init__() 70 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 71 | self.conv2 = nn.Conv2d(88, 20, kernel_size=5) 72 | 73 | self.incept1 = InceptionA(in_channels=10) 74 | self.incept2 = InceptionA(in_channels=20) 75 | 76 | self.mp = nn.MaxPool2d(2) 77 | self.fc = nn.Linear(1408, 10) 78 | 79 | def forward(self, x): 80 | in_size = x.size(0) 81 | x = F.relu(self.mp(self.conv1(x))) 82 | x = self.incept1(x) 83 | x = F.relu(self.mp(self.conv2(x))) 84 | x = self.incept2(x) 85 | x = x.view(in_size, -1) # flatten the tensor 86 | x = self.fc(x) 87 | return F.log_softmax(x) 88 | 89 | 90 | model = Net() 91 | 92 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 93 | 94 | 95 | def train(epoch): 96 | model.train() 97 | for batch_idx, (data, target) in enumerate(train_loader): 98 | data, target = Variable(data), Variable(target) 99 | optimizer.zero_grad() 100 | output = model(data) 101 | loss = F.nll_loss(output, target) 102 | loss.backward() 103 | optimizer.step() 104 | if batch_idx % 10 == 0: 105 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 106 | epoch, batch_idx * len(data), len(train_loader.dataset), 107 | 100. * batch_idx / len(train_loader), loss.data[0])) 108 | 109 | 110 | def test(): 111 | model.eval() 112 | test_loss = 0 113 | correct = 0 114 | for data, target in test_loader: 115 | data, target = Variable(data, volatile=True), Variable(target) 116 | output = model(data) 117 | # sum up batch loss 118 | test_loss += F.nll_loss(output, target, size_average=False).data[0] 119 | # get the index of the max log-probability 120 | pred = output.data.max(1, keepdim=True)[1] 121 | correct += pred.eq(target.data.view_as(pred)).cpu().sum() 122 | 123 | test_loss /= len(test_loader.dataset) 124 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 125 | test_loss, correct, len(test_loader.dataset), 126 | 100. * correct / len(test_loader.dataset))) 127 | 128 | 129 | for epoch in range(1, 10): 130 | train(epoch) 131 | test() 132 | -------------------------------------------------------------------------------- /12_1_rnn_basics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | # One hot encoding for each char in 'hello' 6 | h = [1, 0, 0, 0] 7 | e = [0, 1, 0, 0] 8 | l = [0, 0, 1, 0] 9 | o = [0, 0, 0, 1] 10 | 11 | # One cell RNN input_dim (4) -> output_dim (2). sequence: 5 12 | cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True) 13 | 14 | # (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False 15 | hidden = Variable(torch.randn(1, 1, 2)) 16 | 17 | # Propagate input through RNN 18 | # Input: (batch, seq_len, input_size) when batch_first=True 19 | inputs = Variable(torch.Tensor([h, e, l, l, o])) 20 | for one in inputs: 21 | one = one.view(1, 1, -1) 22 | # Input: (batch, seq_len, input_size) when batch_first=True 23 | out, hidden = cell(one, hidden) 24 | print("one input size", one.size(), "out size", out.size()) 25 | 26 | # We can do the whole at once 27 | # Propagate input through RNN 28 | # Input: (batch, seq_len, input_size) when batch_first=True 29 | inputs = inputs.view(1, 5, -1) 30 | out, hidden = cell(inputs, hidden) 31 | print("sequence input size", inputs.size(), "out size", out.size()) 32 | 33 | 34 | # hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False 35 | hidden = Variable(torch.randn(1, 3, 2)) 36 | 37 | # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3 38 | # 3 batches 'hello', 'eolll', 'lleel' 39 | # rank = (3, 5, 4) 40 | inputs = Variable(torch.Tensor([[h, e, l, l, o], 41 | [e, o, l, l, l], 42 | [l, l, e, e, l]])) 43 | 44 | # Propagate input through RNN 45 | # Input: (batch, seq_len, input_size) when batch_first=True 46 | # B x S x I 47 | out, hidden = cell(inputs, hidden) 48 | print("batch input size", inputs.size(), "out size", out.size()) 49 | 50 | 51 | # One cell RNN input_dim (4) -> output_dim (2) 52 | cell = nn.RNN(input_size=4, hidden_size=2) 53 | 54 | # The given dimensions dim0 and dim1 are swapped. 55 | inputs = inputs.transpose(dim0=0, dim1=1) 56 | # Propagate input through RNN 57 | # Input: (seq_len, batch_size, input_size) when batch_first=False (default) 58 | # S x B x I 59 | out, hidden = cell(inputs, hidden) 60 | print("batch input size", inputs.size(), "out size", out.size()) 61 | -------------------------------------------------------------------------------- /12_2_hello_rnn.py: -------------------------------------------------------------------------------- 1 | # Lab 12 RNN 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | 7 | torch.manual_seed(777) # reproducibility 8 | # 0 1 2 3 4 9 | idx2char = ['h', 'i', 'e', 'l', 'o'] 10 | 11 | # Teach hihell -> ihello 12 | x_data = [0, 1, 0, 2, 3, 3] # hihell 13 | one_hot_lookup = [[1, 0, 0, 0, 0], # 0 14 | [0, 1, 0, 0, 0], # 1 15 | [0, 0, 1, 0, 0], # 2 16 | [0, 0, 0, 1, 0], # 3 17 | [0, 0, 0, 0, 1]] # 4 18 | 19 | y_data = [1, 0, 2, 3, 3, 4] # ihello 20 | x_one_hot = [one_hot_lookup[x] for x in x_data] 21 | 22 | # As we have one batch of samples, we will change them to variables only once 23 | inputs = Variable(torch.Tensor(x_one_hot)) 24 | labels = Variable(torch.LongTensor(y_data)) 25 | 26 | num_classes = 5 27 | input_size = 5 # one-hot size 28 | hidden_size = 5 # output from the RNN. 5 to directly predict one-hot 29 | batch_size = 1 # one sentence 30 | sequence_length = 1 # One by one 31 | num_layers = 1 # one-layer rnn 32 | 33 | 34 | class Model(nn.Module): 35 | 36 | def __init__(self): 37 | super(Model, self).__init__() 38 | self.rnn = nn.RNN(input_size=input_size, 39 | hidden_size=hidden_size, batch_first=True) 40 | 41 | def forward(self, hidden, x): 42 | # Reshape input (batch first) 43 | x = x.view(batch_size, sequence_length, input_size) 44 | 45 | # Propagate input through RNN 46 | # Input: (batch, seq_len, input_size) 47 | # hidden: (num_layers * num_directions, batch, hidden_size) 48 | out, hidden = self.rnn(x, hidden) 49 | return hidden, out.view(-1, num_classes) 50 | 51 | def init_hidden(self): 52 | # Initialize hidden and cell states 53 | # (num_layers * num_directions, batch, hidden_size) 54 | return Variable(torch.zeros(num_layers, batch_size, hidden_size)) 55 | 56 | 57 | # Instantiate RNN model 58 | model = Model() 59 | print(model) 60 | 61 | # Set loss and optimizer function 62 | # CrossEntropyLoss = LogSoftmax + NLLLoss 63 | criterion = nn.CrossEntropyLoss() 64 | optimizer = torch.optim.Adam(model.parameters(), lr=0.1) 65 | 66 | # Train the model 67 | for epoch in range(100): 68 | optimizer.zero_grad() 69 | loss = 0 70 | hidden = model.init_hidden() 71 | 72 | sys.stdout.write("predicted string: ") 73 | for input, label in zip(inputs, labels): 74 | # print(input.size(), label.size()) 75 | hidden, output = model(hidden, input) 76 | val, idx = output.max(1) 77 | sys.stdout.write(idx2char[idx.data[0]]) 78 | loss += criterion(output, torch.LongTensor([label])) 79 | 80 | print(", epoch: %d, loss: %1.3f" % (epoch + 1, loss)) 81 | 82 | loss.backward() 83 | optimizer.step() 84 | 85 | print("Learning finished!") 86 | -------------------------------------------------------------------------------- /12_3_hello_rnn_seq.py: -------------------------------------------------------------------------------- 1 | # Lab 12 RNN 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | torch.manual_seed(777) # reproducibility 7 | 8 | 9 | idx2char = ['h', 'i', 'e', 'l', 'o'] 10 | 11 | # Teach hihell -> ihello 12 | x_data = [[0, 1, 0, 2, 3, 3]] # hihell 13 | x_one_hot = [[[1, 0, 0, 0, 0], # h 0 14 | [0, 1, 0, 0, 0], # i 1 15 | [1, 0, 0, 0, 0], # h 0 16 | [0, 0, 1, 0, 0], # e 2 17 | [0, 0, 0, 1, 0], # l 3 18 | [0, 0, 0, 1, 0]]] # l 3 19 | 20 | y_data = [1, 0, 2, 3, 3, 4] # ihello 21 | 22 | # As we have one batch of samples, we will change them to variables only once 23 | inputs = Variable(torch.Tensor(x_one_hot)) 24 | labels = Variable(torch.LongTensor(y_data)) 25 | 26 | num_classes = 5 27 | input_size = 5 # one-hot size 28 | hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot 29 | batch_size = 1 # one sentence 30 | sequence_length = 6 # |ihello| == 6 31 | num_layers = 1 # one-layer rnn 32 | 33 | 34 | class RNN(nn.Module): 35 | 36 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 37 | super(RNN, self).__init__() 38 | 39 | self.num_classes = num_classes 40 | self.num_layers = num_layers 41 | self.input_size = input_size 42 | self.hidden_size = hidden_size 43 | self.sequence_length = sequence_length 44 | 45 | self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True) 46 | 47 | def forward(self, x): 48 | # Initialize hidden and cell states 49 | # (num_layers * num_directions, batch, hidden_size) for batch_first=True 50 | h_0 = Variable(torch.zeros( 51 | self.num_layers, x.size(0), self.hidden_size)) 52 | 53 | # Reshape input 54 | x.view(x.size(0), self.sequence_length, self.input_size) 55 | 56 | # Propagate input through RNN 57 | # Input: (batch, seq_len, input_size) 58 | # h_0: (num_layers * num_directions, batch, hidden_size) 59 | 60 | out, _ = self.rnn(x, h_0) 61 | return out.view(-1, num_classes) 62 | 63 | 64 | # Instantiate RNN model 65 | rnn = RNN(num_classes, input_size, hidden_size, num_layers) 66 | print(rnn) 67 | 68 | # Set loss and optimizer function 69 | # CrossEntropyLoss = LogSoftmax + NLLLoss 70 | criterion = torch.nn.CrossEntropyLoss() 71 | optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1) 72 | 73 | # Train the model 74 | for epoch in range(100): 75 | outputs = rnn(inputs) 76 | optimizer.zero_grad() 77 | loss = criterion(outputs, labels) 78 | loss.backward() 79 | optimizer.step() 80 | _, idx = outputs.max(1) 81 | idx = idx.data.numpy() 82 | result_str = [idx2char[c] for c in idx.squeeze()] 83 | print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data[0])) 84 | print("Predicted string: ", ''.join(result_str)) 85 | 86 | print("Learning finished!") 87 | -------------------------------------------------------------------------------- /12_4_hello_rnn_emb.py: -------------------------------------------------------------------------------- 1 | # Lab 12 RNN 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | torch.manual_seed(777) # reproducibility 7 | 8 | 9 | idx2char = ['h', 'i', 'e', 'l', 'o'] 10 | 11 | # Teach hihell -> ihello 12 | x_data = [[0, 1, 0, 2, 3, 3]] # hihell 13 | y_data = [1, 0, 2, 3, 3, 4] # ihello 14 | 15 | # As we have one batch of samples, we will change them to variables only once 16 | inputs = Variable(torch.LongTensor(x_data)) 17 | labels = Variable(torch.LongTensor(y_data)) 18 | 19 | num_classes = 5 20 | input_size = 5 21 | embedding_size = 10 # embedding size 22 | hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot 23 | batch_size = 1 # one sentence 24 | sequence_length = 6 # |ihello| == 6 25 | num_layers = 1 # one-layer rnn 26 | 27 | 28 | class Model(nn.Module): 29 | 30 | def __init__(self, num_layers, hidden_size): 31 | super(Model, self).__init__() 32 | self.num_layers = num_layers 33 | self.hidden_size = hidden_size 34 | self.embedding = nn.Embedding(input_size, embedding_size) 35 | self.rnn = nn.RNN(input_size=embedding_size, 36 | hidden_size=5, batch_first=True) 37 | self.fc = nn.Linear(hidden_size, num_classes) 38 | 39 | def forward(self, x): 40 | # Initialize hidden and cell states 41 | # (num_layers * num_directions, batch, hidden_size) 42 | h_0 = Variable(torch.zeros( 43 | self.num_layers, x.size(0), self.hidden_size)) 44 | 45 | emb = self.embedding(x) 46 | emb = emb.view(batch_size, sequence_length, -1) 47 | 48 | # Propagate embedding through RNN 49 | # Input: (batch, seq_len, embedding_size) 50 | # h_0: (num_layers * num_directions, batch, hidden_size) 51 | out, _ = self.rnn(emb, h_0) 52 | return self.fc(out.view(-1, num_classes)) 53 | 54 | 55 | # Instantiate RNN model 56 | model = Model(num_layers, hidden_size) 57 | print(model) 58 | 59 | # Set loss and optimizer function 60 | # CrossEntropyLoss = LogSoftmax + NLLLoss 61 | criterion = torch.nn.CrossEntropyLoss() 62 | optimizer = torch.optim.Adam(model.parameters(), lr=0.1) 63 | 64 | # Train the model 65 | for epoch in range(100): 66 | outputs = model(inputs) 67 | optimizer.zero_grad() 68 | loss = criterion(outputs, labels) 69 | loss.backward() 70 | optimizer.step() 71 | _, idx = outputs.max(1) 72 | idx = idx.data.numpy() 73 | result_str = [idx2char[c] for c in idx.squeeze()] 74 | print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item())) 75 | print("Predicted string: ", ''.join(result_str)) 76 | 77 | print("Learning finished!") 78 | -------------------------------------------------------------------------------- /13_1_rnn_classification_basics.py: -------------------------------------------------------------------------------- 1 | # Original code is from https://github.com/spro/practical-pytorch 2 | import time 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.utils.data import DataLoader 8 | 9 | from name_dataset import NameDataset 10 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 11 | 12 | # Parameters and DataLoaders 13 | HIDDEN_SIZE = 100 14 | N_CHARS = 128 # ASCII 15 | N_CLASSES = 18 16 | 17 | 18 | class RNNClassifier(nn.Module): 19 | 20 | def __init__(self, input_size, hidden_size, output_size, n_layers=1): 21 | super(RNNClassifier, self).__init__() 22 | self.hidden_size = hidden_size 23 | self.n_layers = n_layers 24 | 25 | self.embedding = nn.Embedding(input_size, hidden_size) 26 | self.gru = nn.GRU(hidden_size, hidden_size, n_layers) 27 | self.fc = nn.Linear(hidden_size, output_size) 28 | 29 | def forward(self, input): 30 | # Note: we run this all at once (over the whole input sequence) 31 | 32 | # input = B x S . size(0) = B 33 | batch_size = input.size(0) 34 | 35 | # input: B x S -- (transpose) --> S x B 36 | input = input.t() 37 | 38 | # Embedding S x B -> S x B x I (embedding size) 39 | print(" input", input.size()) 40 | embedded = self.embedding(input) 41 | print(" embedding", embedded.size()) 42 | 43 | # Make a hidden 44 | hidden = self._init_hidden(batch_size) 45 | 46 | output, hidden = self.gru(embedded, hidden) 47 | print(" gru hidden output", hidden.size()) 48 | # Use the last layer output as FC's input 49 | # No need to unpack, since we are going to use hidden 50 | fc_output = self.fc(hidden) 51 | print(" fc output", fc_output.size()) 52 | return fc_output 53 | 54 | def _init_hidden(self, batch_size): 55 | hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size) 56 | return Variable(hidden) 57 | 58 | # Help functions 59 | 60 | 61 | def str2ascii_arr(msg): 62 | arr = [ord(c) for c in msg] 63 | return arr, len(arr) 64 | 65 | # pad sequences and sort the tensor 66 | def pad_sequences(vectorized_seqs, seq_lengths): 67 | seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long() 68 | for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)): 69 | seq_tensor[idx, :seq_len] = torch.LongTensor(seq) 70 | return seq_tensor 71 | 72 | # Create necessary variables, lengths, and target 73 | def make_variables(names): 74 | sequence_and_length = [str2ascii_arr(name) for name in names] 75 | vectorized_seqs = [sl[0] for sl in sequence_and_length] 76 | seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length]) 77 | return pad_sequences(vectorized_seqs, seq_lengths) 78 | 79 | 80 | if __name__ == '__main__': 81 | names = ['adylov', 'solan', 'hard', 'san'] 82 | classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASSES) 83 | 84 | for name in names: 85 | arr, _ = str2ascii_arr(name) 86 | inp = Variable(torch.LongTensor([arr])) 87 | out = classifier(inp) 88 | print("in", inp.size(), "out", out.size()) 89 | 90 | 91 | inputs = make_variables(names) 92 | out = classifier(inputs) 93 | print("batch in", inputs.size(), "batch out", out.size()) 94 | 95 | 96 | -------------------------------------------------------------------------------- /13_2_rnn_classification.py: -------------------------------------------------------------------------------- 1 | # Original code is from https://github.com/spro/practical-pytorch 2 | import time 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.utils.data import DataLoader 8 | 9 | from name_dataset import NameDataset 10 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 11 | 12 | # Parameters and DataLoaders 13 | HIDDEN_SIZE = 100 14 | N_LAYERS = 2 15 | BATCH_SIZE = 256 16 | N_EPOCHS = 100 17 | 18 | test_dataset = NameDataset(is_train_set=False) 19 | test_loader = DataLoader(dataset=test_dataset, 20 | batch_size=BATCH_SIZE, shuffle=True) 21 | 22 | 23 | train_dataset = NameDataset(is_train_set=True) 24 | train_loader = DataLoader(dataset=train_dataset, 25 | batch_size=BATCH_SIZE, shuffle=True) 26 | 27 | N_COUNTRIES = len(train_dataset.get_countries()) 28 | print(N_COUNTRIES, "countries") 29 | N_CHARS = 128 # ASCII 30 | 31 | 32 | # Some utility functions 33 | def time_since(since): 34 | s = time.time() - since 35 | m = math.floor(s / 60) 36 | s -= m * 60 37 | return '%dm %ds' % (m, s) 38 | 39 | 40 | def create_variable(tensor): 41 | # Do cuda() before wrapping with variable 42 | if torch.cuda.is_available(): 43 | return Variable(tensor.cuda()) 44 | else: 45 | return Variable(tensor) 46 | 47 | 48 | # pad sequences and sort the tensor 49 | def pad_sequences(vectorized_seqs, seq_lengths, countries): 50 | seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long() 51 | for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)): 52 | seq_tensor[idx, :seq_len] = torch.LongTensor(seq) 53 | 54 | # Sort tensors by their length 55 | seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) 56 | seq_tensor = seq_tensor[perm_idx] 57 | 58 | # Also sort the target (countries) in the same order 59 | target = countries2tensor(countries) 60 | if len(countries): 61 | target = target[perm_idx] 62 | 63 | # Return variables 64 | # DataParallel requires everything to be a Variable 65 | return create_variable(seq_tensor), \ 66 | create_variable(seq_lengths), \ 67 | create_variable(target) 68 | 69 | 70 | # Create necessary variables, lengths, and target 71 | def make_variables(names, countries): 72 | sequence_and_length = [str2ascii_arr(name) for name in names] 73 | vectorized_seqs = [sl[0] for sl in sequence_and_length] 74 | seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length]) 75 | return pad_sequences(vectorized_seqs, seq_lengths, countries) 76 | 77 | 78 | def str2ascii_arr(msg): 79 | arr = [ord(c) for c in msg] 80 | return arr, len(arr) 81 | 82 | 83 | def countries2tensor(countries): 84 | country_ids = [train_dataset.get_country_id( 85 | country) for country in countries] 86 | return torch.LongTensor(country_ids) 87 | 88 | 89 | class RNNClassifier(nn.Module): 90 | # Our model 91 | 92 | def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True): 93 | super(RNNClassifier, self).__init__() 94 | self.hidden_size = hidden_size 95 | self.n_layers = n_layers 96 | self.n_directions = int(bidirectional) + 1 97 | 98 | self.embedding = nn.Embedding(input_size, hidden_size) 99 | self.gru = nn.GRU(hidden_size, hidden_size, n_layers, 100 | bidirectional=bidirectional) 101 | self.fc = nn.Linear(hidden_size, output_size) 102 | 103 | def forward(self, input, seq_lengths): 104 | # Note: we run this all at once (over the whole input sequence) 105 | # input shape: B x S (input size) 106 | # transpose to make S(sequence) x B (batch) 107 | input = input.t() 108 | batch_size = input.size(1) 109 | 110 | # Make a hidden 111 | hidden = self._init_hidden(batch_size) 112 | 113 | # Embedding S x B -> S x B x I (embedding size) 114 | embedded = self.embedding(input) 115 | 116 | # Pack them up nicely 117 | gru_input = pack_padded_sequence( 118 | embedded, seq_lengths.data.cpu().numpy()) 119 | 120 | # To compact weights again call flatten_parameters(). 121 | self.gru.flatten_parameters() 122 | output, hidden = self.gru(gru_input, hidden) 123 | 124 | # Use the last layer output as FC's input 125 | # No need to unpack, since we are going to use hidden 126 | fc_output = self.fc(hidden[-1]) 127 | return fc_output 128 | 129 | def _init_hidden(self, batch_size): 130 | hidden = torch.zeros(self.n_layers * self.n_directions, 131 | batch_size, self.hidden_size) 132 | return create_variable(hidden) 133 | 134 | 135 | # Train cycle 136 | def train(): 137 | total_loss = 0 138 | 139 | for i, (names, countries) in enumerate(train_loader, 1): 140 | input, seq_lengths, target = make_variables(names, countries) 141 | output = classifier(input, seq_lengths) 142 | 143 | loss = criterion(output, target) 144 | total_loss += loss.data[0] 145 | 146 | classifier.zero_grad() 147 | loss.backward() 148 | optimizer.step() 149 | 150 | if i % 10 == 0: 151 | print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format( 152 | time_since(start), epoch, i * 153 | len(names), len(train_loader.dataset), 154 | 100. * i * len(names) / len(train_loader.dataset), 155 | total_loss / i * len(names))) 156 | 157 | return total_loss 158 | 159 | 160 | # Testing cycle 161 | def test(name=None): 162 | # Predict for a given name 163 | if name: 164 | input, seq_lengths, target = make_variables([name], []) 165 | output = classifier(input, seq_lengths) 166 | pred = output.data.max(1, keepdim=True)[1] 167 | country_id = pred.cpu().numpy()[0][0] 168 | print(name, "is", train_dataset.get_country(country_id)) 169 | return 170 | 171 | print("evaluating trained model ...") 172 | correct = 0 173 | train_data_size = len(test_loader.dataset) 174 | 175 | for names, countries in test_loader: 176 | input, seq_lengths, target = make_variables(names, countries) 177 | output = classifier(input, seq_lengths) 178 | pred = output.data.max(1, keepdim=True)[1] 179 | correct += pred.eq(target.data.view_as(pred)).cpu().sum() 180 | 181 | print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format( 182 | correct, train_data_size, 100. * correct / train_data_size)) 183 | 184 | 185 | if __name__ == '__main__': 186 | 187 | classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRIES, N_LAYERS) 188 | if torch.cuda.device_count() > 1: 189 | print("Let's use", torch.cuda.device_count(), "GPUs!") 190 | # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs 191 | classifier = nn.DataParallel(classifier) 192 | 193 | if torch.cuda.is_available(): 194 | classifier.cuda() 195 | 196 | optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001) 197 | criterion = nn.CrossEntropyLoss() 198 | 199 | start = time.time() 200 | print("Training for %d epochs..." % N_EPOCHS) 201 | for epoch in range(1, N_EPOCHS + 1): 202 | # Train cycle 203 | train() 204 | 205 | # Testing 206 | test() 207 | 208 | # Testing several samples 209 | test("Sung") 210 | test("Jungwoo") 211 | test("Soojin") 212 | test("Nako") 213 | -------------------------------------------------------------------------------- /13_3_char_rnn.py: -------------------------------------------------------------------------------- 1 | # https://github.com/spro/practical-pytorch 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | from torch.utils.data import DataLoader 6 | 7 | from text_loader import TextDataset 8 | 9 | hidden_size = 100 10 | n_layers = 3 11 | batch_size = 1 12 | n_epochs = 100 13 | n_characters = 128 # ASCII 14 | 15 | 16 | class RNN(nn.Module): 17 | 18 | def __init__(self, input_size, hidden_size, output_size, n_layers=1): 19 | super(RNN, self).__init__() 20 | self.input_size = input_size 21 | self.hidden_size = hidden_size 22 | self.output_size = output_size 23 | self.n_layers = n_layers 24 | 25 | self.embedding = nn.Embedding(input_size, hidden_size) 26 | self.gru = nn.GRU(hidden_size, hidden_size, n_layers) 27 | self.linear = nn.Linear(hidden_size, output_size) 28 | 29 | # This runs this one step at a time 30 | # It's extremely slow, and please do not use in practice. 31 | # We need to use (1) batch and (2) data parallelism 32 | def forward(self, input, hidden): 33 | embed = self.embedding(input.view(1, -1)) # S(=1) x I 34 | embed = embed.view(1, 1, -1) # S(=1) x B(=1) x I (embedding size) 35 | output, hidden = self.gru(embed, hidden) 36 | output = self.linear(output.view(1, -1)) # S(=1) x I 37 | return output, hidden 38 | 39 | def init_hidden(self): 40 | if torch.cuda.is_available(): 41 | hidden = torch.zeros(self.n_layers, 1, self.hidden_size).cuda() 42 | else: 43 | hidden = torch.zeros(self.n_layers, 1, self.hidden_size) 44 | 45 | return Variable(hidden) 46 | 47 | 48 | def str2tensor(string): 49 | tensor = [ord(c) for c in string] 50 | tensor = torch.LongTensor(tensor) 51 | 52 | if torch.cuda.is_available(): 53 | tensor = tensor.cuda() 54 | 55 | return Variable(tensor) 56 | 57 | 58 | def generate(decoder, prime_str='A', predict_len=100, temperature=0.8): 59 | hidden = decoder.init_hidden() 60 | prime_input = str2tensor(prime_str) 61 | predicted = prime_str 62 | 63 | # Use priming string to "build up" hidden state 64 | for p in range(len(prime_str) - 1): 65 | _, hidden = decoder(prime_input[p], hidden) 66 | 67 | inp = prime_input[-1] 68 | 69 | for p in range(predict_len): 70 | output, hidden = decoder(inp, hidden) 71 | 72 | # Sample from the network as a multinomial distribution 73 | output_dist = output.data.view(-1).div(temperature).exp() 74 | top_i = torch.multinomial(output_dist, 1)[0] 75 | 76 | # Add predicted character to string and use as next input 77 | predicted_char = chr(top_i) 78 | predicted += predicted_char 79 | inp = str2tensor(predicted_char) 80 | 81 | return predicted 82 | 83 | # Train for a given src and target 84 | # It feeds single string to demonstrate seq2seq 85 | # It's extremely slow, and we need to use (1) batch and (2) data parallelism 86 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html. 87 | 88 | 89 | def train_teacher_forching(line): 90 | input = str2tensor(line[:-1]) 91 | target = str2tensor(line[1:]) 92 | 93 | hidden = decoder.init_hidden() 94 | loss = 0 95 | 96 | for c in range(len(input)): 97 | output, hidden = decoder(input[c], hidden) 98 | loss += criterion(output, target[c]) 99 | 100 | decoder.zero_grad() 101 | loss.backward() 102 | decoder_optimizer.step() 103 | 104 | return loss.data[0] / len(input) 105 | 106 | 107 | def train(line): 108 | input = str2tensor(line[:-1]) 109 | target = str2tensor(line[1:]) 110 | 111 | hidden = decoder.init_hidden() 112 | decoder_in = input[0] 113 | loss = 0 114 | 115 | for c in range(len(input)): 116 | output, hidden = decoder(decoder_in, hidden) 117 | loss += criterion(output, target[c]) 118 | decoder_in = output.max(1)[1] 119 | 120 | decoder.zero_grad() 121 | loss.backward() 122 | decoder_optimizer.step() 123 | 124 | return loss.data[0] / len(input) 125 | 126 | if __name__ == '__main__': 127 | 128 | decoder = RNN(n_characters, hidden_size, n_characters, n_layers) 129 | if torch.cuda.is_available(): 130 | decoder.cuda() 131 | 132 | decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=0.001) 133 | criterion = nn.CrossEntropyLoss() 134 | 135 | train_loader = DataLoader(dataset=TextDataset(), 136 | batch_size=batch_size, 137 | shuffle=True) 138 | 139 | print("Training for %d epochs..." % n_epochs) 140 | for epoch in range(1, n_epochs + 1): 141 | for i, (lines, _) in enumerate(train_loader): 142 | loss = train(lines[0]) # Batch size is 1 143 | 144 | if i % 100 == 0: 145 | print('[(%d %d%%) loss: %.4f]' % 146 | (epoch, epoch / n_epochs * 100, loss)) 147 | print(generate(decoder, 'Wh', 100), '\n') 148 | -------------------------------------------------------------------------------- /13_4_pack_pad.py: -------------------------------------------------------------------------------- 1 | # Original source from 2 | # https://gist.github.com/Tushar-N/dfca335e370a2bc3bc79876e6270099e 3 | # torch 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import itertools 11 | 12 | 13 | def flatten(l): 14 | return list(itertools.chain.from_iterable(l)) 15 | 16 | seqs = ['ghatmasala', 'nicela', 'chutpakodas'] 17 | 18 | # make idx 0 19 | vocab = [''] + sorted(list(set(flatten(seqs)))) 20 | 21 | # make model 22 | embedding_size = 3 23 | embed = nn.Embedding(len(vocab), embedding_size) 24 | lstm = nn.LSTM(embedding_size, 5) 25 | 26 | vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs] 27 | print("vectorized_seqs", vectorized_seqs) 28 | 29 | print([x for x in map(len, vectorized_seqs)]) 30 | # get the length of each seq in your batch 31 | seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)]) 32 | 33 | # dump padding everywhere, and place seqs on the left. 34 | # NOTE: you only need a tensor as big as your longest sequence 35 | seq_tensor = Variable(torch.zeros( 36 | (len(vectorized_seqs), seq_lengths.max()))).long() 37 | for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)): 38 | seq_tensor[idx, :seqlen] = torch.LongTensor(seq) 39 | 40 | print("seq_tensor", seq_tensor) 41 | 42 | # SORT YOUR TENSORS BY LENGTH! 43 | seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) 44 | seq_tensor = seq_tensor[perm_idx] 45 | 46 | print("seq_tensor after sorting", seq_tensor) 47 | 48 | # utils.rnn lets you give (B,L,D) tensors where B is the batch size, L is the maxlength, if you use batch_first=True 49 | # Otherwise, give (L,B,D) tensors 50 | seq_tensor = seq_tensor.transpose(0, 1) # (B,L,D) -> (L,B,D) 51 | print("seq_tensor after transposing", seq_tensor.size(), seq_tensor.data) 52 | 53 | # embed your sequences 54 | embeded_seq_tensor = embed(seq_tensor) 55 | print("seq_tensor after embeding", embeded_seq_tensor.size(), seq_tensor.data) 56 | 57 | # pack them up nicely 58 | packed_input = pack_padded_sequence( 59 | embeded_seq_tensor, seq_lengths.cpu().numpy()) 60 | 61 | # throw them through your LSTM (remember to give batch_first=True here if 62 | # you packed with it) 63 | packed_output, (ht, ct) = lstm(packed_input) 64 | 65 | # unpack your output if required 66 | output, _ = pad_packed_sequence(packed_output) 67 | print("Lstm output", output.size(), output.data) 68 | 69 | # Or if you just want the final hidden state? 70 | print("Last output", ht[-1].size(), ht[-1].data) 71 | -------------------------------------------------------------------------------- /14_1_seq2seq.py: -------------------------------------------------------------------------------- 1 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.data import DataLoader 5 | from text_loader import TextDataset 6 | import seq2seq_models as sm 7 | from seq2seq_models import str2tensor, EOS_token, SOS_token 8 | 9 | HIDDEN_SIZE = 100 10 | N_LAYERS = 1 11 | BATCH_SIZE = 1 12 | N_EPOCH = 100 13 | N_CHARS = 128 # ASCII 14 | 15 | 16 | # Simple test to show how our network works 17 | def test(): 18 | encoder_hidden = encoder.init_hidden() 19 | word_input = str2tensor('hello') 20 | encoder_outputs, encoder_hidden = encoder(word_input, encoder_hidden) 21 | print(encoder_outputs) 22 | 23 | decoder_hidden = encoder_hidden 24 | 25 | word_target = str2tensor('pytorch') 26 | for c in range(len(word_target)): 27 | decoder_output, decoder_hidden = decoder( 28 | word_target[c], decoder_hidden) 29 | print(decoder_output.size(), decoder_hidden.size()) 30 | 31 | 32 | # Train for a given src and target 33 | # To demonstrate seq2seq, We don't handle batch in the code, 34 | # and our encoder runs this one step at a time 35 | # It's extremely slow, and please do not use in practice. 36 | # We need to use (1) batch and (2) data parallelism 37 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html. 38 | def train(src, target): 39 | src_var = str2tensor(src) 40 | target_var = str2tensor(target, eos=True) # Add the EOS token 41 | 42 | encoder_hidden = encoder.init_hidden() 43 | encoder_outputs, encoder_hidden = encoder(src_var, encoder_hidden) 44 | 45 | hidden = encoder_hidden 46 | loss = 0 47 | 48 | for c in range(len(target_var)): 49 | # First, we feed SOS 50 | # Others, we use teacher forcing 51 | token = target_var[c - 1] if c else str2tensor(SOS_token) 52 | output, hidden = decoder(token, hidden) 53 | loss += criterion(output, target_var[c]) 54 | 55 | encoder.zero_grad() 56 | decoder.zero_grad() 57 | loss.backward() 58 | optimizer.step() 59 | 60 | return loss.data[0] / len(target_var) 61 | 62 | 63 | # Translate the given input 64 | def translate(enc_input='thisissungkim.iloveyou.', predict_len=100, temperature=0.9): 65 | input_var = str2tensor(enc_input) 66 | encoder_hidden = encoder.init_hidden() 67 | encoder_outputs, encoder_hidden = encoder(input_var, encoder_hidden) 68 | 69 | hidden = encoder_hidden 70 | 71 | predicted = '' 72 | dec_input = str2tensor(SOS_token) 73 | for c in range(predict_len): 74 | output, hidden = decoder(dec_input, hidden) 75 | 76 | # Sample from the network as a multi nominal distribution 77 | output_dist = output.data.view(-1).div(temperature).exp() 78 | top_i = torch.multinomial(output_dist, 1)[0] 79 | 80 | # Stop at the EOS 81 | if top_i is EOS_token: 82 | break 83 | 84 | predicted_char = chr(top_i) 85 | predicted += predicted_char 86 | 87 | dec_input = str2tensor(predicted_char) 88 | 89 | return enc_input, predicted 90 | 91 | 92 | encoder = sm.EncoderRNN(N_CHARS, HIDDEN_SIZE, N_LAYERS) 93 | decoder = sm.DecoderRNN(HIDDEN_SIZE, N_CHARS, N_LAYERS) 94 | 95 | if torch.cuda.is_available(): 96 | decoder.cuda() 97 | encoder.cuda() 98 | print(encoder, decoder) 99 | test() 100 | 101 | params = list(encoder.parameters()) + list(decoder.parameters()) 102 | optimizer = torch.optim.Adam(params, lr=0.001) 103 | criterion = nn.CrossEntropyLoss() 104 | 105 | 106 | train_loader = DataLoader(dataset=TextDataset(), 107 | batch_size=BATCH_SIZE, 108 | shuffle=True, 109 | num_workers=2) 110 | 111 | print("Training for %d epochs..." % N_EPOCH) 112 | for epoch in range(1, N_EPOCH + 1): 113 | # Get srcs and targets from data loader 114 | for i, (srcs, targets) in enumerate(train_loader): 115 | train_loss = train(srcs[0], targets[0]) # Batch is 1 116 | 117 | if i % 100 is 0: 118 | print('[(%d %d%%) %.4f]' % 119 | (epoch, epoch / N_EPOCH * 100, train_loss)) 120 | print(translate(srcs[0]), '\n') 121 | print(translate(), '\n') 122 | -------------------------------------------------------------------------------- /14_2_seq2seq_att.py: -------------------------------------------------------------------------------- 1 | # Original code from 2 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb 3 | 4 | #import matplotlib.pyplot as plt 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from torch.utils.data import DataLoader 10 | from text_loader import TextDataset 11 | import seq2seq_models as sm 12 | from seq2seq_models import cuda_variable, str2tensor, EOS_token, SOS_token 13 | 14 | 15 | N_LAYERS = 1 16 | BATCH_SIZE = 1 17 | N_EPOCH = 100 18 | N_CHARS = 128 # ASCII 19 | HIDDEN_SIZE = N_CHARS 20 | 21 | 22 | # Simple test to show how our train works 23 | def test(): 24 | encoder_test = sm.EncoderRNN(10, 10, 2) 25 | decoder_test = sm.AttnDecoderRNN(10, 10, 2) 26 | 27 | if torch.cuda.is_available(): 28 | encoder_test.cuda() 29 | decoder_test.cuda() 30 | 31 | encoder_hidden = encoder_test.init_hidden() 32 | word_input = cuda_variable(torch.LongTensor([1, 2, 3])) 33 | encoder_outputs, encoder_hidden = encoder_test(word_input, encoder_hidden) 34 | print(encoder_outputs.size()) 35 | 36 | word_target = cuda_variable(torch.LongTensor([1, 2, 3])) 37 | decoder_attns = torch.zeros(1, 3, 3) 38 | decoder_hidden = encoder_hidden 39 | 40 | for c in range(len(word_target)): 41 | decoder_output, decoder_hidden, decoder_attn = \ 42 | decoder_test(word_target[c], 43 | decoder_hidden, encoder_outputs) 44 | print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size()) 45 | decoder_attns[0, c] = decoder_attn.squeeze(0).cpu().data 46 | 47 | 48 | # Train for a given src and target 49 | # To demonstrate seq2seq, We don't handle batch in the code, 50 | # and our encoder runs this one step at a time 51 | # It's extremely slow, and please do not use in practice. 52 | # We need to use (1) batch and (2) data parallelism 53 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html. 54 | def train(src, target): 55 | loss = 0 56 | 57 | src_var = str2tensor(src) 58 | target_var = str2tensor(target, eos=True) # Add the EOS token 59 | 60 | encoder_hidden = encoder.init_hidden() 61 | encoder_outputs, encoder_hidden = encoder(src_var, encoder_hidden) 62 | 63 | hidden = encoder_hidden 64 | 65 | for c in range(len(target_var)): 66 | # First, we feed SOS. Others, we use teacher forcing. 67 | token = target_var[c - 1] if c else str2tensor(SOS_token) 68 | output, hidden, attention = decoder(token, hidden, encoder_outputs) 69 | loss += criterion(output, target_var[c]) 70 | 71 | encoder.zero_grad() 72 | decoder.zero_grad() 73 | loss.backward() 74 | optimizer.step() 75 | 76 | return loss.data[0] / len(target_var) 77 | 78 | 79 | # Translate the given input 80 | def translate(enc_input='thisissungkim.iloveyou.', predict_len=100, temperature=0.9): 81 | input_var = str2tensor(enc_input) 82 | encoder_hidden = encoder.init_hidden() 83 | encoder_outputs, encoder_hidden = encoder(input_var, encoder_hidden) 84 | 85 | hidden = encoder_hidden 86 | 87 | predicted = '' 88 | dec_input = str2tensor(SOS_token) 89 | attentions = [] 90 | for c in range(predict_len): 91 | output, hidden, attention = decoder(dec_input, hidden, encoder_outputs) 92 | # Sample from the network as a multi nominal distribution 93 | output_dist = output.data.view(-1).div(temperature).exp() 94 | top_i = torch.multinomial(output_dist, 1)[0] 95 | attentions.append(attention.view(-1).data.cpu().numpy().tolist()) 96 | 97 | # Stop at the EOS 98 | if top_i is EOS_token: 99 | break 100 | 101 | predicted_char = chr(top_i) 102 | predicted += predicted_char 103 | 104 | dec_input = str2tensor(predicted_char) 105 | 106 | return predicted, attentions 107 | 108 | 109 | if __name__ == '__main__': 110 | encoder = sm.EncoderRNN(N_CHARS, HIDDEN_SIZE, N_LAYERS) 111 | decoder = sm.AttnDecoderRNN(HIDDEN_SIZE, N_CHARS, N_LAYERS) 112 | 113 | if torch.cuda.is_available(): 114 | decoder.cuda() 115 | encoder.cuda() 116 | print(encoder, decoder) 117 | # test() 118 | 119 | params = list(encoder.parameters()) + list(decoder.parameters()) 120 | optimizer = torch.optim.Adam(params, lr=0.001) 121 | criterion = nn.CrossEntropyLoss() 122 | 123 | train_loader = DataLoader(dataset=TextDataset(), 124 | batch_size=BATCH_SIZE, 125 | shuffle=True, 126 | num_workers=2) 127 | 128 | print("Training for %d epochs..." % N_EPOCH) 129 | for epoch in range(1, N_EPOCH + 1): 130 | # Get srcs and targets from data loader 131 | for i, (srcs, targets) in enumerate(train_loader): 132 | train_loss = train(srcs[0], targets[0]) 133 | 134 | if i % 1000 is 0: 135 | print('[(%d/%d %d%%) %.4f]' % 136 | (epoch, N_EPOCH, i * len(srcs) * 100 / len(train_loader), train_loss)) 137 | output, _ = translate(srcs[0]) 138 | print(srcs[0], output, '\n') 139 | 140 | output, attentions = translate() 141 | print('thisissungkim.iloveyou.', output, '\n') 142 | 143 | # plt.matshow(attentions) 144 | # plt.show() 145 | # print(attentions) 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/hunkim/PythonZeroToAll.svg?branch=master)](https://travis-ci.org/hunkim/PythonZeroToAll) 2 | 3 | # PyTorchZeroToAll 4 | Quick 3~4 day lecture materials for HKUST students. 5 | 6 | ## Video Lectures: (RNN TBA) 7 | * [Youtube](http://bit.ly/PyTorchVideo) 8 | * [Bilibili](https://www.bilibili.com/video/av15823922/) 9 | 10 | ## Slides 11 | * [Lecture Slides @GoogleDrive](http://bit.ly/PyTorchZeroAll) 12 | 13 | If you cannot access the GoogleDoc for somehow, please check out pdf files in slides. However, slides in GoogleDrive are always latest. We really appreciate your comments. 14 | 15 | ## Previous Lectures 16 | * cf., http://bit.ly/TF_HKUST (3 day crash course using TensorFlow) 17 | -------------------------------------------------------------------------------- /data/diabetes.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/diabetes.csv.gz -------------------------------------------------------------------------------- /data/names_test.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/names_test.csv.gz -------------------------------------------------------------------------------- /data/names_train.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/names_train.csv.gz -------------------------------------------------------------------------------- /data/shakespeare.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/shakespeare.txt.gz -------------------------------------------------------------------------------- /name_dataset.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class 4 | import torch 5 | import numpy as np 6 | from torch.autograd import Variable 7 | from torch.utils.data import Dataset, DataLoader 8 | import csv 9 | import gzip 10 | 11 | 12 | class NameDataset(Dataset): 13 | """ Diabetes dataset.""" 14 | 15 | # Initialize your data, download, etc. 16 | def __init__(self, is_train_set=False): 17 | filename = './data/names_train.csv.gz' if is_train_set else './data/names_test.csv.gz' 18 | with gzip.open(filename, "rt") as f: 19 | reader = csv.reader(f) 20 | rows = list(reader) 21 | 22 | self.names = [row[0] for row in rows] 23 | self.countries = [row[1] for row in rows] 24 | self.len = len(self.countries) 25 | 26 | self.country_list = list(sorted(set(self.countries))) 27 | 28 | def __getitem__(self, index): 29 | return self.names[index], self.countries[index] 30 | 31 | def __len__(self): 32 | return self.len 33 | 34 | def get_countries(self): 35 | return self.country_list 36 | 37 | def get_country(self, id): 38 | return self.country_list[id] 39 | 40 | def get_country_id(self, country): 41 | return self.country_list.index(country) 42 | 43 | # Test the loader 44 | if __name__ == "__main__": 45 | dataset = NameDataset(False) 46 | print(dataset.get_countries()) 47 | print(dataset.get_country(3)) 48 | print(dataset.get_country_id('Korean')) 49 | 50 | train_loader = DataLoader(dataset=dataset, 51 | batch_size=10, 52 | shuffle=True) 53 | 54 | print(len(train_loader.dataset)) 55 | for epoch in range(2): 56 | for i, (names, countries) in enumerate(train_loader): 57 | # Run your training process 58 | print(epoch, i, "names", names, "countries", countries) 59 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | #nonsml: digitalgenius/ubuntu-pytorch 2 | #varunagrawal/pytorch 3 | httplib2==0.18.0 4 | matplotlib==2.0.0 5 | numpy==1.13.3 6 | torch 7 | torchvision==0.1.9 8 | Unidecode==0.04.21 9 | -------------------------------------------------------------------------------- /seq2seq_models.py: -------------------------------------------------------------------------------- 1 | # Original code from 2 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | 8 | MAX_LENGTH = 100 9 | 10 | SOS_token = chr(0) 11 | EOS_token = 1 12 | 13 | # Helper function to create Variable based on 14 | # the cuda availability 15 | 16 | 17 | def cuda_variable(tensor): 18 | # Do cuda() before wrapping with variable 19 | if torch.cuda.is_available(): 20 | return Variable(tensor.cuda()) 21 | else: 22 | return Variable(tensor) 23 | 24 | 25 | # Sting to char tensor 26 | def str2tensor(msg, eos=False): 27 | tensor = [ord(c) for c in msg] 28 | if eos: 29 | tensor.append(EOS_token) 30 | 31 | return cuda_variable(torch.LongTensor(tensor)) 32 | 33 | 34 | # To demonstrate seq2seq, We don't handle batch in the code, 35 | # and our encoder runs this one step at a time 36 | # It's extremely slow, and please do not use in practice. 37 | # We need to use (1) batch and (2) data parallelism 38 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html. 39 | 40 | class EncoderRNN(nn.Module): 41 | 42 | def __init__(self, input_size, hidden_size, n_layers=1): 43 | self.hidden_size = hidden_size 44 | self.n_layers = n_layers 45 | 46 | super(EncoderRNN, self).__init__() 47 | 48 | self.embedding = nn.Embedding(input_size, hidden_size) 49 | self.gru = nn.GRU(hidden_size, hidden_size, n_layers) 50 | 51 | def forward(self, word_inputs, hidden): 52 | # Note: we run this all at once (over the whole input sequence) 53 | seq_len = len(word_inputs) 54 | # input shape: S x B (=1) x I (input size) 55 | embedded = self.embedding(word_inputs).view(seq_len, 1, -1) 56 | output, hidden = self.gru(embedded, hidden) 57 | return output, hidden 58 | 59 | def init_hidden(self): 60 | # (num_layers * num_directions, batch, hidden_size) 61 | return cuda_variable(torch.zeros(self.n_layers, 1, self.hidden_size)) 62 | 63 | 64 | class DecoderRNN(nn.Module): 65 | 66 | def __init__(self, hidden_size, output_size, n_layers=1): 67 | super(DecoderRNN, self).__init__() 68 | 69 | self.embedding = nn.Embedding(output_size, hidden_size) 70 | self.gru = nn.GRU(hidden_size, hidden_size, n_layers) 71 | self.out = nn.Linear(hidden_size, output_size) 72 | 73 | def forward(self, input, hidden): 74 | # input shape: S(=1) x B (=1) x I (input size) 75 | # Note: we run this one step at a time. (Sequence size = 1) 76 | output = self.embedding(input).view(1, 1, -1) 77 | output, hidden = self.gru(output, hidden) 78 | output = self.out(output[0]) 79 | # No need softmax, since we are using CrossEntropyLoss 80 | return output, hidden 81 | 82 | def init_hidden(self): 83 | # (num_layers * num_directions, batch, hidden_size) 84 | return cuda_variable(torch.zeros(self.n_layers, 1, self.hidden_size)) 85 | 86 | 87 | class AttnDecoderRNN(nn.Module): 88 | 89 | def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1): 90 | super(AttnDecoderRNN, self).__init__() 91 | 92 | # Linear for attention 93 | self.attn = nn.Linear(hidden_size, hidden_size) 94 | 95 | # Define layers 96 | self.embedding = nn.Embedding(output_size, hidden_size) 97 | self.gru = nn.GRU(hidden_size, hidden_size, 98 | n_layers, dropout=dropout_p) 99 | self.out = nn.Linear(hidden_size * 2, output_size) 100 | 101 | def forward(self, word_input, last_hidden, encoder_hiddens): 102 | # Note: we run this one step (S=1) at a time 103 | # Get the embedding of the current input word (last output word) 104 | rnn_input = self.embedding(word_input).view(1, 1, -1) # S=1 x B x I 105 | rnn_output, hidden = self.gru(rnn_input, last_hidden) 106 | 107 | # Calculate attention from current RNN state and all encoder outputs; 108 | # apply to encoder outputs 109 | attn_weights = self.get_att_weight( 110 | rnn_output.squeeze(0), encoder_hiddens) 111 | context = attn_weights.bmm( 112 | encoder_hiddens.transpose(0, 1)) # B x S(=1) x I 113 | 114 | # Final output layer (next word prediction) using the RNN hidden state 115 | # and context vector 116 | rnn_output = rnn_output.squeeze(0) # S(=1) x B x I -> B x I 117 | context = context.squeeze(1) # B x S(=1) x I -> B x I 118 | output = self.out(torch.cat((rnn_output, context), 1)) 119 | 120 | # Return final output, hidden state, and attention weights (for 121 | # visualization) 122 | return output, hidden, attn_weights 123 | 124 | def get_att_weight(self, hidden, encoder_hiddens): 125 | seq_len = len(encoder_hiddens) 126 | 127 | # Create variable to store attention energies 128 | attn_scores = cuda_variable(torch.zeros(seq_len)) # B x 1 x S 129 | 130 | # Calculate energies for each encoder hidden 131 | for i in range(seq_len): 132 | attn_scores[i] = self.get_att_score(hidden, encoder_hiddens[i]) 133 | 134 | # Normalize scores to weights in range 0 to 1, 135 | # resize to 1 x 1 x seq_len 136 | # print("att_scores", attn_scores.size()) 137 | return F.softmax(attn_scores).view(1, 1, -1) 138 | 139 | # score = h^T W h^e = h dot (W h^e) 140 | # TODO: We need to implement different score models 141 | def get_att_score(self, hidden, encoder_hidden): 142 | score = self.attn(encoder_hidden) 143 | return torch.dot(hidden.view(-1), score.view(-1)) 144 | -------------------------------------------------------------------------------- /slides/Lecture 01: Overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 01: Overview.pdf -------------------------------------------------------------------------------- /slides/Lecture 02: Linear Model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 02: Linear Model.pdf -------------------------------------------------------------------------------- /slides/Lecture 03: Gradient Descent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 03: Gradient Descent.pdf -------------------------------------------------------------------------------- /slides/Lecture 05: Linear regression in PyTorch way.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 05: Linear regression in PyTorch way.pdf -------------------------------------------------------------------------------- /slides/Lecture 06: Logistic Regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 06: Logistic Regression.pdf -------------------------------------------------------------------------------- /slides/Lecture 07: Wide & Deep.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 07: Wide & Deep.pdf -------------------------------------------------------------------------------- /slides/Lecture 08: DataLoader.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 08: DataLoader.pdf -------------------------------------------------------------------------------- /slides/Lecture 09: Softmax Classifier.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 09: Softmax Classifier.pdf -------------------------------------------------------------------------------- /slides/P-Epilogue: What's the next?.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/P-Epilogue: What's the next?.pdf -------------------------------------------------------------------------------- /text_loader.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class 4 | import gzip 5 | from torch.utils.data import Dataset, DataLoader 6 | 7 | 8 | class TextDataset(Dataset): 9 | # Initialize your data, download, etc. 10 | 11 | def __init__(self, filename="./data/shakespeare.txt.gz"): 12 | self.len = 0 13 | with gzip.open(filename, 'rt') as f: 14 | self.targetLines = [x.strip() for x in f if x.strip()] 15 | self.srcLines = [x.lower().replace(' ', '') 16 | for x in self.targetLines] 17 | self.len = len(self.srcLines) 18 | 19 | def __getitem__(self, index): 20 | return self.srcLines[index], self.targetLines[index] 21 | 22 | def __len__(self): 23 | return self.len 24 | 25 | 26 | # Test the loader 27 | if __name__ == "__main__": 28 | dataset = TextDataset() 29 | train_loader = DataLoader(dataset=dataset, 30 | batch_size=3, 31 | shuffle=True, 32 | num_workers=2) 33 | 34 | for i, (src, target) in enumerate(train_loader): 35 | print(i, "data", src) 36 | --------------------------------------------------------------------------------