├── .gitignore
├── .travis.yml
├── 01_basics.py
├── 02_manual_gradient.py
├── 03_auto_gradient.py
├── 05_linear_regression.py
├── 06_logistic_regression.py
├── 07_diabets_logistic.py
├── 08_1_dataset_loader.py
├── 08_2_dataset_loade_logistic.py
├── 09_01_softmax_loss.py
├── 09_2_softmax_mnist.py
├── 10_1_cnn_mnist.py
├── 11_1_toy_inception_mnist.py
├── 12_1_rnn_basics.py
├── 12_2_hello_rnn.py
├── 12_3_hello_rnn_seq.py
├── 12_4_hello_rnn_emb.py
├── 13_1_rnn_classification_basics.py
├── 13_2_rnn_classification.py
├── 13_3_char_rnn.py
├── 13_4_pack_pad.py
├── 14_1_seq2seq.py
├── 14_2_seq2seq_att.py
├── README.md
├── data
    ├── diabetes.csv.gz
    ├── names_test.csv.gz
    ├── names_train.csv.gz
    └── shakespeare.txt.gz
├── name_dataset.py
├── requirements.txt
├── seq2seq_models.py
├── slides
    ├── Lecture 01: Overview.pdf
    ├── Lecture 02: Linear Model.pdf
    ├── Lecture 03: Gradient Descent.pdf
    ├── Lecture 05: Linear regression  in PyTorch way.pdf
    ├── Lecture 06: Logistic Regression.pdf
    ├── Lecture 07: Wide & Deep.pdf
    ├── Lecture 08: DataLoader.pdf
    ├── Lecture 09: Softmax Classifier.pdf
    └── P-Epilogue: What's the next?.pdf
└── text_loader.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | *.key
 3 | .nsml*
 4 | *.pt
 5 | X*
 6 | mnist_data
 7 | ppts
 8 | .ipynb_checkpoints
 9 | client_secret.json
10 | __pycache__/
11 | .py*
12 | tmp
13 | template.pdf
14 | *.ipynb
15 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # code below is taken from https://github.com/fchollet/keras/blob/master/.travis.yml
 2 | sudo: required
 3 | dist: trusty
 4 | language: python
 5 | python: # Only two versions for now
 6 |   - "2.7"
 7 |   - "3.6"
 8 | # command to install dependencies
 9 | install: "pip install -r requirements.txt"
10 | 
11 | script:
12 |   - python -m compileall .
13 |   - ls ??_*.py|xargs -n 1 -P 3 python
14 | 


--------------------------------------------------------------------------------
/01_basics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | x_data = [1.0, 2.0, 3.0]
 5 | y_data = [2.0, 4.0, 6.0]
 6 | 
 7 | 
 8 | # our model for the forward pass
 9 | def forward(x):
10 |     return x * w
11 | 
12 | 
13 | # Loss function
14 | def loss(x, y):
15 |     y_pred = forward(x)
16 |     return (y_pred - y) * (y_pred - y)
17 | 
18 | # List of weights/Mean square Error (Mse) for each input
19 | w_list = []
20 | mse_list = []
21 | 
22 | for w in np.arange(0.0, 4.1, 0.1):
23 |     # Print the weights and initialize the lost
24 |     print("w=", w)
25 |     l_sum = 0
26 | 
27 |     for x_val, y_val in zip(x_data, y_data):
28 |         # For each input and output, calculate y_hat
29 |         # Compute the total loss and add to the total error
30 |         y_pred_val = forward(x_val)
31 |         l = loss(x_val, y_val)
32 |         l_sum += l
33 |         print("\t", x_val, y_val, y_pred_val, l)
34 |     # Now compute the Mean squared error (mse) of each
35 |     # Aggregate the weight/mse from this run
36 |     print("MSE=", l_sum / len(x_data))
37 |     w_list.append(w)
38 |     mse_list.append(l_sum / len(x_data))
39 | 
40 | # Plot it all
41 | plt.plot(w_list, mse_list)
42 | plt.ylabel('Loss')
43 | plt.xlabel('w')
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/02_manual_gradient.py:
--------------------------------------------------------------------------------
 1 | # Training Data
 2 | x_data = [1.0, 2.0, 3.0]
 3 | y_data = [2.0, 4.0, 6.0]
 4 | 
 5 | w = 1.0  # a random guess: random value
 6 | 
 7 | 
 8 | # our model forward pass
 9 | def forward(x):
10 |     return x * w
11 | 
12 | 
13 | # Loss function
14 | def loss(x, y):
15 |     y_pred = forward(x)
16 |     return (y_pred - y) * (y_pred - y)
17 | 
18 | 
19 | # compute gradient
20 | def gradient(x, y):  # d_loss/d_w
21 |     return 2 * x * (x * w - y)
22 | 
23 | 
24 | # Before training
25 | print("Prediction (before training)",  4, forward(4))
26 | 
27 | # Training loop
28 | for epoch in range(10):
29 |     for x_val, y_val in zip(x_data, y_data):
30 |         # Compute derivative w.r.t to the learned weights
31 |         # Update the weights
32 |         # Compute the loss and print progress
33 |         grad = gradient(x_val, y_val)
34 |         w = w - 0.01 * grad
35 |         print("\tgrad: ", x_val, y_val, round(grad, 2))
36 |         l = loss(x_val, y_val)
37 |     print("progress:", epoch, "w=", round(w, 2), "loss=", round(l, 2))
38 | 
39 | # After training
40 | print("Predicted score (after training)",  "4 hours of studying: ", forward(4))
41 | 


--------------------------------------------------------------------------------
/03_auto_gradient.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pdb
 3 | 
 4 | x_data = [1.0, 2.0, 3.0]
 5 | y_data = [2.0, 4.0, 6.0]
 6 | w = torch.tensor([1.0], requires_grad=True)
 7 | 
 8 | # our model forward pass
 9 | def forward(x):
10 |     return x * w
11 | 
12 | # Loss function
13 | def loss(y_pred, y_val):
14 |     return (y_pred - y_val) ** 2
15 | 
16 | # Before training
17 | print("Prediction (before training)",  4, forward(4).item())
18 | 
19 | # Training loop
20 | for epoch in range(10):
21 |     for x_val, y_val in zip(x_data, y_data):
22 |         y_pred = forward(x_val) # 1) Forward pass
23 |         l = loss(y_pred, y_val) # 2) Compute loss
24 |         l.backward() # 3) Back propagation to update weights
25 |         print("\tgrad: ", x_val, y_val, w.grad.item())
26 |         w.data = w.data - 0.01 * w.grad.item()
27 | 
28 |         # Manually zero the gradients after updating weights
29 |         w.grad.data.zero_()
30 | 
31 |     print(f"Epoch: {epoch} | Loss: {l.item()}")
32 | 
33 | # After training
34 | print("Prediction (after training)",  4, forward(4).item())
35 | 


--------------------------------------------------------------------------------
/05_linear_regression.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | from torch import tensor
 4 | 
 5 | x_data = tensor([[1.0], [2.0], [3.0]])
 6 | y_data = tensor([[2.0], [4.0], [6.0]])
 7 | 
 8 | 
 9 | class Model(nn.Module):
10 |     def __init__(self):
11 |         """
12 |         In the constructor we instantiate two nn.Linear module
13 |         """
14 |         super(Model, self).__init__()
15 |         self.linear = torch.nn.Linear(1, 1)  # One in and one out
16 | 
17 |     def forward(self, x):
18 |         """
19 |         In the forward function we accept a Variable of input data and we must return
20 |         a Variable of output data. We can use Modules defined in the constructor as
21 |         well as arbitrary operators on Variables.
22 |         """
23 |         y_pred = self.linear(x)
24 |         return y_pred
25 | 
26 | 
27 | # our model
28 | model = Model()
29 | 
30 | # Construct our loss function and an Optimizer. The call to model.parameters()
31 | # in the SGD constructor will contain the learnable parameters of the two
32 | # nn.Linear modules which are members of the model.
33 | criterion = torch.nn.MSELoss(reduction='sum')
34 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
35 | 
36 | # Training loop
37 | for epoch in range(500):
38 |     # 1) Forward pass: Compute predicted y by passing x to the model
39 |     y_pred = model(x_data)
40 | 
41 |     # 2) Compute and print loss
42 |     loss = criterion(y_pred, y_data)
43 |     print(f'Epoch: {epoch} | Loss: {loss.item()} ')
44 | 
45 |     # Zero gradients, perform a backward pass, and update the weights.
46 |     optimizer.zero_grad()
47 |     loss.backward()
48 |     optimizer.step()
49 | 
50 | 
51 | # After training
52 | hour_var = tensor([[4.0]])
53 | y_pred = model(hour_var)
54 | print("Prediction (after training)",  4, model(hour_var).data[0][0].item())
55 | 


--------------------------------------------------------------------------------
/06_logistic_regression.py:
--------------------------------------------------------------------------------
 1 | from torch import tensor
 2 | from torch import nn
 3 | from torch import sigmoid
 4 | import torch.nn.functional as F
 5 | import torch.optim as optim
 6 | 
 7 | # Training data and ground truth
 8 | x_data = tensor([[1.0], [2.0], [3.0], [4.0]])
 9 | y_data = tensor([[0.], [0.], [1.], [1.]])
10 | 
11 | 
12 | class Model(nn.Module):
13 |     def __init__(self):
14 |         """
15 |         In the constructor we instantiate nn.Linear module
16 |         """
17 |         super(Model, self).__init__()
18 |         self.linear = nn.Linear(1, 1)  # One in and one out
19 | 
20 |     def forward(self, x):
21 |         """
22 |         In the forward function we accept a Variable of input data and we must return
23 |         a Variable of output data.
24 |         """
25 |         y_pred = sigmoid(self.linear(x))
26 |         return y_pred
27 | 
28 | 
29 | # our model
30 | model = Model()
31 | 
32 | # Construct our loss function and an Optimizer. The call to model.parameters()
33 | # in the SGD constructor will contain the learnable parameters of the two
34 | # nn.Linear modules which are members of the model.
35 | criterion = nn.BCELoss(reduction='mean')
36 | optimizer = optim.SGD(model.parameters(), lr=0.01)
37 | 
38 | # Training loop
39 | for epoch in range(1000):
40 |     # Forward pass: Compute predicted y by passing x to the model
41 |     y_pred = model(x_data)
42 | 
43 |     # Compute and print loss
44 |     loss = criterion(y_pred, y_data)
45 |     print(f'Epoch {epoch + 1}/1000 | Loss: {loss.item():.4f}')
46 | 
47 |     # Zero gradients, perform a backward pass, and update the weights.
48 |     optimizer.zero_grad()
49 |     loss.backward()
50 |     optimizer.step()
51 | 
52 | # After training
53 | print(f'\nLet\'s predict the hours need to score above 50%\n{"=" * 50}')
54 | hour_var = model(tensor([[1.0]]))
55 | print(f'Prediction after 1 hour of training: {hour_var.item():.4f} | Above 50%: {hour_var.item() > 0.5}')
56 | hour_var = model(tensor([[7.0]]))
57 | print(f'Prediction after 7 hours of training: {hour_var.item():.4f} | Above 50%: { hour_var.item() > 0.5}')
58 | 


--------------------------------------------------------------------------------
/07_diabets_logistic.py:
--------------------------------------------------------------------------------
 1 | from torch import nn, optim, from_numpy
 2 | import numpy as np
 3 | 
 4 | xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)
 5 | x_data = from_numpy(xy[:, 0:-1])
 6 | y_data = from_numpy(xy[:, [-1]])
 7 | print(f'X\'s shape: {x_data.shape} | Y\'s shape: {y_data.shape}')
 8 | 
 9 | 
10 | class Model(nn.Module):
11 |     def __init__(self):
12 |         """
13 |         In the constructor we instantiate two nn.Linear module
14 |         """
15 |         super(Model, self).__init__()
16 |         self.l1 = nn.Linear(8, 6)
17 |         self.l2 = nn.Linear(6, 4)
18 |         self.l3 = nn.Linear(4, 1)
19 | 
20 |         self.sigmoid = nn.Sigmoid()
21 | 
22 |     def forward(self, x):
23 |         """
24 |         In the forward function we accept a Variable of input data and we must return
25 |         a Variable of output data. We can use Modules defined in the constructor as
26 |         well as arbitrary operators on Variables.
27 |         """
28 |         out1 = self.sigmoid(self.l1(x))
29 |         out2 = self.sigmoid(self.l2(out1))
30 |         y_pred = self.sigmoid(self.l3(out2))
31 |         return y_pred
32 | 
33 | 
34 | # our model
35 | model = Model()
36 | 
37 | 
38 | # Construct our loss function and an Optimizer. The call to model.parameters()
39 | # in the SGD constructor will contain the learnable parameters of the two
40 | # nn.Linear modules which are members of the model.
41 | criterion = nn.BCELoss(reduction='mean')
42 | optimizer = optim.SGD(model.parameters(), lr=0.1)
43 | 
44 | # Training loop
45 | for epoch in range(100):
46 |     # Forward pass: Compute predicted y by passing x to the model
47 |     y_pred = model(x_data)
48 | 
49 |     # Compute and print loss
50 |     loss = criterion(y_pred, y_data)
51 |     print(f'Epoch: {epoch + 1}/100 | Loss: {loss.item():.4f}')
52 | 
53 |     # Zero gradients, perform a backward pass, and update the weights.
54 |     optimizer.zero_grad()
55 |     loss.backward()
56 |     optimizer.step()
57 | 


--------------------------------------------------------------------------------
/08_1_dataset_loader.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
 4 | from torch.utils.data import Dataset, DataLoader
 5 | from torch import from_numpy, tensor
 6 | import numpy as np
 7 | 
 8 | class DiabetesDataset(Dataset):
 9 |     """ Diabetes dataset."""
10 | 
11 |     # Initialize your data, download, etc.
12 |     def __init__(self):
13 |         xy = np.loadtxt('./data/diabetes.csv.gz',
14 |                         delimiter=',', dtype=np.float32)
15 |         self.len = xy.shape[0]
16 |         self.x_data = from_numpy(xy[:, 0:-1])
17 |         self.y_data = from_numpy(xy[:, [-1]])
18 | 
19 |     def __getitem__(self, index):
20 |         return self.x_data[index], self.y_data[index]
21 | 
22 |     def __len__(self):
23 |         return self.len
24 | 
25 | 
26 | dataset = DiabetesDataset()
27 | train_loader = DataLoader(dataset=dataset,
28 |                           batch_size=32,
29 |                           shuffle=True,
30 |                           num_workers=2)
31 | 
32 | for epoch in range(2):
33 |     for i, data in enumerate(train_loader, 0):
34 |         # get the inputs
35 |         inputs, labels = data
36 | 
37 |         # wrap them in Variable
38 |         inputs, labels = tensor(inputs), tensor(labels)
39 | 
40 |         # Run your training process
41 |         print(f'Epoch: {i} | Inputs {inputs.data} | Labels {labels.data}')
42 | 


--------------------------------------------------------------------------------
/08_2_dataset_loade_logistic.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
 4 | from torch.utils.data import Dataset, DataLoader
 5 | from torch import nn, from_numpy, optim
 6 | import numpy as np
 7 | 
 8 | 
 9 | class DiabetesDataset(Dataset):
10 |     """ Diabetes dataset."""
11 |     # Initialize your data, download, etc.
12 |     def __init__(self):
13 |         xy = np.loadtxt('./data/diabetes.csv.gz',
14 |                         delimiter=',', dtype=np.float32)
15 |         self.len = xy.shape[0]
16 |         self.x_data = from_numpy(xy[:, 0:-1])
17 |         self.y_data = from_numpy(xy[:, [-1]])
18 | 
19 |     def __getitem__(self, index):
20 |         return self.x_data[index], self.y_data[index]
21 | 
22 |     def __len__(self):
23 |         return self.len
24 | 
25 | 
26 | dataset = DiabetesDataset()
27 | train_loader = DataLoader(dataset=dataset,
28 |                           batch_size=32,
29 |                           shuffle=True,
30 |                           num_workers=2)
31 | 
32 | 
33 | class Model(nn.Module):
34 | 
35 |     def __init__(self):
36 |         """
37 |         In the constructor we instantiate two nn.Linear module
38 |         """
39 |         super(Model, self).__init__()
40 |         self.l1 = nn.Linear(8, 6)
41 |         self.l2 = nn.Linear(6, 4)
42 |         self.l3 = nn.Linear(4, 1)
43 | 
44 |         self.sigmoid = nn.Sigmoid()
45 | 
46 |     def forward(self, x):
47 |         """
48 |         In the forward function we accept a Variable of input data and we must return
49 |         a Variable of output data. We can use Modules defined in the constructor as
50 |         well as arbitrary operators on Variables.
51 |         """
52 |         out1 = self.sigmoid(self.l1(x))
53 |         out2 = self.sigmoid(self.l2(out1))
54 |         y_pred = self.sigmoid(self.l3(out2))
55 |         return y_pred
56 | 
57 | 
58 | # our model
59 | model = Model()
60 | 
61 | # Construct our loss function and an Optimizer. The call to model.parameters()
62 | # in the SGD constructor will contain the learnable parameters of the two
63 | # nn.Linear modules which are members of the model.
64 | criterion = nn.BCELoss(reduction='sum')
65 | optimizer = optim.SGD(model.parameters(), lr=0.1)
66 | 
67 | # Training loop
68 | for epoch in range(2):
69 |     for i, data in enumerate(train_loader, 0):
70 |         # get the inputs
71 |         inputs, labels = data
72 | 
73 |         # Forward pass: Compute predicted y by passing x to the model
74 |         y_pred = model(inputs)
75 | 
76 |         # Compute and print loss
77 |         loss = criterion(y_pred, labels)
78 |         print(f'Epoch {epoch + 1} | Batch: {i+1} | Loss: {loss.item():.4f}')
79 | 
80 |         # Zero gradients, perform a backward pass, and update the weights.
81 |         optimizer.zero_grad()
82 |         loss.backward()
83 |         optimizer.step()
84 | 


--------------------------------------------------------------------------------
/09_01_softmax_loss.py:
--------------------------------------------------------------------------------
 1 | from torch import nn, tensor, max
 2 | import numpy as np
 3 | 
 4 | # Cross entropy example
 5 | # One hot
 6 | # 0: 1 0 0
 7 | # 1: 0 1 0
 8 | # 2: 0 0 1
 9 | Y = np.array([1, 0, 0])
10 | Y_pred1 = np.array([0.7, 0.2, 0.1])
11 | Y_pred2 = np.array([0.1, 0.3, 0.6])
12 | print(f'Loss1: {np.sum(-Y * np.log(Y_pred1)):.4f}')
13 | print(f'Loss2: {np.sum(-Y * np.log(Y_pred2)):.4f}')
14 | 
15 | # Softmax + CrossEntropy (logSoftmax + NLLLoss)
16 | loss = nn.CrossEntropyLoss()
17 | 
18 | # target is of size nBatch
19 | # each element in target has to have 0 <= value < nClasses (0-2)
20 | # Input is class, not one-hot
21 | Y = tensor([0], requires_grad=False)
22 | 
23 | # input is of size nBatch x nClasses = 1 x 4
24 | # Y_pred are logits (not softmax)
25 | Y_pred1 = tensor([[2.0, 1.0, 0.1]])
26 | Y_pred2 = tensor([[0.5, 2.0, 0.3]])
27 | 
28 | l1 = loss(Y_pred1, Y)
29 | l2 = loss(Y_pred2, Y)
30 | 
31 | print(f'PyTorch Loss1: {l1.item():.4f} \nPyTorch Loss2: {l2.item():.4f}')
32 | print(f'Y_pred1: {max(Y_pred1.data, 1)[1].item()}')
33 | print(f'Y_pred2: {max(Y_pred2.data, 1)[1].item()}')
34 | 
35 | # target is of size nBatch
36 | # each element in target has to have 0 <= value < nClasses (0-2)
37 | # Input is class, not one-hot
38 | Y = tensor([2, 0, 1], requires_grad=False)
39 | 
40 | # input is of size nBatch x nClasses = 2 x 4
41 | # Y_pred are logits (not softmax)
42 | Y_pred1 = tensor([[0.1, 0.2, 0.9],
43 |                   [1.1, 0.1, 0.2],
44 |                   [0.2, 2.1, 0.1]])
45 | 
46 | Y_pred2 = tensor([[0.8, 0.2, 0.3],
47 |                   [0.2, 0.3, 0.5],
48 |                   [0.2, 0.2, 0.5]])
49 | 
50 | l1 = loss(Y_pred1, Y)
51 | l2 = loss(Y_pred2, Y)
52 | print(f'Batch Loss1:  {l1.item():.4f} \nBatch Loss2: {l2.data:.4f}')
53 | 


--------------------------------------------------------------------------------
/09_2_softmax_mnist.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py
  2 | from __future__ import print_function
  3 | from torch import nn, optim, cuda
  4 | from torch.utils import data
  5 | from torchvision import datasets, transforms
  6 | import torch.nn.functional as F
  7 | import time
  8 | 
  9 | # Training settings
 10 | batch_size = 64
 11 | device = 'cuda' if cuda.is_available() else 'cpu'
 12 | print(f'Training MNIST Model on {device}\n{"=" * 44}')
 13 | 
 14 | # MNIST Dataset
 15 | train_dataset = datasets.MNIST(root='./mnist_data/',
 16 |                                train=True,
 17 |                                transform=transforms.ToTensor(),
 18 |                                download=True)
 19 | 
 20 | test_dataset = datasets.MNIST(root='./mnist_data/',
 21 |                               train=False,
 22 |                               transform=transforms.ToTensor())
 23 | 
 24 | # Data Loader (Input Pipeline)
 25 | train_loader = data.DataLoader(dataset=train_dataset,
 26 |                                            batch_size=batch_size,
 27 |                                            shuffle=True)
 28 | 
 29 | test_loader = data.DataLoader(dataset=test_dataset,
 30 |                                           batch_size=batch_size,
 31 |                                           shuffle=False)
 32 | 
 33 | 
 34 | class Net(nn.Module):
 35 | 
 36 |     def __init__(self):
 37 |         super(Net, self).__init__()
 38 |         self.l1 = nn.Linear(784, 520)
 39 |         self.l2 = nn.Linear(520, 320)
 40 |         self.l3 = nn.Linear(320, 240)
 41 |         self.l4 = nn.Linear(240, 120)
 42 |         self.l5 = nn.Linear(120, 10)
 43 | 
 44 |     def forward(self, x):
 45 |         x = x.view(-1, 784)  # Flatten the data (n, 1, 28, 28)-> (n, 784)
 46 |         x = F.relu(self.l1(x))
 47 |         x = F.relu(self.l2(x))
 48 |         x = F.relu(self.l3(x))
 49 |         x = F.relu(self.l4(x))
 50 |         return self.l5(x)
 51 | 
 52 | 
 53 | model = Net()
 54 | model.to(device)
 55 | criterion = nn.CrossEntropyLoss()
 56 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
 57 | 
 58 | 
 59 | def train(epoch):
 60 |     model.train()
 61 |     for batch_idx, (data, target) in enumerate(train_loader):
 62 |         data, target = data.to(device), target.to(device)
 63 |         optimizer.zero_grad()
 64 |         output = model(data)
 65 |         loss = criterion(output, target)
 66 |         loss.backward()
 67 |         optimizer.step()
 68 |         if batch_idx % 10 == 0:
 69 |             print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format(
 70 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 71 |                 100. * batch_idx / len(train_loader), loss.item()))
 72 | 
 73 | 
 74 | def test():
 75 |     model.eval()
 76 |     test_loss = 0
 77 |     correct = 0
 78 |     for data, target in test_loader:
 79 |         data, target = data.to(device), target.to(device)
 80 |         output = model(data)
 81 |         # sum up batch loss
 82 |         test_loss += criterion(output, target).item()
 83 |         # get the index of the max
 84 |         pred = output.data.max(1, keepdim=True)[1]
 85 |         correct += pred.eq(target.data.view_as(pred)).cpu().sum()
 86 | 
 87 |     test_loss /= len(test_loader.dataset)
 88 |     print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
 89 |           f'({100. * correct / len(test_loader.dataset):.0f}%)')
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     since = time.time()
 94 |     for epoch in range(1, 10):
 95 |         epoch_start = time.time()
 96 |         train(epoch)
 97 |         m, s = divmod(time.time() - epoch_start, 60)
 98 |         print(f'Training time: {m:.0f}m {s:.0f}s')
 99 |         test()
100 |         m, s = divmod(time.time() - epoch_start, 60)
101 |         print(f'Testing time: {m:.0f}m {s:.0f}s')
102 | 
103 |     m, s = divmod(time.time() - since, 60)
104 |     print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!')
105 | 
106 | 


--------------------------------------------------------------------------------
/10_1_cnn_mnist.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py
 2 | from __future__ import print_function
 3 | import argparse
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | import torch.optim as optim
 8 | from torchvision import datasets, transforms
 9 | from torch.autograd import Variable
10 | 
11 | # Training settings
12 | batch_size = 64
13 | 
14 | # MNIST Dataset
15 | train_dataset = datasets.MNIST(root='./data/',
16 |                                train=True,
17 |                                transform=transforms.ToTensor(),
18 |                                download=True)
19 | 
20 | test_dataset = datasets.MNIST(root='./data/',
21 |                               train=False,
22 |                               transform=transforms.ToTensor())
23 | 
24 | # Data Loader (Input Pipeline)
25 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
26 |                                            batch_size=batch_size,
27 |                                            shuffle=True)
28 | 
29 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
30 |                                           batch_size=batch_size,
31 |                                           shuffle=False)
32 | 
33 | 
34 | class Net(nn.Module):
35 | 
36 |     def __init__(self):
37 |         super(Net, self).__init__()
38 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
39 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
40 |         self.mp = nn.MaxPool2d(2)
41 |         self.fc = nn.Linear(320, 10)
42 | 
43 |     def forward(self, x):
44 |         in_size = x.size(0)
45 |         x = F.relu(self.mp(self.conv1(x)))
46 |         x = F.relu(self.mp(self.conv2(x)))
47 |         x = x.view(in_size, -1)  # flatten the tensor
48 |         x = self.fc(x)
49 |         return F.log_softmax(x)
50 | 
51 | 
52 | model = Net()
53 | 
54 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
55 | 
56 | 
57 | def train(epoch):
58 |     model.train()
59 |     for batch_idx, (data, target) in enumerate(train_loader):
60 |         data, target = Variable(data), Variable(target)
61 |         optimizer.zero_grad()
62 |         output = model(data)
63 |         loss = F.nll_loss(output, target)
64 |         loss.backward()
65 |         optimizer.step()
66 |         if batch_idx % 10 == 0:
67 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
68 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
69 |                 100. * batch_idx / len(train_loader), loss.item()))
70 | 
71 | 
72 | def test():
73 |     model.eval()
74 |     test_loss = 0
75 |     correct = 0
76 |     for data, target in test_loader:
77 |         data, target = Variable(data, volatile=True), Variable(target)
78 |         output = model(data)
79 |         # sum up batch loss
80 |         test_loss += F.nll_loss(output, target, size_average=False).data
81 |         # get the index of the max log-probability
82 |         pred = output.data.max(1, keepdim=True)[1]
83 |         correct += pred.eq(target.data.view_as(pred)).cpu().sum()
84 | 
85 |     test_loss /= len(test_loader.dataset)
86 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
87 |         test_loss, correct, len(test_loader.dataset),
88 |         100. * correct / len(test_loader.dataset)))
89 | 
90 | 
91 | for epoch in range(1, 10):
92 |     train(epoch)
93 |     test()
94 | 


--------------------------------------------------------------------------------
/11_1_toy_inception_mnist.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/pytorch/examples/blob/master/mnist/main.py
  2 | from __future__ import print_function
  3 | import argparse
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | from torchvision import datasets, transforms
  9 | from torch.autograd import Variable
 10 | 
 11 | # Training settings
 12 | batch_size = 64
 13 | 
 14 | # MNIST Dataset
 15 | train_dataset = datasets.MNIST(root='./data/',
 16 |                                train=True,
 17 |                                transform=transforms.ToTensor(),
 18 |                                download=True)
 19 | 
 20 | test_dataset = datasets.MNIST(root='./data/',
 21 |                               train=False,
 22 |                               transform=transforms.ToTensor())
 23 | 
 24 | # Data Loader (Input Pipeline)
 25 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
 26 |                                            batch_size=batch_size,
 27 |                                            shuffle=True)
 28 | 
 29 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
 30 |                                           batch_size=batch_size,
 31 |                                           shuffle=False)
 32 | 
 33 | 
 34 | class InceptionA(nn.Module):
 35 | 
 36 |     def __init__(self, in_channels):
 37 |         super(InceptionA, self).__init__()
 38 |         self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)
 39 | 
 40 |         self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
 41 |         self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
 42 | 
 43 |         self.branch3x3dbl_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
 44 |         self.branch3x3dbl_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
 45 |         self.branch3x3dbl_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
 46 | 
 47 |         self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
 48 | 
 49 |     def forward(self, x):
 50 |         branch1x1 = self.branch1x1(x)
 51 | 
 52 |         branch5x5 = self.branch5x5_1(x)
 53 |         branch5x5 = self.branch5x5_2(branch5x5)
 54 | 
 55 |         branch3x3dbl = self.branch3x3dbl_1(x)
 56 |         branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
 57 |         branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
 58 | 
 59 |         branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
 60 |         branch_pool = self.branch_pool(branch_pool)
 61 | 
 62 |         outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
 63 |         return torch.cat(outputs, 1)
 64 | 
 65 | 
 66 | class Net(nn.Module):
 67 | 
 68 |     def __init__(self):
 69 |         super(Net, self).__init__()
 70 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 71 |         self.conv2 = nn.Conv2d(88, 20, kernel_size=5)
 72 | 
 73 |         self.incept1 = InceptionA(in_channels=10)
 74 |         self.incept2 = InceptionA(in_channels=20)
 75 | 
 76 |         self.mp = nn.MaxPool2d(2)
 77 |         self.fc = nn.Linear(1408, 10)
 78 | 
 79 |     def forward(self, x):
 80 |         in_size = x.size(0)
 81 |         x = F.relu(self.mp(self.conv1(x)))
 82 |         x = self.incept1(x)
 83 |         x = F.relu(self.mp(self.conv2(x)))
 84 |         x = self.incept2(x)
 85 |         x = x.view(in_size, -1)  # flatten the tensor
 86 |         x = self.fc(x)
 87 |         return F.log_softmax(x)
 88 | 
 89 | 
 90 | model = Net()
 91 | 
 92 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
 93 | 
 94 | 
 95 | def train(epoch):
 96 |     model.train()
 97 |     for batch_idx, (data, target) in enumerate(train_loader):
 98 |         data, target = Variable(data), Variable(target)
 99 |         optimizer.zero_grad()
100 |         output = model(data)
101 |         loss = F.nll_loss(output, target)
102 |         loss.backward()
103 |         optimizer.step()
104 |         if batch_idx % 10 == 0:
105 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
106 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
107 |                 100. * batch_idx / len(train_loader), loss.data[0]))
108 | 
109 | 
110 | def test():
111 |     model.eval()
112 |     test_loss = 0
113 |     correct = 0
114 |     for data, target in test_loader:
115 |         data, target = Variable(data, volatile=True), Variable(target)
116 |         output = model(data)
117 |         # sum up batch loss
118 |         test_loss += F.nll_loss(output, target, size_average=False).data[0]
119 |         # get the index of the max log-probability
120 |         pred = output.data.max(1, keepdim=True)[1]
121 |         correct += pred.eq(target.data.view_as(pred)).cpu().sum()
122 | 
123 |     test_loss /= len(test_loader.dataset)
124 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
125 |         test_loss, correct, len(test_loader.dataset),
126 |         100. * correct / len(test_loader.dataset)))
127 | 
128 | 
129 | for epoch in range(1, 10):
130 |     train(epoch)
131 |     test()
132 | 


--------------------------------------------------------------------------------
/12_1_rnn_basics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | # One hot encoding for each char in 'hello'
 6 | h = [1, 0, 0, 0]
 7 | e = [0, 1, 0, 0]
 8 | l = [0, 0, 1, 0]
 9 | o = [0, 0, 0, 1]
10 | 
11 | # One cell RNN input_dim (4) -> output_dim (2). sequence: 5
12 | cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)
13 | 
14 | # (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
15 | hidden = Variable(torch.randn(1, 1, 2))
16 | 
17 | # Propagate input through RNN
18 | # Input: (batch, seq_len, input_size) when batch_first=True
19 | inputs = Variable(torch.Tensor([h, e, l, l, o]))
20 | for one in inputs:
21 |     one = one.view(1, 1, -1)
22 |     # Input: (batch, seq_len, input_size) when batch_first=True
23 |     out, hidden = cell(one, hidden)
24 |     print("one input size", one.size(), "out size", out.size())
25 | 
26 | # We can do the whole at once
27 | # Propagate input through RNN
28 | # Input: (batch, seq_len, input_size) when batch_first=True
29 | inputs = inputs.view(1, 5, -1)
30 | out, hidden = cell(inputs, hidden)
31 | print("sequence input size", inputs.size(), "out size", out.size())
32 | 
33 | 
34 | # hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
35 | hidden = Variable(torch.randn(1, 3, 2))
36 | 
37 | # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
38 | # 3 batches 'hello', 'eolll', 'lleel'
39 | # rank = (3, 5, 4)
40 | inputs = Variable(torch.Tensor([[h, e, l, l, o],
41 |                                 [e, o, l, l, l],
42 |                                 [l, l, e, e, l]]))
43 | 
44 | # Propagate input through RNN
45 | # Input: (batch, seq_len, input_size) when batch_first=True
46 | # B x S x I
47 | out, hidden = cell(inputs, hidden)
48 | print("batch input size", inputs.size(), "out size", out.size())
49 | 
50 | 
51 | # One cell RNN input_dim (4) -> output_dim (2)
52 | cell = nn.RNN(input_size=4, hidden_size=2)
53 | 
54 | # The given dimensions dim0 and dim1 are swapped.
55 | inputs = inputs.transpose(dim0=0, dim1=1)
56 | # Propagate input through RNN
57 | # Input: (seq_len, batch_size, input_size) when batch_first=False (default)
58 | # S x B x I
59 | out, hidden = cell(inputs, hidden)
60 | print("batch input size", inputs.size(), "out size", out.size())
61 | 


--------------------------------------------------------------------------------
/12_2_hello_rnn.py:
--------------------------------------------------------------------------------
 1 | # Lab 12 RNN
 2 | import sys
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | 
 7 | torch.manual_seed(777)  # reproducibility
 8 | #            0    1    2    3    4
 9 | idx2char = ['h', 'i', 'e', 'l', 'o']
10 | 
11 | # Teach hihell -> ihello
12 | x_data = [0, 1, 0, 2, 3, 3]   # hihell
13 | one_hot_lookup = [[1, 0, 0, 0, 0],  # 0
14 |                   [0, 1, 0, 0, 0],  # 1
15 |                   [0, 0, 1, 0, 0],  # 2
16 |                   [0, 0, 0, 1, 0],  # 3
17 |                   [0, 0, 0, 0, 1]]  # 4
18 | 
19 | y_data = [1, 0, 2, 3, 3, 4]    # ihello
20 | x_one_hot = [one_hot_lookup[x] for x in x_data]
21 | 
22 | # As we have one batch of samples, we will change them to variables only once
23 | inputs = Variable(torch.Tensor(x_one_hot))
24 | labels = Variable(torch.LongTensor(y_data))
25 | 
26 | num_classes = 5
27 | input_size = 5  # one-hot size
28 | hidden_size = 5  # output from the RNN. 5 to directly predict one-hot
29 | batch_size = 1   # one sentence
30 | sequence_length = 1  # One by one
31 | num_layers = 1  # one-layer rnn
32 | 
33 | 
34 | class Model(nn.Module):
35 | 
36 |     def __init__(self):
37 |         super(Model, self).__init__()
38 |         self.rnn = nn.RNN(input_size=input_size,
39 |                           hidden_size=hidden_size, batch_first=True)
40 | 
41 |     def forward(self, hidden, x):
42 |         # Reshape input (batch first)
43 |         x = x.view(batch_size, sequence_length, input_size)
44 | 
45 |         # Propagate input through RNN
46 |         # Input: (batch, seq_len, input_size)
47 |         # hidden: (num_layers * num_directions, batch, hidden_size)
48 |         out, hidden = self.rnn(x, hidden)
49 |         return hidden, out.view(-1, num_classes)
50 | 
51 |     def init_hidden(self):
52 |         # Initialize hidden and cell states
53 |         # (num_layers * num_directions, batch, hidden_size)
54 |         return Variable(torch.zeros(num_layers, batch_size, hidden_size))
55 | 
56 | 
57 | # Instantiate RNN model
58 | model = Model()
59 | print(model)
60 | 
61 | # Set loss and optimizer function
62 | # CrossEntropyLoss = LogSoftmax + NLLLoss
63 | criterion = nn.CrossEntropyLoss()
64 | optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
65 | 
66 | # Train the model
67 | for epoch in range(100):
68 |     optimizer.zero_grad()
69 |     loss = 0
70 |     hidden = model.init_hidden()
71 | 
72 |     sys.stdout.write("predicted string: ")
73 |     for input, label in zip(inputs, labels):
74 |         # print(input.size(), label.size())
75 |         hidden, output = model(hidden, input)
76 |         val, idx = output.max(1)
77 |         sys.stdout.write(idx2char[idx.data[0]])
78 |         loss += criterion(output, torch.LongTensor([label]))
79 | 
80 |     print(", epoch: %d, loss: %1.3f" % (epoch + 1, loss))
81 | 
82 |     loss.backward()
83 |     optimizer.step()
84 | 
85 | print("Learning finished!")
86 | 


--------------------------------------------------------------------------------
/12_3_hello_rnn_seq.py:
--------------------------------------------------------------------------------
 1 | # Lab 12 RNN
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | torch.manual_seed(777)  # reproducibility
 7 | 
 8 | 
 9 | idx2char = ['h', 'i', 'e', 'l', 'o']
10 | 
11 | # Teach hihell -> ihello
12 | x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
13 | x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
14 |               [0, 1, 0, 0, 0],   # i 1
15 |               [1, 0, 0, 0, 0],   # h 0
16 |               [0, 0, 1, 0, 0],   # e 2
17 |               [0, 0, 0, 1, 0],   # l 3
18 |               [0, 0, 0, 1, 0]]]  # l 3
19 | 
20 | y_data = [1, 0, 2, 3, 3, 4]    # ihello
21 | 
22 | # As we have one batch of samples, we will change them to variables only once
23 | inputs = Variable(torch.Tensor(x_one_hot))
24 | labels = Variable(torch.LongTensor(y_data))
25 | 
26 | num_classes = 5
27 | input_size = 5  # one-hot size
28 | hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot
29 | batch_size = 1   # one sentence
30 | sequence_length = 6  # |ihello| == 6
31 | num_layers = 1  # one-layer rnn
32 | 
33 | 
34 | class RNN(nn.Module):
35 | 
36 |     def __init__(self, num_classes, input_size, hidden_size, num_layers):
37 |         super(RNN, self).__init__()
38 | 
39 |         self.num_classes = num_classes
40 |         self.num_layers = num_layers
41 |         self.input_size = input_size
42 |         self.hidden_size = hidden_size
43 |         self.sequence_length = sequence_length
44 | 
45 |         self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)
46 | 
47 |     def forward(self, x):
48 |         # Initialize hidden and cell states
49 |         # (num_layers * num_directions, batch, hidden_size) for batch_first=True
50 |         h_0 = Variable(torch.zeros(
51 |             self.num_layers, x.size(0), self.hidden_size))
52 | 
53 |         # Reshape input
54 |         x.view(x.size(0), self.sequence_length, self.input_size)
55 | 
56 |         # Propagate input through RNN
57 |         # Input: (batch, seq_len, input_size)
58 |         # h_0: (num_layers * num_directions, batch, hidden_size)
59 | 
60 |         out, _ = self.rnn(x, h_0)
61 |         return out.view(-1, num_classes)
62 | 
63 | 
64 | # Instantiate RNN model
65 | rnn = RNN(num_classes, input_size, hidden_size, num_layers)
66 | print(rnn)
67 | 
68 | # Set loss and optimizer function
69 | # CrossEntropyLoss = LogSoftmax + NLLLoss
70 | criterion = torch.nn.CrossEntropyLoss()
71 | optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)
72 | 
73 | # Train the model
74 | for epoch in range(100):
75 |     outputs = rnn(inputs)
76 |     optimizer.zero_grad()
77 |     loss = criterion(outputs, labels)
78 |     loss.backward()
79 |     optimizer.step()
80 |     _, idx = outputs.max(1)
81 |     idx = idx.data.numpy()
82 |     result_str = [idx2char[c] for c in idx.squeeze()]
83 |     print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data[0]))
84 |     print("Predicted string: ", ''.join(result_str))
85 | 
86 | print("Learning finished!")
87 | 


--------------------------------------------------------------------------------
/12_4_hello_rnn_emb.py:
--------------------------------------------------------------------------------
 1 | # Lab 12 RNN
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | torch.manual_seed(777)  # reproducibility
 7 | 
 8 | 
 9 | idx2char = ['h', 'i', 'e', 'l', 'o']
10 | 
11 | # Teach hihell -> ihello
12 | x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
13 | y_data = [1, 0, 2, 3, 3, 4]    # ihello
14 | 
15 | # As we have one batch of samples, we will change them to variables only once
16 | inputs = Variable(torch.LongTensor(x_data))
17 | labels = Variable(torch.LongTensor(y_data))
18 | 
19 | num_classes = 5
20 | input_size = 5
21 | embedding_size = 10  # embedding size
22 | hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot
23 | batch_size = 1   # one sentence
24 | sequence_length = 6  # |ihello| == 6
25 | num_layers = 1  # one-layer rnn
26 | 
27 | 
28 | class Model(nn.Module):
29 | 
30 |     def __init__(self, num_layers, hidden_size):
31 |         super(Model, self).__init__()
32 |         self.num_layers = num_layers
33 |         self.hidden_size = hidden_size
34 |         self.embedding = nn.Embedding(input_size, embedding_size)
35 |         self.rnn = nn.RNN(input_size=embedding_size,
36 |                           hidden_size=5, batch_first=True)
37 |         self.fc = nn.Linear(hidden_size, num_classes)
38 | 
39 |     def forward(self, x):
40 |         # Initialize hidden and cell states
41 |         # (num_layers * num_directions, batch, hidden_size)
42 |         h_0 = Variable(torch.zeros(
43 |             self.num_layers, x.size(0), self.hidden_size))
44 | 
45 |         emb = self.embedding(x)
46 |         emb = emb.view(batch_size, sequence_length, -1)
47 | 
48 |         # Propagate embedding through RNN
49 |         # Input: (batch, seq_len, embedding_size)
50 |         # h_0: (num_layers * num_directions, batch, hidden_size)
51 |         out, _ = self.rnn(emb, h_0)
52 |         return self.fc(out.view(-1, num_classes))
53 | 
54 | 
55 | # Instantiate RNN model
56 | model = Model(num_layers, hidden_size)
57 | print(model)
58 | 
59 | # Set loss and optimizer function
60 | # CrossEntropyLoss = LogSoftmax + NLLLoss
61 | criterion = torch.nn.CrossEntropyLoss()
62 | optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
63 | 
64 | # Train the model
65 | for epoch in range(100):
66 |     outputs = model(inputs)
67 |     optimizer.zero_grad()
68 |     loss = criterion(outputs, labels)
69 |     loss.backward()
70 |     optimizer.step()
71 |     _, idx = outputs.max(1)
72 |     idx = idx.data.numpy()
73 |     result_str = [idx2char[c] for c in idx.squeeze()]
74 |     print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
75 |     print("Predicted string: ", ''.join(result_str))
76 | 
77 | print("Learning finished!")
78 | 


--------------------------------------------------------------------------------
/13_1_rnn_classification_basics.py:
--------------------------------------------------------------------------------
 1 | # Original code is from https://github.com/spro/practical-pytorch
 2 | import time
 3 | import math
 4 | import torch
 5 | import torch.nn as nn
 6 | from torch.autograd import Variable
 7 | from torch.utils.data import DataLoader
 8 | 
 9 | from name_dataset import NameDataset
10 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
11 | 
12 | # Parameters and DataLoaders
13 | HIDDEN_SIZE = 100
14 | N_CHARS = 128  # ASCII
15 | N_CLASSES = 18
16 | 
17 | 
18 | class RNNClassifier(nn.Module):
19 | 
20 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1):
21 |         super(RNNClassifier, self).__init__()
22 |         self.hidden_size = hidden_size
23 |         self.n_layers = n_layers
24 | 
25 |         self.embedding = nn.Embedding(input_size, hidden_size)
26 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
27 |         self.fc = nn.Linear(hidden_size, output_size)
28 | 
29 |     def forward(self, input):
30 |         # Note: we run this all at once (over the whole input sequence)
31 | 
32 |         # input = B x S . size(0) = B
33 |         batch_size = input.size(0)
34 | 
35 |         # input:  B x S  -- (transpose) --> S x B
36 |         input = input.t()
37 | 
38 |         # Embedding S x B -> S x B x I (embedding size)
39 |         print("  input", input.size())
40 |         embedded = self.embedding(input)
41 |         print("  embedding", embedded.size())
42 | 
43 |         # Make a hidden
44 |         hidden = self._init_hidden(batch_size)
45 | 
46 |         output, hidden = self.gru(embedded, hidden)
47 |         print("  gru hidden output", hidden.size())
48 |         # Use the last layer output as FC's input
49 |         # No need to unpack, since we are going to use hidden
50 |         fc_output = self.fc(hidden)
51 |         print("  fc output", fc_output.size())
52 |         return fc_output
53 | 
54 |     def _init_hidden(self, batch_size):
55 |         hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
56 |         return Variable(hidden)
57 | 
58 | # Help functions
59 | 
60 | 
61 | def str2ascii_arr(msg):
62 |     arr = [ord(c) for c in msg]
63 |     return arr, len(arr)
64 | 
65 | # pad sequences and sort the tensor
66 | def pad_sequences(vectorized_seqs, seq_lengths):
67 |     seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
68 |     for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
69 |         seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
70 |     return seq_tensor
71 | 
72 | # Create necessary variables, lengths, and target
73 | def make_variables(names):
74 |     sequence_and_length = [str2ascii_arr(name) for name in names]
75 |     vectorized_seqs = [sl[0] for sl in sequence_and_length]
76 |     seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
77 |     return pad_sequences(vectorized_seqs, seq_lengths)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     names = ['adylov', 'solan', 'hard', 'san']
82 |     classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASSES)
83 | 
84 |     for name in names:
85 |         arr, _ = str2ascii_arr(name)
86 |         inp = Variable(torch.LongTensor([arr]))
87 |         out = classifier(inp)
88 |         print("in", inp.size(), "out", out.size())
89 | 
90 | 
91 |     inputs = make_variables(names)
92 |     out = classifier(inputs)
93 |     print("batch in", inputs.size(), "batch out", out.size())
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/13_2_rnn_classification.py:
--------------------------------------------------------------------------------
  1 | # Original code is from https://github.com/spro/practical-pytorch
  2 | import time
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | from torch.utils.data import DataLoader
  8 | 
  9 | from name_dataset import NameDataset
 10 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 11 | 
 12 | # Parameters and DataLoaders
 13 | HIDDEN_SIZE = 100
 14 | N_LAYERS = 2
 15 | BATCH_SIZE = 256
 16 | N_EPOCHS = 100
 17 | 
 18 | test_dataset = NameDataset(is_train_set=False)
 19 | test_loader = DataLoader(dataset=test_dataset,
 20 |                          batch_size=BATCH_SIZE, shuffle=True)
 21 | 
 22 | 
 23 | train_dataset = NameDataset(is_train_set=True)
 24 | train_loader = DataLoader(dataset=train_dataset,
 25 |                           batch_size=BATCH_SIZE, shuffle=True)
 26 | 
 27 | N_COUNTRIES = len(train_dataset.get_countries())
 28 | print(N_COUNTRIES, "countries")
 29 | N_CHARS = 128  # ASCII
 30 | 
 31 | 
 32 | # Some utility functions
 33 | def time_since(since):
 34 |     s = time.time() - since
 35 |     m = math.floor(s / 60)
 36 |     s -= m * 60
 37 |     return '%dm %ds' % (m, s)
 38 | 
 39 | 
 40 | def create_variable(tensor):
 41 |     # Do cuda() before wrapping with variable
 42 |     if torch.cuda.is_available():
 43 |         return Variable(tensor.cuda())
 44 |     else:
 45 |         return Variable(tensor)
 46 | 
 47 | 
 48 | # pad sequences and sort the tensor
 49 | def pad_sequences(vectorized_seqs, seq_lengths, countries):
 50 |     seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
 51 |     for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
 52 |         seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
 53 | 
 54 |     # Sort tensors by their length
 55 |     seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
 56 |     seq_tensor = seq_tensor[perm_idx]
 57 | 
 58 |     # Also sort the target (countries) in the same order
 59 |     target = countries2tensor(countries)
 60 |     if len(countries):
 61 |         target = target[perm_idx]
 62 | 
 63 |     # Return variables
 64 |     # DataParallel requires everything to be a Variable
 65 |     return create_variable(seq_tensor), \
 66 |         create_variable(seq_lengths), \
 67 |         create_variable(target)
 68 | 
 69 | 
 70 | # Create necessary variables, lengths, and target
 71 | def make_variables(names, countries):
 72 |     sequence_and_length = [str2ascii_arr(name) for name in names]
 73 |     vectorized_seqs = [sl[0] for sl in sequence_and_length]
 74 |     seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
 75 |     return pad_sequences(vectorized_seqs, seq_lengths, countries)
 76 | 
 77 | 
 78 | def str2ascii_arr(msg):
 79 |     arr = [ord(c) for c in msg]
 80 |     return arr, len(arr)
 81 | 
 82 | 
 83 | def countries2tensor(countries):
 84 |     country_ids = [train_dataset.get_country_id(
 85 |         country) for country in countries]
 86 |     return torch.LongTensor(country_ids)
 87 | 
 88 | 
 89 | class RNNClassifier(nn.Module):
 90 |     # Our model
 91 | 
 92 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
 93 |         super(RNNClassifier, self).__init__()
 94 |         self.hidden_size = hidden_size
 95 |         self.n_layers = n_layers
 96 |         self.n_directions = int(bidirectional) + 1
 97 | 
 98 |         self.embedding = nn.Embedding(input_size, hidden_size)
 99 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
100 |                           bidirectional=bidirectional)
101 |         self.fc = nn.Linear(hidden_size, output_size)
102 | 
103 |     def forward(self, input, seq_lengths):
104 |         # Note: we run this all at once (over the whole input sequence)
105 |         # input shape: B x S (input size)
106 |         # transpose to make S(sequence) x B (batch)
107 |         input = input.t()
108 |         batch_size = input.size(1)
109 | 
110 |         # Make a hidden
111 |         hidden = self._init_hidden(batch_size)
112 | 
113 |         # Embedding S x B -> S x B x I (embedding size)
114 |         embedded = self.embedding(input)
115 | 
116 |         # Pack them up nicely
117 |         gru_input = pack_padded_sequence(
118 |             embedded, seq_lengths.data.cpu().numpy())
119 | 
120 |         # To compact weights again call flatten_parameters().
121 |         self.gru.flatten_parameters()
122 |         output, hidden = self.gru(gru_input, hidden)
123 | 
124 |         # Use the last layer output as FC's input
125 |         # No need to unpack, since we are going to use hidden
126 |         fc_output = self.fc(hidden[-1])
127 |         return fc_output
128 | 
129 |     def _init_hidden(self, batch_size):
130 |         hidden = torch.zeros(self.n_layers * self.n_directions,
131 |                              batch_size, self.hidden_size)
132 |         return create_variable(hidden)
133 | 
134 | 
135 | # Train cycle
136 | def train():
137 |     total_loss = 0
138 | 
139 |     for i, (names, countries) in enumerate(train_loader, 1):
140 |         input, seq_lengths, target = make_variables(names, countries)
141 |         output = classifier(input, seq_lengths)
142 | 
143 |         loss = criterion(output, target)
144 |         total_loss += loss.data[0]
145 | 
146 |         classifier.zero_grad()
147 |         loss.backward()
148 |         optimizer.step()
149 | 
150 |         if i % 10 == 0:
151 |             print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
152 |                 time_since(start), epoch,  i *
153 |                 len(names), len(train_loader.dataset),
154 |                 100. * i * len(names) / len(train_loader.dataset),
155 |                 total_loss / i * len(names)))
156 | 
157 |     return total_loss
158 | 
159 | 
160 | # Testing cycle
161 | def test(name=None):
162 |     # Predict for a given name
163 |     if name:
164 |         input, seq_lengths, target = make_variables([name], [])
165 |         output = classifier(input, seq_lengths)
166 |         pred = output.data.max(1, keepdim=True)[1]
167 |         country_id = pred.cpu().numpy()[0][0]
168 |         print(name, "is", train_dataset.get_country(country_id))
169 |         return
170 | 
171 |     print("evaluating trained model ...")
172 |     correct = 0
173 |     train_data_size = len(test_loader.dataset)
174 | 
175 |     for names, countries in test_loader:
176 |         input, seq_lengths, target = make_variables(names, countries)
177 |         output = classifier(input, seq_lengths)
178 |         pred = output.data.max(1, keepdim=True)[1]
179 |         correct += pred.eq(target.data.view_as(pred)).cpu().sum()
180 | 
181 |     print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
182 |         correct, train_data_size, 100. * correct / train_data_size))
183 | 
184 | 
185 | if __name__ == '__main__':
186 | 
187 |     classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRIES, N_LAYERS)
188 |     if torch.cuda.device_count() > 1:
189 |         print("Let's use", torch.cuda.device_count(), "GPUs!")
190 |         # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs
191 |         classifier = nn.DataParallel(classifier)
192 | 
193 |     if torch.cuda.is_available():
194 |         classifier.cuda()
195 | 
196 |     optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
197 |     criterion = nn.CrossEntropyLoss()
198 | 
199 |     start = time.time()
200 |     print("Training for %d epochs..." % N_EPOCHS)
201 |     for epoch in range(1, N_EPOCHS + 1):
202 |         # Train cycle
203 |         train()
204 | 
205 |         # Testing
206 |         test()
207 | 
208 |         # Testing several samples
209 |         test("Sung")
210 |         test("Jungwoo")
211 |         test("Soojin")
212 |         test("Nako")
213 | 


--------------------------------------------------------------------------------
/13_3_char_rnn.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/spro/practical-pytorch
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | from torch.utils.data import DataLoader
  6 | 
  7 | from text_loader import TextDataset
  8 | 
  9 | hidden_size = 100
 10 | n_layers = 3
 11 | batch_size = 1
 12 | n_epochs = 100
 13 | n_characters = 128  # ASCII
 14 | 
 15 | 
 16 | class RNN(nn.Module):
 17 | 
 18 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1):
 19 |         super(RNN, self).__init__()
 20 |         self.input_size = input_size
 21 |         self.hidden_size = hidden_size
 22 |         self.output_size = output_size
 23 |         self.n_layers = n_layers
 24 | 
 25 |         self.embedding = nn.Embedding(input_size, hidden_size)
 26 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
 27 |         self.linear = nn.Linear(hidden_size, output_size)
 28 | 
 29 |     # This runs this one step at a time
 30 |     # It's extremely slow, and please do not use in practice.
 31 |     # We need to use (1) batch and (2) data parallelism
 32 |     def forward(self, input, hidden):
 33 |         embed = self.embedding(input.view(1, -1))  # S(=1) x I
 34 |         embed = embed.view(1, 1, -1)  # S(=1) x B(=1) x I (embedding size)
 35 |         output, hidden = self.gru(embed, hidden)
 36 |         output = self.linear(output.view(1, -1))  # S(=1) x I
 37 |         return output, hidden
 38 | 
 39 |     def init_hidden(self):
 40 |         if torch.cuda.is_available():
 41 |             hidden = torch.zeros(self.n_layers, 1, self.hidden_size).cuda()
 42 |         else:
 43 |             hidden = torch.zeros(self.n_layers, 1, self.hidden_size)
 44 | 
 45 |         return Variable(hidden)
 46 | 
 47 | 
 48 | def str2tensor(string):
 49 |     tensor = [ord(c) for c in string]
 50 |     tensor = torch.LongTensor(tensor)
 51 | 
 52 |     if torch.cuda.is_available():
 53 |         tensor = tensor.cuda()
 54 | 
 55 |     return Variable(tensor)
 56 | 
 57 | 
 58 | def generate(decoder, prime_str='A', predict_len=100, temperature=0.8):
 59 |     hidden = decoder.init_hidden()
 60 |     prime_input = str2tensor(prime_str)
 61 |     predicted = prime_str
 62 | 
 63 |     # Use priming string to "build up" hidden state
 64 |     for p in range(len(prime_str) - 1):
 65 |         _, hidden = decoder(prime_input[p], hidden)
 66 | 
 67 |     inp = prime_input[-1]
 68 | 
 69 |     for p in range(predict_len):
 70 |         output, hidden = decoder(inp, hidden)
 71 | 
 72 |         # Sample from the network as a multinomial distribution
 73 |         output_dist = output.data.view(-1).div(temperature).exp()
 74 |         top_i = torch.multinomial(output_dist, 1)[0]
 75 | 
 76 |         # Add predicted character to string and use as next input
 77 |         predicted_char = chr(top_i)
 78 |         predicted += predicted_char
 79 |         inp = str2tensor(predicted_char)
 80 | 
 81 |     return predicted
 82 | 
 83 | # Train for a given src and target
 84 | # It feeds single string to demonstrate seq2seq
 85 | # It's extremely slow, and we need to use (1) batch and (2) data parallelism
 86 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html.
 87 | 
 88 | 
 89 | def train_teacher_forching(line):
 90 |     input = str2tensor(line[:-1])
 91 |     target = str2tensor(line[1:])
 92 | 
 93 |     hidden = decoder.init_hidden()
 94 |     loss = 0
 95 | 
 96 |     for c in range(len(input)):
 97 |         output, hidden = decoder(input[c], hidden)
 98 |         loss += criterion(output, target[c])
 99 | 
100 |     decoder.zero_grad()
101 |     loss.backward()
102 |     decoder_optimizer.step()
103 | 
104 |     return loss.data[0] / len(input)
105 | 
106 | 
107 | def train(line):
108 |     input = str2tensor(line[:-1])
109 |     target = str2tensor(line[1:])
110 | 
111 |     hidden = decoder.init_hidden()
112 |     decoder_in = input[0]
113 |     loss = 0
114 | 
115 |     for c in range(len(input)):
116 |         output, hidden = decoder(decoder_in, hidden)
117 |         loss += criterion(output, target[c])
118 |         decoder_in = output.max(1)[1]
119 | 
120 |     decoder.zero_grad()
121 |     loss.backward()
122 |     decoder_optimizer.step()
123 | 
124 |     return loss.data[0] / len(input)
125 | 
126 | if __name__ == '__main__':
127 | 
128 |     decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
129 |     if torch.cuda.is_available():
130 |         decoder.cuda()
131 | 
132 |     decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=0.001)
133 |     criterion = nn.CrossEntropyLoss()
134 | 
135 |     train_loader = DataLoader(dataset=TextDataset(),
136 |                               batch_size=batch_size,
137 |                               shuffle=True)
138 | 
139 |     print("Training for %d epochs..." % n_epochs)
140 |     for epoch in range(1, n_epochs + 1):
141 |         for i, (lines, _) in enumerate(train_loader):
142 |             loss = train(lines[0])  # Batch size is 1
143 | 
144 |             if i % 100 == 0:
145 |                 print('[(%d %d%%) loss: %.4f]' %
146 |                       (epoch, epoch / n_epochs * 100, loss))
147 |                 print(generate(decoder, 'Wh', 100), '\n')
148 | 


--------------------------------------------------------------------------------
/13_4_pack_pad.py:
--------------------------------------------------------------------------------
 1 | # Original source from
 2 | # https://gist.github.com/Tushar-N/dfca335e370a2bc3bc79876e6270099e
 3 | # torch
 4 | import torch
 5 | import torch.nn as nn
 6 | from torch.autograd import Variable
 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 8 | import torch.nn.functional as F
 9 | import numpy as np
10 | import itertools
11 | 
12 | 
13 | def flatten(l):
14 |     return list(itertools.chain.from_iterable(l))
15 | 
16 | seqs = ['ghatmasala', 'nicela', 'chutpakodas']
17 | 
18 | # make <pad> idx 0
19 | vocab = ['<pad>'] + sorted(list(set(flatten(seqs))))
20 | 
21 | # make model
22 | embedding_size = 3
23 | embed = nn.Embedding(len(vocab), embedding_size)
24 | lstm = nn.LSTM(embedding_size, 5)
25 | 
26 | vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs]
27 | print("vectorized_seqs", vectorized_seqs)
28 | 
29 | print([x for x in map(len, vectorized_seqs)])
30 | # get the length of each seq in your batch
31 | seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)])
32 | 
33 | # dump padding everywhere, and place seqs on the left.
34 | # NOTE: you only need a tensor as big as your longest sequence
35 | seq_tensor = Variable(torch.zeros(
36 |     (len(vectorized_seqs), seq_lengths.max()))).long()
37 | for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
38 |     seq_tensor[idx, :seqlen] = torch.LongTensor(seq)
39 | 
40 | print("seq_tensor", seq_tensor)
41 | 
42 | # SORT YOUR TENSORS BY LENGTH!
43 | seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
44 | seq_tensor = seq_tensor[perm_idx]
45 | 
46 | print("seq_tensor after sorting", seq_tensor)
47 | 
48 | # utils.rnn lets you give (B,L,D) tensors where B is the batch size, L is the maxlength, if you use batch_first=True
49 | # Otherwise, give (L,B,D) tensors
50 | seq_tensor = seq_tensor.transpose(0, 1)  # (B,L,D) -> (L,B,D)
51 | print("seq_tensor after transposing", seq_tensor.size(), seq_tensor.data)
52 | 
53 | # embed your sequences
54 | embeded_seq_tensor = embed(seq_tensor)
55 | print("seq_tensor after embeding", embeded_seq_tensor.size(), seq_tensor.data)
56 | 
57 | # pack them up nicely
58 | packed_input = pack_padded_sequence(
59 |     embeded_seq_tensor, seq_lengths.cpu().numpy())
60 | 
61 | # throw them through your LSTM (remember to give batch_first=True here if
62 | # you packed with it)
63 | packed_output, (ht, ct) = lstm(packed_input)
64 | 
65 | # unpack your output if required
66 | output, _ = pad_packed_sequence(packed_output)
67 | print("Lstm output", output.size(), output.data)
68 | 
69 | # Or if you just want the final hidden state?
70 | print("Last output", ht[-1].size(), ht[-1].data)
71 | 


--------------------------------------------------------------------------------
/14_1_seq2seq.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.utils.data import DataLoader
  5 | from text_loader import TextDataset
  6 | import seq2seq_models as sm
  7 | from seq2seq_models import str2tensor, EOS_token, SOS_token
  8 | 
  9 | HIDDEN_SIZE = 100
 10 | N_LAYERS = 1
 11 | BATCH_SIZE = 1
 12 | N_EPOCH = 100
 13 | N_CHARS = 128  # ASCII
 14 | 
 15 | 
 16 | # Simple test to show how our network works
 17 | def test():
 18 |     encoder_hidden = encoder.init_hidden()
 19 |     word_input = str2tensor('hello')
 20 |     encoder_outputs, encoder_hidden = encoder(word_input, encoder_hidden)
 21 |     print(encoder_outputs)
 22 | 
 23 |     decoder_hidden = encoder_hidden
 24 | 
 25 |     word_target = str2tensor('pytorch')
 26 |     for c in range(len(word_target)):
 27 |         decoder_output, decoder_hidden = decoder(
 28 |             word_target[c], decoder_hidden)
 29 |         print(decoder_output.size(), decoder_hidden.size())
 30 | 
 31 | 
 32 | # Train for a given src and target
 33 | # To demonstrate seq2seq, We don't handle batch in the code,
 34 | # and our encoder runs this one step at a time
 35 | # It's extremely slow, and please do not use in practice.
 36 | # We need to use (1) batch and (2) data parallelism
 37 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html.
 38 | def train(src, target):
 39 |     src_var = str2tensor(src)
 40 |     target_var = str2tensor(target, eos=True)  # Add the EOS token
 41 | 
 42 |     encoder_hidden = encoder.init_hidden()
 43 |     encoder_outputs, encoder_hidden = encoder(src_var, encoder_hidden)
 44 | 
 45 |     hidden = encoder_hidden
 46 |     loss = 0
 47 | 
 48 |     for c in range(len(target_var)):
 49 |         # First, we feed SOS
 50 |         # Others, we use teacher forcing
 51 |         token = target_var[c - 1] if c else str2tensor(SOS_token)
 52 |         output, hidden = decoder(token, hidden)
 53 |         loss += criterion(output, target_var[c])
 54 | 
 55 |     encoder.zero_grad()
 56 |     decoder.zero_grad()
 57 |     loss.backward()
 58 |     optimizer.step()
 59 | 
 60 |     return loss.data[0] / len(target_var)
 61 | 
 62 | 
 63 | # Translate the given input
 64 | def translate(enc_input='thisissungkim.iloveyou.', predict_len=100, temperature=0.9):
 65 |     input_var = str2tensor(enc_input)
 66 |     encoder_hidden = encoder.init_hidden()
 67 |     encoder_outputs, encoder_hidden = encoder(input_var, encoder_hidden)
 68 | 
 69 |     hidden = encoder_hidden
 70 | 
 71 |     predicted = ''
 72 |     dec_input = str2tensor(SOS_token)
 73 |     for c in range(predict_len):
 74 |         output, hidden = decoder(dec_input, hidden)
 75 | 
 76 |         # Sample from the network as a multi nominal distribution
 77 |         output_dist = output.data.view(-1).div(temperature).exp()
 78 |         top_i = torch.multinomial(output_dist, 1)[0]
 79 | 
 80 |         # Stop at the EOS
 81 |         if top_i is EOS_token:
 82 |             break
 83 | 
 84 |         predicted_char = chr(top_i)
 85 |         predicted += predicted_char
 86 | 
 87 |         dec_input = str2tensor(predicted_char)
 88 | 
 89 |     return enc_input, predicted
 90 | 
 91 | 
 92 | encoder = sm.EncoderRNN(N_CHARS, HIDDEN_SIZE, N_LAYERS)
 93 | decoder = sm.DecoderRNN(HIDDEN_SIZE, N_CHARS, N_LAYERS)
 94 | 
 95 | if torch.cuda.is_available():
 96 |     decoder.cuda()
 97 |     encoder.cuda()
 98 | print(encoder, decoder)
 99 | test()
100 | 
101 | params = list(encoder.parameters()) + list(decoder.parameters())
102 | optimizer = torch.optim.Adam(params, lr=0.001)
103 | criterion = nn.CrossEntropyLoss()
104 | 
105 | 
106 | train_loader = DataLoader(dataset=TextDataset(),
107 |                           batch_size=BATCH_SIZE,
108 |                           shuffle=True,
109 |                           num_workers=2)
110 | 
111 | print("Training for %d epochs..." % N_EPOCH)
112 | for epoch in range(1, N_EPOCH + 1):
113 |     # Get srcs and targets from data loader
114 |     for i, (srcs, targets) in enumerate(train_loader):
115 |         train_loss = train(srcs[0], targets[0])  # Batch is 1
116 | 
117 |         if i % 100 is 0:
118 |             print('[(%d %d%%) %.4f]' %
119 |                   (epoch, epoch / N_EPOCH * 100, train_loss))
120 |             print(translate(srcs[0]), '\n')
121 |             print(translate(), '\n')
122 | 


--------------------------------------------------------------------------------
/14_2_seq2seq_att.py:
--------------------------------------------------------------------------------
  1 | # Original code from
  2 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb
  3 | 
  4 | #import matplotlib.pyplot as plt
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | from torch.utils.data import DataLoader
 10 | from text_loader import TextDataset
 11 | import seq2seq_models as sm
 12 | from seq2seq_models import cuda_variable, str2tensor, EOS_token, SOS_token
 13 | 
 14 | 
 15 | N_LAYERS = 1
 16 | BATCH_SIZE = 1
 17 | N_EPOCH = 100
 18 | N_CHARS = 128  # ASCII
 19 | HIDDEN_SIZE = N_CHARS
 20 | 
 21 | 
 22 | # Simple test to show how our train works
 23 | def test():
 24 |     encoder_test = sm.EncoderRNN(10, 10, 2)
 25 |     decoder_test = sm.AttnDecoderRNN(10, 10, 2)
 26 | 
 27 |     if torch.cuda.is_available():
 28 |         encoder_test.cuda()
 29 |         decoder_test.cuda()
 30 | 
 31 |     encoder_hidden = encoder_test.init_hidden()
 32 |     word_input = cuda_variable(torch.LongTensor([1, 2, 3]))
 33 |     encoder_outputs, encoder_hidden = encoder_test(word_input, encoder_hidden)
 34 |     print(encoder_outputs.size())
 35 | 
 36 |     word_target = cuda_variable(torch.LongTensor([1, 2, 3]))
 37 |     decoder_attns = torch.zeros(1, 3, 3)
 38 |     decoder_hidden = encoder_hidden
 39 | 
 40 |     for c in range(len(word_target)):
 41 |         decoder_output, decoder_hidden, decoder_attn = \
 42 |             decoder_test(word_target[c],
 43 |                          decoder_hidden, encoder_outputs)
 44 |         print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size())
 45 |         decoder_attns[0, c] = decoder_attn.squeeze(0).cpu().data
 46 | 
 47 | 
 48 | # Train for a given src and target
 49 | # To demonstrate seq2seq, We don't handle batch in the code,
 50 | # and our encoder runs this one step at a time
 51 | # It's extremely slow, and please do not use in practice.
 52 | # We need to use (1) batch and (2) data parallelism
 53 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html.
 54 | def train(src, target):
 55 |     loss = 0
 56 | 
 57 |     src_var = str2tensor(src)
 58 |     target_var = str2tensor(target, eos=True)  # Add the EOS token
 59 | 
 60 |     encoder_hidden = encoder.init_hidden()
 61 |     encoder_outputs, encoder_hidden = encoder(src_var, encoder_hidden)
 62 | 
 63 |     hidden = encoder_hidden
 64 | 
 65 |     for c in range(len(target_var)):
 66 |         # First, we feed SOS. Others, we use teacher forcing.
 67 |         token = target_var[c - 1] if c else str2tensor(SOS_token)
 68 |         output, hidden, attention = decoder(token, hidden, encoder_outputs)
 69 |         loss += criterion(output, target_var[c])
 70 | 
 71 |     encoder.zero_grad()
 72 |     decoder.zero_grad()
 73 |     loss.backward()
 74 |     optimizer.step()
 75 | 
 76 |     return loss.data[0] / len(target_var)
 77 | 
 78 | 
 79 | # Translate the given input
 80 | def translate(enc_input='thisissungkim.iloveyou.', predict_len=100, temperature=0.9):
 81 |     input_var = str2tensor(enc_input)
 82 |     encoder_hidden = encoder.init_hidden()
 83 |     encoder_outputs, encoder_hidden = encoder(input_var, encoder_hidden)
 84 | 
 85 |     hidden = encoder_hidden
 86 | 
 87 |     predicted = ''
 88 |     dec_input = str2tensor(SOS_token)
 89 |     attentions = []
 90 |     for c in range(predict_len):
 91 |         output, hidden, attention = decoder(dec_input, hidden, encoder_outputs)
 92 |         # Sample from the network as a multi nominal distribution
 93 |         output_dist = output.data.view(-1).div(temperature).exp()
 94 |         top_i = torch.multinomial(output_dist, 1)[0]
 95 |         attentions.append(attention.view(-1).data.cpu().numpy().tolist())
 96 | 
 97 |         # Stop at the EOS
 98 |         if top_i is EOS_token:
 99 |             break
100 | 
101 |         predicted_char = chr(top_i)
102 |         predicted += predicted_char
103 | 
104 |         dec_input = str2tensor(predicted_char)
105 | 
106 |     return predicted, attentions
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     encoder = sm.EncoderRNN(N_CHARS, HIDDEN_SIZE, N_LAYERS)
111 |     decoder = sm.AttnDecoderRNN(HIDDEN_SIZE, N_CHARS, N_LAYERS)
112 | 
113 |     if torch.cuda.is_available():
114 |         decoder.cuda()
115 |         encoder.cuda()
116 |     print(encoder, decoder)
117 |     # test()
118 | 
119 |     params = list(encoder.parameters()) + list(decoder.parameters())
120 |     optimizer = torch.optim.Adam(params, lr=0.001)
121 |     criterion = nn.CrossEntropyLoss()
122 | 
123 |     train_loader = DataLoader(dataset=TextDataset(),
124 |                               batch_size=BATCH_SIZE,
125 |                               shuffle=True,
126 |                               num_workers=2)
127 | 
128 |     print("Training for %d epochs..." % N_EPOCH)
129 |     for epoch in range(1, N_EPOCH + 1):
130 |         # Get srcs and targets from data loader
131 |         for i, (srcs, targets) in enumerate(train_loader):
132 |             train_loss = train(srcs[0], targets[0])
133 | 
134 |             if i % 1000 is 0:
135 |                 print('[(%d/%d %d%%) %.4f]' %
136 |                       (epoch, N_EPOCH, i * len(srcs) * 100 / len(train_loader), train_loss))
137 |                 output, _ = translate(srcs[0])
138 |                 print(srcs[0], output, '\n')
139 | 
140 |                 output, attentions = translate()
141 |                 print('thisissungkim.iloveyou.', output, '\n')
142 | 
143 |         # plt.matshow(attentions)
144 |         # plt.show()
145 |         # print(attentions)
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/hunkim/PythonZeroToAll.svg?branch=master)](https://travis-ci.org/hunkim/PythonZeroToAll)
 2 | 
 3 | # PyTorchZeroToAll
 4 | Quick 3~4 day lecture materials for HKUST students.
 5 | 
 6 | ## Video Lectures: (RNN TBA)
 7 | * [Youtube](http://bit.ly/PyTorchVideo)
 8 | * [Bilibili](https://www.bilibili.com/video/av15823922/)
 9 | 
10 | ## Slides
11 | * [Lecture Slides @GoogleDrive](http://bit.ly/PyTorchZeroAll)
12 | 
13 | If you cannot access the GoogleDoc for somehow, please check out pdf files in slides. However, slides in GoogleDrive are always latest. We really appreciate your comments.
14 | 
15 | ## Previous Lectures 
16 | * cf., http://bit.ly/TF_HKUST (3 day crash course using TensorFlow)
17 | 


--------------------------------------------------------------------------------
/data/diabetes.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/diabetes.csv.gz


--------------------------------------------------------------------------------
/data/names_test.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/names_test.csv.gz


--------------------------------------------------------------------------------
/data/names_train.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/names_train.csv.gz


--------------------------------------------------------------------------------
/data/shakespeare.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/data/shakespeare.txt.gz


--------------------------------------------------------------------------------
/name_dataset.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
 4 | import torch
 5 | import numpy as np
 6 | from torch.autograd import Variable
 7 | from torch.utils.data import Dataset, DataLoader
 8 | import csv
 9 | import gzip
10 | 
11 | 
12 | class NameDataset(Dataset):
13 |     """ Diabetes dataset."""
14 | 
15 |     # Initialize your data, download, etc.
16 |     def __init__(self, is_train_set=False):
17 |         filename = './data/names_train.csv.gz' if is_train_set else './data/names_test.csv.gz'
18 |         with gzip.open(filename, "rt") as f:
19 |             reader = csv.reader(f)
20 |             rows = list(reader)
21 | 
22 |         self.names = [row[0] for row in rows]
23 |         self.countries = [row[1] for row in rows]
24 |         self.len = len(self.countries)
25 | 
26 |         self.country_list = list(sorted(set(self.countries)))
27 | 
28 |     def __getitem__(self, index):
29 |         return self.names[index], self.countries[index]
30 | 
31 |     def __len__(self):
32 |         return self.len
33 | 
34 |     def get_countries(self):
35 |         return self.country_list
36 | 
37 |     def get_country(self, id):
38 |         return self.country_list[id]
39 | 
40 |     def get_country_id(self, country):
41 |         return self.country_list.index(country)
42 | 
43 | # Test the loader
44 | if __name__ == "__main__":
45 |     dataset = NameDataset(False)
46 |     print(dataset.get_countries())
47 |     print(dataset.get_country(3))
48 |     print(dataset.get_country_id('Korean'))
49 | 
50 |     train_loader = DataLoader(dataset=dataset,
51 |                               batch_size=10,
52 |                               shuffle=True)
53 | 
54 |     print(len(train_loader.dataset))
55 |     for epoch in range(2):
56 |         for i, (names, countries) in enumerate(train_loader):
57 |             # Run your training process
58 |             print(epoch, i, "names", names, "countries", countries)
59 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | #nonsml: digitalgenius/ubuntu-pytorch
2 | #varunagrawal/pytorch
3 | httplib2==0.18.0
4 | matplotlib==2.0.0
5 | numpy==1.13.3
6 | torch
7 | torchvision==0.1.9
8 | Unidecode==0.04.21
9 | 


--------------------------------------------------------------------------------
/seq2seq_models.py:
--------------------------------------------------------------------------------
  1 | # Original code from
  2 | # https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import torch.nn.functional as F
  7 | 
  8 | MAX_LENGTH = 100
  9 | 
 10 | SOS_token = chr(0)
 11 | EOS_token = 1
 12 | 
 13 | # Helper function to create Variable based on
 14 | # the cuda availability
 15 | 
 16 | 
 17 | def cuda_variable(tensor):
 18 |     # Do cuda() before wrapping with variable
 19 |     if torch.cuda.is_available():
 20 |         return Variable(tensor.cuda())
 21 |     else:
 22 |         return Variable(tensor)
 23 | 
 24 | 
 25 | # Sting to char tensor
 26 | def str2tensor(msg, eos=False):
 27 |     tensor = [ord(c) for c in msg]
 28 |     if eos:
 29 |         tensor.append(EOS_token)
 30 | 
 31 |     return cuda_variable(torch.LongTensor(tensor))
 32 | 
 33 | 
 34 | # To demonstrate seq2seq, We don't handle batch in the code,
 35 | # and our encoder runs this one step at a time
 36 | # It's extremely slow, and please do not use in practice.
 37 | # We need to use (1) batch and (2) data parallelism
 38 | # http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html.
 39 | 
 40 | class EncoderRNN(nn.Module):
 41 | 
 42 |     def __init__(self, input_size, hidden_size, n_layers=1):
 43 |         self.hidden_size = hidden_size
 44 |         self.n_layers = n_layers
 45 | 
 46 |         super(EncoderRNN, self).__init__()
 47 | 
 48 |         self.embedding = nn.Embedding(input_size, hidden_size)
 49 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
 50 | 
 51 |     def forward(self, word_inputs, hidden):
 52 |         # Note: we run this all at once (over the whole input sequence)
 53 |         seq_len = len(word_inputs)
 54 |         # input shape: S x B (=1) x I (input size)
 55 |         embedded = self.embedding(word_inputs).view(seq_len, 1, -1)
 56 |         output, hidden = self.gru(embedded, hidden)
 57 |         return output, hidden
 58 | 
 59 |     def init_hidden(self):
 60 |         # (num_layers * num_directions, batch, hidden_size)
 61 |         return cuda_variable(torch.zeros(self.n_layers, 1, self.hidden_size))
 62 | 
 63 | 
 64 | class DecoderRNN(nn.Module):
 65 | 
 66 |     def __init__(self, hidden_size, output_size, n_layers=1):
 67 |         super(DecoderRNN, self).__init__()
 68 | 
 69 |         self.embedding = nn.Embedding(output_size, hidden_size)
 70 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
 71 |         self.out = nn.Linear(hidden_size, output_size)
 72 | 
 73 |     def forward(self, input, hidden):
 74 |         # input shape: S(=1) x B (=1) x I (input size)
 75 |         # Note: we run this one step at a time. (Sequence size = 1)
 76 |         output = self.embedding(input).view(1, 1, -1)
 77 |         output, hidden = self.gru(output, hidden)
 78 |         output = self.out(output[0])
 79 |         # No need softmax, since we are using CrossEntropyLoss
 80 |         return output, hidden
 81 | 
 82 |     def init_hidden(self):
 83 |         # (num_layers * num_directions, batch, hidden_size)
 84 |         return cuda_variable(torch.zeros(self.n_layers, 1, self.hidden_size))
 85 | 
 86 | 
 87 | class AttnDecoderRNN(nn.Module):
 88 | 
 89 |     def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
 90 |         super(AttnDecoderRNN, self).__init__()
 91 | 
 92 |         # Linear for attention
 93 |         self.attn = nn.Linear(hidden_size, hidden_size)
 94 | 
 95 |         # Define layers
 96 |         self.embedding = nn.Embedding(output_size, hidden_size)
 97 |         self.gru = nn.GRU(hidden_size, hidden_size,
 98 |                           n_layers, dropout=dropout_p)
 99 |         self.out = nn.Linear(hidden_size * 2, output_size)
100 | 
101 |     def forward(self, word_input, last_hidden, encoder_hiddens):
102 |         # Note: we run this one step (S=1) at a time
103 |         # Get the embedding of the current input word (last output word)
104 |         rnn_input = self.embedding(word_input).view(1, 1, -1)  # S=1 x B x I
105 |         rnn_output, hidden = self.gru(rnn_input, last_hidden)
106 | 
107 |         # Calculate attention from current RNN state and all encoder outputs;
108 |         # apply to encoder outputs
109 |         attn_weights = self.get_att_weight(
110 |             rnn_output.squeeze(0), encoder_hiddens)
111 |         context = attn_weights.bmm(
112 |             encoder_hiddens.transpose(0, 1))  # B x S(=1) x I
113 | 
114 |         # Final output layer (next word prediction) using the RNN hidden state
115 |         # and context vector
116 |         rnn_output = rnn_output.squeeze(0)  # S(=1) x B x I -> B x I
117 |         context = context.squeeze(1)  # B x S(=1) x I -> B x I
118 |         output = self.out(torch.cat((rnn_output, context), 1))
119 | 
120 |         # Return final output, hidden state, and attention weights (for
121 |         # visualization)
122 |         return output, hidden, attn_weights
123 | 
124 |     def get_att_weight(self, hidden, encoder_hiddens):
125 |         seq_len = len(encoder_hiddens)
126 | 
127 |         # Create variable to store attention energies
128 |         attn_scores = cuda_variable(torch.zeros(seq_len))  # B x 1 x S
129 | 
130 |         # Calculate energies for each encoder hidden
131 |         for i in range(seq_len):
132 |             attn_scores[i] = self.get_att_score(hidden, encoder_hiddens[i])
133 | 
134 |         # Normalize scores to weights in range 0 to 1,
135 |         # resize to 1 x 1 x seq_len
136 |         # print("att_scores", attn_scores.size())
137 |         return F.softmax(attn_scores).view(1, 1, -1)
138 | 
139 |     # score = h^T W h^e = h dot (W h^e)
140 |     # TODO: We need to implement different score models
141 |     def get_att_score(self, hidden, encoder_hidden):
142 |         score = self.attn(encoder_hidden)
143 |         return torch.dot(hidden.view(-1), score.view(-1))
144 | 


--------------------------------------------------------------------------------
/slides/Lecture 01: Overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 01: Overview.pdf


--------------------------------------------------------------------------------
/slides/Lecture 02: Linear Model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 02: Linear Model.pdf


--------------------------------------------------------------------------------
/slides/Lecture 03: Gradient Descent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 03: Gradient Descent.pdf


--------------------------------------------------------------------------------
/slides/Lecture 05: Linear regression  in PyTorch way.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 05: Linear regression  in PyTorch way.pdf


--------------------------------------------------------------------------------
/slides/Lecture 06: Logistic Regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 06: Logistic Regression.pdf


--------------------------------------------------------------------------------
/slides/Lecture 07: Wide & Deep.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 07: Wide & Deep.pdf


--------------------------------------------------------------------------------
/slides/Lecture 08: DataLoader.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 08: DataLoader.pdf


--------------------------------------------------------------------------------
/slides/Lecture 09: Softmax Classifier.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/Lecture 09: Softmax Classifier.pdf


--------------------------------------------------------------------------------
/slides/P-Epilogue: What's the next?.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hunkim/PyTorchZeroToAll/e5a260fac0b4ff1e2ec434fc6f0d962ac97fc289/slides/P-Epilogue: What's the next?.pdf


--------------------------------------------------------------------------------
/text_loader.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
 4 | import gzip
 5 | from torch.utils.data import Dataset, DataLoader
 6 | 
 7 | 
 8 | class TextDataset(Dataset):
 9 |     # Initialize your data, download, etc.
10 | 
11 |     def __init__(self, filename="./data/shakespeare.txt.gz"):
12 |         self.len = 0
13 |         with gzip.open(filename, 'rt') as f:
14 |             self.targetLines = [x.strip() for x in f if x.strip()]
15 |             self.srcLines = [x.lower().replace(' ', '')
16 |                              for x in self.targetLines]
17 |             self.len = len(self.srcLines)
18 | 
19 |     def __getitem__(self, index):
20 |         return self.srcLines[index], self.targetLines[index]
21 | 
22 |     def __len__(self):
23 |         return self.len
24 | 
25 | 
26 | # Test the loader
27 | if __name__ == "__main__":
28 |     dataset = TextDataset()
29 |     train_loader = DataLoader(dataset=dataset,
30 |                               batch_size=3,
31 |                               shuffle=True,
32 |                               num_workers=2)
33 | 
34 |     for i, (src, target) in enumerate(train_loader):
35 |         print(i, "data", src)
36 | 


--------------------------------------------------------------------------------