├── .gitignore ├── datasets └── README.md ├── 0_multiply.py ├── README.md ├── .travis.yml ├── 1_linear_regression.py ├── 3_neural_net.py ├── 2_logistic_regression.py ├── 4_modern_neural_net.py ├── data_util.py ├── 6_lstm.py └── 5_convolutional_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | datasets/mnist 4 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | This folder contains data sets for the tutorials. -------------------------------------------------------------------------------- /0_multiply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | a = torch.IntTensor([2, 3, 4]) 4 | b = torch.IntTensor([3, 4, 5]) 5 | m = a * b # element-wise product 6 | print(m.numpy()) # convert to the numpy array [ 6 12 20] 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/vinhkhuc/PyTorch-Mini-Tutorials.svg?branch=master)](https://travis-ci.org/vinhkhuc/PyTorch-Mini-Tutorials) 2 | 3 | Minimal tutorials for [PyTorch](https://github.com/pytorch/pytorch) adapted 4 | from Alec Radford's [Theano tutorials](https://github.com/Newmu/Theano-Tutorials). 5 | 6 |
    7 |
  1. Tensor multiplication
  2. 8 |
  3. Linear Regression
  4. 9 |
  5. Logistic Regression
  6. 10 |
  7. Neural Network
  8. 11 |
  9. Modern Neural Network
  10. 12 |
  11. Convolutional Neural Network
  12. 13 |
  13. Long Short-Term Memory
  14. 14 |
15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | language: python 4 | python: 5 | - 3.6 6 | cache: bundler 7 | install: 8 | - pip install --no-cache-dir -q torch 9 | 10 | # We use timeout to prevent the Python process from running too long since the long running process will be 11 | # auto-killed by Travis. When the timeout happens, we override its exit code 143 by the success exit code 0. 12 | script: 13 | - timeout 2m python 0_multiply.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 14 | - timeout 2m python 1_linear_regression.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 15 | - timeout 5m python 2_logistic_regression.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 16 | - timeout 5m python 3_neural_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 17 | - timeout 5m python 4_modern_neural_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 18 | - timeout 10m python 5_convolutional_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 19 | - timeout 10m python 6_lstm.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi 20 | -------------------------------------------------------------------------------- /1_linear_regression.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch import optim 4 | 5 | 6 | def build_model(): 7 | model = torch.nn.Sequential() 8 | model.add_module("linear", torch.nn.Linear(1, 1, bias=False)) 9 | return model 10 | 11 | 12 | def train(model, loss, optimizer, x, y): 13 | model.train() 14 | x = Variable(x, requires_grad=False) 15 | y = Variable(y, requires_grad=False) 16 | 17 | # Reset gradient 18 | optimizer.zero_grad() 19 | 20 | # Forward 21 | fx = model.forward(x.view(len(x), 1)).squeeze() 22 | output = loss.forward(fx, y) 23 | 24 | # Backward 25 | output.backward() 26 | 27 | # Update parameters 28 | optimizer.step() 29 | 30 | return output.item() 31 | 32 | 33 | def main(): 34 | torch.manual_seed(42) 35 | X = torch.linspace(-1, 1, 101) 36 | Y = 2 * X + torch.randn(X.size()) * 0.33 37 | 38 | model = build_model() 39 | loss = torch.nn.MSELoss(reduction='elementwise_mean') 40 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) 41 | batch_size = 10 42 | 43 | for i in range(100): 44 | cost = 0. 45 | num_batches = len(X) // batch_size 46 | for k in range(num_batches): 47 | start, end = k * batch_size, (k + 1) * batch_size 48 | cost += train(model, loss, optimizer, X[start:end], Y[start:end]) 49 | print("Epoch = %d, cost = %s" % (i + 1, cost / num_batches)) 50 | 51 | w = next(model.parameters()).data # model has only one parameter 52 | print("w = %.2f" % w.numpy()) # will be approximately 2 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /3_neural_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim 6 | 7 | from data_util import load_mnist 8 | 9 | 10 | def build_model(input_dim, output_dim): 11 | model = torch.nn.Sequential() 12 | model.add_module("linear_1", torch.nn.Linear(input_dim, 512, bias=False)) 13 | model.add_module("sigmoid_1", torch.nn.Sigmoid()) 14 | model.add_module("linear_2", torch.nn.Linear(512, output_dim, bias=False)) 15 | return model 16 | 17 | 18 | def train(model, loss, optimizer, x_val, y_val): 19 | model.train() 20 | x = Variable(x_val, requires_grad=False) 21 | y = Variable(y_val, requires_grad=False) 22 | 23 | # Reset gradient 24 | optimizer.zero_grad() 25 | 26 | # Forward 27 | fx = model.forward(x) 28 | output = loss.forward(fx, y) 29 | 30 | # Backward 31 | output.backward() 32 | 33 | # Update parameters 34 | optimizer.step() 35 | 36 | return output.item() 37 | 38 | 39 | def predict(model, x_val): 40 | model.eval() 41 | x = Variable(x_val, requires_grad=False) 42 | output = model.forward(x) 43 | return output.data.numpy().argmax(axis=1) 44 | 45 | 46 | def main(): 47 | torch.manual_seed(42) 48 | trX, teX, trY, teY = load_mnist(onehot=False) 49 | trX = torch.from_numpy(trX).float() 50 | teX = torch.from_numpy(teX).float() 51 | trY = torch.from_numpy(trY).long() 52 | 53 | n_examples, n_features = trX.size() 54 | n_classes = 10 55 | model = build_model(n_features, n_classes) 56 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') 57 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) 58 | batch_size = 100 59 | 60 | for i in range(100): 61 | cost = 0. 62 | num_batches = n_examples // batch_size 63 | for k in range(num_batches): 64 | start, end = k * batch_size, (k + 1) * batch_size 65 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) 66 | predY = predict(model, teX) 67 | print("Epoch %d, cost = %f, acc = %.2f%%" 68 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY))) 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim 6 | 7 | from data_util import load_mnist 8 | 9 | 10 | def build_model(input_dim, output_dim): 11 | # We don't need the softmax layer here since CrossEntropyLoss already 12 | # uses it internally. 13 | model = torch.nn.Sequential() 14 | model.add_module("linear", 15 | torch.nn.Linear(input_dim, output_dim, bias=False)) 16 | return model 17 | 18 | 19 | def train(model, loss, optimizer, x_val, y_val): 20 | model.train() 21 | x = Variable(x_val, requires_grad=False) 22 | y = Variable(y_val, requires_grad=False) 23 | 24 | # Reset gradient 25 | optimizer.zero_grad() 26 | 27 | # Forward 28 | fx = model.forward(x) 29 | output = loss.forward(fx, y) 30 | 31 | # Backward 32 | output.backward() 33 | 34 | # Update parameters 35 | optimizer.step() 36 | 37 | return output.item() 38 | 39 | 40 | def predict(model, x_val): 41 | model.eval() 42 | x = Variable(x_val, requires_grad=False) 43 | output = model.forward(x) 44 | return output.data.numpy().argmax(axis=1) 45 | 46 | 47 | def main(): 48 | torch.manual_seed(42) 49 | trX, teX, trY, teY = load_mnist(onehot=False) 50 | trX = torch.from_numpy(trX).float() 51 | teX = torch.from_numpy(teX).float() 52 | trY = torch.from_numpy(trY).long() 53 | 54 | n_examples, n_features = trX.size() 55 | n_classes = 10 56 | model = build_model(n_features, n_classes) 57 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') 58 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) 59 | batch_size = 100 60 | 61 | for i in range(100): 62 | cost = 0. 63 | num_batches = n_examples // batch_size 64 | for k in range(num_batches): 65 | start, end = k * batch_size, (k + 1) * batch_size 66 | cost += train(model, loss, optimizer, 67 | trX[start:end], trY[start:end]) 68 | predY = predict(model, teX) 69 | print("Epoch %d, cost = %f, acc = %.2f%%" 70 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY))) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /4_modern_neural_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim 6 | 7 | from data_util import load_mnist 8 | 9 | 10 | def build_model(input_dim, output_dim): 11 | model = torch.nn.Sequential() 12 | model.add_module("linear_1", torch.nn.Linear(input_dim, 512, bias=False)) 13 | model.add_module("relu_1", torch.nn.ReLU()) 14 | model.add_module("dropout_1", torch.nn.Dropout(0.2)) 15 | model.add_module("linear_2", torch.nn.Linear(512, 512, bias=False)) 16 | model.add_module("relu_2", torch.nn.ReLU()) 17 | model.add_module("dropout_2", torch.nn.Dropout(0.2)) 18 | model.add_module("linear_3", torch.nn.Linear(512, output_dim, bias=False)) 19 | return model 20 | 21 | 22 | def train(model, loss, optimizer, x_val, y_val): 23 | model.train() 24 | x = Variable(x_val, requires_grad=False) 25 | y = Variable(y_val, requires_grad=False) 26 | 27 | # Reset gradient 28 | optimizer.zero_grad() 29 | 30 | # Forward 31 | fx = model.forward(x) 32 | output = loss.forward(fx, y) 33 | 34 | # Backward 35 | output.backward() 36 | 37 | # Update parameters 38 | optimizer.step() 39 | 40 | return output.item() 41 | 42 | 43 | def predict(model, x_val): 44 | model.eval() 45 | x = Variable(x_val, requires_grad=False) 46 | output = model.forward(x) 47 | return output.data.numpy().argmax(axis=1) 48 | 49 | 50 | def main(): 51 | torch.manual_seed(42) 52 | trX, teX, trY, teY = load_mnist(onehot=False) 53 | trX = torch.from_numpy(trX).float() 54 | teX = torch.from_numpy(teX).float() 55 | trY = torch.from_numpy(trY).long() 56 | 57 | n_examples, n_features = trX.size() 58 | n_classes = 10 59 | model = build_model(n_features, n_classes) 60 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') 61 | optimizer = optim.Adam(model.parameters()) 62 | batch_size = 100 63 | 64 | for i in range(100): 65 | cost = 0. 66 | num_batches = n_examples // batch_size 67 | for k in range(num_batches): 68 | start, end = k * batch_size, (k + 1) * batch_size 69 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) 70 | predY = predict(model, teX) 71 | print("Epoch %d, cost = %f, acc = %.2f%%" 72 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY))) 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /data_util.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import urllib.request as request 4 | from os import path 5 | 6 | import numpy as np 7 | 8 | DATASET_DIR = 'datasets/' 9 | 10 | MNIST_FILES = ["train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz", 11 | "t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz"] 12 | 13 | 14 | def download_file(url, local_path): 15 | dir_path = path.dirname(local_path) 16 | if not path.exists(dir_path): 17 | print("Creating the directory '%s' ..." % dir_path) 18 | os.makedirs(dir_path) 19 | 20 | print("Downloading from '%s' ..." % url) 21 | request.urlretrieve(url, local_path) 22 | 23 | 24 | def download_mnist(local_path): 25 | url_root = "http://yann.lecun.com/exdb/mnist/" 26 | for f_name in MNIST_FILES: 27 | f_path = os.path.join(local_path, f_name) 28 | if not path.exists(f_path): 29 | download_file(url_root + f_name, f_path) 30 | 31 | 32 | def one_hot(x, n): 33 | if type(x) == list: 34 | x = np.array(x) 35 | x = x.flatten() 36 | o_h = np.zeros((len(x), n)) 37 | o_h[np.arange(len(x)), x] = 1 38 | return o_h 39 | 40 | 41 | def load_mnist(ntrain=60000, ntest=10000, onehot=True): 42 | data_dir = os.path.join(DATASET_DIR, 'mnist/') 43 | if not path.exists(data_dir): 44 | download_mnist(data_dir) 45 | else: 46 | # check all files 47 | checks = [path.exists(os.path.join(data_dir, f)) for f in MNIST_FILES] 48 | if not np.all(checks): 49 | download_mnist(data_dir) 50 | 51 | with gzip.open(os.path.join(data_dir, 'train-images-idx3-ubyte.gz')) as fd: 52 | buf = fd.read() 53 | loaded = np.frombuffer(buf, dtype=np.uint8) 54 | trX = loaded[16:].reshape((60000, 28 * 28)).astype(float) 55 | 56 | with gzip.open(os.path.join(data_dir, 'train-labels-idx1-ubyte.gz')) as fd: 57 | buf = fd.read() 58 | loaded = np.frombuffer(buf, dtype=np.uint8) 59 | trY = loaded[8:].reshape((60000)) 60 | 61 | with gzip.open(os.path.join(data_dir, 't10k-images-idx3-ubyte.gz')) as fd: 62 | buf = fd.read() 63 | loaded = np.frombuffer(buf, dtype=np.uint8) 64 | teX = loaded[16:].reshape((10000, 28 * 28)).astype(float) 65 | 66 | with gzip.open(os.path.join(data_dir, 't10k-labels-idx1-ubyte.gz')) as fd: 67 | buf = fd.read() 68 | loaded = np.frombuffer(buf, dtype=np.uint8) 69 | teY = loaded[8:].reshape((10000)) 70 | 71 | trX /= 255. 72 | teX /= 255. 73 | 74 | trX = trX[:ntrain] 75 | trY = trY[:ntrain] 76 | 77 | teX = teX[:ntest] 78 | teY = teY[:ntest] 79 | 80 | if onehot: 81 | trY = one_hot(trY, 10) 82 | teY = one_hot(teY, 10) 83 | else: 84 | trY = np.asarray(trY) 85 | teY = np.asarray(teY) 86 | 87 | return trX, teX, trY, teY 88 | -------------------------------------------------------------------------------- /6_lstm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim, nn 6 | 7 | from data_util import load_mnist 8 | 9 | 10 | class LSTMNet(torch.nn.Module): 11 | def __init__(self, input_dim, hidden_dim, output_dim): 12 | super(LSTMNet, self).__init__() 13 | self.hidden_dim = hidden_dim 14 | self.lstm = nn.LSTM(input_dim, hidden_dim) 15 | self.linear = nn.Linear(hidden_dim, output_dim, bias=False) 16 | 17 | def forward(self, x): 18 | batch_size = x.size()[1] 19 | h0 = Variable(torch.zeros([1, batch_size, self.hidden_dim]), requires_grad=False) 20 | c0 = Variable(torch.zeros([1, batch_size, self.hidden_dim]), requires_grad=False) 21 | fx, _ = self.lstm.forward(x, (h0, c0)) 22 | return self.linear.forward(fx[-1]) 23 | 24 | 25 | def train(model, loss, optimizer, x_val, y_val): 26 | model.train() 27 | x = Variable(x_val, requires_grad=False) 28 | y = Variable(y_val, requires_grad=False) 29 | 30 | # Reset gradient 31 | optimizer.zero_grad() 32 | 33 | # Forward 34 | fx = model.forward(x) 35 | output = loss.forward(fx, y) 36 | 37 | # Backward 38 | output.backward() 39 | 40 | # Update parameters 41 | optimizer.step() 42 | 43 | return output.item() 44 | 45 | 46 | def predict(model, x_val): 47 | model.eval() 48 | x = Variable(x_val, requires_grad=False) 49 | output = model.forward(x) 50 | return output.data.numpy().argmax(axis=1) 51 | 52 | 53 | def main(): 54 | torch.manual_seed(42) 55 | trX, teX, trY, teY = load_mnist(onehot=False) 56 | 57 | train_size = len(trY) 58 | n_classes = 10 59 | seq_length = 28 60 | input_dim = 28 61 | hidden_dim = 128 62 | batch_size = 100 63 | epochs = 20 64 | 65 | trX = trX.reshape(-1, seq_length, input_dim) 66 | teX = teX.reshape(-1, seq_length, input_dim) 67 | 68 | # Convert to the shape (seq_length, num_samples, input_dim) 69 | trX = np.swapaxes(trX, 0, 1) 70 | teX = np.swapaxes(teX, 0, 1) 71 | 72 | trX = torch.from_numpy(trX).float() 73 | teX = torch.from_numpy(teX).float() 74 | trY = torch.from_numpy(trY).long() 75 | 76 | model = LSTMNet(input_dim, hidden_dim, n_classes) 77 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') 78 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) 79 | 80 | for i in range(epochs): 81 | cost = 0. 82 | num_batches = train_size // batch_size 83 | for k in range(num_batches): 84 | start, end = k * batch_size, (k + 1) * batch_size 85 | cost += train(model, loss, optimizer, trX[:, start:end, :], trY[start:end]) 86 | predY = predict(model, teX) 87 | print("Epoch %d, cost = %f, acc = %.2f%%" % 88 | (i + 1, cost / num_batches, 100. * np.mean(predY == teY))) 89 | 90 | 91 | if __name__ == "__main__": 92 | main() 93 | -------------------------------------------------------------------------------- /5_convolutional_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim 6 | 7 | from data_util import load_mnist 8 | 9 | 10 | # We need to create two sequential models here since PyTorch doesn't have nn.View() 11 | class ConvNet(torch.nn.Module): 12 | def __init__(self, output_dim): 13 | super(ConvNet, self).__init__() 14 | 15 | self.conv = torch.nn.Sequential() 16 | self.conv.add_module("conv_1", torch.nn.Conv2d(1, 10, kernel_size=5)) 17 | self.conv.add_module("maxpool_1", torch.nn.MaxPool2d(kernel_size=2)) 18 | self.conv.add_module("relu_1", torch.nn.ReLU()) 19 | self.conv.add_module("conv_2", torch.nn.Conv2d(10, 20, kernel_size=5)) 20 | self.conv.add_module("dropout_2", torch.nn.Dropout()) 21 | self.conv.add_module("maxpool_2", torch.nn.MaxPool2d(kernel_size=2)) 22 | self.conv.add_module("relu_2", torch.nn.ReLU()) 23 | 24 | self.fc = torch.nn.Sequential() 25 | self.fc.add_module("fc1", torch.nn.Linear(320, 50)) 26 | self.fc.add_module("relu_3", torch.nn.ReLU()) 27 | self.fc.add_module("dropout_3", torch.nn.Dropout()) 28 | self.fc.add_module("fc2", torch.nn.Linear(50, output_dim)) 29 | 30 | def forward(self, x): 31 | x = self.conv.forward(x) 32 | x = x.view(-1, 320) 33 | return self.fc.forward(x) 34 | 35 | 36 | def train(model, loss, optimizer, x_val, y_val): 37 | model.train() 38 | x = Variable(x_val, requires_grad=False) 39 | y = Variable(y_val, requires_grad=False) 40 | 41 | # Reset gradient 42 | optimizer.zero_grad() 43 | 44 | # Forward 45 | fx = model.forward(x) 46 | output = loss.forward(fx, y) 47 | 48 | # Backward 49 | output.backward() 50 | 51 | # Update parameters 52 | optimizer.step() 53 | 54 | return output.item() 55 | 56 | 57 | def predict(model, x_val): 58 | model.eval() 59 | x = Variable(x_val, requires_grad=False) 60 | output = model.forward(x) 61 | return output.data.numpy().argmax(axis=1) 62 | 63 | 64 | def main(): 65 | torch.manual_seed(42) 66 | trX, teX, trY, teY = load_mnist(onehot=False) 67 | trX = trX.reshape(-1, 1, 28, 28) 68 | teX = teX.reshape(-1, 1, 28, 28) 69 | 70 | trX = torch.from_numpy(trX).float() 71 | teX = torch.from_numpy(teX).float() 72 | trY = torch.from_numpy(trY).long() 73 | 74 | n_examples = len(trX) 75 | n_classes = 10 76 | model = ConvNet(output_dim=n_classes) 77 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean') 78 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) 79 | batch_size = 100 80 | 81 | for i in range(20): 82 | cost = 0. 83 | num_batches = n_examples // batch_size 84 | for k in range(num_batches): 85 | start, end = k * batch_size, (k + 1) * batch_size 86 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end]) 87 | predY = predict(model, teX) 88 | print("Epoch %d, cost = %f, acc = %.2f%%" 89 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY))) 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | --------------------------------------------------------------------------------