├── .gitignore
├── datasets
└── README.md
├── 0_multiply.py
├── README.md
├── .travis.yml
├── 1_linear_regression.py
├── 3_neural_net.py
├── 2_logistic_regression.py
├── 4_modern_neural_net.py
├── data_util.py
├── 6_lstm.py
└── 5_convolutional_net.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.pyc
3 | datasets/mnist
4 |
--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
1 | This folder contains data sets for the tutorials.
--------------------------------------------------------------------------------
/0_multiply.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | a = torch.IntTensor([2, 3, 4])
4 | b = torch.IntTensor([3, 4, 5])
5 | m = a * b # element-wise product
6 | print(m.numpy()) # convert to the numpy array [ 6 12 20]
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/vinhkhuc/PyTorch-Mini-Tutorials)
2 |
3 | Minimal tutorials for [PyTorch](https://github.com/pytorch/pytorch) adapted
4 | from Alec Radford's [Theano tutorials](https://github.com/Newmu/Theano-Tutorials).
5 |
6 |
7 | - Tensor multiplication
8 | - Linear Regression
9 | - Logistic Regression
10 | - Neural Network
11 | - Modern Neural Network
12 | - Convolutional Neural Network
13 | - Long Short-Term Memory
14 |
15 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | os:
2 | - linux
3 | language: python
4 | python:
5 | - 3.6
6 | cache: bundler
7 | install:
8 | - pip install --no-cache-dir -q torch
9 |
10 | # We use timeout to prevent the Python process from running too long since the long running process will be
11 | # auto-killed by Travis. When the timeout happens, we override its exit code 143 by the success exit code 0.
12 | script:
13 | - timeout 2m python 0_multiply.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
14 | - timeout 2m python 1_linear_regression.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
15 | - timeout 5m python 2_logistic_regression.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
16 | - timeout 5m python 3_neural_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
17 | - timeout 5m python 4_modern_neural_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
18 | - timeout 10m python 5_convolutional_net.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
19 | - timeout 10m python 6_lstm.py || if [ $? -eq 0 ] || [ $? -eq 143 ]; then exit 0; fi
20 |
--------------------------------------------------------------------------------
/1_linear_regression.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | from torch import optim
4 |
5 |
6 | def build_model():
7 | model = torch.nn.Sequential()
8 | model.add_module("linear", torch.nn.Linear(1, 1, bias=False))
9 | return model
10 |
11 |
12 | def train(model, loss, optimizer, x, y):
13 | model.train()
14 | x = Variable(x, requires_grad=False)
15 | y = Variable(y, requires_grad=False)
16 |
17 | # Reset gradient
18 | optimizer.zero_grad()
19 |
20 | # Forward
21 | fx = model.forward(x.view(len(x), 1)).squeeze()
22 | output = loss.forward(fx, y)
23 |
24 | # Backward
25 | output.backward()
26 |
27 | # Update parameters
28 | optimizer.step()
29 |
30 | return output.item()
31 |
32 |
33 | def main():
34 | torch.manual_seed(42)
35 | X = torch.linspace(-1, 1, 101)
36 | Y = 2 * X + torch.randn(X.size()) * 0.33
37 |
38 | model = build_model()
39 | loss = torch.nn.MSELoss(reduction='elementwise_mean')
40 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
41 | batch_size = 10
42 |
43 | for i in range(100):
44 | cost = 0.
45 | num_batches = len(X) // batch_size
46 | for k in range(num_batches):
47 | start, end = k * batch_size, (k + 1) * batch_size
48 | cost += train(model, loss, optimizer, X[start:end], Y[start:end])
49 | print("Epoch = %d, cost = %s" % (i + 1, cost / num_batches))
50 |
51 | w = next(model.parameters()).data # model has only one parameter
52 | print("w = %.2f" % w.numpy()) # will be approximately 2
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
--------------------------------------------------------------------------------
/3_neural_net.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch import optim
6 |
7 | from data_util import load_mnist
8 |
9 |
10 | def build_model(input_dim, output_dim):
11 | model = torch.nn.Sequential()
12 | model.add_module("linear_1", torch.nn.Linear(input_dim, 512, bias=False))
13 | model.add_module("sigmoid_1", torch.nn.Sigmoid())
14 | model.add_module("linear_2", torch.nn.Linear(512, output_dim, bias=False))
15 | return model
16 |
17 |
18 | def train(model, loss, optimizer, x_val, y_val):
19 | model.train()
20 | x = Variable(x_val, requires_grad=False)
21 | y = Variable(y_val, requires_grad=False)
22 |
23 | # Reset gradient
24 | optimizer.zero_grad()
25 |
26 | # Forward
27 | fx = model.forward(x)
28 | output = loss.forward(fx, y)
29 |
30 | # Backward
31 | output.backward()
32 |
33 | # Update parameters
34 | optimizer.step()
35 |
36 | return output.item()
37 |
38 |
39 | def predict(model, x_val):
40 | model.eval()
41 | x = Variable(x_val, requires_grad=False)
42 | output = model.forward(x)
43 | return output.data.numpy().argmax(axis=1)
44 |
45 |
46 | def main():
47 | torch.manual_seed(42)
48 | trX, teX, trY, teY = load_mnist(onehot=False)
49 | trX = torch.from_numpy(trX).float()
50 | teX = torch.from_numpy(teX).float()
51 | trY = torch.from_numpy(trY).long()
52 |
53 | n_examples, n_features = trX.size()
54 | n_classes = 10
55 | model = build_model(n_features, n_classes)
56 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
57 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
58 | batch_size = 100
59 |
60 | for i in range(100):
61 | cost = 0.
62 | num_batches = n_examples // batch_size
63 | for k in range(num_batches):
64 | start, end = k * batch_size, (k + 1) * batch_size
65 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
66 | predY = predict(model, teX)
67 | print("Epoch %d, cost = %f, acc = %.2f%%"
68 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
69 |
70 |
71 | if __name__ == "__main__":
72 | main()
73 |
--------------------------------------------------------------------------------
/2_logistic_regression.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch import optim
6 |
7 | from data_util import load_mnist
8 |
9 |
10 | def build_model(input_dim, output_dim):
11 | # We don't need the softmax layer here since CrossEntropyLoss already
12 | # uses it internally.
13 | model = torch.nn.Sequential()
14 | model.add_module("linear",
15 | torch.nn.Linear(input_dim, output_dim, bias=False))
16 | return model
17 |
18 |
19 | def train(model, loss, optimizer, x_val, y_val):
20 | model.train()
21 | x = Variable(x_val, requires_grad=False)
22 | y = Variable(y_val, requires_grad=False)
23 |
24 | # Reset gradient
25 | optimizer.zero_grad()
26 |
27 | # Forward
28 | fx = model.forward(x)
29 | output = loss.forward(fx, y)
30 |
31 | # Backward
32 | output.backward()
33 |
34 | # Update parameters
35 | optimizer.step()
36 |
37 | return output.item()
38 |
39 |
40 | def predict(model, x_val):
41 | model.eval()
42 | x = Variable(x_val, requires_grad=False)
43 | output = model.forward(x)
44 | return output.data.numpy().argmax(axis=1)
45 |
46 |
47 | def main():
48 | torch.manual_seed(42)
49 | trX, teX, trY, teY = load_mnist(onehot=False)
50 | trX = torch.from_numpy(trX).float()
51 | teX = torch.from_numpy(teX).float()
52 | trY = torch.from_numpy(trY).long()
53 |
54 | n_examples, n_features = trX.size()
55 | n_classes = 10
56 | model = build_model(n_features, n_classes)
57 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
58 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
59 | batch_size = 100
60 |
61 | for i in range(100):
62 | cost = 0.
63 | num_batches = n_examples // batch_size
64 | for k in range(num_batches):
65 | start, end = k * batch_size, (k + 1) * batch_size
66 | cost += train(model, loss, optimizer,
67 | trX[start:end], trY[start:end])
68 | predY = predict(model, teX)
69 | print("Epoch %d, cost = %f, acc = %.2f%%"
70 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
71 |
72 |
73 | if __name__ == "__main__":
74 | main()
75 |
--------------------------------------------------------------------------------
/4_modern_neural_net.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch import optim
6 |
7 | from data_util import load_mnist
8 |
9 |
10 | def build_model(input_dim, output_dim):
11 | model = torch.nn.Sequential()
12 | model.add_module("linear_1", torch.nn.Linear(input_dim, 512, bias=False))
13 | model.add_module("relu_1", torch.nn.ReLU())
14 | model.add_module("dropout_1", torch.nn.Dropout(0.2))
15 | model.add_module("linear_2", torch.nn.Linear(512, 512, bias=False))
16 | model.add_module("relu_2", torch.nn.ReLU())
17 | model.add_module("dropout_2", torch.nn.Dropout(0.2))
18 | model.add_module("linear_3", torch.nn.Linear(512, output_dim, bias=False))
19 | return model
20 |
21 |
22 | def train(model, loss, optimizer, x_val, y_val):
23 | model.train()
24 | x = Variable(x_val, requires_grad=False)
25 | y = Variable(y_val, requires_grad=False)
26 |
27 | # Reset gradient
28 | optimizer.zero_grad()
29 |
30 | # Forward
31 | fx = model.forward(x)
32 | output = loss.forward(fx, y)
33 |
34 | # Backward
35 | output.backward()
36 |
37 | # Update parameters
38 | optimizer.step()
39 |
40 | return output.item()
41 |
42 |
43 | def predict(model, x_val):
44 | model.eval()
45 | x = Variable(x_val, requires_grad=False)
46 | output = model.forward(x)
47 | return output.data.numpy().argmax(axis=1)
48 |
49 |
50 | def main():
51 | torch.manual_seed(42)
52 | trX, teX, trY, teY = load_mnist(onehot=False)
53 | trX = torch.from_numpy(trX).float()
54 | teX = torch.from_numpy(teX).float()
55 | trY = torch.from_numpy(trY).long()
56 |
57 | n_examples, n_features = trX.size()
58 | n_classes = 10
59 | model = build_model(n_features, n_classes)
60 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
61 | optimizer = optim.Adam(model.parameters())
62 | batch_size = 100
63 |
64 | for i in range(100):
65 | cost = 0.
66 | num_batches = n_examples // batch_size
67 | for k in range(num_batches):
68 | start, end = k * batch_size, (k + 1) * batch_size
69 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
70 | predY = predict(model, teX)
71 | print("Epoch %d, cost = %f, acc = %.2f%%"
72 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
73 |
74 |
75 | if __name__ == "__main__":
76 | main()
77 |
--------------------------------------------------------------------------------
/data_util.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import os
3 | import urllib.request as request
4 | from os import path
5 |
6 | import numpy as np
7 |
8 | DATASET_DIR = 'datasets/'
9 |
10 | MNIST_FILES = ["train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz",
11 | "t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz"]
12 |
13 |
14 | def download_file(url, local_path):
15 | dir_path = path.dirname(local_path)
16 | if not path.exists(dir_path):
17 | print("Creating the directory '%s' ..." % dir_path)
18 | os.makedirs(dir_path)
19 |
20 | print("Downloading from '%s' ..." % url)
21 | request.urlretrieve(url, local_path)
22 |
23 |
24 | def download_mnist(local_path):
25 | url_root = "http://yann.lecun.com/exdb/mnist/"
26 | for f_name in MNIST_FILES:
27 | f_path = os.path.join(local_path, f_name)
28 | if not path.exists(f_path):
29 | download_file(url_root + f_name, f_path)
30 |
31 |
32 | def one_hot(x, n):
33 | if type(x) == list:
34 | x = np.array(x)
35 | x = x.flatten()
36 | o_h = np.zeros((len(x), n))
37 | o_h[np.arange(len(x)), x] = 1
38 | return o_h
39 |
40 |
41 | def load_mnist(ntrain=60000, ntest=10000, onehot=True):
42 | data_dir = os.path.join(DATASET_DIR, 'mnist/')
43 | if not path.exists(data_dir):
44 | download_mnist(data_dir)
45 | else:
46 | # check all files
47 | checks = [path.exists(os.path.join(data_dir, f)) for f in MNIST_FILES]
48 | if not np.all(checks):
49 | download_mnist(data_dir)
50 |
51 | with gzip.open(os.path.join(data_dir, 'train-images-idx3-ubyte.gz')) as fd:
52 | buf = fd.read()
53 | loaded = np.frombuffer(buf, dtype=np.uint8)
54 | trX = loaded[16:].reshape((60000, 28 * 28)).astype(float)
55 |
56 | with gzip.open(os.path.join(data_dir, 'train-labels-idx1-ubyte.gz')) as fd:
57 | buf = fd.read()
58 | loaded = np.frombuffer(buf, dtype=np.uint8)
59 | trY = loaded[8:].reshape((60000))
60 |
61 | with gzip.open(os.path.join(data_dir, 't10k-images-idx3-ubyte.gz')) as fd:
62 | buf = fd.read()
63 | loaded = np.frombuffer(buf, dtype=np.uint8)
64 | teX = loaded[16:].reshape((10000, 28 * 28)).astype(float)
65 |
66 | with gzip.open(os.path.join(data_dir, 't10k-labels-idx1-ubyte.gz')) as fd:
67 | buf = fd.read()
68 | loaded = np.frombuffer(buf, dtype=np.uint8)
69 | teY = loaded[8:].reshape((10000))
70 |
71 | trX /= 255.
72 | teX /= 255.
73 |
74 | trX = trX[:ntrain]
75 | trY = trY[:ntrain]
76 |
77 | teX = teX[:ntest]
78 | teY = teY[:ntest]
79 |
80 | if onehot:
81 | trY = one_hot(trY, 10)
82 | teY = one_hot(teY, 10)
83 | else:
84 | trY = np.asarray(trY)
85 | teY = np.asarray(teY)
86 |
87 | return trX, teX, trY, teY
88 |
--------------------------------------------------------------------------------
/6_lstm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch import optim, nn
6 |
7 | from data_util import load_mnist
8 |
9 |
10 | class LSTMNet(torch.nn.Module):
11 | def __init__(self, input_dim, hidden_dim, output_dim):
12 | super(LSTMNet, self).__init__()
13 | self.hidden_dim = hidden_dim
14 | self.lstm = nn.LSTM(input_dim, hidden_dim)
15 | self.linear = nn.Linear(hidden_dim, output_dim, bias=False)
16 |
17 | def forward(self, x):
18 | batch_size = x.size()[1]
19 | h0 = Variable(torch.zeros([1, batch_size, self.hidden_dim]), requires_grad=False)
20 | c0 = Variable(torch.zeros([1, batch_size, self.hidden_dim]), requires_grad=False)
21 | fx, _ = self.lstm.forward(x, (h0, c0))
22 | return self.linear.forward(fx[-1])
23 |
24 |
25 | def train(model, loss, optimizer, x_val, y_val):
26 | model.train()
27 | x = Variable(x_val, requires_grad=False)
28 | y = Variable(y_val, requires_grad=False)
29 |
30 | # Reset gradient
31 | optimizer.zero_grad()
32 |
33 | # Forward
34 | fx = model.forward(x)
35 | output = loss.forward(fx, y)
36 |
37 | # Backward
38 | output.backward()
39 |
40 | # Update parameters
41 | optimizer.step()
42 |
43 | return output.item()
44 |
45 |
46 | def predict(model, x_val):
47 | model.eval()
48 | x = Variable(x_val, requires_grad=False)
49 | output = model.forward(x)
50 | return output.data.numpy().argmax(axis=1)
51 |
52 |
53 | def main():
54 | torch.manual_seed(42)
55 | trX, teX, trY, teY = load_mnist(onehot=False)
56 |
57 | train_size = len(trY)
58 | n_classes = 10
59 | seq_length = 28
60 | input_dim = 28
61 | hidden_dim = 128
62 | batch_size = 100
63 | epochs = 20
64 |
65 | trX = trX.reshape(-1, seq_length, input_dim)
66 | teX = teX.reshape(-1, seq_length, input_dim)
67 |
68 | # Convert to the shape (seq_length, num_samples, input_dim)
69 | trX = np.swapaxes(trX, 0, 1)
70 | teX = np.swapaxes(teX, 0, 1)
71 |
72 | trX = torch.from_numpy(trX).float()
73 | teX = torch.from_numpy(teX).float()
74 | trY = torch.from_numpy(trY).long()
75 |
76 | model = LSTMNet(input_dim, hidden_dim, n_classes)
77 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
78 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
79 |
80 | for i in range(epochs):
81 | cost = 0.
82 | num_batches = train_size // batch_size
83 | for k in range(num_batches):
84 | start, end = k * batch_size, (k + 1) * batch_size
85 | cost += train(model, loss, optimizer, trX[:, start:end, :], trY[start:end])
86 | predY = predict(model, teX)
87 | print("Epoch %d, cost = %f, acc = %.2f%%" %
88 | (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
89 |
90 |
91 | if __name__ == "__main__":
92 | main()
93 |
--------------------------------------------------------------------------------
/5_convolutional_net.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch import optim
6 |
7 | from data_util import load_mnist
8 |
9 |
10 | # We need to create two sequential models here since PyTorch doesn't have nn.View()
11 | class ConvNet(torch.nn.Module):
12 | def __init__(self, output_dim):
13 | super(ConvNet, self).__init__()
14 |
15 | self.conv = torch.nn.Sequential()
16 | self.conv.add_module("conv_1", torch.nn.Conv2d(1, 10, kernel_size=5))
17 | self.conv.add_module("maxpool_1", torch.nn.MaxPool2d(kernel_size=2))
18 | self.conv.add_module("relu_1", torch.nn.ReLU())
19 | self.conv.add_module("conv_2", torch.nn.Conv2d(10, 20, kernel_size=5))
20 | self.conv.add_module("dropout_2", torch.nn.Dropout())
21 | self.conv.add_module("maxpool_2", torch.nn.MaxPool2d(kernel_size=2))
22 | self.conv.add_module("relu_2", torch.nn.ReLU())
23 |
24 | self.fc = torch.nn.Sequential()
25 | self.fc.add_module("fc1", torch.nn.Linear(320, 50))
26 | self.fc.add_module("relu_3", torch.nn.ReLU())
27 | self.fc.add_module("dropout_3", torch.nn.Dropout())
28 | self.fc.add_module("fc2", torch.nn.Linear(50, output_dim))
29 |
30 | def forward(self, x):
31 | x = self.conv.forward(x)
32 | x = x.view(-1, 320)
33 | return self.fc.forward(x)
34 |
35 |
36 | def train(model, loss, optimizer, x_val, y_val):
37 | model.train()
38 | x = Variable(x_val, requires_grad=False)
39 | y = Variable(y_val, requires_grad=False)
40 |
41 | # Reset gradient
42 | optimizer.zero_grad()
43 |
44 | # Forward
45 | fx = model.forward(x)
46 | output = loss.forward(fx, y)
47 |
48 | # Backward
49 | output.backward()
50 |
51 | # Update parameters
52 | optimizer.step()
53 |
54 | return output.item()
55 |
56 |
57 | def predict(model, x_val):
58 | model.eval()
59 | x = Variable(x_val, requires_grad=False)
60 | output = model.forward(x)
61 | return output.data.numpy().argmax(axis=1)
62 |
63 |
64 | def main():
65 | torch.manual_seed(42)
66 | trX, teX, trY, teY = load_mnist(onehot=False)
67 | trX = trX.reshape(-1, 1, 28, 28)
68 | teX = teX.reshape(-1, 1, 28, 28)
69 |
70 | trX = torch.from_numpy(trX).float()
71 | teX = torch.from_numpy(teX).float()
72 | trY = torch.from_numpy(trY).long()
73 |
74 | n_examples = len(trX)
75 | n_classes = 10
76 | model = ConvNet(output_dim=n_classes)
77 | loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
78 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
79 | batch_size = 100
80 |
81 | for i in range(20):
82 | cost = 0.
83 | num_batches = n_examples // batch_size
84 | for k in range(num_batches):
85 | start, end = k * batch_size, (k + 1) * batch_size
86 | cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
87 | predY = predict(model, teX)
88 | print("Epoch %d, cost = %f, acc = %.2f%%"
89 | % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))
90 |
91 |
92 | if __name__ == "__main__":
93 | main()
94 |
--------------------------------------------------------------------------------