├── .gitignore ├── README.md ├── data └── .keep ├── models ├── conditional_gan_mnist.py ├── dqn_cartpole.py ├── encoder_decoder_attention.py ├── encoder_decoder_lstm.py ├── gan_fashion_mnist.py ├── layers │ ├── Attention.py │ ├── DotProductAttention.py │ ├── Flatten.py │ ├── GlobalAvgPool2d.py │ ├── MultiHeadAttention.py │ ├── PositionalEncoding.py │ ├── ScaledDotProductAttention.py │ └── __init__.py ├── lenet_mnist.py ├── logistic_regression_mnist.py ├── mlp_mnist.py ├── resnet34_fashion_mnist.py ├── resnet50_fashion_mnist.py ├── transformer.py ├── utils │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ └── small_parallel_enja.py │ └── preprocessing │ │ ├── __init__.py │ │ └── sequence │ │ ├── __init__.py │ │ ├── pad_sequences.py │ │ └── sort.py └── vae_fashion_mnist.py └── output └── .keep /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | !data/.keep 3 | output/* 4 | !output/.keep 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning with PyTorch 1.X 2 | 3 | Implementations of neural network models with torch (>=1.0) 4 | 5 | See also implementations with TesorFlow 2.0 [here](https://github.com/yusugomori/deeplearning-tf2). 6 | 7 | ## Requirements 8 | 9 | * PyTorch >= 1.0 10 | 11 | ```shell 12 | $ pip install torch torchvision 13 | ``` 14 | 15 | ## Models 16 | 17 | * Logistic Regression 18 | * MLP 19 | * LeNet 20 | * ResNet (ResNet34, ResNet50) 21 | * Encoder-Decoder (LSTM) 22 | * Encoder-Decoder (Attention) 23 | * Transformer 24 | * Deep Q-Network 25 | * Variational Autoencoder 26 | * Generative Adversarial Network 27 | * Conditional GAN 28 | 29 | ``` 30 | models/ 31 | ├── conditonal_gan_mnist.py 32 | ├── dqn_cartpole.py 33 | ├── encoder_decoder_attention.py 34 | ├── encoder_decoder_lstm.py 35 | ├── gan_fashion_mnist.py 36 | ├── lenet_mnist.py 37 | ├── logistic_regression_mnist.py 38 | ├── mlp_mnist.py 39 | ├── resnet34_fashion_mnist.py 40 | ├── resnet50_fashion_mnist.py 41 | ├── transformer.py 42 | ├── vae_fashion_mnist.py 43 | │ 44 | └── layers/ 45 |    ├── Attention.py 46 |    ├── DotProductAttention.py 47 |    ├── Flatten.py 48 |    ├── GlobalAvgPool2d.py 49 |    ├── MultiHeadAttention.py 50 |    ├── PositionalEncoding.py 51 |    └── ScaledDotProductAttention.py 52 | ``` 53 | -------------------------------------------------------------------------------- /data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-pytorch/25eeab2eea574f7c57d5ee9cd5e14d64dc2d1d56/data/.keep -------------------------------------------------------------------------------- /models/conditional_gan_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import matplotlib 11 | # matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class CGAN(nn.Module): 16 | ''' 17 | Simple Conditional GAN 18 | ''' 19 | def __init__(self, device='cpu'): 20 | super().__init__() 21 | self.device = device 22 | self.G = Generator(device=device) 23 | self.D = Discriminator(device=device) 24 | 25 | def forward(self, x, cond): 26 | x = self.G(x, cond) 27 | y = self.D(x, cond) 28 | 29 | return y 30 | 31 | 32 | class Discriminator(nn.Module): 33 | def __init__(self, device='cpu'): 34 | super().__init__() 35 | self.device = device 36 | self.reshape = lambda x: torch.ones(10, 28, 28).to(device) * x 37 | self.conv1 = nn.Conv2d(1+10, 128, 38 | kernel_size=(3, 3), 39 | stride=(2, 2), 40 | padding=1) 41 | self.relu1 = nn.LeakyReLU(0.2) 42 | self.conv2 = nn.Conv2d(128, 256, 43 | kernel_size=(3, 3), 44 | stride=(2, 2), 45 | padding=1) 46 | self.bn2 = nn.BatchNorm2d(256) 47 | self.relu2 = nn.LeakyReLU(0.2) 48 | self.fc = nn.Linear(256*7*7, 1024) 49 | self.bn3 = nn.BatchNorm1d(1024) 50 | self.relu3 = nn.LeakyReLU(0.2) 51 | self.out = nn.Linear(1024, 1) 52 | 53 | for l in [self.conv1, self.conv2, self.fc, self.out]: 54 | nn.init.xavier_normal_(l.weight) 55 | 56 | def forward(self, x, cond): 57 | cond = cond.view(-1, 10, 1, 1) 58 | cond = self.reshape(cond) 59 | x = torch.cat((x, cond), dim=1) 60 | h = self.conv1(x) 61 | h = self.relu1(h) 62 | h = self.conv2(h) 63 | h = self.bn2(h) 64 | h = self.relu2(h) 65 | h = h.view(-1, 256*7*7) 66 | h = self.fc(h) 67 | h = self.bn3(h) 68 | h = self.relu3(h) 69 | h = self.out(h) 70 | y = torch.sigmoid(h) 71 | 72 | return y 73 | 74 | 75 | class Generator(nn.Module): 76 | def __init__(self, 77 | input_dim=100, 78 | device='cpu'): 79 | super().__init__() 80 | self.device = device 81 | self.linear = nn.Linear(input_dim+10, 256*14*14) 82 | self.bn1 = nn.BatchNorm1d(256*14*14) 83 | self.relu1 = nn.ReLU() 84 | self.conv1 = nn.Conv2d(256, 128, 85 | kernel_size=(3, 3), 86 | padding=1) 87 | self.bn2 = nn.BatchNorm2d(128) 88 | self.relu2 = nn.ReLU() 89 | self.conv2 = nn.Conv2d(128, 64, 90 | kernel_size=(3, 3), 91 | padding=1) 92 | self.bn3 = nn.BatchNorm2d(64) 93 | self.relu3 = nn.ReLU() 94 | self.conv3 = nn.Conv2d(64, 1, 95 | kernel_size=(1, 1)) 96 | 97 | for l in [self.conv1, self.conv2, self.conv3]: 98 | nn.init.xavier_normal_(l.weight) 99 | 100 | def forward(self, x, cond): 101 | x = torch.cat((x, cond), dim=-1) 102 | h = self.linear(x) 103 | h = self.bn1(h) 104 | h = self.relu1(h) 105 | h = h.view(-1, 256, 14, 14) 106 | h = nn.functional.interpolate(h, size=(28, 28)) 107 | h = self.conv1(h) 108 | h = self.bn2(h) 109 | h = self.relu2(h) 110 | h = self.conv2(h) 111 | h = self.bn3(h) 112 | h = self.relu3(h) 113 | h = self.conv3(h) 114 | y = torch.sigmoid(h) 115 | 116 | return y 117 | 118 | 119 | if __name__ == '__main__': 120 | np.random.seed(1234) 121 | torch.manual_seed(1234) 122 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 123 | 124 | def compute_loss(label, pred): 125 | return criterion(pred, label) 126 | 127 | def train_step(x, t): 128 | batch_size = x.size(0) 129 | model.D.train() 130 | model.G.train() 131 | 132 | # train D 133 | # real images 134 | cond = torch.eye(10)[t.long()].float().to(device) 135 | preds = model.D(x, cond).squeeze() # preds with true images 136 | label = torch.ones(batch_size).float().to(device) 137 | loss_D_real = compute_loss(label, preds) 138 | # fake images 139 | noise = gen_noise(batch_size) 140 | cond = gen_cond(batch_size).detach() 141 | z = model.G(noise, cond).detach() 142 | preds = model.D(z, cond).squeeze() # preds with fake images 143 | label = torch.zeros(batch_size).float().to(device) 144 | loss_D_fake = compute_loss(label, preds) 145 | 146 | loss_D = loss_D_real + loss_D_fake 147 | optimizer_D.zero_grad() 148 | loss_D.backward() 149 | optimizer_D.step() 150 | 151 | # train G 152 | noise = gen_noise(batch_size) 153 | cond = gen_cond(batch_size) 154 | z = model.G(noise, cond) 155 | preds = model.D(z, cond).squeeze() # preds with fake images 156 | label = torch.ones(batch_size).float().to(device) # label as true 157 | loss_G = compute_loss(label, preds) 158 | optimizer_G.zero_grad() 159 | loss_G.backward() 160 | optimizer_G.step() 161 | 162 | return loss_D, loss_G 163 | 164 | def generate(cond): 165 | model.eval() 166 | batch_size = cond.size(0) 167 | noise = gen_noise(batch_size) 168 | gen = model.G(noise, cond) 169 | 170 | return gen 171 | 172 | def gen_noise(batch_size): 173 | return torch.empty(batch_size, 100).uniform_(0, 1).to(device) 174 | 175 | def gen_cond(batch_size, one_hot=True): 176 | cond = torch.randint(0, 10, (batch_size,)).long() 177 | if not one_hot: 178 | return cond.to(device) 179 | return torch.eye(10)[cond].float().to(device) 180 | 181 | ''' 182 | Load data 183 | ''' 184 | root = os.path.join(os.path.dirname(__file__), 185 | '..', 'data', 'mnist') 186 | transform = transforms.Compose([transforms.ToTensor()]) 187 | mnist_train = \ 188 | torchvision.datasets.MNIST(root=root, 189 | download=True, 190 | train=True, 191 | transform=transform) 192 | train_dataloader = DataLoader(mnist_train, 193 | batch_size=100, 194 | shuffle=True) 195 | 196 | ''' 197 | Build model 198 | ''' 199 | model = CGAN(device=device).to(device) 200 | criterion = nn.BCELoss() 201 | optimizer_D = optimizers.Adam(model.D.parameters(), lr=0.0002) 202 | optimizer_G = optimizers.Adam(model.G.parameters(), lr=0.0002) 203 | 204 | ''' 205 | Train model 206 | ''' 207 | epochs = 1000 208 | out_path = os.path.join(os.path.dirname(__file__), 209 | '..', 'output') 210 | 211 | for epoch in range(epochs): 212 | train_loss_D = 0. 213 | train_loss_G = 0. 214 | test_loss = 0. 215 | 216 | for (x, t) in train_dataloader: 217 | x = x.to(device) 218 | loss_D, loss_G = train_step(x, t) 219 | 220 | train_loss_D += loss_D.item() 221 | train_loss_G += loss_G.item() 222 | 223 | train_loss_D /= len(train_dataloader) 224 | train_loss_G /= len(train_dataloader) 225 | 226 | print('Epoch: {}, D Cost: {:.3f}, G Cost: {:.3f}'.format( 227 | epoch+1, 228 | train_loss_D, 229 | train_loss_G 230 | )) 231 | 232 | if epoch % 5 == 4 or epoch == epochs - 1: 233 | cond = torch.eye(10)[torch.arange(10).long()].float().to(device) 234 | images = generate(cond) 235 | images = images.squeeze().detach().cpu().numpy() 236 | plt.figure(figsize=(5, 2)) 237 | for i, image in enumerate(images): 238 | plt.subplot(2, 5, i+1) 239 | plt.imshow(image, cmap='binary') 240 | plt.axis('off') 241 | plt.tight_layout() 242 | # plt.show() 243 | template = '{}/conditional_gan_mnist_epoch_{:0>4}.png' 244 | plt.savefig(template.format(out_path, epoch+1), dpi=300) 245 | -------------------------------------------------------------------------------- /models/dqn_cartpole.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import deque 3 | import numpy as np 4 | import gym 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optimizers 8 | 9 | 10 | class DQN(nn.Module): 11 | ''' 12 | Simple Deep Q-Network for CartPole 13 | ''' 14 | def __init__(self, 15 | device='cpu'): 16 | super().__init__() 17 | self.device = device 18 | 19 | self.original = Network().to(device) 20 | self.target = Network().to(device) 21 | 22 | def forward(self, x): 23 | return self.original(x) 24 | 25 | def q_original(self, x): 26 | return self.forward(x) 27 | 28 | def q_target(self, x): 29 | return self.target(x) 30 | 31 | def copy_original(self): 32 | self.target = copy.deepcopy(self.original) 33 | 34 | 35 | class Network(nn.Module): 36 | def __init__(self, 37 | device='cpu'): 38 | super().__init__() 39 | self.device = device 40 | 41 | self.l1 = nn.Linear(4, 16) 42 | self.l2 = nn.Linear(16, 32) 43 | self.l3 = nn.Linear(32, 16) 44 | self.l4 = nn.Linear(16, 2) 45 | 46 | def forward(self, x): 47 | x = self.l1(x) 48 | x = torch.relu(x) 49 | x = self.l2(x) 50 | x = torch.relu(x) 51 | x = self.l3(x) 52 | x = torch.relu(x) 53 | y = self.l4(x) 54 | 55 | return y 56 | 57 | 58 | class ReplayMemory(object): 59 | def __init__(self, 60 | memory_size=50000, 61 | device='cpu'): 62 | self.device = device 63 | self.memory_size = memory_size 64 | self.memories = deque([], maxlen=memory_size) 65 | 66 | def append(self, memory): 67 | self.memories.append(memory) 68 | 69 | def sample(self, batch_size=128): 70 | indices = \ 71 | np.random.permutation(range(len(self.memories)))[:batch_size]\ 72 | .tolist() 73 | 74 | state = np.array([self.memories[i].state for i in indices]) 75 | action = np.array([self.memories[i].action for i in indices]) 76 | next_state = \ 77 | np.array([self.memories[i].next_state for i in indices]) 78 | reward = np.array([self.memories[i].reward for i in indices]) 79 | terminal = np.array([self.memories[i].terminal for i in indices]) 80 | 81 | return Memory( 82 | torch.Tensor(state).to(self.device), 83 | torch.Tensor(action).to(self.device), 84 | torch.Tensor(next_state).to(self.device), 85 | torch.Tensor(reward).to(self.device), 86 | torch.Tensor(terminal).to(self.device), 87 | ) 88 | 89 | 90 | class Memory(object): 91 | def __init__(self, 92 | state, 93 | action, 94 | next_state, 95 | reward, 96 | terminal): 97 | self.state = state 98 | self.action = action 99 | self.next_state = next_state 100 | self.reward = reward 101 | self.terminal = terminal 102 | 103 | 104 | class Epsilon(object): 105 | def __init__(self, 106 | init=1.0, 107 | end=0.1, 108 | steps=10000): 109 | self.init = init 110 | self.end = end 111 | self.steps = steps 112 | 113 | def __call__(self, step): 114 | return max(0.1, 115 | self.init + (self.end - self.init) / self.steps * step) 116 | 117 | 118 | if __name__ == '__main__': 119 | np.random.seed(1234) 120 | torch.manual_seed(1234) 121 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 122 | 123 | def compute_loss(label, pred): 124 | return criterion(pred, label) 125 | 126 | def train_step(state, action, t): 127 | model.train() 128 | q_original = model(state) 129 | action = torch.eye(2)[action.long()].to(device) # one-hot 130 | q = torch.sum(q_original * action, dim=1) 131 | loss = compute_loss(t, q) 132 | optimizer.zero_grad() 133 | loss.backward() 134 | optimizer.step() 135 | 136 | return loss 137 | 138 | ''' 139 | Load env 140 | ''' 141 | env = gym.make('CartPole-v0') 142 | 143 | ''' 144 | Build model 145 | ''' 146 | model = DQN(device=device) 147 | criterion = nn.MSELoss() 148 | optimizer = optimizers.Adam(model.parameters()) 149 | 150 | ''' 151 | Build ReplayMemory 152 | ''' 153 | initial_memory_size = 500 154 | replay_memory = ReplayMemory(device=device) 155 | 156 | step = 0 157 | while True: 158 | state = env.reset() 159 | terminal = False 160 | 161 | while not terminal: 162 | action = env.action_space.sample() 163 | next_state, reward, terminal, _ = env.step(action) 164 | memory = Memory(state, action, next_state, reward, int(terminal)) 165 | replay_memory.append(memory) 166 | state = next_state 167 | step += 1 168 | 169 | if step >= initial_memory_size: 170 | break 171 | 172 | ''' 173 | Train model 174 | ''' 175 | n_episodes = 300 176 | gamma = 0.99 177 | step = 0 178 | copy_original_every = 1000 179 | eps = Epsilon() 180 | 181 | model.copy_original() 182 | for episode in range(n_episodes): 183 | state = env.reset() 184 | terminal = False 185 | 186 | rewards = 0. 187 | q_max = [] 188 | while not terminal: 189 | s = torch.Tensor(state[None]).to(device) 190 | q = model.q_original(s) 191 | q_max.append(q.max().data.cpu().numpy()) 192 | 193 | # epsilon-greedy 194 | if np.random.random() < eps(step): 195 | action = env.action_space.sample() 196 | else: 197 | action = torch.argmax(q).data.cpu().numpy() 198 | 199 | next_state, reward, terminal, _ = env.step(action) 200 | rewards += reward 201 | 202 | memory = Memory(state, action, next_state, reward, int(terminal)) 203 | replay_memory.append(memory) 204 | 205 | sample = replay_memory.sample() 206 | q_target = model.q_target(sample.next_state) 207 | 208 | t = sample.reward \ 209 | + (1 - sample.terminal) * gamma * q_target.max(-1)[0] 210 | 211 | train_step(sample.state, sample.action, t) 212 | 213 | state = next_state 214 | env.render() 215 | 216 | if (step + 1) % copy_original_every == 0: 217 | model.copy_original() 218 | 219 | step += 1 220 | 221 | template = 'Episode: {}, Reward: {}, Qmax: {:.3f}' 222 | print(template.format( 223 | episode+1, 224 | rewards, 225 | np.mean(q_max) 226 | )) 227 | 228 | env.close() 229 | -------------------------------------------------------------------------------- /models/encoder_decoder_attention.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | # from torch.utils.data import Dataset, DataLoader 7 | from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence 8 | from utils.datasets.small_parallel_enja import load_small_parallel_enja 9 | from utils.preprocessing.sequence import pad_sequences, sort 10 | from sklearn.utils import shuffle 11 | from layers import Attention 12 | 13 | 14 | class EncoderDecoder(nn.Module): 15 | def __init__(self, 16 | input_dim, 17 | hidden_dim, 18 | output_dim, 19 | bos_value=1, 20 | max_len=20, 21 | device='cpu'): 22 | super().__init__() 23 | self.device = device 24 | self.encoder = Encoder(input_dim, hidden_dim, device=device) 25 | self.decoder = Decoder(hidden_dim, output_dim, device=device) 26 | 27 | self._BOS = bos_value 28 | self._max_len = max_len 29 | self.output_dim = output_dim 30 | 31 | def forward(self, source, target=None, use_teacher_forcing=False): 32 | batch_size = source.size()[1] 33 | if target is not None: 34 | len_target_sequences = target.size()[0] 35 | else: 36 | len_target_sequences = self._max_len 37 | 38 | hs, states = self.encoder(source) 39 | 40 | y = torch.ones((1, batch_size), 41 | dtype=torch.long, 42 | device=device) * self._BOS 43 | output = torch.zeros((len_target_sequences, 44 | batch_size, 45 | self.output_dim), 46 | device=device) 47 | 48 | for t in range(len_target_sequences): 49 | out, states = self.decoder(y, hs, states, source=source) 50 | output[t] = out 51 | 52 | if use_teacher_forcing and target is not None: 53 | y = target[t].unsqueeze(0) 54 | else: 55 | y = out.max(-1)[1] 56 | 57 | return output 58 | 59 | 60 | class Encoder(nn.Module): 61 | def __init__(self, 62 | input_dim, 63 | hidden_dim, 64 | device='cpu'): 65 | super().__init__() 66 | self.device = device 67 | self.embedding = nn.Embedding(input_dim, hidden_dim, padding_idx=0) 68 | self.lstm = nn.LSTM(hidden_dim, hidden_dim) 69 | 70 | def forward(self, x): 71 | len_source_sequences = (x.t() > 0).sum(dim=-1) 72 | x = self.embedding(x) 73 | pack = pack_padded_sequence(x, len_source_sequences) 74 | y, states = self.lstm(pack) 75 | y, _ = pad_packed_sequence(y) 76 | 77 | return y, states 78 | 79 | 80 | class Decoder(nn.Module): 81 | def __init__(self, 82 | hidden_dim, 83 | output_dim, 84 | device='cpu'): 85 | super().__init__() 86 | self.device = device 87 | self.embedding = nn.Embedding(output_dim, hidden_dim, padding_idx=0) 88 | self.lstm = nn.LSTM(hidden_dim, hidden_dim) 89 | self.attn = Attention(hidden_dim, hidden_dim, device=device) 90 | self.out = nn.Linear(hidden_dim, output_dim) 91 | 92 | def forward(self, x, hs, states, source=None): 93 | x = self.embedding(x) 94 | x, states = self.lstm(x, states) 95 | x = self.attn(x, hs, source=source) 96 | y = self.out(x) 97 | # y = torch.log_softmax(x, dim=-1) 98 | 99 | return y, states 100 | 101 | 102 | if __name__ == '__main__': 103 | np.random.seed(1234) 104 | torch.manual_seed(1234) 105 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 106 | 107 | def compute_loss(label, pred): 108 | return criterion(pred, label) 109 | 110 | def train_step(x, t, 111 | teacher_forcing_rate=0.5, 112 | pad_value=0): 113 | use_teacher_forcing = (random.random() < teacher_forcing_rate) 114 | model.train() 115 | preds = model(x, t, use_teacher_forcing=use_teacher_forcing) 116 | loss = compute_loss(t.contiguous().view(-1), 117 | preds.contiguous().view(-1, preds.size(-1))) 118 | 119 | optimizer.zero_grad() 120 | loss.backward() 121 | optimizer.step() 122 | 123 | return loss, preds 124 | 125 | def valid_step(x, t): 126 | model.eval() 127 | preds = model(x, t, use_teacher_forcing=False) 128 | loss = compute_loss(t.contiguous().view(-1), 129 | preds.contiguous().view(-1, preds.size(-1))) 130 | 131 | return loss, preds 132 | 133 | def test_step(x): 134 | model.eval() 135 | preds = model(x) 136 | return preds 137 | 138 | def ids_to_sentence(ids, i2w): 139 | return [i2w[id] for id in ids] 140 | 141 | ''' 142 | Load data 143 | ''' 144 | class ParallelDataLoader(object): 145 | def __init__(self, dataset, 146 | batch_size=128, 147 | shuffle=False, 148 | random_state=None): 149 | if type(dataset) is not tuple: 150 | raise ValueError('argument `dataset` must be tuple,' 151 | ' not {}.'.format(type(dataset))) 152 | self.dataset = list(zip(dataset[0], dataset[1])) 153 | self.batch_size = batch_size 154 | self.shuffle = shuffle 155 | if random_state is None: 156 | random_state = np.random.RandomState(1234) 157 | self.random_state = random_state 158 | self._idx = 0 159 | 160 | def __len__(self): 161 | return len(self.dataset) 162 | 163 | def __iter__(self): 164 | return self 165 | 166 | def __next__(self): 167 | if self._idx >= len(self.dataset): 168 | self._reorder() 169 | raise StopIteration() 170 | 171 | x, y = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 172 | x, y = sort(x, y, order='descend') 173 | x = pad_sequences(x, padding='post') 174 | y = pad_sequences(y, padding='post') 175 | 176 | x = torch.LongTensor(x).t() 177 | y = torch.LongTensor(y).t() 178 | 179 | self._idx += self.batch_size 180 | 181 | return x, y 182 | 183 | def _reorder(self): 184 | if self.shuffle: 185 | self.dataset = shuffle(self.dataset, 186 | random_state=self.random_state) 187 | self._idx = 0 188 | 189 | (x_train, y_train), \ 190 | (x_test, y_test), \ 191 | (num_x, num_y), \ 192 | (w2i_x, w2i_y), (i2w_x, i2w_y) = \ 193 | load_small_parallel_enja(to_ja=True, add_bos=False) 194 | 195 | train_dataloader = ParallelDataLoader((x_train, y_train), 196 | shuffle=True) 197 | valid_dataloader = ParallelDataLoader((x_test, y_test)) 198 | test_dataloader = ParallelDataLoader((x_test, y_test), 199 | batch_size=1, 200 | shuffle=True) 201 | 202 | ''' 203 | Build model 204 | ''' 205 | input_dim = num_x 206 | hidden_dim = 128 207 | output_dim = num_y 208 | 209 | model = EncoderDecoder(input_dim, 210 | hidden_dim, 211 | output_dim, 212 | device=device).to(device) 213 | criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=0) 214 | optimizer = optimizers.Adam(model.parameters()) 215 | 216 | ''' 217 | Train model 218 | ''' 219 | epochs = 20 220 | 221 | for epoch in range(epochs): 222 | print('-' * 20) 223 | print('Epoch: {}'.format(epoch+1)) 224 | 225 | train_loss = 0. 226 | valid_loss = 0. 227 | 228 | for (source, target) in train_dataloader: 229 | source, target = source.to(device), target.to(device) 230 | loss, _ = train_step(source, target) 231 | train_loss += loss.item() 232 | 233 | train_loss /= len(train_dataloader) 234 | 235 | for (source, target) in valid_dataloader: 236 | source, target = source.to(device), target.to(device) 237 | loss, _ = valid_step(source, target) 238 | valid_loss += loss.item() 239 | 240 | valid_loss /= len(valid_dataloader) 241 | print('Valid loss: {:.3}'.format(valid_loss)) 242 | 243 | for idx, (source, target) in enumerate(test_dataloader): 244 | source, target = source.to(device), target.to(device) 245 | out = test_step(source) 246 | out = out.max(dim=-1)[1].view(-1).tolist() 247 | out = ' '.join(ids_to_sentence(out, i2w_y)) 248 | source = ' '.join(ids_to_sentence(source.view(-1).tolist(), i2w_x)) 249 | target = ' '.join(ids_to_sentence(target.view(-1).tolist(), i2w_y)) 250 | print('>', source) 251 | print('=', target) 252 | print('<', out) 253 | print() 254 | 255 | if idx >= 10: 256 | break 257 | -------------------------------------------------------------------------------- /models/encoder_decoder_lstm.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | # from torch.utils.data import Dataset, DataLoader 7 | from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence 8 | from utils.datasets.small_parallel_enja import load_small_parallel_enja 9 | from utils.preprocessing.sequence import pad_sequences, sort 10 | from sklearn.utils import shuffle 11 | 12 | 13 | class EncoderDecoder(nn.Module): 14 | def __init__(self, 15 | input_dim, 16 | hidden_dim, 17 | output_dim, 18 | bos_value=1, 19 | max_len=20, 20 | device='cpu'): 21 | super().__init__() 22 | self.device = device 23 | self.encoder = Encoder(input_dim, hidden_dim, device=device) 24 | self.decoder = Decoder(hidden_dim, output_dim, device=device) 25 | 26 | self._BOS = bos_value 27 | self._max_len = max_len 28 | self.output_dim = output_dim 29 | 30 | def forward(self, source, target=None, use_teacher_forcing=False): 31 | batch_size = source.size()[1] 32 | if target is not None: 33 | len_target_sequences = target.size()[0] 34 | else: 35 | len_target_sequences = self._max_len 36 | 37 | _, states = self.encoder(source) 38 | 39 | y = torch.ones((1, batch_size), 40 | dtype=torch.long, 41 | device=device) * self._BOS 42 | output = torch.zeros((len_target_sequences, 43 | batch_size, 44 | self.output_dim), 45 | device=device) 46 | 47 | for t in range(len_target_sequences): 48 | out, states = self.decoder(y, states) 49 | output[t] = out 50 | 51 | if use_teacher_forcing and target is not None: 52 | y = target[t].unsqueeze(0) 53 | else: 54 | y = out.max(-1)[1] 55 | 56 | return output 57 | 58 | 59 | class Encoder(nn.Module): 60 | def __init__(self, 61 | input_dim, 62 | hidden_dim, 63 | device='cpu'): 64 | super().__init__() 65 | self.device = device 66 | self.embedding = nn.Embedding(input_dim, hidden_dim, padding_idx=0) 67 | self.lstm = nn.LSTM(hidden_dim, hidden_dim) 68 | 69 | def forward(self, x): 70 | len_source_sequences = (x.t() > 0).sum(dim=-1) 71 | x = self.embedding(x) 72 | pack = pack_padded_sequence(x, len_source_sequences) 73 | y, states = self.lstm(pack) 74 | y, _ = pad_packed_sequence(y) 75 | 76 | return y, states 77 | 78 | 79 | class Decoder(nn.Module): 80 | def __init__(self, 81 | hidden_dim, 82 | output_dim, 83 | device='cpu'): 84 | super().__init__() 85 | self.device = device 86 | self.embedding = nn.Embedding(output_dim, hidden_dim, padding_idx=0) 87 | self.lstm = nn.LSTM(hidden_dim, hidden_dim) 88 | self.out = nn.Linear(hidden_dim, output_dim) 89 | 90 | def forward(self, x, states): 91 | x = self.embedding(x) 92 | x, states = self.lstm(x, states) 93 | y = self.out(x) 94 | # y = torch.log_softmax(x, dim=-1) 95 | 96 | return y, states 97 | 98 | 99 | if __name__ == '__main__': 100 | np.random.seed(1234) 101 | torch.manual_seed(1234) 102 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 103 | 104 | def compute_loss(label, pred): 105 | return criterion(pred, label) 106 | 107 | def train_step(x, t, 108 | teacher_forcing_rate=0.5, 109 | pad_value=0): 110 | use_teacher_forcing = (random.random() < teacher_forcing_rate) 111 | model.train() 112 | preds = model(x, t, use_teacher_forcing=use_teacher_forcing) 113 | loss = compute_loss(t.contiguous().view(-1), 114 | preds.contiguous().view(-1, preds.size(-1))) 115 | 116 | optimizer.zero_grad() 117 | loss.backward() 118 | optimizer.step() 119 | 120 | return loss, preds 121 | 122 | def valid_step(x, t): 123 | model.eval() 124 | preds = model(x, t, use_teacher_forcing=False) 125 | loss = compute_loss(t.contiguous().view(-1), 126 | preds.contiguous().view(-1, preds.size(-1))) 127 | 128 | return loss, preds 129 | 130 | def test_step(x): 131 | model.eval() 132 | preds = model(x) 133 | return preds 134 | 135 | def ids_to_sentence(ids, i2w): 136 | return [i2w[id] for id in ids] 137 | 138 | ''' 139 | Load data 140 | ''' 141 | class ParallelDataLoader(object): 142 | def __init__(self, dataset, 143 | batch_size=128, 144 | shuffle=False, 145 | random_state=None): 146 | if type(dataset) is not tuple: 147 | raise ValueError('argument `dataset` must be tuple,' 148 | ' not {}.'.format(type(dataset))) 149 | self.dataset = list(zip(dataset[0], dataset[1])) 150 | self.batch_size = batch_size 151 | self.shuffle = shuffle 152 | if random_state is None: 153 | random_state = np.random.RandomState(1234) 154 | self.random_state = random_state 155 | self._idx = 0 156 | 157 | def __len__(self): 158 | return len(self.dataset) 159 | 160 | def __iter__(self): 161 | return self 162 | 163 | def __next__(self): 164 | if self._idx >= len(self.dataset): 165 | self._reorder() 166 | raise StopIteration() 167 | 168 | x, y = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 169 | x, y = sort(x, y, order='descend') 170 | x = pad_sequences(x, padding='post') 171 | y = pad_sequences(y, padding='post') 172 | 173 | x = torch.LongTensor(x).t() 174 | y = torch.LongTensor(y).t() 175 | 176 | self._idx += self.batch_size 177 | 178 | return x, y 179 | 180 | def _reorder(self): 181 | if self.shuffle: 182 | self.dataset = shuffle(self.dataset, 183 | random_state=self.random_state) 184 | self._idx = 0 185 | 186 | (x_train, y_train), \ 187 | (x_test, y_test), \ 188 | (num_x, num_y), \ 189 | (w2i_x, w2i_y), (i2w_x, i2w_y) = \ 190 | load_small_parallel_enja(to_ja=True, add_bos=False) 191 | 192 | train_dataloader = ParallelDataLoader((x_train, y_train), 193 | shuffle=True) 194 | valid_dataloader = ParallelDataLoader((x_test, y_test)) 195 | test_dataloader = ParallelDataLoader((x_test, y_test), 196 | batch_size=1, 197 | shuffle=True) 198 | 199 | ''' 200 | Build model 201 | ''' 202 | input_dim = num_x 203 | hidden_dim = 128 204 | output_dim = num_y 205 | 206 | model = EncoderDecoder(input_dim, 207 | hidden_dim, 208 | output_dim, 209 | device=device).to(device) 210 | criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=0) 211 | optimizer = optimizers.Adam(model.parameters()) 212 | 213 | ''' 214 | Train model 215 | ''' 216 | epochs = 20 217 | 218 | for epoch in range(epochs): 219 | print('-' * 20) 220 | print('Epoch: {}'.format(epoch+1)) 221 | 222 | train_loss = 0. 223 | valid_loss = 0. 224 | 225 | for (source, target) in train_dataloader: 226 | source, target = source.to(device), target.to(device) 227 | loss, _ = train_step(source, target) 228 | train_loss += loss.item() 229 | 230 | train_loss /= len(train_dataloader) 231 | 232 | for (source, target) in valid_dataloader: 233 | source, target = source.to(device), target.to(device) 234 | loss, _ = valid_step(source, target) 235 | valid_loss += loss.item() 236 | 237 | valid_loss /= len(valid_dataloader) 238 | print('Valid loss: {:.3}'.format(valid_loss)) 239 | 240 | for idx, (source, target) in enumerate(test_dataloader): 241 | source, target = source.to(device), target.to(device) 242 | out = test_step(source) 243 | out = out.max(dim=-1)[1].view(-1).tolist() 244 | out = ' '.join(ids_to_sentence(out, i2w_y)) 245 | source = ' '.join(ids_to_sentence(source.view(-1).tolist(), i2w_x)) 246 | target = ' '.join(ids_to_sentence(target.view(-1).tolist(), i2w_y)) 247 | print('>', source) 248 | print('=', target) 249 | print('<', out) 250 | print() 251 | 252 | if idx >= 10: 253 | break 254 | -------------------------------------------------------------------------------- /models/gan_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import matplotlib 11 | # matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class GAN(nn.Module): 16 | ''' 17 | Simple Generative Adversarial Network 18 | ''' 19 | def __init__(self, device='cpu'): 20 | super().__init__() 21 | self.device = device 22 | self.G = Generator(device=device) 23 | self.D = Discriminator(device=device) 24 | 25 | def forward(self, x): 26 | x = self.G(x) 27 | y = self.D(x) 28 | 29 | return y 30 | 31 | 32 | class Discriminator(nn.Module): 33 | def __init__(self, device='cpu'): 34 | super().__init__() 35 | self.device = device 36 | self.conv1 = nn.Conv2d(1, 128, 37 | kernel_size=(3, 3), 38 | stride=(2, 2), 39 | padding=1) 40 | self.relu1 = nn.LeakyReLU(0.2) 41 | self.conv2 = nn.Conv2d(128, 256, 42 | kernel_size=(3, 3), 43 | stride=(2, 2), 44 | padding=1) 45 | self.bn2 = nn.BatchNorm2d(256) 46 | self.relu2 = nn.LeakyReLU(0.2) 47 | self.fc = nn.Linear(256*7*7, 1024) 48 | self.bn3 = nn.BatchNorm1d(1024) 49 | self.relu3 = nn.LeakyReLU(0.2) 50 | self.out = nn.Linear(1024, 1) 51 | 52 | for l in [self.conv1, self.conv2, self.fc, self.out]: 53 | nn.init.xavier_uniform_(l.weight) 54 | 55 | def forward(self, x): 56 | h = self.conv1(x) 57 | h = self.relu1(h) 58 | h = self.conv2(h) 59 | h = self.bn2(h) 60 | h = self.relu2(h) 61 | h = h.view(-1, 256*7*7) 62 | h = self.fc(h) 63 | h = self.bn3(h) 64 | h = self.relu3(h) 65 | h = self.out(h) 66 | y = torch.sigmoid(h) 67 | 68 | return y 69 | 70 | 71 | class Generator(nn.Module): 72 | def __init__(self, 73 | input_dim=100, 74 | device='cpu'): 75 | super().__init__() 76 | self.device = device 77 | self.linear = nn.Linear(input_dim, 256*14*14) 78 | self.bn1 = nn.BatchNorm1d(256*14*14) 79 | self.relu1 = nn.ReLU() 80 | self.conv1 = nn.Conv2d(256, 128, 81 | kernel_size=(3, 3), 82 | padding=1) 83 | self.bn2 = nn.BatchNorm2d(128) 84 | self.relu2 = nn.ReLU() 85 | self.conv2 = nn.Conv2d(128, 64, 86 | kernel_size=(3, 3), 87 | padding=1) 88 | self.bn3 = nn.BatchNorm2d(64) 89 | self.relu3 = nn.ReLU() 90 | self.conv3 = nn.Conv2d(64, 1, 91 | kernel_size=(1, 1)) 92 | 93 | for l in [self.conv1, self.conv2, self.conv3]: 94 | nn.init.xavier_uniform_(l.weight) 95 | 96 | def forward(self, x): 97 | h = self.linear(x) 98 | h = self.bn1(h) 99 | h = self.relu1(h) 100 | h = h.view(-1, 256, 14, 14) 101 | h = nn.functional.interpolate(h, size=(28, 28)) 102 | h = self.conv1(h) 103 | h = self.bn2(h) 104 | h = self.relu2(h) 105 | h = self.conv2(h) 106 | h = self.bn3(h) 107 | h = self.relu3(h) 108 | h = self.conv3(h) 109 | y = torch.sigmoid(h) 110 | 111 | return y 112 | 113 | 114 | if __name__ == '__main__': 115 | np.random.seed(1234) 116 | torch.manual_seed(1234) 117 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 118 | 119 | def compute_loss(label, pred): 120 | return criterion(pred, label) 121 | 122 | def train_step(x): 123 | batch_size = x.size(0) 124 | model.D.train() 125 | model.G.train() 126 | 127 | # train D 128 | # real images 129 | preds = model.D(x).squeeze() # preds with true images 130 | t = torch.ones(batch_size).float().to(device) 131 | loss_D_real = compute_loss(t, preds) 132 | # fake images 133 | noise = gen_noise(batch_size) 134 | z = model.G(noise) 135 | preds = model.D(z.detach()).squeeze() # preds with fake images 136 | t = torch.zeros(batch_size).float().to(device) 137 | loss_D_fake = compute_loss(t, preds) 138 | 139 | loss_D = loss_D_real + loss_D_fake 140 | optimizer_D.zero_grad() 141 | loss_D.backward() 142 | optimizer_D.step() 143 | 144 | # train G 145 | noise = gen_noise(batch_size) 146 | z = model.G(noise) 147 | preds = model.D(z).squeeze() # preds with fake images 148 | t = torch.ones(batch_size).float().to(device) # label as true 149 | loss_G = compute_loss(t, preds) 150 | optimizer_G.zero_grad() 151 | loss_G.backward() 152 | optimizer_G.step() 153 | 154 | return loss_D, loss_G 155 | 156 | def generate(batch_size=10): 157 | model.eval() 158 | noise = gen_noise(batch_size) 159 | gen = model.G(noise) 160 | 161 | return gen 162 | 163 | def gen_noise(batch_size): 164 | return torch.empty(batch_size, 100).uniform_(0, 1).to(device) 165 | 166 | ''' 167 | Load data 168 | ''' 169 | root = os.path.join(os.path.dirname(__file__), 170 | '..', 'data', 'fashion_mnist') 171 | transform = transforms.Compose([transforms.ToTensor()]) 172 | mnist_train = \ 173 | torchvision.datasets.FashionMNIST(root=root, 174 | download=True, 175 | train=True, 176 | transform=transform) 177 | train_dataloader = DataLoader(mnist_train, 178 | batch_size=100, 179 | shuffle=True) 180 | 181 | ''' 182 | Build model 183 | ''' 184 | model = GAN(device=device).to(device) 185 | criterion = nn.BCELoss() 186 | optimizer_D = optimizers.Adam(model.D.parameters(), lr=0.0002) 187 | optimizer_G = optimizers.Adam(model.G.parameters(), lr=0.0002) 188 | 189 | ''' 190 | Train model 191 | ''' 192 | epochs = 100 193 | out_path = os.path.join(os.path.dirname(__file__), 194 | '..', 'output') 195 | 196 | for epoch in range(epochs): 197 | train_loss_D = 0. 198 | train_loss_G = 0. 199 | test_loss = 0. 200 | 201 | for (x, _) in train_dataloader: 202 | x = x.to(device) 203 | loss_D, loss_G = train_step(x) 204 | 205 | train_loss_D += loss_D.item() 206 | train_loss_G += loss_G.item() 207 | 208 | train_loss_D /= len(train_dataloader) 209 | train_loss_G /= len(train_dataloader) 210 | 211 | print('Epoch: {}, D Cost: {:.3f}, G Cost: {:.3f}'.format( 212 | epoch+1, 213 | train_loss_D, 214 | train_loss_G 215 | )) 216 | 217 | if epoch % 5 == 4 or epoch == epochs - 1: 218 | images = generate(batch_size=16) 219 | images = images.squeeze().detach().cpu().numpy() 220 | plt.figure(figsize=(6, 6)) 221 | for i, image in enumerate(images): 222 | plt.subplot(4, 4, i+1) 223 | plt.imshow(image, cmap='binary') 224 | plt.axis('off') 225 | plt.tight_layout() 226 | # plt.show() 227 | template = '{}/gan_fashion_mnist_epoch_{:0>4}.png' 228 | plt.savefig(template.format(out_path, epoch+1), dpi=300) 229 | -------------------------------------------------------------------------------- /models/layers/Attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Attention(nn.Module): 6 | ''' 7 | Reference: 8 | "Effective Approaches to Attention-based Neural Machine Translation" 9 | https://arxiv.org/abs/1508.04025 10 | ''' 11 | def __init__(self, 12 | output_dim, 13 | hidden_dim, # suppose dim(hs) = dim(ht) 14 | device='cpu'): 15 | super().__init__() 16 | self.device = device 17 | self.output_dim = output_dim 18 | self.hidden_dim = hidden_dim 19 | 20 | self.W_a = nn.Parameter(torch.Tensor(hidden_dim, 21 | hidden_dim)) 22 | 23 | self.W_c = nn.Parameter(torch.Tensor(hidden_dim + hidden_dim, 24 | output_dim)) 25 | 26 | self.b = nn.Parameter(torch.zeros(output_dim)) 27 | 28 | nn.init.xavier_normal_(self.W_a) 29 | nn.init.xavier_normal_(self.W_c) 30 | 31 | def forward(self, ht, hs, source=None, pad_value=0): 32 | ''' 33 | # Argument 34 | ht, hs: (sequence, batch, out_features) 35 | source: (sequence, batch) 36 | ''' 37 | score = torch.einsum('jik,kl->jil', (hs, self.W_a)) 38 | score = torch.einsum('jik,lik->jil', (ht, score)) 39 | 40 | score = score - torch.max(score, 41 | dim=-1, 42 | keepdim=True)[0] # softmax max trick 43 | 44 | score = torch.exp(score) 45 | if source is not None: 46 | # mask_source = (source.t() != pad_value).unsqueeze(0) 47 | # score = score * mask_source.float().to(self.device) 48 | mask_source = source.t().eq(pad_value).unsqueeze(0) 49 | score.data.masked_fill_(mask_source, 0) 50 | 51 | a = score / torch.sum(score, dim=-1, keepdim=True) 52 | c = torch.einsum('jik,kil->jil', (a, hs)) 53 | 54 | h = torch.cat((c, ht), -1) 55 | return torch.tanh(torch.einsum('jik,kl->jil', (h, self.W_c)) + self.b) 56 | -------------------------------------------------------------------------------- /models/layers/DotProductAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DotProductAttention(nn.Module): 6 | def __init__(self, 7 | # d_model, 8 | device='cpu'): 9 | super().__init__() 10 | self.device = device 11 | 12 | def forward(self, q, k, v, mask=None): 13 | ''' 14 | # Argument 15 | q, k, v: (batch, sequence, out_features) 16 | mask: (batch, sequence) 17 | ''' 18 | score = torch.einsum('ijk,ilk->ijl', (q, k)) 19 | score = score - torch.max(score, 20 | dim=-1, 21 | keepdim=True)[0] # softmax max trick 22 | 23 | score = torch.exp(score) 24 | if mask is not None: 25 | # suppose `mask` is a mask of source 26 | # in source-target-attention, source is `k` and `v` 27 | if len(mask.size()) == 2: 28 | mask = mask.unsqueeze(1).repeat(1, score.size(1), 1) 29 | # score = score * mask.float().to(self.device) 30 | score.data.masked_fill_(mask, 0) 31 | 32 | a = score / torch.sum(score, dim=-1, keepdim=True) 33 | c = torch.einsum('ijk,ikl->ijl', (a, v)) 34 | 35 | return c 36 | -------------------------------------------------------------------------------- /models/layers/Flatten.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Flatten(nn.Module): 5 | def __init__(self, 6 | device='cpu'): 7 | super().__init__() 8 | 9 | def forward(self, x): 10 | return x.view(x.size(0), -1) 11 | -------------------------------------------------------------------------------- /models/layers/GlobalAvgPool2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class GlobalAvgPool2d(nn.Module): 7 | def __init__(self, 8 | device='cpu'): 9 | super().__init__() 10 | 11 | def forward(self, x): 12 | return F.avg_pool2d(x, kernel_size=x.size()[2:]).view(-1, x.size(1)) 13 | -------------------------------------------------------------------------------- /models/layers/MultiHeadAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .ScaledDotProductAttention import ScaledDotProductAttention 4 | 5 | 6 | class MultiHeadAttention(nn.Module): 7 | def __init__(self, 8 | h, 9 | d_model, 10 | device='cpu'): 11 | super().__init__() 12 | self.h = h 13 | self.d_model = d_model 14 | self.d_k = d_k = d_model // h 15 | self.d_v = d_v = d_model // h 16 | self.device = device 17 | 18 | self.W_q = nn.Parameter(torch.Tensor(h, 19 | d_model, 20 | d_k)) 21 | 22 | self.W_k = nn.Parameter(torch.Tensor(h, 23 | d_model, 24 | d_k)) 25 | 26 | self.W_v = nn.Parameter(torch.Tensor(h, 27 | d_model, 28 | d_v)) 29 | 30 | nn.init.xavier_normal_(self.W_q) 31 | nn.init.xavier_normal_(self.W_k) 32 | nn.init.xavier_normal_(self.W_v) 33 | 34 | self.attn = ScaledDotProductAttention(d_k) 35 | self.linear = nn.Linear((h * d_v), d_model) 36 | nn.init.xavier_normal_(self.linear.weight) 37 | 38 | def forward(self, q, k, v, mask=None): 39 | ''' 40 | # Argument 41 | q, k, v: (batch, sequence, out_features) 42 | mask: (batch, sequence) 43 | ''' 44 | batch_size = q.size(0) 45 | 46 | q = torch.einsum('hijk,hkl->hijl', 47 | (q.unsqueeze(0).repeat(self.h, 1, 1, 1), 48 | self.W_q)) 49 | k = torch.einsum('hijk,hkl->hijl', 50 | (k.unsqueeze(0).repeat(self.h, 1, 1, 1), 51 | self.W_k)) 52 | v = torch.einsum('hijk,hkl->hijl', 53 | (v.unsqueeze(0).repeat(self.h, 1, 1, 1), 54 | self.W_v)) 55 | 56 | q = q.view(-1, q.size(-2), q.size(-1)) 57 | k = k.view(-1, k.size(-2), k.size(-1)) 58 | v = v.view(-1, v.size(-2), v.size(-1)) 59 | 60 | if mask is not None: 61 | multiples = [self.h] + [1] * (len(mask.size()) - 1) 62 | mask = mask.repeat(multiples) 63 | 64 | c = self.attn(q, k, v, mask=mask) 65 | c = torch.split(c, batch_size, dim=0) 66 | c = torch.cat(c, dim=-1) 67 | 68 | out = self.linear(c) 69 | 70 | return out 71 | -------------------------------------------------------------------------------- /models/layers/PositionalEncoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | 7 | class PositionalEncoding(nn.Module): 8 | ''' 9 | Positional encoding layer with sinusoid 10 | ''' 11 | def __init__(self, output_dim, 12 | max_len=6000, 13 | device='cpu'): 14 | super().__init__() 15 | self.output_dim = output_dim 16 | self.max_len = max_len 17 | pe = self.initializer() 18 | self.register_buffer('pe', pe) 19 | 20 | def forward(self, x, mask=None): 21 | ''' 22 | # Argument 23 | x: (batch, sequence) 24 | ''' 25 | pe = self.pe[:x.size(1), :].unsqueeze(0) 26 | return x + Variable(pe, requires_grad=False) 27 | 28 | def initializer(self): 29 | pe = \ 30 | np.array([[pos / np.power(10000, 2 * (i // 2) / self.output_dim) 31 | for i in range(self.output_dim)] 32 | for pos in range(self.max_len)]) 33 | 34 | pe[:, 0::2] = np.sin(pe[:, 0::2]) 35 | pe[:, 1::2] = np.cos(pe[:, 1::2]) 36 | 37 | return torch.from_numpy(pe).float() 38 | -------------------------------------------------------------------------------- /models/layers/ScaledDotProductAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class ScaledDotProductAttention(nn.Module): 7 | def __init__(self, 8 | d_k, 9 | device='cpu'): 10 | super().__init__() 11 | self.device = device 12 | self.scaler = np.sqrt(d_k) 13 | 14 | def forward(self, q, k, v, mask=None): 15 | ''' 16 | # Argument 17 | q, k, v: (batch, sequence, out_features) 18 | mask: (batch, sequence) 19 | ''' 20 | score = torch.einsum('ijk,ilk->ijl', (q, k)) / self.scaler 21 | score = score - torch.max(score, 22 | dim=-1, 23 | keepdim=True)[0] # softmax max trick 24 | 25 | score = torch.exp(score) 26 | if mask is not None: 27 | # suppose `mask` is a mask of source 28 | # in source-target-attention, source is `k` and `v` 29 | if len(mask.size()) == 2: 30 | mask = mask.unsqueeze(1).repeat(1, score.size(1), 1) 31 | # score = score * mask.float().to(self.device) 32 | score.data.masked_fill_(mask, 0) 33 | 34 | a = score / torch.sum(score, dim=-1, keepdim=True) 35 | c = torch.einsum('ijk,ikl->ijl', (a, v)) 36 | 37 | return c 38 | -------------------------------------------------------------------------------- /models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .Attention import Attention 2 | from .DotProductAttention import DotProductAttention 3 | from .Flatten import Flatten 4 | from .GlobalAvgPool2d import GlobalAvgPool2d 5 | from .MultiHeadAttention import MultiHeadAttention 6 | from .PositionalEncoding import PositionalEncoding 7 | from .ScaledDotProductAttention import ScaledDotProductAttention 8 | -------------------------------------------------------------------------------- /models/lenet_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | from sklearn.metrics import accuracy_score 11 | 12 | 13 | class LeNet(nn.Module): 14 | def __init__(self): 15 | super().__init__() 16 | self.conv1 = nn.Conv2d(1, 6, kernel_size=(5, 5)) 17 | self.pooling1 = nn.MaxPool2d(kernel_size=(2, 2)) 18 | self.conv2 = nn.Conv2d(6, 16, kernel_size=(5, 5)) 19 | self.pooling2 = nn.MaxPool2d(kernel_size=(2, 2)) 20 | self.fc1 = nn.Linear(256, 120) 21 | self.fc2 = nn.Linear(120, 84) 22 | self.out = nn.Linear(84, 10) 23 | 24 | def forward(self, x): 25 | x = self.conv1(x) 26 | x = self.pooling1(x) 27 | x = self.conv2(x) 28 | x = self.pooling2(x) 29 | x = x.view(x.size(0), -1) 30 | x = self.fc1(x) 31 | x = torch.relu(x) 32 | x = self.fc2(x) 33 | x = torch.relu(x) 34 | x = self.out(x) 35 | y = torch.log_softmax(x, dim=-1) 36 | 37 | return y 38 | 39 | 40 | if __name__ == '__main__': 41 | np.random.seed(1234) 42 | torch.manual_seed(1234) 43 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 44 | 45 | def compute_loss(label, pred): 46 | return criterion(pred, label) 47 | 48 | def train_step(x, t): 49 | model.train() 50 | preds = model(x) 51 | loss = compute_loss(t, preds) 52 | optimizer.zero_grad() 53 | loss.backward() 54 | optimizer.step() 55 | 56 | return loss, preds 57 | 58 | def test_step(x, t): 59 | model.eval() 60 | preds = model(x) 61 | loss = compute_loss(t, preds) 62 | 63 | return loss, preds 64 | 65 | ''' 66 | Load data 67 | ''' 68 | root = os.path.join(os.path.dirname(__file__), '..', 'data', 'mnist') 69 | transform = transforms.Compose([transforms.ToTensor()]) 70 | mnist_train = \ 71 | torchvision.datasets.MNIST(root=root, 72 | download=True, 73 | train=True, 74 | transform=transform) 75 | mnist_test = \ 76 | torchvision.datasets.MNIST(root=root, 77 | download=True, 78 | train=False, 79 | transform=transform) 80 | 81 | train_dataloader = DataLoader(mnist_train, 82 | batch_size=100, 83 | shuffle=True) 84 | test_dataloader = DataLoader(mnist_test, 85 | batch_size=100, 86 | shuffle=False) 87 | 88 | ''' 89 | Build model 90 | ''' 91 | model = LeNet().to(device) 92 | criterion = nn.NLLLoss() 93 | optimizer = optimizers.Adam(model.parameters()) 94 | 95 | ''' 96 | Train model 97 | ''' 98 | epochs = 10 99 | 100 | for epoch in range(epochs): 101 | train_loss = 0. 102 | test_loss = 0. 103 | test_acc = 0. 104 | 105 | for (x, t) in train_dataloader: 106 | x, t = x.to(device), t.to(device) 107 | loss, _ = train_step(x, t) 108 | train_loss += loss.item() 109 | 110 | train_loss /= len(train_dataloader) 111 | 112 | if epoch % 5 == 4 or epoch == epochs - 1: 113 | for (x, t) in test_dataloader: 114 | x, t = x.to(device), t.to(device) 115 | loss, preds = test_step(x, t) 116 | test_loss += loss.item() 117 | test_acc += \ 118 | accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 119 | 120 | test_loss /= len(test_dataloader) 121 | test_acc /= len(test_dataloader) 122 | print('Epoch: {}, Valid Cost: {:.3f}, Valid Acc: {:.3f}'.format( 123 | epoch+1, 124 | test_loss, 125 | test_acc 126 | )) 127 | -------------------------------------------------------------------------------- /models/logistic_regression_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | from torch.utils.data import Dataset, DataLoader 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | from sklearn.metrics import accuracy_score 10 | 11 | 12 | class LogisticRegression(nn.Module): 13 | def __init__(self): 14 | super().__init__() 15 | self.linear = nn.Linear(784, 10) 16 | 17 | def forward(self, x): 18 | x = self.linear(x) 19 | y = torch.log_softmax(x, dim=-1) 20 | return y 21 | 22 | 23 | if __name__ == '__main__': 24 | np.random.seed(1234) 25 | torch.manual_seed(1234) 26 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 27 | 28 | def compute_loss(label, pred): 29 | return criterion(pred, label) 30 | 31 | def train_step(x, t): 32 | model.train() 33 | preds = model(x) 34 | loss = compute_loss(t, preds) 35 | optimizer.zero_grad() 36 | loss.backward() 37 | optimizer.step() 38 | 39 | return loss, preds 40 | 41 | def test_step(x, t): 42 | model.eval() 43 | preds = model(x) 44 | loss = compute_loss(t, preds) 45 | 46 | return loss, preds 47 | 48 | ''' 49 | Load data 50 | ''' 51 | root = os.path.join(os.path.dirname(__file__), '..', 'data', 'mnist') 52 | transform = transforms.Compose([transforms.ToTensor(), 53 | lambda x: x.view(-1)]) 54 | mnist_train = \ 55 | torchvision.datasets.MNIST(root=root, 56 | download=True, 57 | train=True, 58 | transform=transform) 59 | mnist_test = \ 60 | torchvision.datasets.MNIST(root=root, 61 | download=True, 62 | train=False, 63 | transform=transform) 64 | 65 | train_dataloader = DataLoader(mnist_train, 66 | batch_size=100, 67 | shuffle=True) 68 | test_dataloader = DataLoader(mnist_test, 69 | batch_size=100, 70 | shuffle=False) 71 | 72 | ''' 73 | Build model 74 | ''' 75 | model = LogisticRegression().to(device) 76 | criterion = nn.NLLLoss() 77 | optimizer = optimizers.Adam(model.parameters()) 78 | 79 | ''' 80 | Train model 81 | ''' 82 | epochs = 10 83 | 84 | for epoch in range(epochs): 85 | train_loss = 0. 86 | test_loss = 0. 87 | test_acc = 0. 88 | 89 | for (x, t) in train_dataloader: 90 | x, t = x.to(device), t.to(device) 91 | loss, _ = train_step(x, t) 92 | train_loss += loss.item() 93 | 94 | train_loss /= len(train_dataloader) 95 | 96 | if epoch % 5 == 4 or epoch == epochs - 1: 97 | for (x, t) in test_dataloader: 98 | x, t = x.to(device), t.to(device) 99 | loss, preds = test_step(x, t) 100 | test_loss += loss.item() 101 | test_acc += \ 102 | accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 103 | 104 | test_loss /= len(test_dataloader) 105 | test_acc /= len(test_dataloader) 106 | print('Epoch: {}, Valid Cost: {:.3f}, Valid Acc: {:.3f}'.format( 107 | epoch+1, 108 | test_loss, 109 | test_acc 110 | )) 111 | -------------------------------------------------------------------------------- /models/mlp_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | from torch.utils.data import Dataset, DataLoader 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | from sklearn.metrics import accuracy_score 10 | 11 | 12 | class MLP(nn.Module): 13 | def __init__(self): 14 | super().__init__() 15 | self.l1 = nn.Linear(784, 200) 16 | self.l2 = nn.Linear(200, 10) 17 | 18 | def forward(self, x): 19 | x = self.l1(x) 20 | x = torch.relu(x) 21 | x = self.l2(x) 22 | y = torch.log_softmax(x, dim=-1) 23 | return y 24 | 25 | 26 | if __name__ == '__main__': 27 | np.random.seed(1234) 28 | torch.manual_seed(1234) 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | 31 | def compute_loss(label, pred): 32 | return criterion(pred, label) 33 | 34 | def train_step(x, t): 35 | model.train() 36 | preds = model(x) 37 | loss = compute_loss(t, preds) 38 | optimizer.zero_grad() 39 | loss.backward() 40 | optimizer.step() 41 | 42 | return loss, preds 43 | 44 | def test_step(x, t): 45 | model.eval() 46 | preds = model(x) 47 | loss = compute_loss(t, preds) 48 | 49 | return loss, preds 50 | 51 | ''' 52 | Load data 53 | ''' 54 | root = os.path.join(os.path.dirname(__file__), '..', 'data', 'mnist') 55 | transform = transforms.Compose([transforms.ToTensor(), 56 | lambda x: x.view(-1)]) 57 | mnist_train = \ 58 | torchvision.datasets.MNIST(root=root, 59 | download=True, 60 | train=True, 61 | transform=transform) 62 | mnist_test = \ 63 | torchvision.datasets.MNIST(root=root, 64 | download=True, 65 | train=False, 66 | transform=transform) 67 | 68 | train_dataloader = DataLoader(mnist_train, 69 | batch_size=100, 70 | shuffle=True) 71 | test_dataloader = DataLoader(mnist_test, 72 | batch_size=100, 73 | shuffle=False) 74 | 75 | ''' 76 | Build model 77 | ''' 78 | model = MLP().to(device) 79 | criterion = nn.NLLLoss() 80 | optimizer = optimizers.Adam(model.parameters()) 81 | 82 | ''' 83 | Train model 84 | ''' 85 | epochs = 10 86 | 87 | for epoch in range(epochs): 88 | train_loss = 0. 89 | test_loss = 0. 90 | test_acc = 0. 91 | 92 | for (x, t) in train_dataloader: 93 | x, t = x.to(device), t.to(device) 94 | loss, _ = train_step(x, t) 95 | train_loss += loss.item() 96 | 97 | train_loss /= len(train_dataloader) 98 | 99 | if epoch % 5 == 4 or epoch == epochs - 1: 100 | for (x, t) in test_dataloader: 101 | x, t = x.to(device), t.to(device) 102 | loss, preds = test_step(x, t) 103 | test_loss += loss.item() 104 | test_acc += \ 105 | accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 106 | 107 | test_loss /= len(test_dataloader) 108 | test_acc /= len(test_dataloader) 109 | print('Epoch: {}, Valid Cost: {:.3f}, Valid Acc: {:.3f}'.format( 110 | epoch+1, 111 | test_loss, 112 | test_acc 113 | )) 114 | -------------------------------------------------------------------------------- /models/resnet34_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | from sklearn.metrics import accuracy_score 11 | from layers import GlobalAvgPool2d 12 | 13 | 14 | class ResNet34(nn.Module): 15 | def __init__(self, output_dim): 16 | super().__init__() 17 | self.conv1 = nn.Conv2d(1, 64, 18 | kernel_size=(7, 7), 19 | stride=(2, 2), 20 | padding=3) 21 | self.bn1 = nn.BatchNorm2d(64) 22 | self.relu1 = nn.ReLU() 23 | self.pool1 = nn.MaxPool2d(kernel_size=(3, 3), 24 | stride=(2, 2), 25 | padding=1) 26 | self.block1 = nn.ModuleList([ 27 | self._building_block(64) for _ in range(3) 28 | ]) 29 | self.conv2 = nn.Conv2d(64, 128, 30 | kernel_size=(1, 1), 31 | stride=(2, 2)) 32 | self.block2 = nn.ModuleList([ 33 | self._building_block(128) for _ in range(4) 34 | ]) 35 | self.conv3 = nn.Conv2d(128, 256, 36 | kernel_size=(1, 1), 37 | stride=(2, 2)) 38 | self.block3 = nn.ModuleList([ 39 | self._building_block(256) for _ in range(6) 40 | ]) 41 | self.conv4 = nn.Conv2d(256, 512, 42 | kernel_size=(1, 1), 43 | stride=(2, 2)) 44 | self.block4 = nn.ModuleList([ 45 | self._building_block(512) for _ in range(3) 46 | ]) 47 | self.avg_pool = GlobalAvgPool2d() 48 | self.fc = nn.Linear(512, 1000) 49 | self.out = nn.Linear(1000, output_dim) 50 | 51 | def forward(self, x): 52 | h = self.conv1(x) 53 | h = self.bn1(h) 54 | h = self.relu1(h) 55 | h = self.pool1(h) 56 | for block in self.block1: 57 | h = block(h) 58 | h = self.conv2(h) 59 | for block in self.block2: 60 | h = block(h) 61 | h = self.conv3(h) 62 | for block in self.block3: 63 | h = block(h) 64 | h = self.conv4(h) 65 | for block in self.block4: 66 | h = block(h) 67 | h = self.avg_pool(h) 68 | h = self.fc(h) 69 | h = torch.relu(h) 70 | h = self.out(h) 71 | y = torch.log_softmax(h, dim=-1) 72 | 73 | return y 74 | 75 | def _building_block(self, channel_out=64): 76 | channel_in = channel_out 77 | return Block(channel_in, channel_out) 78 | 79 | 80 | class Block(nn.Module): 81 | def __init__(self, channel_in, channel_out): 82 | super().__init__() 83 | self.conv1 = nn.Conv2d(channel_in, channel_out, 84 | kernel_size=(3, 3), 85 | padding=1) 86 | self.bn1 = nn.BatchNorm2d(channel_out) 87 | self.relu1 = nn.ReLU() 88 | self.conv2 = nn.Conv2d(channel_out, channel_out, 89 | kernel_size=(3, 3), 90 | padding=1) 91 | self.bn2 = nn.BatchNorm2d(channel_out) 92 | self.relu2 = nn.ReLU() 93 | 94 | def forward(self, x): 95 | h = self.conv1(x) 96 | h = self.bn1(h) 97 | h = self.relu1(h) 98 | h = self.conv2(h) 99 | h = self.bn2(h) 100 | y = self.relu2(x + h) 101 | return y 102 | 103 | 104 | if __name__ == '__main__': 105 | np.random.seed(1234) 106 | torch.manual_seed(1234) 107 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 108 | 109 | def compute_loss(label, pred): 110 | return criterion(pred, label) 111 | 112 | def train_step(x, t): 113 | model.train() 114 | preds = model(x) 115 | loss = compute_loss(t, preds) 116 | optimizer.zero_grad() 117 | loss.backward() 118 | optimizer.step() 119 | 120 | return loss, preds 121 | 122 | def test_step(x, t): 123 | model.eval() 124 | preds = model(x) 125 | loss = compute_loss(t, preds) 126 | 127 | return loss, preds 128 | 129 | ''' 130 | Load data 131 | ''' 132 | root = os.path.join(os.path.dirname(__file__), 133 | '..', 'data', 'fashion_mnist') 134 | transform = transforms.Compose([transforms.ToTensor()]) 135 | mnist_train = \ 136 | torchvision.datasets.FashionMNIST(root=root, 137 | download=True, 138 | train=True, 139 | transform=transform) 140 | mnist_test = \ 141 | torchvision.datasets.FashionMNIST(root=root, 142 | download=True, 143 | train=False, 144 | transform=transform) 145 | 146 | train_dataloader = DataLoader(mnist_train, 147 | batch_size=100, 148 | shuffle=True) 149 | test_dataloader = DataLoader(mnist_test, 150 | batch_size=100, 151 | shuffle=False) 152 | 153 | ''' 154 | Build model 155 | ''' 156 | model = ResNet34(10).to(device) 157 | criterion = nn.NLLLoss() 158 | optimizer = optimizers.Adam(model.parameters(), weight_decay=0.01) 159 | 160 | ''' 161 | Train model 162 | ''' 163 | epochs = 1 164 | 165 | for epoch in range(epochs): 166 | train_loss = 0. 167 | test_loss = 0. 168 | test_acc = 0. 169 | 170 | for (x, t) in train_dataloader: 171 | x, t = x.to(device), t.to(device) 172 | loss, _ = train_step(x, t) 173 | train_loss += loss.item() 174 | 175 | train_loss /= len(train_dataloader) 176 | 177 | for (x, t) in test_dataloader: 178 | x, t = x.to(device), t.to(device) 179 | loss, preds = test_step(x, t) 180 | test_loss += loss.item() 181 | test_acc += \ 182 | accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 183 | 184 | test_loss /= len(test_dataloader) 185 | test_acc /= len(test_dataloader) 186 | print('Epoch: {}, Valid Cost: {:.3f}, Valid Acc: {:.3f}'.format( 187 | epoch+1, 188 | test_loss, 189 | test_acc 190 | )) 191 | -------------------------------------------------------------------------------- /models/resnet50_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | from sklearn.metrics import accuracy_score 11 | from layers import GlobalAvgPool2d 12 | 13 | 14 | class ResNet50(nn.Module): 15 | def __init__(self, output_dim): 16 | super().__init__() 17 | self.conv1 = nn.Conv2d(1, 64, 18 | kernel_size=(7, 7), 19 | stride=(2, 2), 20 | padding=3) 21 | self.bn1 = nn.BatchNorm2d(64) 22 | self.relu1 = nn.ReLU() 23 | self.pool1 = nn.MaxPool2d(kernel_size=(3, 3), 24 | stride=(2, 2), 25 | padding=1) 26 | self.block0 = self._building_block(256, channel_in=64) 27 | self.block1 = nn.ModuleList([ 28 | self._building_block(256) for _ in range(2) 29 | ]) 30 | self.conv2 = nn.Conv2d(256, 512, 31 | kernel_size=(1, 1), 32 | stride=(2, 2)) 33 | self.block2 = nn.ModuleList([ 34 | self._building_block(512) for _ in range(4) 35 | ]) 36 | self.conv3 = nn.Conv2d(512, 1024, 37 | kernel_size=(1, 1), 38 | stride=(2, 2)) 39 | self.block3 = nn.ModuleList([ 40 | self._building_block(1024) for _ in range(6) 41 | ]) 42 | self.conv4 = nn.Conv2d(1024, 2048, 43 | kernel_size=(1, 1), 44 | stride=(2, 2)) 45 | self.block4 = nn.ModuleList([ 46 | self._building_block(2048) for _ in range(3) 47 | ]) 48 | self.avg_pool = GlobalAvgPool2d() 49 | self.fc = nn.Linear(2048, 1000) 50 | self.out = nn.Linear(1000, output_dim) 51 | 52 | def forward(self, x): 53 | h = self.conv1(x) 54 | h = self.bn1(h) 55 | h = self.relu1(h) 56 | h = self.pool1(h) 57 | h = self.block0(h) 58 | for block in self.block1: 59 | h = block(h) 60 | h = self.conv2(h) 61 | for block in self.block2: 62 | h = block(h) 63 | h = self.conv3(h) 64 | for block in self.block3: 65 | h = block(h) 66 | h = self.conv4(h) 67 | for block in self.block4: 68 | h = block(h) 69 | h = self.avg_pool(h) 70 | h = self.fc(h) 71 | h = torch.relu(h) 72 | h = self.out(h) 73 | y = torch.log_softmax(h, dim=-1) 74 | 75 | return y 76 | 77 | def _building_block(self, 78 | channel_out=256, 79 | channel_in=None): 80 | if channel_in is None: 81 | channel_in = channel_out 82 | return Block(channel_in, channel_out) 83 | 84 | 85 | class Block(nn.Module): 86 | def __init__(self, channel_in=64, channel_out=256): 87 | super().__init__() 88 | channel = channel_out // 4 89 | self.conv1 = nn.Conv2d(channel_in, channel, 90 | kernel_size=(1, 1)) 91 | self.bn1 = nn.BatchNorm2d(channel) 92 | self.relu1 = nn.ReLU() 93 | self.conv2 = nn.Conv2d(channel, channel, 94 | kernel_size=(3, 3), 95 | padding=1) 96 | self.bn2 = nn.BatchNorm2d(channel) 97 | self.relu2 = nn.ReLU() 98 | self.conv3 = nn.Conv2d(channel, channel_out, 99 | kernel_size=(1, 1), 100 | padding=0) 101 | self.bn3 = nn.BatchNorm2d(channel_out) 102 | self.shortcut = self._shortcut(channel_in, channel_out) 103 | self.relu3 = nn.ReLU() 104 | 105 | def forward(self, x): 106 | h = self.conv1(x) 107 | h = self.bn1(h) 108 | h = self.relu1(h) 109 | h = self.conv2(h) 110 | h = self.bn2(h) 111 | h = self.relu2(h) 112 | h = self.conv3(h) 113 | h = self.bn3(h) 114 | shortcut = self.shortcut(x) 115 | y = self.relu3(h + shortcut) 116 | return y 117 | 118 | def _shortcut(self, channel_in, channel_out): 119 | if channel_in != channel_out: 120 | return self._projection(channel_in, channel_out) 121 | else: 122 | return lambda x: x 123 | 124 | def _projection(self, channel_in, channel_out): 125 | return nn.Conv2d(channel_in, channel_out, 126 | kernel_size=(1, 1), 127 | padding=0) 128 | 129 | 130 | if __name__ == '__main__': 131 | np.random.seed(1234) 132 | torch.manual_seed(1234) 133 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 134 | 135 | def compute_loss(label, pred): 136 | return criterion(pred, label) 137 | 138 | def train_step(x, t): 139 | model.train() 140 | preds = model(x) 141 | loss = compute_loss(t, preds) 142 | optimizer.zero_grad() 143 | loss.backward() 144 | optimizer.step() 145 | 146 | return loss, preds 147 | 148 | def test_step(x, t): 149 | model.eval() 150 | preds = model(x) 151 | loss = compute_loss(t, preds) 152 | 153 | return loss, preds 154 | 155 | ''' 156 | Load data 157 | ''' 158 | root = os.path.join(os.path.dirname(__file__), 159 | '..', 'data', 'fashion_mnist') 160 | transform = transforms.Compose([transforms.ToTensor()]) 161 | mnist_train = \ 162 | torchvision.datasets.FashionMNIST(root=root, 163 | download=True, 164 | train=True, 165 | transform=transform) 166 | mnist_test = \ 167 | torchvision.datasets.FashionMNIST(root=root, 168 | download=True, 169 | train=False, 170 | transform=transform) 171 | 172 | train_dataloader = DataLoader(mnist_train, 173 | batch_size=100, 174 | shuffle=True) 175 | test_dataloader = DataLoader(mnist_test, 176 | batch_size=100, 177 | shuffle=False) 178 | 179 | ''' 180 | Build model 181 | ''' 182 | model = ResNet50(10).to(device) 183 | criterion = nn.NLLLoss() 184 | optimizer = optimizers.Adam(model.parameters(), weight_decay=0.01) 185 | 186 | ''' 187 | Train model 188 | ''' 189 | epochs = 1 190 | 191 | for epoch in range(epochs): 192 | train_loss = 0. 193 | test_loss = 0. 194 | test_acc = 0. 195 | 196 | for (x, t) in train_dataloader: 197 | x, t = x.to(device), t.to(device) 198 | loss, _ = train_step(x, t) 199 | train_loss += loss.item() 200 | 201 | train_loss /= len(train_dataloader) 202 | 203 | for (x, t) in test_dataloader: 204 | x, t = x.to(device), t.to(device) 205 | loss, preds = test_step(x, t) 206 | test_loss += loss.item() 207 | test_acc += \ 208 | accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 209 | 210 | test_loss /= len(test_dataloader) 211 | test_acc /= len(test_dataloader) 212 | print('Epoch: {}, Valid Cost: {:.3f}, Valid Acc: {:.3f}'.format( 213 | epoch+1, 214 | test_loss, 215 | test_acc 216 | )) 217 | -------------------------------------------------------------------------------- /models/transformer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | # from torch.utils.data import Dataset, DataLoader 7 | from utils.datasets.small_parallel_enja import load_small_parallel_enja 8 | from utils.preprocessing.sequence import pad_sequences, sort 9 | from sklearn.utils import shuffle 10 | from layers import PositionalEncoding 11 | from layers import MultiHeadAttention 12 | 13 | 14 | class Transformer(nn.Module): 15 | def __init__(self, 16 | depth_source, 17 | depth_target, 18 | N=6, 19 | h=8, 20 | d_model=512, 21 | d_ff=2048, 22 | p_dropout=0.1, 23 | max_len=20, 24 | bos_value=1, 25 | device='cpu'): 26 | super().__init__() 27 | self.device = device 28 | self.encoder = Encoder(depth_source, 29 | N=N, 30 | h=h, 31 | d_model=d_model, 32 | d_ff=d_ff, 33 | p_dropout=p_dropout, 34 | max_len=max_len, 35 | device=device) 36 | self.decoder = Decoder(depth_target, 37 | N=N, 38 | h=h, 39 | d_model=d_model, 40 | d_ff=d_ff, 41 | p_dropout=p_dropout, 42 | max_len=max_len, 43 | device=device) 44 | self.out = nn.Linear(d_model, depth_target) 45 | nn.init.xavier_normal_(self.out.weight) 46 | 47 | self._BOS = bos_value 48 | self._max_len = max_len 49 | 50 | def forward(self, source, target=None): 51 | source_mask = self.sequence_mask(source) 52 | 53 | hs = self.encoder(source, mask=source_mask) 54 | 55 | if target is not None: 56 | target = target[:, :-1] 57 | len_target_sequences = target.size(1) 58 | target_mask = self.sequence_mask(target).unsqueeze(1) 59 | subsequent_mask = self.subsequence_mask(target) 60 | target_mask = torch.gt(target_mask + subsequent_mask, 0) 61 | 62 | y = self.decoder(target, hs, 63 | mask=target_mask, 64 | source_mask=source_mask) 65 | output = self.out(y) 66 | else: 67 | batch_size = source.size(0) 68 | len_target_sequences = self._max_len 69 | 70 | output = torch.ones((batch_size, 1), 71 | dtype=torch.long, 72 | device=self.device) * self._BOS 73 | 74 | for t in range(len_target_sequences - 1): 75 | target_mask = self.subsequence_mask(output) 76 | out = self.decoder(output, hs, 77 | mask=target_mask, 78 | source_mask=source_mask) 79 | out = self.out(out)[:, -1:, :] 80 | out = out.max(-1)[1] 81 | output = torch.cat((output, out), dim=1) 82 | 83 | return output 84 | 85 | def sequence_mask(self, x): 86 | return x.eq(0) 87 | 88 | def subsequence_mask(self, x): 89 | shape = (x.size(1), x.size(1)) 90 | mask = torch.triu(torch.ones(shape, dtype=torch.uint8), 91 | diagonal=1) 92 | return mask.unsqueeze(0).repeat(x.size(0), 1, 1).to(self.device) 93 | 94 | 95 | class Encoder(nn.Module): 96 | def __init__(self, 97 | depth_source, 98 | N=6, 99 | h=8, 100 | d_model=512, 101 | d_ff=2048, 102 | p_dropout=0.1, 103 | max_len=128, 104 | device='cpu'): 105 | super().__init__() 106 | self.device = device 107 | self.embedding = nn.Embedding(depth_source, 108 | d_model, padding_idx=0) 109 | self.pe = PositionalEncoding(d_model, max_len=max_len) 110 | self.encs = nn.ModuleList([ 111 | EncoderLayer(h=h, 112 | d_model=d_model, 113 | d_ff=d_ff, 114 | p_dropout=p_dropout, 115 | max_len=max_len, 116 | device=device) for _ in range(N)]) 117 | 118 | def forward(self, x, mask=None): 119 | x = self.embedding(x) 120 | y = self.pe(x) 121 | for enc in self.encs: 122 | y = enc(y, mask=mask) 123 | 124 | return y 125 | 126 | 127 | class EncoderLayer(nn.Module): 128 | def __init__(self, 129 | h=8, 130 | d_model=512, 131 | d_ff=2048, 132 | p_dropout=0.1, 133 | max_len=128, 134 | device='cpu'): 135 | super().__init__() 136 | self.attn = MultiHeadAttention(h, d_model) 137 | self.dropout1 = nn.Dropout(p_dropout) 138 | self.norm1 = nn.LayerNorm(d_model) 139 | self.ff = FFN(d_model, d_ff) 140 | self.dropout2 = nn.Dropout(p_dropout) 141 | self.norm2 = nn.LayerNorm(d_model) 142 | 143 | def forward(self, x, mask=None): 144 | h = self.attn(x, x, x, mask=mask) 145 | h = self.dropout1(h) 146 | h = self.norm1(x + h) 147 | y = self.ff(h) 148 | y = self.dropout2(y) 149 | y = self.norm2(h + y) 150 | 151 | return y 152 | 153 | 154 | class Decoder(nn.Module): 155 | def __init__(self, 156 | depth_target, 157 | N=6, 158 | h=8, 159 | d_model=512, 160 | d_ff=2048, 161 | p_dropout=0.1, 162 | max_len=128, 163 | device='cpu'): 164 | super().__init__() 165 | self.device = device 166 | self.embedding = nn.Embedding(depth_target, 167 | d_model, padding_idx=0) 168 | self.pe = PositionalEncoding(d_model, max_len=max_len) 169 | self.decs = nn.ModuleList([ 170 | DecoderLayer(h=h, 171 | d_model=d_model, 172 | d_ff=d_ff, 173 | p_dropout=p_dropout, 174 | max_len=max_len, 175 | device=device) for _ in range(N)]) 176 | 177 | def forward(self, x, hs, 178 | mask=None, 179 | source_mask=None): 180 | x = self.embedding(x) 181 | y = self.pe(x) 182 | 183 | for dec in self.decs: 184 | y = dec(y, hs, 185 | mask=mask, 186 | source_mask=source_mask) 187 | 188 | return y 189 | 190 | 191 | class DecoderLayer(nn.Module): 192 | def __init__(self, 193 | h=8, 194 | d_model=512, 195 | d_ff=2048, 196 | p_dropout=0.1, 197 | max_len=128, 198 | device='cpu'): 199 | super().__init__() 200 | self.self_attn = MultiHeadAttention(h, d_model) 201 | self.dropout1 = nn.Dropout(p_dropout) 202 | self.norm1 = nn.LayerNorm(d_model) 203 | self.src_tgt_attn = MultiHeadAttention(h, d_model) 204 | self.dropout2 = nn.Dropout(p_dropout) 205 | self.norm2 = nn.LayerNorm(d_model) 206 | self.ff = FFN(d_model, d_ff) 207 | self.dropout3 = nn.Dropout(p_dropout) 208 | self.norm3 = nn.LayerNorm(d_model) 209 | 210 | def forward(self, x, hs, 211 | mask=None, 212 | source_mask=None): 213 | h = self.self_attn(x, x, x, mask=mask) 214 | h = self.dropout1(h) 215 | h = self.norm1(x + h) 216 | 217 | z = self.src_tgt_attn(h, hs, hs, 218 | mask=source_mask) 219 | z = self.dropout2(z) 220 | z = self.norm2(h + z) 221 | 222 | y = self.ff(z) 223 | y = self.dropout3(y) 224 | y = self.norm3(z + y) 225 | 226 | return y 227 | 228 | 229 | class FFN(nn.Module): 230 | ''' 231 | Position-wise Feed-Forward Networks 232 | ''' 233 | def __init__(self, d_model, d_ff, 234 | device='cpu'): 235 | super().__init__() 236 | self.l1 = nn.Linear(d_model, d_ff) 237 | self.l2 = nn.Linear(d_ff, d_model) 238 | # self.l1 = nn.Conv1d(d_model, d_ff, 1) 239 | # self.l2 = nn.Conv1d(d_ff, d_model, 1) 240 | 241 | def forward(self, x): 242 | x = self.l1(x) 243 | x = torch.relu(x) 244 | y = self.l2(x) 245 | return y 246 | 247 | 248 | if __name__ == '__main__': 249 | np.random.seed(1234) 250 | torch.manual_seed(1234) 251 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 252 | 253 | def compute_loss(label, pred): 254 | return criterion(pred, label) 255 | 256 | def train_step(x, t): 257 | model.train() 258 | preds = model(x, t) 259 | loss = compute_loss(t[:, 1:].contiguous().view(-1), 260 | preds.contiguous().view(-1, preds.size(-1))) 261 | 262 | optimizer.zero_grad() 263 | loss.backward() 264 | optimizer.step() 265 | 266 | return loss, preds 267 | 268 | def valid_step(x, t): 269 | model.eval() 270 | preds = model(x, t) 271 | loss = compute_loss(t[:, 1:].contiguous().view(-1), 272 | preds.contiguous().view(-1, preds.size(-1))) 273 | 274 | return loss, preds 275 | 276 | def test_step(x): 277 | model.eval() 278 | preds = model(x) 279 | return preds 280 | 281 | def ids_to_sentence(ids, i2w): 282 | return [i2w[id] for id in ids] 283 | 284 | ''' 285 | Load data 286 | ''' 287 | class ParallelDataLoader(object): 288 | def __init__(self, dataset, 289 | batch_size=128, 290 | shuffle=False, 291 | random_state=None): 292 | if type(dataset) is not tuple: 293 | raise ValueError('argument `dataset` must be tuple,' 294 | ' not {}.'.format(type(dataset))) 295 | self.dataset = list(zip(dataset[0], dataset[1])) 296 | self.batch_size = batch_size 297 | self.shuffle = shuffle 298 | if random_state is None: 299 | random_state = np.random.RandomState(1234) 300 | self.random_state = random_state 301 | self._idx = 0 302 | 303 | def __len__(self): 304 | return len(self.dataset) 305 | 306 | def __iter__(self): 307 | return self 308 | 309 | def __next__(self): 310 | if self._idx >= len(self.dataset): 311 | self._reorder() 312 | raise StopIteration() 313 | 314 | x, y = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 315 | x, y = sort(x, y, order='descend') 316 | x = pad_sequences(x, padding='post') 317 | y = pad_sequences(y, padding='post') 318 | 319 | x = torch.LongTensor(x) # not use .t() 320 | y = torch.LongTensor(y) # not use .t() 321 | 322 | self._idx += self.batch_size 323 | 324 | return x, y 325 | 326 | def _reorder(self): 327 | if self.shuffle: 328 | self.dataset = shuffle(self.dataset, 329 | random_state=self.random_state) 330 | self._idx = 0 331 | 332 | (x_train, y_train), \ 333 | (x_test, y_test), \ 334 | (num_x, num_y), \ 335 | (w2i_x, w2i_y), (i2w_x, i2w_y) = \ 336 | load_small_parallel_enja(to_ja=True) 337 | 338 | train_dataloader = ParallelDataLoader((x_train, y_train), 339 | shuffle=True) 340 | valid_dataloader = ParallelDataLoader((x_test, y_test)) 341 | test_dataloader = ParallelDataLoader((x_test, y_test), 342 | batch_size=1, 343 | shuffle=True) 344 | 345 | ''' 346 | Build model 347 | ''' 348 | model = Transformer(num_x, 349 | num_y, 350 | N=3, 351 | h=4, 352 | d_model=128, 353 | d_ff=256, 354 | max_len=20, 355 | device=device).to(device) 356 | criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=0) 357 | optimizer = optimizers.Adam(model.parameters()) 358 | 359 | ''' 360 | Train model 361 | ''' 362 | epochs = 20 363 | 364 | for epoch in range(epochs): 365 | print('-' * 20) 366 | print('Epoch: {}'.format(epoch+1)) 367 | 368 | train_loss = 0. 369 | valid_loss = 0. 370 | 371 | for idx, (source, target) in enumerate(train_dataloader): 372 | source, target = source.to(device), target.to(device) 373 | loss, _ = train_step(source, target) 374 | train_loss += loss.item() 375 | 376 | train_loss /= len(train_dataloader) 377 | 378 | for (source, target) in valid_dataloader: 379 | source, target = source.to(device), target.to(device) 380 | loss, _ = valid_step(source, target) 381 | valid_loss += loss.item() 382 | 383 | valid_loss /= len(valid_dataloader) 384 | print('Valid loss: {:.3}'.format(valid_loss)) 385 | 386 | for idx, (source, target) in enumerate(test_dataloader): 387 | source, target = source.to(device), target.to(device) 388 | out = test_step(source) 389 | out = out.view(-1).tolist() 390 | out = ' '.join(ids_to_sentence(out, i2w_y)) 391 | source = ' '.join(ids_to_sentence(source.view(-1).tolist(), i2w_x)) 392 | target = ' '.join(ids_to_sentence(target.view(-1).tolist(), i2w_y)) 393 | print('>', source) 394 | print('=', target) 395 | print('<', out) 396 | print() 397 | 398 | if idx >= 10: 399 | break 400 | -------------------------------------------------------------------------------- /models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-pytorch/25eeab2eea574f7c57d5ee9cd5e14d64dc2d1d56/models/utils/__init__.py -------------------------------------------------------------------------------- /models/utils/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-pytorch/25eeab2eea574f7c57d5ee9cd5e14d64dc2d1d56/models/utils/datasets/__init__.py -------------------------------------------------------------------------------- /models/utils/datasets/small_parallel_enja.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import numpy as np 4 | 5 | 6 | ''' 7 | Download 50k En/Ja Parallel Corpus 8 | from https://github.com/odashi/small_parallel_enja 9 | and transform words to IDs. 10 | 11 | 12 | Original Source from: 13 | 14 | https://github.com/yusugomori/tftf/blob/master/tftf/datasets/small_parallel_enja.py 15 | ''' 16 | 17 | 18 | def load_small_parallel_enja(path=None, 19 | to_ja=True, 20 | pad_value=0, 21 | start_char=1, 22 | end_char=2, 23 | oov_char=3, 24 | index_from=4, 25 | pad='', 26 | bos='', 27 | eos='', 28 | oov='', 29 | add_bos=True, 30 | add_eos=True): 31 | url_base = 'https://raw.githubusercontent.com/' \ 32 | 'odashi/small_parallel_enja/master/' 33 | 34 | path = path or 'small_parallel_enja' 35 | dir_path = os.path.join(os.path.expanduser('~'), 36 | '.tftf', 'datasets', path) 37 | if not os.path.exists(dir_path): 38 | os.makedirs(dir_path) 39 | 40 | f_ja = ['train.ja', 'test.ja'] 41 | f_en = ['train.en', 'test.en'] 42 | 43 | for f in (f_ja + f_en): 44 | f_path = os.path.join(dir_path, f) 45 | if not os.path.exists(f_path): 46 | url = url_base + f 47 | print('Downloading {}'.format(f)) 48 | cmd = ['curl', '-o', f_path, url] 49 | subprocess.call(cmd) 50 | 51 | f_ja_train = os.path.join(dir_path, f_ja[0]) 52 | f_test_ja = os.path.join(dir_path, f_ja[1]) 53 | f_en_train = os.path.join(dir_path, f_en[0]) 54 | f_test_en = os.path.join(dir_path, f_en[1]) 55 | 56 | (ja_train, test_ja), num_words_ja, (w2i_ja, i2w_ja) = \ 57 | _build(f_ja_train, f_test_ja, 58 | pad_value, start_char, end_char, oov_char, index_from, 59 | pad, bos, eos, oov, add_bos, add_eos) 60 | (en_train, test_en), num_words_en, (w2i_en, i2w_en) = \ 61 | _build(f_en_train, f_test_en, 62 | pad_value, start_char, end_char, oov_char, index_from, 63 | pad, bos, eos, oov, add_bos, add_eos) 64 | 65 | if to_ja: 66 | x_train, x_test, num_X, w2i_X, i2w_X = \ 67 | en_train, test_en, num_words_en, w2i_en, i2w_en 68 | y_train, y_test, num_y, w2i_y, i2w_y = \ 69 | ja_train, test_ja, num_words_ja, w2i_ja, i2w_ja 70 | else: 71 | x_train, x_test, num_X, w2i_X, i2w_X = \ 72 | ja_train, test_ja, num_words_ja, w2i_ja, i2w_ja 73 | y_train, y_test, num_y, w2i_y, i2w_y = \ 74 | en_train, test_en, num_words_en, w2i_en, i2w_en 75 | 76 | x_train, x_test = np.array(x_train), np.array(x_test) 77 | y_train, y_test = np.array(y_train), np.array(y_test) 78 | 79 | return (x_train, y_train), (x_test, y_test), \ 80 | (num_X, num_y), (w2i_X, w2i_y), (i2w_X, i2w_y) 81 | 82 | 83 | def _build(f_train, f_test, 84 | pad_value=0, 85 | start_char=1, 86 | end_char=2, 87 | oov_char=3, 88 | index_from=4, 89 | pad='', 90 | bos='', 91 | eos='', 92 | oov='', 93 | add_bos=True, 94 | add_eos=True): 95 | 96 | builder = _Builder(pad_value=pad_value, 97 | start_char=start_char, 98 | end_char=end_char, 99 | oov_char=oov_char, 100 | index_from=index_from, 101 | pad=pad, 102 | bos=bos, 103 | eos=eos, 104 | oov=oov, 105 | add_bos=add_bos, 106 | add_eos=add_eos) 107 | builder.fit(f_train) 108 | train = builder.transform(f_train) 109 | test = builder.transform(f_test) 110 | 111 | return (train, test), builder.num_words, (builder.w2i, builder.i2w) 112 | 113 | 114 | class _Builder(object): 115 | def __init__(self, 116 | pad_value=0, 117 | start_char=1, 118 | end_char=2, 119 | oov_char=3, 120 | index_from=4, 121 | pad='', 122 | bos='', 123 | eos='', 124 | oov='', 125 | add_bos=True, 126 | add_eos=True): 127 | self._vocab = None 128 | self._w2i = None 129 | self._i2w = None 130 | 131 | self.pad_value = pad_value 132 | self.start_char = start_char 133 | self.end_char = end_char 134 | self.oov_char = oov_char 135 | self.index_from = index_from 136 | self.pad = pad 137 | self.bos = bos 138 | self.eos = eos 139 | self.oov = oov 140 | 141 | self.add_bos = add_bos 142 | self.add_eos = add_eos 143 | 144 | @property 145 | def num_words(self): 146 | return max(self._w2i.values()) + 1 147 | 148 | @property 149 | def w2i(self): 150 | ''' 151 | Dict of word to index 152 | ''' 153 | return self._w2i 154 | 155 | @property 156 | def i2w(self): 157 | ''' 158 | Dict of index to word 159 | ''' 160 | return self._i2w 161 | 162 | def fit(self, f_path): 163 | self._vocab = set() 164 | self._w2i = {} 165 | for line in open(f_path, encoding='utf-8'): 166 | _sentence = line.strip().split() 167 | self._vocab.update(_sentence) 168 | 169 | self._w2i = {w: (i + self.index_from) 170 | for i, w in enumerate(self._vocab)} 171 | if self.pad_value >= 0: 172 | self._w2i[self.pad] = self.pad_value 173 | self._w2i[self.bos] = self.start_char 174 | self._w2i[self.eos] = self.end_char 175 | self._w2i[self.oov] = self.oov_char 176 | self._i2w = {i: w for w, i in self._w2i.items()} 177 | 178 | def transform(self, f_path): 179 | if self._vocab is None or self._w2i is None: 180 | raise AttributeError('`{}.fit` must be called before `transform`.' 181 | ''.format(self.__class__.__name__)) 182 | sentences = [] 183 | for line in open(f_path, encoding='utf-8'): 184 | _sentence = line.strip().split() 185 | # _sentence = [self.bos] + _sentence + [self.eos] 186 | if self.add_bos: 187 | _sentence = [self.bos] + _sentence 188 | if self.add_eos: 189 | _sentence = _sentence + [self.eos] 190 | sentences.append(self._encode(_sentence)) 191 | return sentences 192 | 193 | def _encode(self, sentence): 194 | encoded = [] 195 | for w in sentence: 196 | if w not in self._w2i: 197 | id = self.oov_char 198 | else: 199 | id = self._w2i[w] 200 | encoded.append(id) 201 | 202 | return encoded 203 | -------------------------------------------------------------------------------- /models/utils/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-pytorch/25eeab2eea574f7c57d5ee9cd5e14d64dc2d1d56/models/utils/preprocessing/__init__.py -------------------------------------------------------------------------------- /models/utils/preprocessing/sequence/__init__.py: -------------------------------------------------------------------------------- 1 | from .pad_sequences import pad_sequences 2 | from .sort import sort 3 | -------------------------------------------------------------------------------- /models/utils/preprocessing/sequence/pad_sequences.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Original Source from: 3 | 4 | https://github.com/yusugomori/tftf/blob/master/tftf/preprocessing/sequence/pad_sequences.py 5 | ''' 6 | import numpy as np 7 | 8 | 9 | def pad_sequences(data, 10 | padding='pre', 11 | value=0): 12 | ''' 13 | # Arguments 14 | data: list of lists / np.array of lists 15 | 16 | # Returns 17 | numpy.ndarray 18 | ''' 19 | if type(data[0]) is not list: 20 | raise ValueError('`data` must be a list of lists') 21 | maxlen = len(max(data, key=len)) 22 | 23 | if padding == 'pre': 24 | data = \ 25 | [[value] * (maxlen - len(data[i])) + data[i] 26 | for i in range(len(data))] 27 | elif padding == 'post': 28 | data = \ 29 | [data[i] + [value] * (maxlen - len(data[i])) 30 | for i in range(len(data))] 31 | else: 32 | raise ValueError('`padding` must be one of \'pre\' or \'post\'') 33 | 34 | return np.array(data) 35 | -------------------------------------------------------------------------------- /models/utils/preprocessing/sequence/sort.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Original Source from: 3 | 4 | https://github.com/yusugomori/tftf/blob/master/tftf/preprocessing/sequence/sort.py 5 | ''' 6 | 7 | 8 | def sort(data, target, 9 | order='ascend'): 10 | if order == 'ascend' or order == 'ascending': 11 | a = True 12 | elif order == 'descend' or order == 'descending': 13 | a = False 14 | else: 15 | raise ValueError('`order` must be of \'ascend\' or \'descend\'.') 16 | 17 | lens = [len(i) for i in data] 18 | indices = sorted(range(len(lens)), 19 | key=lambda x: (2 * a - 1) * lens[x]) 20 | data = [data[i] for i in indices] 21 | target = [target[i] for i in indices] 22 | 23 | return (data, target) 24 | -------------------------------------------------------------------------------- /models/vae_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import matplotlib 11 | # matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class VAE(nn.Module): 16 | ''' 17 | Simple Variational Autoencoder 18 | ''' 19 | def __init__(self, device='cpu'): 20 | super().__init__() 21 | self.device = device 22 | self.encoder = Encoder(device=device) 23 | self.decoder = Decoder(device=device) 24 | 25 | def forward(self, x): 26 | mean, var = self.encoder(x) 27 | z = self.reparameterize(mean, var) 28 | y = self.decoder(z) 29 | 30 | return y 31 | 32 | def reparameterize(self, mean, var): 33 | eps = torch.randn(mean.size()).to(self.device) 34 | z = mean + torch.sqrt(var) * eps 35 | return z 36 | 37 | def lower_bound(self, x): 38 | mean, var = self.encoder(x) 39 | kl = - 1/2 * torch.mean(torch.sum(1 40 | + torch.log(var) 41 | - mean**2 42 | - var, dim=1)) 43 | z = self.reparameterize(mean, var) 44 | y = self.decoder(z) 45 | 46 | reconst = torch.mean(torch.sum(x * torch.log(y) 47 | + (1 - x) * torch.log(1 - y), dim=1)) 48 | 49 | return reconst - kl 50 | 51 | 52 | class Encoder(nn.Module): 53 | def __init__(self, device='cpu'): 54 | super().__init__() 55 | self.device = device 56 | self.l1 = nn.Linear(784, 200) 57 | self.l2 = nn.Linear(200, 200) 58 | self.l_mean = nn.Linear(200, 10) 59 | self.l_var = nn.Linear(200, 10) 60 | 61 | def forward(self, x): 62 | h = self.l1(x) 63 | h = torch.relu(h) 64 | h = self.l2(h) 65 | h = torch.relu(h) 66 | 67 | mean = self.l_mean(h) 68 | var = F.softplus(self.l_var(h)) 69 | 70 | return mean, var 71 | 72 | 73 | class Decoder(nn.Module): 74 | def __init__(self, device='cpu'): 75 | super().__init__() 76 | self.device = device 77 | self.l1 = nn.Linear(10, 200) 78 | self.l2 = nn.Linear(200, 200) 79 | self.out = nn.Linear(200, 784) 80 | 81 | def forward(self, x): 82 | h = self.l1(x) 83 | h = torch.relu(h) 84 | h = self.l2(h) 85 | h = torch.relu(h) 86 | h = self.out(h) 87 | y = torch.sigmoid(h) 88 | 89 | return y 90 | 91 | 92 | if __name__ == '__main__': 93 | np.random.seed(1234) 94 | torch.manual_seed(1234) 95 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 96 | 97 | def compute_loss(x): 98 | return -1 * criterion(x) 99 | 100 | def train_step(x): 101 | model.train() 102 | loss = compute_loss(x) 103 | 104 | optimizer.zero_grad() 105 | loss.backward() 106 | optimizer.step() 107 | 108 | return loss 109 | 110 | def generate(batch_size=10): 111 | model.eval() 112 | z = gen_noise(batch_size) 113 | gen = model.decoder(z) 114 | gen = gen.view(-1, 28, 28) 115 | 116 | return gen 117 | 118 | def gen_noise(batch_size): 119 | return torch.empty(batch_size, 10).normal_().to(device) 120 | 121 | ''' 122 | Load data 123 | ''' 124 | root = os.path.join(os.path.dirname(__file__), 125 | '..', 'data', 'fashion_mnist') 126 | transform = transforms.Compose([transforms.ToTensor(), 127 | lambda x: x.view(-1)]) 128 | mnist_train = \ 129 | torchvision.datasets.FashionMNIST(root=root, 130 | download=True, 131 | train=True, 132 | transform=transform) 133 | train_dataloader = DataLoader(mnist_train, 134 | batch_size=100, 135 | shuffle=True) 136 | 137 | ''' 138 | Build model 139 | ''' 140 | model = VAE(device=device).to(device) 141 | criterion = model.lower_bound 142 | optimizer = optimizers.Adam(model.parameters()) 143 | 144 | ''' 145 | Train model 146 | ''' 147 | epochs = 10 148 | out_path = os.path.join(os.path.dirname(__file__), 149 | '..', 'output') 150 | 151 | for epoch in range(epochs): 152 | train_loss = 0. 153 | 154 | for (x, _) in train_dataloader: 155 | x = x.to(device) 156 | loss = train_step(x) 157 | 158 | train_loss += loss.item() 159 | 160 | train_loss /= len(train_dataloader) 161 | 162 | print('Epoch: {}, Cost: {:.3f}'.format( 163 | epoch+1, 164 | train_loss 165 | )) 166 | 167 | if epoch % 5 == 4 or epoch == epochs - 1: 168 | images = generate(batch_size=16) 169 | images = images.squeeze().detach().cpu().numpy() 170 | plt.figure(figsize=(6, 6)) 171 | for i, image in enumerate(images): 172 | plt.subplot(4, 4, i+1) 173 | plt.imshow(image, cmap='binary') 174 | plt.axis('off') 175 | plt.tight_layout() 176 | # plt.show() 177 | template = '{}/vae_fashion_mnist_epoch_{:0>4}.png' 178 | plt.savefig(template.format(out_path, epoch+1), dpi=300) 179 | -------------------------------------------------------------------------------- /output/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-pytorch/25eeab2eea574f7c57d5ee9cd5e14d64dc2d1d56/output/.keep --------------------------------------------------------------------------------