├── .gitignore ├── CHANGELOG.md ├── README.md └── src ├── 01_autoencoder_fashion_mnist.py ├── 02_vae_fashion_mnist.py ├── 03_gan_fashion_mnist.py └── data └── .keep /.gitignore: -------------------------------------------------------------------------------- 1 | src/data/* 2 | !src/data/.keep 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 書籍バージョン履歴 2 | 3 | **ver. 1.3.1 (2019.04.28)** 4 | 5 | * p20 `mnist_test` を読み込むべきところが `mnist_train` となっていた部分を修正しました。それに伴い、実験結果の画像を差し替えています。 6 | * p58 「識別器 G」と記述されていた部分を修正しました。 7 | 8 |
9 | 10 | 11 | **ver. 1.3.0 (2019.04.24)** 12 | 13 | * p9 式(1.9) KLダイバージェンスの添字の誤りを修正しました。 14 | 15 |
16 | 17 | **ver. 1.2.0 (2019.04.19)** 18 | 19 | * p30 (L5) 生成モデルの数式の誤植を修正しました。 20 | 21 |
22 | 23 | **ver. 1.1.0 (2019.04.18)** 24 | 25 | * p50 「負の交差エントロピー誤差」の部分を削除しました。 26 | * p52 式(3.14) の添字の誤りを修正しました。 27 | 28 |
29 | 30 | **ver. 1.0.1 (2019.04.16)** 31 | 32 | * 3章のセクションレベルを調整しました。本文の修正はありません。 33 | 34 |
35 | 36 | **ver. 1.0.0 (2019.04.16)** 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 詳説ディープラーニング(生成モデル編) 2 | 3 | ディープラーニング電子書籍「詳説ディープラーニング(生成モデル編)」の中で紹介しているコード集です。書籍は[noteのページ](https://note.mu/yusugomori/n/n945f51cabc03)もしくは[amazon](https://amzn.to/2Dryztq)から購入ができます。 4 | 5 | ※書籍の修正履歴は[こちら](https://github.com/yusugomori/dl-book-generative/blob/master/CHANGELOG.md)にまとめています。 6 | -------------------------------------------------------------------------------- /src/01_autoencoder_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | from torch.utils.data import Dataset, DataLoader 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | import matplotlib 10 | # matplotlib.use('Agg') 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | class Autoencoder(nn.Module): 15 | def __init__(self, device='cpu'): 16 | super().__init__() 17 | self.device = device 18 | self.l1 = nn.Linear(784, 200) 19 | self.l2 = nn.Linear(200, 784) 20 | 21 | def forward(self, x): 22 | # encode 23 | h = self.l1(x) 24 | h = torch.relu(h) 25 | 26 | # decode 27 | h = self.l2(h) 28 | y = torch.sigmoid(h) 29 | 30 | return y 31 | 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(1234) 35 | torch.manual_seed(1234) 36 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 37 | 38 | ''' 39 | 1. Load data 40 | ''' 41 | root = os.path.join(os.path.dirname(__file__), 42 | '.', 'data', 'fashion_mnist') 43 | transform = transforms.Compose([transforms.ToTensor(), 44 | lambda x: x.view(-1)]) 45 | mnist_train = \ 46 | torchvision.datasets.FashionMNIST(root=root, 47 | download=True, 48 | train=True, 49 | transform=transform) 50 | mnist_test = \ 51 | torchvision.datasets.FashionMNIST(root=root, 52 | download=True, 53 | train=False, 54 | transform=transform) 55 | train_dataloader = DataLoader(mnist_train, 56 | batch_size=100, 57 | shuffle=True) 58 | test_dataloader = DataLoader(mnist_test, 59 | batch_size=1, 60 | shuffle=False) 61 | 62 | ''' 63 | 2. Build model 64 | ''' 65 | model = Autoencoder(device=device).to(device) 66 | 67 | ''' 68 | 3. Train model 69 | ''' 70 | criterion = nn.BCELoss() 71 | optimizer = optimizers.Adam(model.parameters()) 72 | 73 | def compute_loss(x, preds): 74 | return criterion(preds, x) 75 | 76 | def train_step(x): 77 | model.train() 78 | preds = model(x) 79 | loss = compute_loss(x, preds) 80 | 81 | optimizer.zero_grad() 82 | loss.backward() 83 | optimizer.step() 84 | 85 | return loss 86 | 87 | epochs = 10 88 | 89 | for epoch in range(epochs): 90 | train_loss = 0. 91 | 92 | for (x, _) in train_dataloader: 93 | x = x.to(device) 94 | loss = train_step(x) 95 | 96 | train_loss += loss.item() 97 | 98 | train_loss /= len(train_dataloader) 99 | 100 | print('Epoch: {}, Cost: {:.3f}'.format( 101 | epoch+1, 102 | train_loss 103 | )) 104 | 105 | ''' 106 | 4. Test model 107 | ''' 108 | x, _ = next(iter(test_dataloader)) 109 | noise = torch.bernoulli(0.8 * torch.ones(x.size())).to(device) 110 | x_noise = x * noise 111 | 112 | x_reconstructed = model(x_noise) 113 | 114 | plt.figure(figsize=(18, 6)) 115 | for i, image in enumerate([x, x_noise, x_reconstructed]): 116 | image = image.view(28, 28).detach().cpu().numpy() 117 | plt.subplot(1, 3, i+1) 118 | plt.imshow(image, cmap='binary_r') 119 | plt.axis('off') 120 | plt.show() 121 | -------------------------------------------------------------------------------- /src/02_vae_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import matplotlib 11 | # matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class VAE(nn.Module): 16 | def __init__(self, device='cpu'): 17 | super().__init__() 18 | self.device = device 19 | self.encoder = Encoder(device=device) 20 | self.decoder = Decoder(device=device) 21 | 22 | def forward(self, x): 23 | mean, var = self.encoder(x) 24 | z = self.reparameterize(mean, var) 25 | y = self.decoder(z) 26 | 27 | return y 28 | 29 | def reparameterize(self, mean, var): 30 | eps = torch.randn(mean.size()).to(self.device) 31 | z = mean + torch.sqrt(var) * eps 32 | return z 33 | 34 | def lower_bound(self, x): 35 | mean, var = self.encoder(x) 36 | kl = - 1/2 * torch.mean(torch.sum(1 37 | + torch.log(var) 38 | - mean**2 39 | - var, dim=1)) 40 | z = self.reparameterize(mean, var) 41 | y = self.decoder(z) 42 | 43 | reconst = torch.mean(torch.sum(x * torch.log(y) 44 | + (1 - x) * torch.log(1 - y), 45 | dim=1)) 46 | 47 | L = reconst - kl 48 | 49 | return L 50 | 51 | 52 | class Encoder(nn.Module): 53 | def __init__(self, device='cpu'): 54 | super().__init__() 55 | self.device = device 56 | self.l1 = nn.Linear(784, 200) 57 | self.l2 = nn.Linear(200, 200) 58 | self.l_mean = nn.Linear(200, 10) 59 | self.l_var = nn.Linear(200, 10) 60 | 61 | def forward(self, x): 62 | h = self.l1(x) 63 | h = torch.relu(h) 64 | h = self.l2(h) 65 | h = torch.relu(h) 66 | 67 | mean = self.l_mean(h) 68 | var = F.softplus(self.l_var(h)) 69 | 70 | return mean, var 71 | 72 | 73 | class Decoder(nn.Module): 74 | def __init__(self, device='cpu'): 75 | super().__init__() 76 | self.device = device 77 | self.l1 = nn.Linear(10, 200) 78 | self.l2 = nn.Linear(200, 200) 79 | self.out = nn.Linear(200, 784) 80 | 81 | def forward(self, x): 82 | h = self.l1(x) 83 | h = torch.relu(h) 84 | h = self.l2(h) 85 | h = torch.relu(h) 86 | h = self.out(h) 87 | y = torch.sigmoid(h) 88 | 89 | return y 90 | 91 | 92 | if __name__ == '__main__': 93 | np.random.seed(1234) 94 | torch.manual_seed(1234) 95 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 96 | 97 | ''' 98 | 1. Load data 99 | ''' 100 | root = os.path.join(os.path.dirname(__file__), 101 | '.', 'data', 'fashion_mnist') 102 | transform = transforms.Compose([transforms.ToTensor(), 103 | lambda x: x.view(-1)]) 104 | mnist_train = \ 105 | torchvision.datasets.FashionMNIST(root=root, 106 | download=True, 107 | train=True, 108 | transform=transform) 109 | train_dataloader = DataLoader(mnist_train, 110 | batch_size=100, 111 | shuffle=True) 112 | 113 | ''' 114 | 2. Build model 115 | ''' 116 | model = VAE(device=device).to(device) 117 | 118 | ''' 119 | 3. Train model 120 | ''' 121 | criterion = model.lower_bound 122 | optimizer = optimizers.Adam(model.parameters()) 123 | 124 | def compute_loss(x): 125 | return -1 * criterion(x) 126 | 127 | def train_step(x): 128 | model.train() 129 | loss = compute_loss(x) 130 | 131 | optimizer.zero_grad() 132 | loss.backward() 133 | optimizer.step() 134 | 135 | return loss 136 | 137 | epochs = 10 138 | 139 | for epoch in range(epochs): 140 | train_loss = 0. 141 | 142 | for (x, _) in train_dataloader: 143 | x = x.to(device) 144 | loss = train_step(x) 145 | 146 | train_loss += loss.item() 147 | 148 | train_loss /= len(train_dataloader) 149 | 150 | print('Epoch: {}, Cost: {:.3f}'.format( 151 | epoch+1, 152 | train_loss 153 | )) 154 | 155 | ''' 156 | 4. Test model 157 | ''' 158 | def gen_noise(batch_size): 159 | return torch.empty(batch_size, 10).normal_().to(device) 160 | 161 | def generate(batch_size=16): 162 | model.eval() 163 | z = gen_noise(batch_size) 164 | gen = model.decoder(z) 165 | gen = gen.view(-1, 28, 28) 166 | 167 | return gen 168 | 169 | images = generate(batch_size=16) 170 | images = images.squeeze().detach().cpu().numpy() 171 | plt.figure(figsize=(6, 6)) 172 | for i, image in enumerate(images): 173 | plt.subplot(4, 4, i+1) 174 | plt.imshow(image, cmap='binary_r') 175 | plt.axis('off') 176 | plt.tight_layout() 177 | plt.show() 178 | -------------------------------------------------------------------------------- /src/03_gan_fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optimizers 6 | import torch.nn.functional as F 7 | from torch.utils.data import Dataset, DataLoader 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import matplotlib 11 | # matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class GAN(nn.Module): 16 | def __init__(self, device='cpu'): 17 | super().__init__() 18 | self.device = device 19 | self.G = Generator(device=device) 20 | self.D = Discriminator(device=device) 21 | 22 | def forward(self, x): 23 | x = self.G(x) 24 | y = self.D(x) 25 | 26 | return y 27 | 28 | 29 | class Discriminator(nn.Module): 30 | def __init__(self, device='cpu'): 31 | super().__init__() 32 | self.device = device 33 | self.conv1 = nn.Conv2d(1, 128, 34 | kernel_size=(3, 3), 35 | stride=(2, 2), 36 | padding=1) 37 | self.relu1 = nn.LeakyReLU(0.2) 38 | self.conv2 = nn.Conv2d(128, 256, 39 | kernel_size=(3, 3), 40 | stride=(2, 2), 41 | padding=1) 42 | self.bn2 = nn.BatchNorm2d(256) 43 | self.relu2 = nn.LeakyReLU(0.2) 44 | self.fc = nn.Linear(256*7*7, 1024) 45 | self.bn3 = nn.BatchNorm1d(1024) 46 | self.relu3 = nn.LeakyReLU(0.2) 47 | self.out = nn.Linear(1024, 1) 48 | 49 | def forward(self, x): 50 | h = self.conv1(x) 51 | h = self.relu1(h) 52 | h = self.conv2(h) 53 | h = self.bn2(h) 54 | h = self.relu2(h) 55 | h = h.view(-1, 256*7*7) 56 | h = self.fc(h) 57 | h = self.bn3(h) 58 | h = self.relu3(h) 59 | h = self.out(h) 60 | y = torch.sigmoid(h) 61 | 62 | return y 63 | 64 | 65 | class Generator(nn.Module): 66 | def __init__(self, 67 | input_dim=100, 68 | device='cpu'): 69 | super().__init__() 70 | self.device = device 71 | self.linear = nn.Linear(input_dim, 256*14*14) 72 | self.bn1 = nn.BatchNorm1d(256*14*14) 73 | self.relu1 = nn.ReLU() 74 | self.conv1 = nn.Conv2d(256, 128, 75 | kernel_size=(3, 3), 76 | padding=1) 77 | self.bn2 = nn.BatchNorm2d(128) 78 | self.relu2 = nn.ReLU() 79 | self.conv2 = nn.Conv2d(128, 64, 80 | kernel_size=(3, 3), 81 | padding=1) 82 | self.bn3 = nn.BatchNorm2d(64) 83 | self.relu3 = nn.ReLU() 84 | self.conv3 = nn.Conv2d(64, 1, 85 | kernel_size=(1, 1)) 86 | 87 | def forward(self, x): 88 | h = self.linear(x) 89 | h = self.bn1(h) 90 | h = self.relu1(h) 91 | h = h.view(-1, 256, 14, 14) 92 | h = F.interpolate(h, size=(28, 28)) 93 | h = self.conv1(h) 94 | h = self.bn2(h) 95 | h = self.relu2(h) 96 | h = self.conv2(h) 97 | h = self.bn3(h) 98 | h = self.relu3(h) 99 | h = self.conv3(h) 100 | y = torch.sigmoid(h) 101 | 102 | return y 103 | 104 | 105 | if __name__ == '__main__': 106 | np.random.seed(1234) 107 | torch.manual_seed(1234) 108 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 109 | 110 | ''' 111 | 1. Load data 112 | ''' 113 | root = os.path.join(os.path.dirname(__file__), 114 | '.', 'data', 'fashion_mnist') 115 | transform = transforms.Compose([transforms.ToTensor()]) 116 | mnist_train = \ 117 | torchvision.datasets.FashionMNIST(root=root, 118 | download=True, 119 | train=True, 120 | transform=transform) 121 | train_dataloader = DataLoader(mnist_train, 122 | batch_size=100, 123 | shuffle=True) 124 | 125 | ''' 126 | 2. Build model 127 | ''' 128 | model = GAN(device=device).to(device) 129 | 130 | def gen_noise(batch_size): 131 | return torch.empty(batch_size, 100).uniform_(0, 1).to(device) 132 | 133 | ''' 134 | 3. Train model 135 | ''' 136 | criterion = nn.BCELoss() 137 | optimizer_D = optimizers.Adam(model.D.parameters(), lr=0.0002) 138 | optimizer_G = optimizers.Adam(model.G.parameters(), lr=0.0002) 139 | 140 | def compute_loss(label, preds): 141 | return criterion(preds, label) 142 | 143 | def train_step(x): 144 | batch_size = x.size(0) 145 | model.D.train() 146 | model.G.train() 147 | 148 | # 3-A. train D 149 | # real images 150 | preds = model.D(x).squeeze() # preds with true images 151 | t = torch.ones(batch_size).float().to(device) 152 | loss_D_real = compute_loss(t, preds) 153 | # fake images 154 | noise = gen_noise(batch_size) 155 | gen = model.G(noise) 156 | preds = model.D(gen.detach()).squeeze() # preds with fake images 157 | t = torch.zeros(batch_size).float().to(device) 158 | loss_D_fake = compute_loss(t, preds) 159 | 160 | loss_D = loss_D_real + loss_D_fake 161 | optimizer_D.zero_grad() 162 | loss_D.backward() 163 | optimizer_D.step() 164 | 165 | # 3-B. train G 166 | noise = gen_noise(batch_size) 167 | gen = model.G(noise) 168 | preds = model.D(gen).squeeze() # preds with fake images 169 | t = torch.ones(batch_size).float().to(device) # label as true 170 | loss_G = compute_loss(t, preds) 171 | optimizer_G.zero_grad() 172 | loss_G.backward() 173 | optimizer_G.step() 174 | 175 | return loss_D, loss_G 176 | 177 | epochs = 20 178 | 179 | for epoch in range(epochs): 180 | train_loss_D = 0. 181 | train_loss_G = 0. 182 | test_loss = 0. 183 | 184 | for (x, _) in train_dataloader: 185 | x = x.to(device) 186 | loss_D, loss_G = train_step(x) 187 | 188 | train_loss_D += loss_D.item() 189 | train_loss_G += loss_G.item() 190 | 191 | train_loss_D /= len(train_dataloader) 192 | train_loss_G /= len(train_dataloader) 193 | 194 | print('Epoch: {}, D Cost: {:.3f}, G Cost: {:.3f}'.format( 195 | epoch+1, 196 | train_loss_D, 197 | train_loss_G 198 | )) 199 | 200 | ''' 201 | 4. Test model 202 | ''' 203 | def generate(batch_size=16): 204 | model.eval() 205 | noise = gen_noise(batch_size) 206 | gen = model.G(noise) 207 | 208 | return gen 209 | 210 | images = generate(batch_size=16) 211 | images = images.squeeze().detach().cpu().numpy() 212 | plt.figure(figsize=(6, 6)) 213 | for i, image in enumerate(images): 214 | plt.subplot(4, 4, i+1) 215 | plt.imshow(image, cmap='binary_r') 216 | plt.axis('off') 217 | plt.tight_layout() 218 | plt.show() 219 | -------------------------------------------------------------------------------- /src/data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/dl-book-generative/9eba8bba9f627d4dcbff6fe762939dfdf4a4830e/src/data/.keep --------------------------------------------------------------------------------