├── .gitignore ├── data ├── cat │ └── cat.jpg └── dog │ └── dog.jpg ├── 2.AlexNet ├── model.jpg └── AlexNet-Torch.py ├── 7.ResNet ├── model.jpeg ├── spec.jpg └── ResNet-Torch.py ├── 9.DenseNet ├── spec.jpg ├── model.jpg └── DenseNet-Torch.py ├── 1.CNN-Introduce └── CNN-Torch.py ├── 0.Imagehandler ├── Imagehandler-Torch.py └── Imagehandler-Keras.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea -------------------------------------------------------------------------------- /data/cat/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/cat/cat.jpg -------------------------------------------------------------------------------- /data/dog/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/dog/dog.jpg -------------------------------------------------------------------------------- /2.AlexNet/model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/2.AlexNet/model.jpg -------------------------------------------------------------------------------- /7.ResNet/model.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/model.jpeg -------------------------------------------------------------------------------- /7.ResNet/spec.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/spec.jpg -------------------------------------------------------------------------------- /9.DenseNet/spec.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/spec.jpg -------------------------------------------------------------------------------- /9.DenseNet/model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/model.jpg -------------------------------------------------------------------------------- /1.CNN-Introduce/CNN-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | ''' 4 | import numpy as np 5 | import torch.nn as nn 6 | import matplotlib.pyplot as plt 7 | from torchvision import datasets, transforms 8 | 9 | def img_show(image): 10 | image = image / 2 + 0.5 11 | if image.shape[0] == 3: 12 | plt.imshow(np.transpose(image.numpy(), (1, 2, 0))) 13 | elif image.shape[0] == 1: 14 | plt.imshow(image.squeeze(0)) 15 | plt.show(block=False) 16 | 17 | transform = transforms.Compose([ 18 | transforms.Resize((227, 227)), 19 | transforms.ToTensor(), 20 | ]) 21 | dataset = datasets.ImageFolder(root='../data/', transform=transform) 22 | cat, dog = dataset[0][0], dataset[1][0] 23 | 24 | # What is filter(=kernel) in CNN 25 | print('original image') 26 | img_show(cat) 27 | 28 | print('in_channels=3, out_channels=6, kernel_size=4 Convolution') 29 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4)(cat.unsqueeze(0)).data 30 | for i in range(outputs.shape[1]): 31 | print(i+1,'channel') 32 | img_show(outputs[:,i,:,:]) 33 | 34 | print('in_channels=3, out_channels=6, kernel_size=40 Convolution') 35 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=40)(cat.unsqueeze(0)).data 36 | for i in range(outputs.shape[1]): 37 | print(i+1,'channel') 38 | img_show(outputs[:,i,:,:]) 39 | 40 | print('in_channels=3, out_channels=6, kernel_size=3 stride=4 Convolution') 41 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=4)(cat.unsqueeze(0)).data 42 | for i in range(outputs.shape[1]): 43 | print(i+1,'channel') 44 | img_show(outputs[:,i,:,:]) -------------------------------------------------------------------------------- /2.AlexNet/AlexNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | reference : https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | from modelsummary import summary 8 | 9 | class AlexNet(nn.Module): 10 | def __init__(self): 11 | super(AlexNet, self).__init__() 12 | self.features = nn.Sequential( 13 | nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=3, stride=2), 16 | nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=3, stride=2), 19 | nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), 24 | nn.ReLU(inplace=True), 25 | nn.MaxPool2d(kernel_size=3, stride=2), 26 | ) 27 | self.classifier = nn.Sequential( 28 | nn.Dropout(), 29 | nn.Linear(256 * 6 * 6, 4096), 30 | nn.ReLU(inplace=True), 31 | nn.Dropout(), 32 | nn.Linear(4096, 4096), 33 | nn.ReLU(inplace=True), 34 | nn.Linear(4096, 2), 35 | ) 36 | 37 | def forward(self, x): # x : [1, 3, 227, 227] 38 | x = self.features(x) 39 | x = x.view(x.size(0), 256 * 6 * 6) 40 | x = self.classifier(x) 41 | return x 42 | 43 | model = AlexNet() 44 | print(model) 45 | summary(model, torch.zeros((1, 3, 227, 227))) 46 | -------------------------------------------------------------------------------- /0.Imagehandler/Imagehandler-Torch.py: -------------------------------------------------------------------------------- 1 | # code by Tae Hwan Jung(Jeff Jung) @graykode 2 | import numpy as np 3 | import torch 4 | import matplotlib.pyplot as plt 5 | from torchvision import datasets, transforms 6 | from torch.autograd import Variable 7 | 8 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data 9 | # 0 : cat, 1 : dog 10 | 11 | def img_show(image): 12 | image = image / 2 + 0.5 13 | plt.imshow(np.transpose(image.numpy(), (1, 2, 0))) 14 | plt.show(block=False) 15 | 16 | def pick_image(data, index): 17 | image, target = data[index] 18 | img_show(image) 19 | 20 | original_image = datasets.ImageFolder(root='../data/', transform=transforms.ToTensor()) 21 | print(original_image,'\n') 22 | pick_image(original_image, 1) 23 | 24 | # make transformation (resizing image) 25 | resized_transform = transforms.Compose([ 26 | transforms.Resize((227, 227)), 27 | transforms.ToTensor() 28 | ]) 29 | resized_image = datasets.ImageFolder(root='../data/', transform=resized_transform) 30 | print('resized image to 227x227x3') 31 | pick_image(resized_image, 1) 32 | 33 | # make transformation (crop image) 34 | cropped_transform = transforms.Compose([ 35 | transforms.CenterCrop((10, 10)), 36 | transforms.ToTensor() 37 | ]) 38 | cropped_image = datasets.ImageFolder(root='../data/', transform=cropped_transform) 39 | print('cropped image to 10x10x3') 40 | pick_image(cropped_image, 1) 41 | 42 | # make transformation (normalized image) 43 | normalized_transform = transforms.Compose([ 44 | transforms.ToTensor(), 45 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 46 | ]) 47 | 48 | normalized_image = datasets.ImageFolder(root='../data/', transform=normalized_transform) 49 | print('normalized image to mean and std (0.5, 0.5, 0.5)') 50 | pick_image(normalized_image, 1) 51 | 52 | 53 | # How to use Data Loader (Input Pipeline) 54 | # same batch images should have same height, weight, channel 55 | batch_size = 2 56 | print('Data Loader') 57 | dataloader = torch.utils.data.DataLoader(dataset=resized_image, batch_size=batch_size, shuffle=True) 58 | 59 | count = 0 60 | for batch_idx, (data, target) in enumerate(dataloader): 61 | data, target = Variable(data), Variable(target) 62 | count += batch_size 63 | print('batch :', batch_idx + 1,' ', count, '/', len(original_image), 64 | 'image:', data.shape, 'target : ', target) -------------------------------------------------------------------------------- /0.Imagehandler/Imagehandler-Keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung(Jeff Jung) @graykode 3 | code reference : https://jkjung-avt.github.io/keras-image-cropping/ 4 | https://github.com/zizhaozhang/unet-tensorflow-keras/blob/master/loader.py 5 | ''' 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from keras.preprocessing.image import ImageDataGenerator 9 | 10 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data 11 | # 0 : cat, 1 : dog 12 | 13 | def img_show(image): 14 | image = image / 2 + 0.5 15 | plt.imshow(image) 16 | plt.show(block=False) 17 | 18 | def random_crop(img, random_crop_size): 19 | # Note: image_data_format is 'channel_last' 20 | assert img.shape[2] == 3 21 | height, width = img.shape[0], img.shape[1] 22 | dy, dx = random_crop_size 23 | x = np.random.randint(0, width - dx + 1) 24 | y = np.random.randint(0, height - dy + 1) 25 | return img[y:(y+dy), x:(x+dx), :] 26 | 27 | def crop_generator(batches, crop_length): 28 | """Take as input a Keras ImageGen (Iterator) and generate random 29 | crops from the image batches generated by the original iterator. 30 | """ 31 | while True: 32 | batch_x, batch_y = next(batches) 33 | batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3)) 34 | for i in range(batch_x.shape[0]): 35 | batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length)) 36 | yield (batch_crops, batch_y) 37 | 38 | def preprocess(img, mean, std): 39 | out_img = img / img.max() # scale to [0,1] 40 | out_img = (out_img - np.array(mean).reshape(1, 1, 3)) / np.array(std).reshape(1, 1, 3) 41 | return out_img 42 | 43 | # make transformation (resizing image) 44 | resized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', target_size=(227, 227), 45 | classes=['dog', 'cat'], batch_size=2) 46 | data, target = next(resized_image) 47 | print('resized image to 227x227x3') 48 | img_show(data[1]) 49 | 50 | # make transformation (crop image) 51 | cropped_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', 52 | classes=['dog', 'cat'], batch_size=2) 53 | cropped_image = crop_generator(cropped_image, 10) 54 | data, target = next(cropped_image) 55 | print('cropped image to 10x10x3') 56 | img_show(data[1]) 57 | 58 | # make transformation (normalized image) 59 | normalized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', 60 | classes=['dog', 'cat'], batch_size=2) 61 | data, target = next(normalized_image) 62 | print('normalized image to mean and std (0.5, 0.5, 0.5)') 63 | nimage = preprocess(data[1], mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 64 | img_show(nimage) -------------------------------------------------------------------------------- /9.DenseNet/DenseNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | code reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py 4 | ''' 5 | import math 6 | import torch 7 | import torch.nn as nn 8 | from modelsummary import summary 9 | 10 | class Bottleneck(nn.Module): 11 | def __init__(self, in_channel, growth_rate): 12 | super(Bottleneck, self).__init__() 13 | self.composite1 = nn.Sequential( 14 | nn.BatchNorm2d(in_channel), 15 | nn.ReLU(), 16 | nn.Conv2d(in_channel, 4 * growth_rate, kernel_size=1, bias=False) 17 | ) 18 | self.composite2 = nn.Sequential( 19 | nn.BatchNorm2d(4 * growth_rate), 20 | nn.ReLU(), 21 | nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 22 | ) 23 | def forward(self, x): # in_channel > growth_rate 24 | out = self.composite1(x) # in_channel > 4*growth_rate 25 | out = self.composite2(out) # 4*growth_rate > growth_rate 26 | out = torch.cat([out, x], 1) # 1 dim is channel 27 | return out 28 | 29 | class Transition(nn.Module): 30 | def __init__(self, in_channel, out_channel): 31 | super(Transition, self).__init__() 32 | self.composite = nn.Sequential( 33 | nn.BatchNorm2d(in_channel), 34 | nn.ReLU(), 35 | nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False), 36 | nn.AvgPool2d(kernel_size=2) 37 | ) 38 | def forward(self, x): 39 | return self.composite(x) 40 | 41 | class DenseNet(nn.Module): 42 | def __init__(self, type_block, num_blocks, growth_rate=12, reduction=0.5, num_classes=2): 43 | super(DenseNet, self).__init__() 44 | self.growth_rate = growth_rate 45 | 46 | in_channel = 2 * growth_rate # The initial convolution layer comprises 2k convolutions of size 7×7 with stride 2 47 | self.conv1 = nn.Conv2d(3, in_channel, kernel_size=7, stride=2, padding=1, bias=False) # 112 = (227-7+2*1)/2+1 48 | self.max_pooling = nn.MaxPool2d(kernel_size=3, stride=2) 49 | 50 | self.dense1 = self.make_dense_layers(type_block, in_channel, num_blocks[0]) 51 | in_channel += num_blocks[0]*growth_rate 52 | out_channel = int(math.floor(in_channel * reduction)) 53 | self.trans1 = Transition(in_channel, out_channel) # Transite channel in_channel > out_channel 54 | in_channel = out_channel 55 | 56 | self.dense2 = self.make_dense_layers(type_block, in_channel, num_blocks[1]) 57 | in_channel += num_blocks[1] * growth_rate 58 | out_channel = int(math.floor(in_channel * reduction)) 59 | self.trans2 = Transition(in_channel, out_channel) 60 | in_channel = out_channel 61 | 62 | self.dense3 = self.make_dense_layers(type_block, in_channel, num_blocks[2]) 63 | in_channel += num_blocks[2] * growth_rate 64 | out_planes = int(math.floor(in_channel * reduction)) 65 | self.trans3 = Transition(in_channel, out_planes) 66 | in_channel = out_planes 67 | 68 | self.dense4 = self.make_dense_layers(type_block, in_channel, num_blocks[3]) 69 | in_channel += num_blocks[3] * growth_rate 70 | 71 | self.classifier = nn.Sequential( 72 | nn.BatchNorm2d(in_channel), 73 | nn.ReLU(), 74 | nn.AvgPool2d(kernel_size=7) 75 | ) 76 | self.linear = nn.Linear(in_channel, num_classes) 77 | 78 | def make_dense_layers(self, type_block, in_channel, num_block): 79 | layers = [] 80 | for i in range(num_block): 81 | layers.append(type_block(in_channel, self.growth_rate)) 82 | in_channel += self.growth_rate 83 | return nn.Sequential(*layers) 84 | 85 | def forward(self, x): 86 | out = self.max_pooling(self.conv1(x)) 87 | out = self.trans1(self.dense1(out)) 88 | out = self.trans2(self.dense2(out)) 89 | out = self.trans3(self.dense3(out)) 90 | out = self.dense4(out) 91 | out = self.classifier(out) 92 | out = out.view(out.size(0), -1) 93 | out = self.linear(out) 94 | return out 95 | 96 | def DenseNet121(): 97 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) 98 | 99 | def DenseNet169(): 100 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 101 | 102 | def DenseNet201(): 103 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 104 | 105 | def DenseNet161(): 106 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 107 | 108 | # Please Select model by index 109 | model = [DenseNet121(), DenseNet161(), DenseNet169(), DenseNet201()][3] 110 | print(model) 111 | summary(model, torch.zeros((1, 3, 227, 227))) -------------------------------------------------------------------------------- /7.ResNet/ResNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | from modelsummary import summary 8 | 9 | class BasicBlock(nn.Module): 10 | expansion = 1 11 | def __init__(self, in_channel, out_channel, stride=1): 12 | super(BasicBlock, self).__init__() 13 | self.features = nn.Sequential( 14 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False), 15 | nn.BatchNorm2d(out_channel), 16 | nn.ReLU(), 17 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 18 | nn.BatchNorm2d(out_channel) 19 | ) 20 | # make shortcut 21 | self.residual = nn.Sequential(nn.ReLU()) 22 | if stride != 1 or in_channel != self.expansion * out_channel: 23 | # ResNet34 fig3 in paper, case of dot-line 24 | self.residual = nn.Sequential( 25 | # output channel is expansion * current channel 26 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion * out_channel), 28 | nn.ReLU() 29 | ) 30 | 31 | def forward(self, x): 32 | out = self.features(x) 33 | out += self.residual(x) # intput is not out! 34 | return out 35 | 36 | class Bottleneck(nn.Module): 37 | expansion = 4 38 | def __init__(self, in_channel, out_channel, stride=1): 39 | super(Bottleneck, self).__init__() 40 | self.features = nn.Sequential( 41 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False), 42 | nn.BatchNorm2d(out_channel), 43 | nn.ReLU(), 44 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 45 | nn.BatchNorm2d(out_channel), 46 | nn.ReLU(), 47 | nn.Conv2d(out_channel, self.expansion * out_channel, kernel_size=1, bias=False), 48 | nn.BatchNorm2d(self.expansion * out_channel) 49 | ) 50 | self.residual = nn.Sequential(nn.ReLU()) 51 | if stride != 1 or in_channel != self.expansion * out_channel: 52 | # ResNet34 fig3 in paper, case of dot-line 53 | self.residual = nn.Sequential( 54 | # output channel is expansion * current channel 55 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 56 | nn.BatchNorm2d(self.expansion * out_channel), 57 | nn.ReLU() 58 | ) 59 | 60 | def forward(self, x): 61 | out = self.features(x) 62 | out += self.residual(x) # intput is not out! 63 | return out 64 | 65 | # out_channel : (width(=height) - filter_size + 2*padding)/stride + 1 66 | class ResNet(nn.Module): 67 | def __init__(self, type_block , num_blocks, num_classes=2): 68 | super(ResNet, self).__init__() 69 | self.in_channel = 64 70 | self.features = nn.Sequential( 71 | nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), 72 | nn.BatchNorm2d(64), 73 | nn.ReLU(), 74 | nn.AvgPool2d(kernel_size=3, stride=2), 75 | self.make_layer(type_block, 64, num_blocks[0], stride=1), 76 | self.make_layer(type_block, 128, num_blocks[1], stride=2), 77 | self.make_layer(type_block, 256, num_blocks[2], stride=2), 78 | self.make_layer(type_block, 512, num_blocks[3], stride=2), 79 | nn.AvgPool2d(kernel_size=7) 80 | ) 81 | self.linear = nn.Linear(512 * type_block.expansion, num_classes) 82 | 83 | def make_layer(self, type_block, in_channel, num_blocks, stride): 84 | strides = [stride] + [1] * (num_blocks - 1) # only stride of first layer is not one. 85 | layers = [] 86 | for stride in strides: 87 | layers.append(type_block(self.in_channel, in_channel, stride)) 88 | self.in_channel = in_channel * type_block.expansion 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): # x : [batch_size, 3, 227, 227] 92 | out = self.features(x) 93 | out = out.view(out.size(0), -1) 94 | out = self.linear(out) 95 | return out 96 | 97 | def ResNet18(): 98 | return ResNet(BasicBlock, [2,2,2,2]) 99 | 100 | def ResNet34(): 101 | return ResNet(BasicBlock, [3,4,6,3]) 102 | 103 | def ResNet50(): 104 | return ResNet(Bottleneck, [3,4,6,3]) 105 | 106 | def ResNet101(): 107 | return ResNet(Bottleneck, [3,4,23,3]) 108 | 109 | def ResNet152(): 110 | return ResNet(Bottleneck, [3,8,36,3]) 111 | 112 | # Please Select model by index 113 | model = [ResNet18(), ResNet34(), ResNet50(), ResNet101(), ResNet152()][3] 114 | print(model) 115 | summary(model, torch.zeros((1, 3, 227, 227))) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## vision-tutorial 2 | 3 |

4 | 5 | `vision-tutorial` is a tutorial for who is studying `Computer Vision Basic Architectures` using **Pytorch** and **Keras**. Most of the models about Vision were implemented with less than **100 lines** of code(except comments or blank lines). The list of these papers is a list that Professor [Sung Kim](https://github.com/hunkim) recommended. 6 | 7 | - Data was used as overfitting to show simple model learning. [One image about Cat or Dog](https://github.com/graykode/vision-tutorial/tree/master/data) 8 | 9 | - The accuracy of the model is not important in this project because it is affected by data. I recommend that you **focus on the structure of the model, the number of parameters, the learning process and paper detailed implementation. ** 10 | 11 | 12 | 13 | ## SOTA Basic Vision Models - Introduction 14 | 15 | - How to handle image in Pytorch and Keras 16 | 17 | - Image Resizing, Cropping 18 | 19 | - Introduction CNN(Convolutional Neural Networks) in Pytorch and Keras 20 | 21 | - How does number of channels, filter size (=kernel), grid, and padding affect Convolution? 22 | 23 | - Paper : [Object Recognition with Gradient-Based Learning](http://yann.lecun.com/exdb/publis/pdf/lecun-99.pdf) 24 | 25 | - AlexNet(2012.09) 26 | 27 | - Paper : [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) 28 | 29 | - Model 30 | 31 | ![](2.AlexNet/model.jpg) 32 | 33 | - ZFNet(2013.11) 34 | 35 | - Paper : [Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901) 36 | 37 | - VGG16(2014.09) 38 | 39 | - Paper : [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) 40 | 41 | - Inception.v1(a.k.a GoogLeNet)(2014.09) 42 | 43 | - Paper : [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) 44 | 45 | - Inception.v2, v3(2015.12) 46 | 47 | - Paper : [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) 48 | 49 | - ResNet(2015.12) 50 | 51 | - Paper : [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 52 | - Model 53 | ![](7.ResNet/model.jpeg) 54 | 55 | - Inception.v4(2016.02) 56 | 57 | - Paper : [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) 58 | 59 | - DenseNet(2016.08) 60 | 61 | - Paper : [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) 62 | - Model 63 | ![](9.DenseNet/model.jpg) 64 | 65 | - Xception(2016.10) 66 | 67 | - Paper : [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357) 68 | 69 | - MobileNet(2017.04) 70 | 71 | - Paper : [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) 72 | 73 | - SENet(2017.09) 74 | 75 | - Paper : [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) 76 | 77 | 78 | 79 | ## To be Continue Implementation in Other Repository 80 | 81 | #### v Semantic Segmentation 82 | 83 | - FCN(2014.11) : [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) 84 | - U-Net(2015.05) : [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597)](https://arxiv.org/abs/1606.00915) 85 | - SegNet(2015.11) : [SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation](https://arxiv.org/abs/1511.00561) 86 | - DeepLab(2016.06) : [DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915) 87 | - ENet(2016.07) : [ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation](https://arxiv.org/abs/1606.02147) 88 | - PSPNet(2016.12) : [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) 89 | - ICNet(2017.04) : [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545) 90 | 91 | 92 | 93 | #### v Generative adversarial networks 94 | 95 | - GAN(2014.06) : [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661) 96 | - DCGAN(2015.11) : [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/abs/1511.06434) 97 | - Pix2Pix(2016.11) : [Image-to-Image Translation with Conditional Adversarial Networks](https://arxiv.org/abs/1611.07004) 98 | - WGAN(2017.01) : [Wasserstein GAN](https://arxiv.org/abs/1701.07875) 99 | - CycleGAN(2017.05) : [Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks](https://arxiv.org/abs/1703.10593) 100 | 101 | 102 | 103 | #### v Object Detection 104 | 105 | - RCNN(2013.11) : [Rich feature hierarchies for accurate object detection and semantic segmentation](https://arxiv.org/abs/1311.2524) 106 | - Fast-RCNN(2015.04) : [Fast R-CNN](https://arxiv.org/abs/1504.08083) 107 | - Faster-RCNN(2015.06) : [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497) 108 | - YOLO(2015.06) : [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640) 109 | - SSD(2015.12) : [SSD: Single Shot MultiBox Detector](https://arxiv.org/abs/1512.02325) 110 | - YOLO9000(2016.12) : [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242) 111 | - Mask R-CNN(2017.05) : [Mask R-CNN](https://arxiv.org/abs/1703.06870) 112 | - RetinaNet(2017.08): [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) 113 | 114 | 115 | 116 | ## Author 117 | 118 | - Tae Hwan Jung(Jeff Jung) @graykode 119 | - Author Email : [nlkey2022@gmail.com](mailto:nlkey2022@gmail.com) --------------------------------------------------------------------------------