├── .gitignore
├── data
    ├── cat
    │   └── cat.jpg
    └── dog
    │   └── dog.jpg
├── 2.AlexNet
    ├── model.jpg
    └── AlexNet-Torch.py
├── 7.ResNet
    ├── model.jpeg
    ├── spec.jpg
    └── ResNet-Torch.py
├── 9.DenseNet
    ├── spec.jpg
    ├── model.jpg
    └── DenseNet-Torch.py
├── 1.CNN-Introduce
    └── CNN-Torch.py
├── 0.Imagehandler
    ├── Imagehandler-Torch.py
    └── Imagehandler-Keras.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea


--------------------------------------------------------------------------------
/data/cat/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/cat/cat.jpg


--------------------------------------------------------------------------------
/data/dog/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/dog/dog.jpg


--------------------------------------------------------------------------------
/2.AlexNet/model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/2.AlexNet/model.jpg


--------------------------------------------------------------------------------
/7.ResNet/model.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/model.jpeg


--------------------------------------------------------------------------------
/7.ResNet/spec.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/spec.jpg


--------------------------------------------------------------------------------
/9.DenseNet/spec.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/spec.jpg


--------------------------------------------------------------------------------
/9.DenseNet/model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/model.jpg


--------------------------------------------------------------------------------
/1.CNN-Introduce/CNN-Torch.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     code by Tae Hwan Jung @graykode
 3 | '''
 4 | import numpy as np
 5 | import torch.nn as nn
 6 | import matplotlib.pyplot as plt
 7 | from torchvision import datasets, transforms
 8 | 
 9 | def img_show(image):
10 |     image = image / 2 + 0.5
11 |     if image.shape[0] == 3:
12 |         plt.imshow(np.transpose(image.numpy(), (1, 2, 0)))
13 |     elif image.shape[0] == 1:
14 |         plt.imshow(image.squeeze(0))
15 |     plt.show(block=False)
16 | 
17 | transform = transforms.Compose([
18 |     transforms.Resize((227, 227)),
19 |     transforms.ToTensor(),
20 | ])
21 | dataset = datasets.ImageFolder(root='../data/', transform=transform)
22 | cat, dog = dataset[0][0], dataset[1][0]
23 | 
24 | # What is filter(=kernel) in CNN
25 | print('original image')
26 | img_show(cat)
27 | 
28 | print('in_channels=3, out_channels=6, kernel_size=4 Convolution')
29 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4)(cat.unsqueeze(0)).data
30 | for i in range(outputs.shape[1]):
31 |     print(i+1,'channel')
32 |     img_show(outputs[:,i,:,:])
33 | 
34 | print('in_channels=3, out_channels=6, kernel_size=40 Convolution')
35 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=40)(cat.unsqueeze(0)).data
36 | for i in range(outputs.shape[1]):
37 |     print(i+1,'channel')
38 |     img_show(outputs[:,i,:,:])
39 | 
40 | print('in_channels=3, out_channels=6, kernel_size=3 stride=4 Convolution')
41 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=4)(cat.unsqueeze(0)).data
42 | for i in range(outputs.shape[1]):
43 |     print(i+1,'channel')
44 |     img_show(outputs[:,i,:,:])


--------------------------------------------------------------------------------
/2.AlexNet/AlexNet-Torch.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     code by Tae Hwan Jung @graykode
 3 |     reference : https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | from modelsummary import summary
 8 | 
 9 | class AlexNet(nn.Module):
10 |     def __init__(self):
11 |         super(AlexNet, self).__init__()
12 |         self.features = nn.Sequential(
13 |             nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),
14 |             nn.ReLU(inplace=True),
15 |             nn.MaxPool2d(kernel_size=3, stride=2),
16 |             nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
17 |             nn.ReLU(inplace=True),
18 |             nn.MaxPool2d(kernel_size=3, stride=2),
19 |             nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
20 |             nn.ReLU(inplace=True),
21 |             nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
22 |             nn.ReLU(inplace=True),
23 |             nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
24 |             nn.ReLU(inplace=True),
25 |             nn.MaxPool2d(kernel_size=3, stride=2),
26 |         )
27 |         self.classifier = nn.Sequential(
28 |             nn.Dropout(),
29 |             nn.Linear(256 * 6 * 6, 4096),
30 |             nn.ReLU(inplace=True),
31 |             nn.Dropout(),
32 |             nn.Linear(4096, 4096),
33 |             nn.ReLU(inplace=True),
34 |             nn.Linear(4096, 2),
35 |         )
36 | 
37 |     def forward(self, x): # x : [1, 3, 227, 227]
38 |         x = self.features(x)
39 |         x = x.view(x.size(0), 256 * 6 * 6)
40 |         x = self.classifier(x)
41 |         return x
42 | 
43 | model = AlexNet()
44 | print(model)
45 | summary(model, torch.zeros((1, 3, 227, 227)))
46 | 


--------------------------------------------------------------------------------
/0.Imagehandler/Imagehandler-Torch.py:
--------------------------------------------------------------------------------
 1 | # code by Tae Hwan Jung(Jeff Jung) @graykode
 2 | import numpy as np
 3 | import torch
 4 | import matplotlib.pyplot as plt
 5 | from torchvision import datasets, transforms
 6 | from torch.autograd import Variable
 7 | 
 8 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data
 9 | # 0 : cat, 1 : dog
10 | 
11 | def img_show(image):
12 |     image = image / 2 + 0.5
13 |     plt.imshow(np.transpose(image.numpy(), (1, 2, 0)))
14 |     plt.show(block=False)
15 | 
16 | def pick_image(data, index):
17 |     image, target = data[index]
18 |     img_show(image)
19 | 
20 | original_image = datasets.ImageFolder(root='../data/', transform=transforms.ToTensor())
21 | print(original_image,'\n')
22 | pick_image(original_image, 1)
23 | 
24 | # make transformation (resizing image)
25 | resized_transform = transforms.Compose([
26 |     transforms.Resize((227, 227)),
27 |     transforms.ToTensor()
28 | ])
29 | resized_image = datasets.ImageFolder(root='../data/', transform=resized_transform)
30 | print('resized image to 227x227x3')
31 | pick_image(resized_image, 1)
32 | 
33 | # make transformation (crop image)
34 | cropped_transform = transforms.Compose([
35 |     transforms.CenterCrop((10, 10)),
36 |     transforms.ToTensor()
37 | ])
38 | cropped_image = datasets.ImageFolder(root='../data/', transform=cropped_transform)
39 | print('cropped image to 10x10x3')
40 | pick_image(cropped_image, 1)
41 | 
42 | # make transformation (normalized image)
43 | normalized_transform = transforms.Compose([
44 |     transforms.ToTensor(),
45 |     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
46 | ])
47 | 
48 | normalized_image = datasets.ImageFolder(root='../data/', transform=normalized_transform)
49 | print('normalized image to mean and std (0.5, 0.5, 0.5)')
50 | pick_image(normalized_image, 1)
51 | 
52 | 
53 | # How to use Data Loader (Input Pipeline)
54 | # same batch images should have same height, weight, channel
55 | batch_size = 2
56 | print('Data Loader')
57 | dataloader = torch.utils.data.DataLoader(dataset=resized_image, batch_size=batch_size, shuffle=True)
58 | 
59 | count = 0
60 | for batch_idx, (data, target) in enumerate(dataloader):
61 |     data, target = Variable(data), Variable(target)
62 |     count += batch_size
63 |     print('batch :', batch_idx + 1,'    ', count, '/', len(original_image),
64 |           'image:', data.shape, 'target : ', target)


--------------------------------------------------------------------------------
/0.Imagehandler/Imagehandler-Keras.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     code by Tae Hwan Jung(Jeff Jung) @graykode
 3 |     code reference : https://jkjung-avt.github.io/keras-image-cropping/
 4 |                     https://github.com/zizhaozhang/unet-tensorflow-keras/blob/master/loader.py
 5 | '''
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from keras.preprocessing.image import ImageDataGenerator
 9 | 
10 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data
11 | # 0 : cat, 1 : dog
12 | 
13 | def img_show(image):
14 |     image = image / 2 + 0.5
15 |     plt.imshow(image)
16 |     plt.show(block=False)
17 | 
18 | def random_crop(img, random_crop_size):
19 |     # Note: image_data_format is 'channel_last'
20 |     assert img.shape[2] == 3
21 |     height, width = img.shape[0], img.shape[1]
22 |     dy, dx = random_crop_size
23 |     x = np.random.randint(0, width - dx + 1)
24 |     y = np.random.randint(0, height - dy + 1)
25 |     return img[y:(y+dy), x:(x+dx), :]
26 | 
27 | def crop_generator(batches, crop_length):
28 |     """Take as input a Keras ImageGen (Iterator) and generate random
29 |     crops from the image batches generated by the original iterator.
30 |     """
31 |     while True:
32 |         batch_x, batch_y = next(batches)
33 |         batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
34 |         for i in range(batch_x.shape[0]):
35 |             batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
36 |         yield (batch_crops, batch_y)
37 | 
38 | def preprocess(img, mean, std):
39 |     out_img = img / img.max()  # scale to [0,1]
40 |     out_img = (out_img - np.array(mean).reshape(1, 1, 3)) / np.array(std).reshape(1, 1, 3)
41 |     return out_img
42 | 
43 | # make transformation (resizing image)
44 | resized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', target_size=(227, 227),
45 |                                         classes=['dog', 'cat'],  batch_size=2)
46 | data, target = next(resized_image)
47 | print('resized image to 227x227x3')
48 | img_show(data[1])
49 | 
50 | # make transformation (crop image)
51 | cropped_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data',
52 |                                         classes=['dog', 'cat'],  batch_size=2)
53 | cropped_image = crop_generator(cropped_image, 10)
54 | data, target = next(cropped_image)
55 | print('cropped image to 10x10x3')
56 | img_show(data[1])
57 | 
58 | # make transformation (normalized image)
59 | normalized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data',
60 |                                         classes=['dog', 'cat'],  batch_size=2)
61 | data, target = next(normalized_image)
62 | print('normalized image to mean and std (0.5, 0.5, 0.5)')
63 | nimage = preprocess(data[1], mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
64 | img_show(nimage)


--------------------------------------------------------------------------------
/9.DenseNet/DenseNet-Torch.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |     code by Tae Hwan Jung @graykode
  3 |     code reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py
  4 | '''
  5 | import math
  6 | import torch
  7 | import torch.nn as nn
  8 | from modelsummary import summary
  9 | 
 10 | class Bottleneck(nn.Module):
 11 |     def __init__(self, in_channel, growth_rate):
 12 |         super(Bottleneck, self).__init__()
 13 |         self.composite1 = nn.Sequential(
 14 |             nn.BatchNorm2d(in_channel),
 15 |             nn.ReLU(),
 16 |             nn.Conv2d(in_channel, 4 * growth_rate, kernel_size=1, bias=False)
 17 |         )
 18 |         self.composite2 = nn.Sequential(
 19 |             nn.BatchNorm2d(4 * growth_rate),
 20 |             nn.ReLU(),
 21 |             nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 22 |         )
 23 |     def forward(self, x): # in_channel > growth_rate
 24 |         out = self.composite1(x) # in_channel > 4*growth_rate
 25 |         out = self.composite2(out) # 4*growth_rate > growth_rate
 26 |         out = torch.cat([out, x], 1) # 1 dim is channel
 27 |         return out
 28 | 
 29 | class Transition(nn.Module):
 30 |     def __init__(self, in_channel, out_channel):
 31 |         super(Transition, self).__init__()
 32 |         self.composite = nn.Sequential(
 33 |             nn.BatchNorm2d(in_channel),
 34 |             nn.ReLU(),
 35 |             nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False),
 36 |             nn.AvgPool2d(kernel_size=2)
 37 |         )
 38 |     def forward(self, x):
 39 |         return self.composite(x)
 40 | 
 41 | class DenseNet(nn.Module):
 42 |     def __init__(self, type_block, num_blocks, growth_rate=12, reduction=0.5, num_classes=2):
 43 |         super(DenseNet, self).__init__()
 44 |         self.growth_rate = growth_rate
 45 | 
 46 |         in_channel = 2 * growth_rate # The initial convolution layer comprises 2k convolutions of size 7×7 with stride 2
 47 |         self.conv1 =  nn.Conv2d(3, in_channel, kernel_size=7, stride=2, padding=1, bias=False) # 112 = (227-7+2*1)/2+1
 48 |         self.max_pooling = nn.MaxPool2d(kernel_size=3, stride=2)
 49 | 
 50 |         self.dense1 = self.make_dense_layers(type_block, in_channel, num_blocks[0])
 51 |         in_channel += num_blocks[0]*growth_rate
 52 |         out_channel = int(math.floor(in_channel * reduction))
 53 |         self.trans1 = Transition(in_channel, out_channel) # Transite channel in_channel > out_channel
 54 |         in_channel = out_channel
 55 | 
 56 |         self.dense2 = self.make_dense_layers(type_block, in_channel, num_blocks[1])
 57 |         in_channel += num_blocks[1] * growth_rate
 58 |         out_channel = int(math.floor(in_channel * reduction))
 59 |         self.trans2 = Transition(in_channel, out_channel)
 60 |         in_channel = out_channel
 61 | 
 62 |         self.dense3 = self.make_dense_layers(type_block, in_channel, num_blocks[2])
 63 |         in_channel += num_blocks[2] * growth_rate
 64 |         out_planes = int(math.floor(in_channel * reduction))
 65 |         self.trans3 = Transition(in_channel, out_planes)
 66 |         in_channel = out_planes
 67 | 
 68 |         self.dense4 = self.make_dense_layers(type_block, in_channel, num_blocks[3])
 69 |         in_channel += num_blocks[3] * growth_rate
 70 | 
 71 |         self.classifier = nn.Sequential(
 72 |             nn.BatchNorm2d(in_channel),
 73 |             nn.ReLU(),
 74 |             nn.AvgPool2d(kernel_size=7)
 75 |         )
 76 |         self.linear = nn.Linear(in_channel, num_classes)
 77 | 
 78 |     def make_dense_layers(self, type_block, in_channel, num_block):
 79 |         layers = []
 80 |         for i in range(num_block):
 81 |             layers.append(type_block(in_channel, self.growth_rate))
 82 |             in_channel += self.growth_rate
 83 |         return nn.Sequential(*layers)
 84 | 
 85 |     def forward(self, x):
 86 |         out = self.max_pooling(self.conv1(x))
 87 |         out = self.trans1(self.dense1(out))
 88 |         out = self.trans2(self.dense2(out))
 89 |         out = self.trans3(self.dense3(out))
 90 |         out = self.dense4(out)
 91 |         out = self.classifier(out)
 92 |         out = out.view(out.size(0), -1)
 93 |         out = self.linear(out)
 94 |         return out
 95 | 
 96 | def DenseNet121():
 97 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
 98 | 
 99 | def DenseNet169():
100 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
101 | 
102 | def DenseNet201():
103 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
104 | 
105 | def DenseNet161():
106 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
107 | 
108 | # Please Select model by index
109 | model = [DenseNet121(), DenseNet161(), DenseNet169(), DenseNet201()][3]
110 | print(model)
111 | summary(model, torch.zeros((1, 3, 227, 227)))


--------------------------------------------------------------------------------
/7.ResNet/ResNet-Torch.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |     code by Tae Hwan Jung @graykode
  3 |     reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | from modelsummary import summary
  8 | 
  9 | class BasicBlock(nn.Module):
 10 |     expansion = 1
 11 |     def __init__(self, in_channel, out_channel, stride=1):
 12 |         super(BasicBlock, self).__init__()
 13 |         self.features = nn.Sequential(
 14 |             nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False),
 15 |             nn.BatchNorm2d(out_channel),
 16 |             nn.ReLU(),
 17 |             nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
 18 |             nn.BatchNorm2d(out_channel)
 19 |         )
 20 |         # make shortcut
 21 |         self.residual = nn.Sequential(nn.ReLU())
 22 |         if stride != 1 or in_channel != self.expansion * out_channel:
 23 |             # ResNet34 fig3 in paper, case of dot-line
 24 |             self.residual = nn.Sequential(
 25 |                 # output channel is expansion * current channel
 26 |                 nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False),
 27 |                 nn.BatchNorm2d(self.expansion * out_channel),
 28 |                 nn.ReLU()
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = self.features(x)
 33 |         out += self.residual(x) # intput is not out!
 34 |         return out
 35 | 
 36 | class Bottleneck(nn.Module):
 37 |     expansion = 4
 38 |     def __init__(self, in_channel, out_channel, stride=1):
 39 |         super(Bottleneck, self).__init__()
 40 |         self.features = nn.Sequential(
 41 |             nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False),
 42 |             nn.BatchNorm2d(out_channel),
 43 |             nn.ReLU(),
 44 |             nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
 45 |             nn.BatchNorm2d(out_channel),
 46 |             nn.ReLU(),
 47 |             nn.Conv2d(out_channel, self.expansion * out_channel, kernel_size=1, bias=False),
 48 |             nn.BatchNorm2d(self.expansion * out_channel)
 49 |         )
 50 |         self.residual = nn.Sequential(nn.ReLU())
 51 |         if stride != 1 or in_channel != self.expansion * out_channel:
 52 |             # ResNet34 fig3 in paper, case of dot-line
 53 |             self.residual = nn.Sequential(
 54 |                 # output channel is expansion * current channel
 55 |                 nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False),
 56 |                 nn.BatchNorm2d(self.expansion * out_channel),
 57 |                 nn.ReLU()
 58 |             )
 59 | 
 60 |     def forward(self, x):
 61 |         out = self.features(x)
 62 |         out += self.residual(x)  # intput is not out!
 63 |         return out
 64 | 
 65 | # out_channel : (width(=height) - filter_size + 2*padding)/stride + 1
 66 | class ResNet(nn.Module):
 67 |     def __init__(self, type_block , num_blocks, num_classes=2):
 68 |         super(ResNet, self).__init__()
 69 |         self.in_channel = 64
 70 |         self.features = nn.Sequential(
 71 |             nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
 72 |             nn.BatchNorm2d(64),
 73 |             nn.ReLU(),
 74 |             nn.AvgPool2d(kernel_size=3, stride=2),
 75 |             self.make_layer(type_block, 64,  num_blocks[0], stride=1),
 76 |             self.make_layer(type_block, 128, num_blocks[1], stride=2),
 77 |             self.make_layer(type_block, 256, num_blocks[2], stride=2),
 78 |             self.make_layer(type_block, 512, num_blocks[3], stride=2),
 79 |             nn.AvgPool2d(kernel_size=7)
 80 |         )
 81 |         self.linear = nn.Linear(512 * type_block.expansion, num_classes)
 82 | 
 83 |     def make_layer(self, type_block, in_channel, num_blocks, stride):
 84 |         strides = [stride] + [1] * (num_blocks - 1) # only stride of first layer is not one.
 85 |         layers = []
 86 |         for stride in strides:
 87 |             layers.append(type_block(self.in_channel, in_channel, stride))
 88 |             self.in_channel = in_channel * type_block.expansion
 89 |         return nn.Sequential(*layers)
 90 | 
 91 |     def forward(self, x): # x : [batch_size, 3, 227, 227]
 92 |         out = self.features(x)
 93 |         out = out.view(out.size(0), -1)
 94 |         out = self.linear(out)
 95 |         return out
 96 | 
 97 | def ResNet18():
 98 |     return ResNet(BasicBlock, [2,2,2,2])
 99 | 
100 | def ResNet34():
101 |     return ResNet(BasicBlock, [3,4,6,3])
102 | 
103 | def ResNet50():
104 |     return ResNet(Bottleneck, [3,4,6,3])
105 | 
106 | def ResNet101():
107 |     return ResNet(Bottleneck, [3,4,23,3])
108 | 
109 | def ResNet152():
110 |     return ResNet(Bottleneck, [3,8,36,3])
111 | 
112 | # Please Select model by index
113 | model = [ResNet18(), ResNet34(), ResNet50(), ResNet101(), ResNet152()][3]
114 | print(model)
115 | summary(model, torch.zeros((1, 3, 227, 227)))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## vision-tutorial
  2 | 
  3 | <p align="center"><img width="100" src="https://media-thumbs.golden.com/OLqzmrmwAzY1P7Sl29k2T9WjJdM=/200x200/smart/golden-storage-production.s3.amazonaws.com/topic_images/e08914afa10a4179893eeb07cb5e4713.png" /><img width="100" src="https://keras.io/img/keras-logo-small-wb.png" /></p>
  4 | 
  5 | `vision-tutorial` is a tutorial for who is studying `Computer Vision Basic Architectures` using **Pytorch** and **Keras**. Most of the models about Vision were implemented with less than **100 lines** of code(except comments or blank lines). The list of these papers is a list that Professor [Sung Kim](https://github.com/hunkim) recommended.
  6 | 
  7 | - Data was used as overfitting to show simple model learning. [One image about Cat or Dog](https://github.com/graykode/vision-tutorial/tree/master/data)
  8 | 
  9 | - The accuracy of the model is not important in this project because it is affected by data. I recommend that you **focus on the structure of the model, the number of parameters, the learning process and paper detailed implementation. **
 10 | 
 11 |   
 12 | 
 13 | ## SOTA Basic Vision Models - Introduction
 14 | 
 15 | - How to handle image in Pytorch and Keras
 16 | 
 17 |   - Image Resizing, Cropping
 18 | 
 19 | - Introduction CNN(Convolutional Neural Networks) in Pytorch and Keras
 20 | 
 21 |   - How does number of channels, filter size (=kernel), grid, and padding affect Convolution?
 22 | 
 23 |   - Paper : [Object Recognition with Gradient-Based Learning](http://yann.lecun.com/exdb/publis/pdf/lecun-99.pdf)
 24 | 
 25 | - AlexNet(2012.09)
 26 | 
 27 |   - Paper : [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
 28 | 
 29 |   - Model
 30 | 
 31 |     ![](2.AlexNet/model.jpg)
 32 | 
 33 | - ZFNet(2013.11)
 34 | 
 35 |   - Paper : [Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901)
 36 | 
 37 | - VGG16(2014.09)
 38 | 
 39 |   - Paper : [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
 40 | 
 41 | - Inception.v1(a.k.a GoogLeNet)(2014.09)
 42 | 
 43 |   - Paper : [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
 44 | 
 45 | - Inception.v2, v3(2015.12)
 46 | 
 47 |   - Paper : [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567)
 48 | 
 49 | - ResNet(2015.12)
 50 | 
 51 |   - Paper : [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
 52 |   - Model
 53 |     ![](7.ResNet/model.jpeg)
 54 | 
 55 | - Inception.v4(2016.02)
 56 | 
 57 |   - Paper : [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261)
 58 | 
 59 | - DenseNet(2016.08)
 60 | 
 61 |   - Paper : [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
 62 |   - Model
 63 |     ![](9.DenseNet/model.jpg)
 64 | 
 65 | - Xception(2016.10)
 66 | 
 67 |   - Paper : [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
 68 | 
 69 | - MobileNet(2017.04)
 70 | 
 71 |   - Paper : [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861)
 72 | 
 73 | - SENet(2017.09)
 74 | 
 75 |   - Paper : [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507)
 76 | 
 77 | 
 78 | 
 79 | ## To be Continue Implementation in Other Repository
 80 | 
 81 | #### v Semantic Segmentation
 82 | 
 83 | - FCN(2014.11) : [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
 84 | - U-Net(2015.05) : [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597)](https://arxiv.org/abs/1606.00915)
 85 | - SegNet(2015.11) : [SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation](https://arxiv.org/abs/1511.00561)
 86 | - DeepLab(2016.06) : [DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915)
 87 | - ENet(2016.07) : [ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation](https://arxiv.org/abs/1606.02147)
 88 | - PSPNet(2016.12) : [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105)
 89 | - ICNet(2017.04) : [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545)
 90 | 
 91 | 
 92 | 
 93 | #### v Generative adversarial networks
 94 | 
 95 | - GAN(2014.06) : [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661)
 96 | - DCGAN(2015.11) : [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/abs/1511.06434)
 97 | - Pix2Pix(2016.11) : [Image-to-Image Translation with Conditional Adversarial Networks](https://arxiv.org/abs/1611.07004)
 98 | - WGAN(2017.01) : [Wasserstein GAN](https://arxiv.org/abs/1701.07875)
 99 | - CycleGAN(2017.05) : [Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks](https://arxiv.org/abs/1703.10593)
100 | 
101 | 
102 | 
103 | #### v Object Detection
104 | 
105 | - RCNN(2013.11) : [Rich feature hierarchies for accurate object detection and semantic segmentation](https://arxiv.org/abs/1311.2524)
106 | - Fast-RCNN(2015.04) : [Fast R-CNN](https://arxiv.org/abs/1504.08083)
107 | - Faster-RCNN(2015.06) : [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
108 | - YOLO(2015.06) : [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640)
109 | - SSD(2015.12) : [SSD: Single Shot MultiBox Detector](https://arxiv.org/abs/1512.02325)
110 | - YOLO9000(2016.12) : [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242)
111 | - Mask R-CNN(2017.05) : [Mask R-CNN](https://arxiv.org/abs/1703.06870)
112 | - RetinaNet(2017.08):  [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
113 | 
114 | 
115 | 
116 | ## Author
117 | 
118 | - Tae Hwan Jung(Jeff Jung) @graykode
119 | - Author Email : [nlkey2022@gmail.com](mailto:nlkey2022@gmail.com)


--------------------------------------------------------------------------------