├── .gitignore ├── data ├── cat │ └── cat.jpg └── dog │ └── dog.jpg ├── 2.AlexNet ├── model.jpg └── AlexNet-Torch.py ├── 7.ResNet ├── model.jpeg ├── spec.jpg └── ResNet-Torch.py ├── 9.DenseNet ├── spec.jpg ├── model.jpg └── DenseNet-Torch.py ├── 1.CNN-Introduce └── CNN-Torch.py ├── 0.Imagehandler ├── Imagehandler-Torch.py └── Imagehandler-Keras.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea -------------------------------------------------------------------------------- /data/cat/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/cat/cat.jpg -------------------------------------------------------------------------------- /data/dog/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/data/dog/dog.jpg -------------------------------------------------------------------------------- /2.AlexNet/model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/2.AlexNet/model.jpg -------------------------------------------------------------------------------- /7.ResNet/model.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/model.jpeg -------------------------------------------------------------------------------- /7.ResNet/spec.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/7.ResNet/spec.jpg -------------------------------------------------------------------------------- /9.DenseNet/spec.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/spec.jpg -------------------------------------------------------------------------------- /9.DenseNet/model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graykode/vision-tutorial/HEAD/9.DenseNet/model.jpg -------------------------------------------------------------------------------- /1.CNN-Introduce/CNN-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | ''' 4 | import numpy as np 5 | import torch.nn as nn 6 | import matplotlib.pyplot as plt 7 | from torchvision import datasets, transforms 8 | 9 | def img_show(image): 10 | image = image / 2 + 0.5 11 | if image.shape[0] == 3: 12 | plt.imshow(np.transpose(image.numpy(), (1, 2, 0))) 13 | elif image.shape[0] == 1: 14 | plt.imshow(image.squeeze(0)) 15 | plt.show(block=False) 16 | 17 | transform = transforms.Compose([ 18 | transforms.Resize((227, 227)), 19 | transforms.ToTensor(), 20 | ]) 21 | dataset = datasets.ImageFolder(root='../data/', transform=transform) 22 | cat, dog = dataset[0][0], dataset[1][0] 23 | 24 | # What is filter(=kernel) in CNN 25 | print('original image') 26 | img_show(cat) 27 | 28 | print('in_channels=3, out_channels=6, kernel_size=4 Convolution') 29 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4)(cat.unsqueeze(0)).data 30 | for i in range(outputs.shape[1]): 31 | print(i+1,'channel') 32 | img_show(outputs[:,i,:,:]) 33 | 34 | print('in_channels=3, out_channels=6, kernel_size=40 Convolution') 35 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=40)(cat.unsqueeze(0)).data 36 | for i in range(outputs.shape[1]): 37 | print(i+1,'channel') 38 | img_show(outputs[:,i,:,:]) 39 | 40 | print('in_channels=3, out_channels=6, kernel_size=3 stride=4 Convolution') 41 | outputs = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=4)(cat.unsqueeze(0)).data 42 | for i in range(outputs.shape[1]): 43 | print(i+1,'channel') 44 | img_show(outputs[:,i,:,:]) -------------------------------------------------------------------------------- /2.AlexNet/AlexNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | reference : https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | from modelsummary import summary 8 | 9 | class AlexNet(nn.Module): 10 | def __init__(self): 11 | super(AlexNet, self).__init__() 12 | self.features = nn.Sequential( 13 | nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=3, stride=2), 16 | nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=3, stride=2), 19 | nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), 24 | nn.ReLU(inplace=True), 25 | nn.MaxPool2d(kernel_size=3, stride=2), 26 | ) 27 | self.classifier = nn.Sequential( 28 | nn.Dropout(), 29 | nn.Linear(256 * 6 * 6, 4096), 30 | nn.ReLU(inplace=True), 31 | nn.Dropout(), 32 | nn.Linear(4096, 4096), 33 | nn.ReLU(inplace=True), 34 | nn.Linear(4096, 2), 35 | ) 36 | 37 | def forward(self, x): # x : [1, 3, 227, 227] 38 | x = self.features(x) 39 | x = x.view(x.size(0), 256 * 6 * 6) 40 | x = self.classifier(x) 41 | return x 42 | 43 | model = AlexNet() 44 | print(model) 45 | summary(model, torch.zeros((1, 3, 227, 227))) 46 | -------------------------------------------------------------------------------- /0.Imagehandler/Imagehandler-Torch.py: -------------------------------------------------------------------------------- 1 | # code by Tae Hwan Jung(Jeff Jung) @graykode 2 | import numpy as np 3 | import torch 4 | import matplotlib.pyplot as plt 5 | from torchvision import datasets, transforms 6 | from torch.autograd import Variable 7 | 8 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data 9 | # 0 : cat, 1 : dog 10 | 11 | def img_show(image): 12 | image = image / 2 + 0.5 13 | plt.imshow(np.transpose(image.numpy(), (1, 2, 0))) 14 | plt.show(block=False) 15 | 16 | def pick_image(data, index): 17 | image, target = data[index] 18 | img_show(image) 19 | 20 | original_image = datasets.ImageFolder(root='../data/', transform=transforms.ToTensor()) 21 | print(original_image,'\n') 22 | pick_image(original_image, 1) 23 | 24 | # make transformation (resizing image) 25 | resized_transform = transforms.Compose([ 26 | transforms.Resize((227, 227)), 27 | transforms.ToTensor() 28 | ]) 29 | resized_image = datasets.ImageFolder(root='../data/', transform=resized_transform) 30 | print('resized image to 227x227x3') 31 | pick_image(resized_image, 1) 32 | 33 | # make transformation (crop image) 34 | cropped_transform = transforms.Compose([ 35 | transforms.CenterCrop((10, 10)), 36 | transforms.ToTensor() 37 | ]) 38 | cropped_image = datasets.ImageFolder(root='../data/', transform=cropped_transform) 39 | print('cropped image to 10x10x3') 40 | pick_image(cropped_image, 1) 41 | 42 | # make transformation (normalized image) 43 | normalized_transform = transforms.Compose([ 44 | transforms.ToTensor(), 45 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 46 | ]) 47 | 48 | normalized_image = datasets.ImageFolder(root='../data/', transform=normalized_transform) 49 | print('normalized image to mean and std (0.5, 0.5, 0.5)') 50 | pick_image(normalized_image, 1) 51 | 52 | 53 | # How to use Data Loader (Input Pipeline) 54 | # same batch images should have same height, weight, channel 55 | batch_size = 2 56 | print('Data Loader') 57 | dataloader = torch.utils.data.DataLoader(dataset=resized_image, batch_size=batch_size, shuffle=True) 58 | 59 | count = 0 60 | for batch_idx, (data, target) in enumerate(dataloader): 61 | data, target = Variable(data), Variable(target) 62 | count += batch_size 63 | print('batch :', batch_idx + 1,' ', count, '/', len(original_image), 64 | 'image:', data.shape, 'target : ', target) -------------------------------------------------------------------------------- /0.Imagehandler/Imagehandler-Keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung(Jeff Jung) @graykode 3 | code reference : https://jkjung-avt.github.io/keras-image-cropping/ 4 | https://github.com/zizhaozhang/unet-tensorflow-keras/blob/master/loader.py 5 | ''' 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from keras.preprocessing.image import ImageDataGenerator 9 | 10 | # image from https://github.com/ardamavi/Dog-Cat-Classifier/tree/master/Data/Train_Data 11 | # 0 : cat, 1 : dog 12 | 13 | def img_show(image): 14 | image = image / 2 + 0.5 15 | plt.imshow(image) 16 | plt.show(block=False) 17 | 18 | def random_crop(img, random_crop_size): 19 | # Note: image_data_format is 'channel_last' 20 | assert img.shape[2] == 3 21 | height, width = img.shape[0], img.shape[1] 22 | dy, dx = random_crop_size 23 | x = np.random.randint(0, width - dx + 1) 24 | y = np.random.randint(0, height - dy + 1) 25 | return img[y:(y+dy), x:(x+dx), :] 26 | 27 | def crop_generator(batches, crop_length): 28 | """Take as input a Keras ImageGen (Iterator) and generate random 29 | crops from the image batches generated by the original iterator. 30 | """ 31 | while True: 32 | batch_x, batch_y = next(batches) 33 | batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3)) 34 | for i in range(batch_x.shape[0]): 35 | batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length)) 36 | yield (batch_crops, batch_y) 37 | 38 | def preprocess(img, mean, std): 39 | out_img = img / img.max() # scale to [0,1] 40 | out_img = (out_img - np.array(mean).reshape(1, 1, 3)) / np.array(std).reshape(1, 1, 3) 41 | return out_img 42 | 43 | # make transformation (resizing image) 44 | resized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', target_size=(227, 227), 45 | classes=['dog', 'cat'], batch_size=2) 46 | data, target = next(resized_image) 47 | print('resized image to 227x227x3') 48 | img_show(data[1]) 49 | 50 | # make transformation (crop image) 51 | cropped_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', 52 | classes=['dog', 'cat'], batch_size=2) 53 | cropped_image = crop_generator(cropped_image, 10) 54 | data, target = next(cropped_image) 55 | print('cropped image to 10x10x3') 56 | img_show(data[1]) 57 | 58 | # make transformation (normalized image) 59 | normalized_image = ImageDataGenerator(rescale=1./255).flow_from_directory('../data', 60 | classes=['dog', 'cat'], batch_size=2) 61 | data, target = next(normalized_image) 62 | print('normalized image to mean and std (0.5, 0.5, 0.5)') 63 | nimage = preprocess(data[1], mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 64 | img_show(nimage) -------------------------------------------------------------------------------- /9.DenseNet/DenseNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | code reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py 4 | ''' 5 | import math 6 | import torch 7 | import torch.nn as nn 8 | from modelsummary import summary 9 | 10 | class Bottleneck(nn.Module): 11 | def __init__(self, in_channel, growth_rate): 12 | super(Bottleneck, self).__init__() 13 | self.composite1 = nn.Sequential( 14 | nn.BatchNorm2d(in_channel), 15 | nn.ReLU(), 16 | nn.Conv2d(in_channel, 4 * growth_rate, kernel_size=1, bias=False) 17 | ) 18 | self.composite2 = nn.Sequential( 19 | nn.BatchNorm2d(4 * growth_rate), 20 | nn.ReLU(), 21 | nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 22 | ) 23 | def forward(self, x): # in_channel > growth_rate 24 | out = self.composite1(x) # in_channel > 4*growth_rate 25 | out = self.composite2(out) # 4*growth_rate > growth_rate 26 | out = torch.cat([out, x], 1) # 1 dim is channel 27 | return out 28 | 29 | class Transition(nn.Module): 30 | def __init__(self, in_channel, out_channel): 31 | super(Transition, self).__init__() 32 | self.composite = nn.Sequential( 33 | nn.BatchNorm2d(in_channel), 34 | nn.ReLU(), 35 | nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False), 36 | nn.AvgPool2d(kernel_size=2) 37 | ) 38 | def forward(self, x): 39 | return self.composite(x) 40 | 41 | class DenseNet(nn.Module): 42 | def __init__(self, type_block, num_blocks, growth_rate=12, reduction=0.5, num_classes=2): 43 | super(DenseNet, self).__init__() 44 | self.growth_rate = growth_rate 45 | 46 | in_channel = 2 * growth_rate # The initial convolution layer comprises 2k convolutions of size 7×7 with stride 2 47 | self.conv1 = nn.Conv2d(3, in_channel, kernel_size=7, stride=2, padding=1, bias=False) # 112 = (227-7+2*1)/2+1 48 | self.max_pooling = nn.MaxPool2d(kernel_size=3, stride=2) 49 | 50 | self.dense1 = self.make_dense_layers(type_block, in_channel, num_blocks[0]) 51 | in_channel += num_blocks[0]*growth_rate 52 | out_channel = int(math.floor(in_channel * reduction)) 53 | self.trans1 = Transition(in_channel, out_channel) # Transite channel in_channel > out_channel 54 | in_channel = out_channel 55 | 56 | self.dense2 = self.make_dense_layers(type_block, in_channel, num_blocks[1]) 57 | in_channel += num_blocks[1] * growth_rate 58 | out_channel = int(math.floor(in_channel * reduction)) 59 | self.trans2 = Transition(in_channel, out_channel) 60 | in_channel = out_channel 61 | 62 | self.dense3 = self.make_dense_layers(type_block, in_channel, num_blocks[2]) 63 | in_channel += num_blocks[2] * growth_rate 64 | out_planes = int(math.floor(in_channel * reduction)) 65 | self.trans3 = Transition(in_channel, out_planes) 66 | in_channel = out_planes 67 | 68 | self.dense4 = self.make_dense_layers(type_block, in_channel, num_blocks[3]) 69 | in_channel += num_blocks[3] * growth_rate 70 | 71 | self.classifier = nn.Sequential( 72 | nn.BatchNorm2d(in_channel), 73 | nn.ReLU(), 74 | nn.AvgPool2d(kernel_size=7) 75 | ) 76 | self.linear = nn.Linear(in_channel, num_classes) 77 | 78 | def make_dense_layers(self, type_block, in_channel, num_block): 79 | layers = [] 80 | for i in range(num_block): 81 | layers.append(type_block(in_channel, self.growth_rate)) 82 | in_channel += self.growth_rate 83 | return nn.Sequential(*layers) 84 | 85 | def forward(self, x): 86 | out = self.max_pooling(self.conv1(x)) 87 | out = self.trans1(self.dense1(out)) 88 | out = self.trans2(self.dense2(out)) 89 | out = self.trans3(self.dense3(out)) 90 | out = self.dense4(out) 91 | out = self.classifier(out) 92 | out = out.view(out.size(0), -1) 93 | out = self.linear(out) 94 | return out 95 | 96 | def DenseNet121(): 97 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) 98 | 99 | def DenseNet169(): 100 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 101 | 102 | def DenseNet201(): 103 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 104 | 105 | def DenseNet161(): 106 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 107 | 108 | # Please Select model by index 109 | model = [DenseNet121(), DenseNet161(), DenseNet169(), DenseNet201()][3] 110 | print(model) 111 | summary(model, torch.zeros((1, 3, 227, 227))) -------------------------------------------------------------------------------- /7.ResNet/ResNet-Torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | code by Tae Hwan Jung @graykode 3 | reference : https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | from modelsummary import summary 8 | 9 | class BasicBlock(nn.Module): 10 | expansion = 1 11 | def __init__(self, in_channel, out_channel, stride=1): 12 | super(BasicBlock, self).__init__() 13 | self.features = nn.Sequential( 14 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False), 15 | nn.BatchNorm2d(out_channel), 16 | nn.ReLU(), 17 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 18 | nn.BatchNorm2d(out_channel) 19 | ) 20 | # make shortcut 21 | self.residual = nn.Sequential(nn.ReLU()) 22 | if stride != 1 or in_channel != self.expansion * out_channel: 23 | # ResNet34 fig3 in paper, case of dot-line 24 | self.residual = nn.Sequential( 25 | # output channel is expansion * current channel 26 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion * out_channel), 28 | nn.ReLU() 29 | ) 30 | 31 | def forward(self, x): 32 | out = self.features(x) 33 | out += self.residual(x) # intput is not out! 34 | return out 35 | 36 | class Bottleneck(nn.Module): 37 | expansion = 4 38 | def __init__(self, in_channel, out_channel, stride=1): 39 | super(Bottleneck, self).__init__() 40 | self.features = nn.Sequential( 41 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False), 42 | nn.BatchNorm2d(out_channel), 43 | nn.ReLU(), 44 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 45 | nn.BatchNorm2d(out_channel), 46 | nn.ReLU(), 47 | nn.Conv2d(out_channel, self.expansion * out_channel, kernel_size=1, bias=False), 48 | nn.BatchNorm2d(self.expansion * out_channel) 49 | ) 50 | self.residual = nn.Sequential(nn.ReLU()) 51 | if stride != 1 or in_channel != self.expansion * out_channel: 52 | # ResNet34 fig3 in paper, case of dot-line 53 | self.residual = nn.Sequential( 54 | # output channel is expansion * current channel 55 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 56 | nn.BatchNorm2d(self.expansion * out_channel), 57 | nn.ReLU() 58 | ) 59 | 60 | def forward(self, x): 61 | out = self.features(x) 62 | out += self.residual(x) # intput is not out! 63 | return out 64 | 65 | # out_channel : (width(=height) - filter_size + 2*padding)/stride + 1 66 | class ResNet(nn.Module): 67 | def __init__(self, type_block , num_blocks, num_classes=2): 68 | super(ResNet, self).__init__() 69 | self.in_channel = 64 70 | self.features = nn.Sequential( 71 | nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), 72 | nn.BatchNorm2d(64), 73 | nn.ReLU(), 74 | nn.AvgPool2d(kernel_size=3, stride=2), 75 | self.make_layer(type_block, 64, num_blocks[0], stride=1), 76 | self.make_layer(type_block, 128, num_blocks[1], stride=2), 77 | self.make_layer(type_block, 256, num_blocks[2], stride=2), 78 | self.make_layer(type_block, 512, num_blocks[3], stride=2), 79 | nn.AvgPool2d(kernel_size=7) 80 | ) 81 | self.linear = nn.Linear(512 * type_block.expansion, num_classes) 82 | 83 | def make_layer(self, type_block, in_channel, num_blocks, stride): 84 | strides = [stride] + [1] * (num_blocks - 1) # only stride of first layer is not one. 85 | layers = [] 86 | for stride in strides: 87 | layers.append(type_block(self.in_channel, in_channel, stride)) 88 | self.in_channel = in_channel * type_block.expansion 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): # x : [batch_size, 3, 227, 227] 92 | out = self.features(x) 93 | out = out.view(out.size(0), -1) 94 | out = self.linear(out) 95 | return out 96 | 97 | def ResNet18(): 98 | return ResNet(BasicBlock, [2,2,2,2]) 99 | 100 | def ResNet34(): 101 | return ResNet(BasicBlock, [3,4,6,3]) 102 | 103 | def ResNet50(): 104 | return ResNet(Bottleneck, [3,4,6,3]) 105 | 106 | def ResNet101(): 107 | return ResNet(Bottleneck, [3,4,23,3]) 108 | 109 | def ResNet152(): 110 | return ResNet(Bottleneck, [3,8,36,3]) 111 | 112 | # Please Select model by index 113 | model = [ResNet18(), ResNet34(), ResNet50(), ResNet101(), ResNet152()][3] 114 | print(model) 115 | summary(model, torch.zeros((1, 3, 227, 227))) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## vision-tutorial 2 | 3 |
![]()
